CoroSplit.cpp revision 363496
1//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass builds the coroutine frame and outlines resume and destroy parts
9// of the coroutine into separate functions.
10//
11// We present a coroutine to an LLVM as an ordinary function with suspension
12// points marked up with intrinsics. We let the optimizer party on the coroutine
13// as a single function for as long as possible. Shortly before the coroutine is
14// eligible to be inlined into its callers, we split up the coroutine into parts
15// corresponding to an initial, resume and destroy invocations of the coroutine,
16// add them to the current SCC and restart the IPO pipeline to optimize the
17// coroutine subfunctions we extracted before proceeding to the caller of the
18// coroutine.
19//===----------------------------------------------------------------------===//
20
21#include "CoroInstr.h"
22#include "CoroInternal.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/SmallPtrSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/ADT/Twine.h"
28#include "llvm/Analysis/CallGraph.h"
29#include "llvm/Analysis/CallGraphSCCPass.h"
30#include "llvm/IR/Argument.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/CFG.h"
34#include "llvm/IR/CallSite.h"
35#include "llvm/IR/CallingConv.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GlobalValue.h"
41#include "llvm/IR/GlobalVariable.h"
42#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/InstIterator.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/IntrinsicInst.h"
48#include "llvm/IR/LLVMContext.h"
49#include "llvm/IR/LegacyPassManager.h"
50#include "llvm/IR/Module.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/Value.h"
53#include "llvm/IR/Verifier.h"
54#include "llvm/InitializePasses.h"
55#include "llvm/Pass.h"
56#include "llvm/Support/Casting.h"
57#include "llvm/Support/Debug.h"
58#include "llvm/Support/PrettyStackTrace.h"
59#include "llvm/Support/raw_ostream.h"
60#include "llvm/Transforms/Scalar.h"
61#include "llvm/Transforms/Utils/BasicBlockUtils.h"
62#include "llvm/Transforms/Utils/Cloning.h"
63#include "llvm/Transforms/Utils/Local.h"
64#include "llvm/Transforms/Utils/ValueMapper.h"
65#include <cassert>
66#include <cstddef>
67#include <cstdint>
68#include <initializer_list>
69#include <iterator>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "coro-split"
74
75namespace {
76
77/// A little helper class for building
78class CoroCloner {
79public:
80  enum class Kind {
81    /// The shared resume function for a switch lowering.
82    SwitchResume,
83
84    /// The shared unwind function for a switch lowering.
85    SwitchUnwind,
86
87    /// The shared cleanup function for a switch lowering.
88    SwitchCleanup,
89
90    /// An individual continuation function.
91    Continuation,
92  };
93private:
94  Function &OrigF;
95  Function *NewF;
96  const Twine &Suffix;
97  coro::Shape &Shape;
98  Kind FKind;
99  ValueToValueMapTy VMap;
100  IRBuilder<> Builder;
101  Value *NewFramePtr = nullptr;
102  Value *SwiftErrorSlot = nullptr;
103
104  /// The active suspend instruction; meaningful only for continuation ABIs.
105  AnyCoroSuspendInst *ActiveSuspend = nullptr;
106
107public:
108  /// Create a cloner for a switch lowering.
109  CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
110             Kind FKind)
111    : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
112      FKind(FKind), Builder(OrigF.getContext()) {
113    assert(Shape.ABI == coro::ABI::Switch);
114  }
115
116  /// Create a cloner for a continuation lowering.
117  CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
118             Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
119    : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
120      FKind(Kind::Continuation), Builder(OrigF.getContext()),
121      ActiveSuspend(ActiveSuspend) {
122    assert(Shape.ABI == coro::ABI::Retcon ||
123           Shape.ABI == coro::ABI::RetconOnce);
124    assert(NewF && "need existing function for continuation");
125    assert(ActiveSuspend && "need active suspend point for continuation");
126  }
127
128  Function *getFunction() const {
129    assert(NewF != nullptr && "declaration not yet set");
130    return NewF;
131  }
132
133  void create();
134
135private:
136  bool isSwitchDestroyFunction() {
137    switch (FKind) {
138    case Kind::Continuation:
139    case Kind::SwitchResume:
140      return false;
141    case Kind::SwitchUnwind:
142    case Kind::SwitchCleanup:
143      return true;
144    }
145    llvm_unreachable("Unknown CoroCloner::Kind enum");
146  }
147
148  void createDeclaration();
149  void replaceEntryBlock();
150  Value *deriveNewFramePointer();
151  void replaceRetconSuspendUses();
152  void replaceCoroSuspends();
153  void replaceCoroEnds();
154  void replaceSwiftErrorOps();
155  void handleFinalSuspend();
156  void maybeFreeContinuationStorage();
157};
158
159} // end anonymous namespace
160
161static void maybeFreeRetconStorage(IRBuilder<> &Builder,
162                                   const coro::Shape &Shape, Value *FramePtr,
163                                   CallGraph *CG) {
164  assert(Shape.ABI == coro::ABI::Retcon ||
165         Shape.ABI == coro::ABI::RetconOnce);
166  if (Shape.RetconLowering.IsFrameInlineInStorage)
167    return;
168
169  Shape.emitDealloc(Builder, FramePtr, CG);
170}
171
172/// Replace a non-unwind call to llvm.coro.end.
173static void replaceFallthroughCoroEnd(CoroEndInst *End,
174                                      const coro::Shape &Shape, Value *FramePtr,
175                                      bool InResume, CallGraph *CG) {
176  // Start inserting right before the coro.end.
177  IRBuilder<> Builder(End);
178
179  // Create the return instruction.
180  switch (Shape.ABI) {
181  // The cloned functions in switch-lowering always return void.
182  case coro::ABI::Switch:
183    // coro.end doesn't immediately end the coroutine in the main function
184    // in this lowering, because we need to deallocate the coroutine.
185    if (!InResume)
186      return;
187    Builder.CreateRetVoid();
188    break;
189
190  // In unique continuation lowering, the continuations always return void.
191  // But we may have implicitly allocated storage.
192  case coro::ABI::RetconOnce:
193    maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
194    Builder.CreateRetVoid();
195    break;
196
197  // In non-unique continuation lowering, we signal completion by returning
198  // a null continuation.
199  case coro::ABI::Retcon: {
200    maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
201    auto RetTy = Shape.getResumeFunctionType()->getReturnType();
202    auto RetStructTy = dyn_cast<StructType>(RetTy);
203    PointerType *ContinuationTy =
204      cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
205
206    Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
207    if (RetStructTy) {
208      ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
209                                              ReturnValue, 0);
210    }
211    Builder.CreateRet(ReturnValue);
212    break;
213  }
214  }
215
216  // Remove the rest of the block, by splitting it into an unreachable block.
217  auto *BB = End->getParent();
218  BB->splitBasicBlock(End);
219  BB->getTerminator()->eraseFromParent();
220}
221
222/// Replace an unwind call to llvm.coro.end.
223static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
224                                 Value *FramePtr, bool InResume, CallGraph *CG){
225  IRBuilder<> Builder(End);
226
227  switch (Shape.ABI) {
228  // In switch-lowering, this does nothing in the main function.
229  case coro::ABI::Switch:
230    if (!InResume)
231      return;
232    break;
233
234  // In continuation-lowering, this frees the continuation storage.
235  case coro::ABI::Retcon:
236  case coro::ABI::RetconOnce:
237    maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
238    break;
239  }
240
241  // If coro.end has an associated bundle, add cleanupret instruction.
242  if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
243    auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
244    auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
245    End->getParent()->splitBasicBlock(End);
246    CleanupRet->getParent()->getTerminator()->eraseFromParent();
247  }
248}
249
250static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
251                           Value *FramePtr, bool InResume, CallGraph *CG) {
252  if (End->isUnwind())
253    replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
254  else
255    replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
256
257  auto &Context = End->getContext();
258  End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
259                                   : ConstantInt::getFalse(Context));
260  End->eraseFromParent();
261}
262
263// Create an entry block for a resume function with a switch that will jump to
264// suspend points.
265static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
266  assert(Shape.ABI == coro::ABI::Switch);
267  LLVMContext &C = F.getContext();
268
269  // resume.entry:
270  //  %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
271  //  i32 2
272  //  % index = load i32, i32* %index.addr
273  //  switch i32 %index, label %unreachable [
274  //    i32 0, label %resume.0
275  //    i32 1, label %resume.1
276  //    ...
277  //  ]
278
279  auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
280  auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
281
282  IRBuilder<> Builder(NewEntry);
283  auto *FramePtr = Shape.FramePtr;
284  auto *FrameTy = Shape.FrameTy;
285  auto *GepIndex = Builder.CreateStructGEP(
286      FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
287  auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
288  auto *Switch =
289      Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
290  Shape.SwitchLowering.ResumeSwitch = Switch;
291
292  size_t SuspendIndex = 0;
293  for (auto *AnyS : Shape.CoroSuspends) {
294    auto *S = cast<CoroSuspendInst>(AnyS);
295    ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
296
297    // Replace CoroSave with a store to Index:
298    //    %index.addr = getelementptr %f.frame... (index field number)
299    //    store i32 0, i32* %index.addr1
300    auto *Save = S->getCoroSave();
301    Builder.SetInsertPoint(Save);
302    if (S->isFinal()) {
303      // Final suspend point is represented by storing zero in ResumeFnAddr.
304      auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr,
305                                 coro::Shape::SwitchFieldIndex::Resume,
306                                  "ResumeFn.addr");
307      auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
308          cast<PointerType>(GepIndex->getType())->getElementType()));
309      Builder.CreateStore(NullPtr, GepIndex);
310    } else {
311      auto *GepIndex = Builder.CreateStructGEP(
312          FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
313      Builder.CreateStore(IndexVal, GepIndex);
314    }
315    Save->replaceAllUsesWith(ConstantTokenNone::get(C));
316    Save->eraseFromParent();
317
318    // Split block before and after coro.suspend and add a jump from an entry
319    // switch:
320    //
321    //  whateverBB:
322    //    whatever
323    //    %0 = call i8 @llvm.coro.suspend(token none, i1 false)
324    //    switch i8 %0, label %suspend[i8 0, label %resume
325    //                                 i8 1, label %cleanup]
326    // becomes:
327    //
328    //  whateverBB:
329    //     whatever
330    //     br label %resume.0.landing
331    //
332    //  resume.0: ; <--- jump from the switch in the resume.entry
333    //     %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
334    //     br label %resume.0.landing
335    //
336    //  resume.0.landing:
337    //     %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
338    //     switch i8 % 1, label %suspend [i8 0, label %resume
339    //                                    i8 1, label %cleanup]
340
341    auto *SuspendBB = S->getParent();
342    auto *ResumeBB =
343        SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
344    auto *LandingBB = ResumeBB->splitBasicBlock(
345        S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
346    Switch->addCase(IndexVal, ResumeBB);
347
348    cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
349    auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
350    S->replaceAllUsesWith(PN);
351    PN->addIncoming(Builder.getInt8(-1), SuspendBB);
352    PN->addIncoming(S, ResumeBB);
353
354    ++SuspendIndex;
355  }
356
357  Builder.SetInsertPoint(UnreachBB);
358  Builder.CreateUnreachable();
359
360  Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
361}
362
363
364// Rewrite final suspend point handling. We do not use suspend index to
365// represent the final suspend point. Instead we zero-out ResumeFnAddr in the
366// coroutine frame, since it is undefined behavior to resume a coroutine
367// suspended at the final suspend point. Thus, in the resume function, we can
368// simply remove the last case (when coro::Shape is built, the final suspend
369// point (if present) is always the last element of CoroSuspends array).
370// In the destroy function, we add a code sequence to check if ResumeFnAddress
371// is Null, and if so, jump to the appropriate label to handle cleanup from the
372// final suspend point.
373void CoroCloner::handleFinalSuspend() {
374  assert(Shape.ABI == coro::ABI::Switch &&
375         Shape.SwitchLowering.HasFinalSuspend);
376  auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
377  auto FinalCaseIt = std::prev(Switch->case_end());
378  BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
379  Switch->removeCase(FinalCaseIt);
380  if (isSwitchDestroyFunction()) {
381    BasicBlock *OldSwitchBB = Switch->getParent();
382    auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
383    Builder.SetInsertPoint(OldSwitchBB->getTerminator());
384    auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
385                                       coro::Shape::SwitchFieldIndex::Resume,
386                                             "ResumeFn.addr");
387    auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
388                                    GepIndex);
389    auto *Cond = Builder.CreateIsNull(Load);
390    Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
391    OldSwitchBB->getTerminator()->eraseFromParent();
392  }
393}
394
395static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
396                                        const Twine &Suffix,
397                                        Module::iterator InsertBefore) {
398  Module *M = OrigF.getParent();
399  auto *FnTy = Shape.getResumeFunctionType();
400
401  Function *NewF =
402      Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
403                       OrigF.getName() + Suffix);
404  NewF->addParamAttr(0, Attribute::NonNull);
405  NewF->addParamAttr(0, Attribute::NoAlias);
406
407  M->getFunctionList().insert(InsertBefore, NewF);
408
409  return NewF;
410}
411
412/// Replace uses of the active llvm.coro.suspend.retcon call with the
413/// arguments to the continuation function.
414///
415/// This assumes that the builder has a meaningful insertion point.
416void CoroCloner::replaceRetconSuspendUses() {
417  assert(Shape.ABI == coro::ABI::Retcon ||
418         Shape.ABI == coro::ABI::RetconOnce);
419
420  auto NewS = VMap[ActiveSuspend];
421  if (NewS->use_empty()) return;
422
423  // Copy out all the continuation arguments after the buffer pointer into
424  // an easily-indexed data structure for convenience.
425  SmallVector<Value*, 8> Args;
426  for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I)
427    Args.push_back(&*I);
428
429  // If the suspend returns a single scalar value, we can just do a simple
430  // replacement.
431  if (!isa<StructType>(NewS->getType())) {
432    assert(Args.size() == 1);
433    NewS->replaceAllUsesWith(Args.front());
434    return;
435  }
436
437  // Try to peephole extracts of an aggregate return.
438  for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
439    auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
440    if (!EVI || EVI->getNumIndices() != 1)
441      continue;
442
443    EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
444    EVI->eraseFromParent();
445  }
446
447  // If we have no remaining uses, we're done.
448  if (NewS->use_empty()) return;
449
450  // Otherwise, we need to create an aggregate.
451  Value *Agg = UndefValue::get(NewS->getType());
452  for (size_t I = 0, E = Args.size(); I != E; ++I)
453    Agg = Builder.CreateInsertValue(Agg, Args[I], I);
454
455  NewS->replaceAllUsesWith(Agg);
456}
457
458void CoroCloner::replaceCoroSuspends() {
459  Value *SuspendResult;
460
461  switch (Shape.ABI) {
462  // In switch lowering, replace coro.suspend with the appropriate value
463  // for the type of function we're extracting.
464  // Replacing coro.suspend with (0) will result in control flow proceeding to
465  // a resume label associated with a suspend point, replacing it with (1) will
466  // result in control flow proceeding to a cleanup label associated with this
467  // suspend point.
468  case coro::ABI::Switch:
469    SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
470    break;
471
472  // In returned-continuation lowering, the arguments from earlier
473  // continuations are theoretically arbitrary, and they should have been
474  // spilled.
475  case coro::ABI::RetconOnce:
476  case coro::ABI::Retcon:
477    return;
478  }
479
480  for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
481    // The active suspend was handled earlier.
482    if (CS == ActiveSuspend) continue;
483
484    auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
485    MappedCS->replaceAllUsesWith(SuspendResult);
486    MappedCS->eraseFromParent();
487  }
488}
489
490void CoroCloner::replaceCoroEnds() {
491  for (CoroEndInst *CE : Shape.CoroEnds) {
492    // We use a null call graph because there's no call graph node for
493    // the cloned function yet.  We'll just be rebuilding that later.
494    auto NewCE = cast<CoroEndInst>(VMap[CE]);
495    replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
496  }
497}
498
499static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
500                                 ValueToValueMapTy *VMap) {
501  Value *CachedSlot = nullptr;
502  auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
503    if (CachedSlot) {
504      assert(CachedSlot->getType()->getPointerElementType() == ValueTy &&
505             "multiple swifterror slots in function with different types");
506      return CachedSlot;
507    }
508
509    // Check if the function has a swifterror argument.
510    for (auto &Arg : F.args()) {
511      if (Arg.isSwiftError()) {
512        CachedSlot = &Arg;
513        assert(Arg.getType()->getPointerElementType() == ValueTy &&
514               "swifterror argument does not have expected type");
515        return &Arg;
516      }
517    }
518
519    // Create a swifterror alloca.
520    IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
521    auto Alloca = Builder.CreateAlloca(ValueTy);
522    Alloca->setSwiftError(true);
523
524    CachedSlot = Alloca;
525    return Alloca;
526  };
527
528  for (CallInst *Op : Shape.SwiftErrorOps) {
529    auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
530    IRBuilder<> Builder(MappedOp);
531
532    // If there are no arguments, this is a 'get' operation.
533    Value *MappedResult;
534    if (Op->getNumArgOperands() == 0) {
535      auto ValueTy = Op->getType();
536      auto Slot = getSwiftErrorSlot(ValueTy);
537      MappedResult = Builder.CreateLoad(ValueTy, Slot);
538    } else {
539      assert(Op->getNumArgOperands() == 1);
540      auto Value = MappedOp->getArgOperand(0);
541      auto ValueTy = Value->getType();
542      auto Slot = getSwiftErrorSlot(ValueTy);
543      Builder.CreateStore(Value, Slot);
544      MappedResult = Slot;
545    }
546
547    MappedOp->replaceAllUsesWith(MappedResult);
548    MappedOp->eraseFromParent();
549  }
550
551  // If we're updating the original function, we've invalidated SwiftErrorOps.
552  if (VMap == nullptr) {
553    Shape.SwiftErrorOps.clear();
554  }
555}
556
557void CoroCloner::replaceSwiftErrorOps() {
558  ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
559}
560
561void CoroCloner::replaceEntryBlock() {
562  // In the original function, the AllocaSpillBlock is a block immediately
563  // following the allocation of the frame object which defines GEPs for
564  // all the allocas that have been moved into the frame, and it ends by
565  // branching to the original beginning of the coroutine.  Make this
566  // the entry block of the cloned function.
567  auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
568  Entry->setName("entry" + Suffix);
569  Entry->moveBefore(&NewF->getEntryBlock());
570  Entry->getTerminator()->eraseFromParent();
571
572  // Clear all predecessors of the new entry block.  There should be
573  // exactly one predecessor, which we created when splitting out
574  // AllocaSpillBlock to begin with.
575  assert(Entry->hasOneUse());
576  auto BranchToEntry = cast<BranchInst>(Entry->user_back());
577  assert(BranchToEntry->isUnconditional());
578  Builder.SetInsertPoint(BranchToEntry);
579  Builder.CreateUnreachable();
580  BranchToEntry->eraseFromParent();
581
582  // TODO: move any allocas into Entry that weren't moved into the frame.
583  // (Currently we move all allocas into the frame.)
584
585  // Branch from the entry to the appropriate place.
586  Builder.SetInsertPoint(Entry);
587  switch (Shape.ABI) {
588  case coro::ABI::Switch: {
589    // In switch-lowering, we built a resume-entry block in the original
590    // function.  Make the entry block branch to this.
591    auto *SwitchBB =
592      cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
593    Builder.CreateBr(SwitchBB);
594    break;
595  }
596
597  case coro::ABI::Retcon:
598  case coro::ABI::RetconOnce: {
599    // In continuation ABIs, we want to branch to immediately after the
600    // active suspend point.  Earlier phases will have put the suspend in its
601    // own basic block, so just thread our jump directly to its successor.
602    auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]);
603    auto Branch = cast<BranchInst>(MappedCS->getNextNode());
604    assert(Branch->isUnconditional());
605    Builder.CreateBr(Branch->getSuccessor(0));
606    break;
607  }
608  }
609}
610
611/// Derive the value of the new frame pointer.
612Value *CoroCloner::deriveNewFramePointer() {
613  // Builder should be inserting to the front of the new entry block.
614
615  switch (Shape.ABI) {
616  // In switch-lowering, the argument is the frame pointer.
617  case coro::ABI::Switch:
618    return &*NewF->arg_begin();
619
620  // In continuation-lowering, the argument is the opaque storage.
621  case coro::ABI::Retcon:
622  case coro::ABI::RetconOnce: {
623    Argument *NewStorage = &*NewF->arg_begin();
624    auto FramePtrTy = Shape.FrameTy->getPointerTo();
625
626    // If the storage is inline, just bitcast to the storage to the frame type.
627    if (Shape.RetconLowering.IsFrameInlineInStorage)
628      return Builder.CreateBitCast(NewStorage, FramePtrTy);
629
630    // Otherwise, load the real frame from the opaque storage.
631    auto FramePtrPtr =
632      Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
633    return Builder.CreateLoad(FramePtrPtr);
634  }
635  }
636  llvm_unreachable("bad ABI");
637}
638
639/// Clone the body of the original function into a resume function of
640/// some sort.
641void CoroCloner::create() {
642  // Create the new function if we don't already have one.
643  if (!NewF) {
644    NewF = createCloneDeclaration(OrigF, Shape, Suffix,
645                                  OrigF.getParent()->end());
646  }
647
648  // Replace all args with undefs. The buildCoroutineFrame algorithm already
649  // rewritten access to the args that occurs after suspend points with loads
650  // and stores to/from the coroutine frame.
651  for (Argument &A : OrigF.args())
652    VMap[&A] = UndefValue::get(A.getType());
653
654  SmallVector<ReturnInst *, 4> Returns;
655
656  // Ignore attempts to change certain attributes of the function.
657  // TODO: maybe there should be a way to suppress this during cloning?
658  auto savedVisibility = NewF->getVisibility();
659  auto savedUnnamedAddr = NewF->getUnnamedAddr();
660  auto savedDLLStorageClass = NewF->getDLLStorageClass();
661
662  // NewF's linkage (which CloneFunctionInto does *not* change) might not
663  // be compatible with the visibility of OrigF (which it *does* change),
664  // so protect against that.
665  auto savedLinkage = NewF->getLinkage();
666  NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
667
668  CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);
669
670  NewF->setLinkage(savedLinkage);
671  NewF->setVisibility(savedVisibility);
672  NewF->setUnnamedAddr(savedUnnamedAddr);
673  NewF->setDLLStorageClass(savedDLLStorageClass);
674
675  auto &Context = NewF->getContext();
676
677  // Replace the attributes of the new function:
678  auto OrigAttrs = NewF->getAttributes();
679  auto NewAttrs = AttributeList();
680
681  switch (Shape.ABI) {
682  case coro::ABI::Switch:
683    // Bootstrap attributes by copying function attributes from the
684    // original function.  This should include optimization settings and so on.
685    NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
686                                      OrigAttrs.getFnAttributes());
687    break;
688
689  case coro::ABI::Retcon:
690  case coro::ABI::RetconOnce:
691    // If we have a continuation prototype, just use its attributes,
692    // full-stop.
693    NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
694    break;
695  }
696
697  // Make the frame parameter nonnull and noalias.
698  NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
699  NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
700
701  switch (Shape.ABI) {
702  // In these ABIs, the cloned functions always return 'void', and the
703  // existing return sites are meaningless.  Note that for unique
704  // continuations, this includes the returns associated with suspends;
705  // this is fine because we can't suspend twice.
706  case coro::ABI::Switch:
707  case coro::ABI::RetconOnce:
708    // Remove old returns.
709    for (ReturnInst *Return : Returns)
710      changeToUnreachable(Return, /*UseLLVMTrap=*/false);
711    break;
712
713  // With multi-suspend continuations, we'll already have eliminated the
714  // original returns and inserted returns before all the suspend points,
715  // so we want to leave any returns in place.
716  case coro::ABI::Retcon:
717    break;
718  }
719
720  NewF->setAttributes(NewAttrs);
721  NewF->setCallingConv(Shape.getResumeFunctionCC());
722
723  // Set up the new entry block.
724  replaceEntryBlock();
725
726  Builder.SetInsertPoint(&NewF->getEntryBlock().front());
727  NewFramePtr = deriveNewFramePointer();
728
729  // Remap frame pointer.
730  Value *OldFramePtr = VMap[Shape.FramePtr];
731  NewFramePtr->takeName(OldFramePtr);
732  OldFramePtr->replaceAllUsesWith(NewFramePtr);
733
734  // Remap vFrame pointer.
735  auto *NewVFrame = Builder.CreateBitCast(
736      NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
737  Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
738  OldVFrame->replaceAllUsesWith(NewVFrame);
739
740  switch (Shape.ABI) {
741  case coro::ABI::Switch:
742    // Rewrite final suspend handling as it is not done via switch (allows to
743    // remove final case from the switch, since it is undefined behavior to
744    // resume the coroutine suspended at the final suspend point.
745    if (Shape.SwitchLowering.HasFinalSuspend)
746      handleFinalSuspend();
747    break;
748
749  case coro::ABI::Retcon:
750  case coro::ABI::RetconOnce:
751    // Replace uses of the active suspend with the corresponding
752    // continuation-function arguments.
753    assert(ActiveSuspend != nullptr &&
754           "no active suspend when lowering a continuation-style coroutine");
755    replaceRetconSuspendUses();
756    break;
757  }
758
759  // Handle suspends.
760  replaceCoroSuspends();
761
762  // Handle swifterror.
763  replaceSwiftErrorOps();
764
765  // Remove coro.end intrinsics.
766  replaceCoroEnds();
767
768  // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
769  // to suppress deallocation code.
770  if (Shape.ABI == coro::ABI::Switch)
771    coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
772                          /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
773}
774
775// Create a resume clone by cloning the body of the original function, setting
776// new entry block and replacing coro.suspend an appropriate value to force
777// resume or cleanup pass for every suspend point.
778static Function *createClone(Function &F, const Twine &Suffix,
779                             coro::Shape &Shape, CoroCloner::Kind FKind) {
780  CoroCloner Cloner(F, Suffix, Shape, FKind);
781  Cloner.create();
782  return Cloner.getFunction();
783}
784
785/// Remove calls to llvm.coro.end in the original function.
786static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
787  for (auto End : Shape.CoroEnds) {
788    replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
789  }
790}
791
792static void replaceFrameSize(coro::Shape &Shape) {
793  if (Shape.CoroSizes.empty())
794    return;
795
796  // In the same function all coro.sizes should have the same result type.
797  auto *SizeIntrin = Shape.CoroSizes.back();
798  Module *M = SizeIntrin->getModule();
799  const DataLayout &DL = M->getDataLayout();
800  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
801  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
802
803  for (CoroSizeInst *CS : Shape.CoroSizes) {
804    CS->replaceAllUsesWith(SizeConstant);
805    CS->eraseFromParent();
806  }
807}
808
809// Create a global constant array containing pointers to functions provided and
810// set Info parameter of CoroBegin to point at this constant. Example:
811//
812//   @f.resumers = internal constant [2 x void(%f.frame*)*]
813//                    [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
814//   define void @f() {
815//     ...
816//     call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
817//                    i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
818//
819// Assumes that all the functions have the same signature.
820static void setCoroInfo(Function &F, coro::Shape &Shape,
821                        ArrayRef<Function *> Fns) {
822  // This only works under the switch-lowering ABI because coro elision
823  // only works on the switch-lowering ABI.
824  assert(Shape.ABI == coro::ABI::Switch);
825
826  SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
827  assert(!Args.empty());
828  Function *Part = *Fns.begin();
829  Module *M = Part->getParent();
830  auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
831
832  auto *ConstVal = ConstantArray::get(ArrTy, Args);
833  auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
834                                GlobalVariable::PrivateLinkage, ConstVal,
835                                F.getName() + Twine(".resumers"));
836
837  // Update coro.begin instruction to refer to this constant.
838  LLVMContext &C = F.getContext();
839  auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
840  Shape.getSwitchCoroId()->setInfo(BC);
841}
842
843// Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
844static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
845                            Function *DestroyFn, Function *CleanupFn) {
846  assert(Shape.ABI == coro::ABI::Switch);
847
848  IRBuilder<> Builder(Shape.FramePtr->getNextNode());
849  auto *ResumeAddr = Builder.CreateStructGEP(
850      Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
851      "resume.addr");
852  Builder.CreateStore(ResumeFn, ResumeAddr);
853
854  Value *DestroyOrCleanupFn = DestroyFn;
855
856  CoroIdInst *CoroId = Shape.getSwitchCoroId();
857  if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
858    // If there is a CoroAlloc and it returns false (meaning we elide the
859    // allocation, use CleanupFn instead of DestroyFn).
860    DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
861  }
862
863  auto *DestroyAddr = Builder.CreateStructGEP(
864      Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
865      "destroy.addr");
866  Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
867}
868
869static void postSplitCleanup(Function &F) {
870  removeUnreachableBlocks(F);
871
872  // For now, we do a mandatory verification step because we don't
873  // entirely trust this pass.  Note that we don't want to add a verifier
874  // pass to FPM below because it will also verify all the global data.
875  verifyFunction(F);
876
877  legacy::FunctionPassManager FPM(F.getParent());
878
879  FPM.add(createSCCPPass());
880  FPM.add(createCFGSimplificationPass());
881  FPM.add(createEarlyCSEPass());
882  FPM.add(createCFGSimplificationPass());
883
884  FPM.doInitialization();
885  FPM.run(F);
886  FPM.doFinalization();
887}
888
889// Assuming we arrived at the block NewBlock from Prev instruction, store
890// PHI's incoming values in the ResolvedValues map.
891static void
892scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
893                          DenseMap<Value *, Value *> &ResolvedValues) {
894  auto *PrevBB = Prev->getParent();
895  for (PHINode &PN : NewBlock->phis()) {
896    auto V = PN.getIncomingValueForBlock(PrevBB);
897    // See if we already resolved it.
898    auto VI = ResolvedValues.find(V);
899    if (VI != ResolvedValues.end())
900      V = VI->second;
901    // Remember the value.
902    ResolvedValues[&PN] = V;
903  }
904}
905
906// Replace a sequence of branches leading to a ret, with a clone of a ret
907// instruction. Suspend instruction represented by a switch, track the PHI
908// values and select the correct case successor when possible.
909static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
910  DenseMap<Value *, Value *> ResolvedValues;
911  BasicBlock *UnconditionalSucc = nullptr;
912
913  Instruction *I = InitialInst;
914  while (I->isTerminator()) {
915    if (isa<ReturnInst>(I)) {
916      if (I != InitialInst) {
917        // If InitialInst is an unconditional branch,
918        // remove PHI values that come from basic block of InitialInst
919        if (UnconditionalSucc)
920          for (PHINode &PN : UnconditionalSucc->phis()) {
921            int idx = PN.getBasicBlockIndex(InitialInst->getParent());
922            if (idx != -1)
923              PN.removeIncomingValue(idx);
924          }
925        ReplaceInstWithInst(InitialInst, I->clone());
926      }
927      return true;
928    }
929    if (auto *BR = dyn_cast<BranchInst>(I)) {
930      if (BR->isUnconditional()) {
931        BasicBlock *BB = BR->getSuccessor(0);
932        if (I == InitialInst)
933          UnconditionalSucc = BB;
934        scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
935        I = BB->getFirstNonPHIOrDbgOrLifetime();
936        continue;
937      }
938    } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
939      Value *V = SI->getCondition();
940      auto it = ResolvedValues.find(V);
941      if (it != ResolvedValues.end())
942        V = it->second;
943      if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
944        BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
945        scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
946        I = BB->getFirstNonPHIOrDbgOrLifetime();
947        continue;
948      }
949    }
950    return false;
951  }
952  return false;
953}
954
955// Add musttail to any resume instructions that is immediately followed by a
956// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
957// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
958// This transformation is done only in the resume part of the coroutine that has
959// identical signature and calling convention as the coro.resume call.
960static void addMustTailToCoroResumes(Function &F) {
961  bool changed = false;
962
963  // Collect potential resume instructions.
964  SmallVector<CallInst *, 4> Resumes;
965  for (auto &I : instructions(F))
966    if (auto *Call = dyn_cast<CallInst>(&I))
967      if (auto *CalledValue = Call->getCalledValue())
968        // CoroEarly pass replaced coro resumes with indirect calls to an
969        // address return by CoroSubFnInst intrinsic. See if it is one of those.
970        if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts()))
971          Resumes.push_back(Call);
972
973  // Set musttail on those that are followed by a ret instruction.
974  for (CallInst *Call : Resumes)
975    if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
976      Call->setTailCallKind(CallInst::TCK_MustTail);
977      changed = true;
978    }
979
980  if (changed)
981    removeUnreachableBlocks(F);
982}
983
984// Coroutine has no suspend points. Remove heap allocation for the coroutine
985// frame if possible.
986static void handleNoSuspendCoroutine(coro::Shape &Shape) {
987  auto *CoroBegin = Shape.CoroBegin;
988  auto *CoroId = CoroBegin->getId();
989  auto *AllocInst = CoroId->getCoroAlloc();
990  switch (Shape.ABI) {
991  case coro::ABI::Switch: {
992    auto SwitchId = cast<CoroIdInst>(CoroId);
993    coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
994    if (AllocInst) {
995      IRBuilder<> Builder(AllocInst);
996      // FIXME: Need to handle overaligned members.
997      auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
998      auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
999      AllocInst->replaceAllUsesWith(Builder.getFalse());
1000      AllocInst->eraseFromParent();
1001      CoroBegin->replaceAllUsesWith(VFrame);
1002    } else {
1003      CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1004    }
1005    break;
1006  }
1007
1008  case coro::ABI::Retcon:
1009  case coro::ABI::RetconOnce:
1010    CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1011    break;
1012  }
1013
1014  CoroBegin->eraseFromParent();
1015}
1016
1017// SimplifySuspendPoint needs to check that there is no calls between
1018// coro_save and coro_suspend, since any of the calls may potentially resume
1019// the coroutine and if that is the case we cannot eliminate the suspend point.
1020static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1021  for (Instruction *I = From; I != To; I = I->getNextNode()) {
1022    // Assume that no intrinsic can resume the coroutine.
1023    if (isa<IntrinsicInst>(I))
1024      continue;
1025
1026    if (CallSite(I))
1027      return true;
1028  }
1029  return false;
1030}
1031
1032static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1033  SmallPtrSet<BasicBlock *, 8> Set;
1034  SmallVector<BasicBlock *, 8> Worklist;
1035
1036  Set.insert(SaveBB);
1037  Worklist.push_back(ResDesBB);
1038
1039  // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1040  // returns a token consumed by suspend instruction, all blocks in between
1041  // will have to eventually hit SaveBB when going backwards from ResDesBB.
1042  while (!Worklist.empty()) {
1043    auto *BB = Worklist.pop_back_val();
1044    Set.insert(BB);
1045    for (auto *Pred : predecessors(BB))
1046      if (Set.count(Pred) == 0)
1047        Worklist.push_back(Pred);
1048  }
1049
1050  // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1051  Set.erase(SaveBB);
1052  Set.erase(ResDesBB);
1053
1054  for (auto *BB : Set)
1055    if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1056      return true;
1057
1058  return false;
1059}
1060
1061static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1062  auto *SaveBB = Save->getParent();
1063  auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1064
1065  if (SaveBB == ResumeOrDestroyBB)
1066    return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1067
1068  // Any calls from Save to the end of the block?
1069  if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1070    return true;
1071
1072  // Any calls from begging of the block up to ResumeOrDestroy?
1073  if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1074                             ResumeOrDestroy))
1075    return true;
1076
1077  // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1078  if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1079    return true;
1080
1081  return false;
1082}
1083
1084// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1085// suspend point and replace it with nornal control flow.
1086static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1087                                 CoroBeginInst *CoroBegin) {
1088  Instruction *Prev = Suspend->getPrevNode();
1089  if (!Prev) {
1090    auto *Pred = Suspend->getParent()->getSinglePredecessor();
1091    if (!Pred)
1092      return false;
1093    Prev = Pred->getTerminator();
1094  }
1095
1096  CallSite CS{Prev};
1097  if (!CS)
1098    return false;
1099
1100  auto *CallInstr = CS.getInstruction();
1101
1102  auto *Callee = CS.getCalledValue()->stripPointerCasts();
1103
1104  // See if the callsite is for resumption or destruction of the coroutine.
1105  auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1106  if (!SubFn)
1107    return false;
1108
1109  // Does not refer to the current coroutine, we cannot do anything with it.
1110  if (SubFn->getFrame() != CoroBegin)
1111    return false;
1112
1113  // See if the transformation is safe. Specifically, see if there are any
1114  // calls in between Save and CallInstr. They can potenitally resume the
1115  // coroutine rendering this optimization unsafe.
1116  auto *Save = Suspend->getCoroSave();
1117  if (hasCallsBetween(Save, CallInstr))
1118    return false;
1119
1120  // Replace llvm.coro.suspend with the value that results in resumption over
1121  // the resume or cleanup path.
1122  Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1123  Suspend->eraseFromParent();
1124  Save->eraseFromParent();
1125
1126  // No longer need a call to coro.resume or coro.destroy.
1127  if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
1128    BranchInst::Create(Invoke->getNormalDest(), Invoke);
1129  }
1130
1131  // Grab the CalledValue from CS before erasing the CallInstr.
1132  auto *CalledValue = CS.getCalledValue();
1133  CallInstr->eraseFromParent();
1134
1135  // If no more users remove it. Usually it is a bitcast of SubFn.
1136  if (CalledValue != SubFn && CalledValue->user_empty())
1137    if (auto *I = dyn_cast<Instruction>(CalledValue))
1138      I->eraseFromParent();
1139
1140  // Now we are good to remove SubFn.
1141  if (SubFn->user_empty())
1142    SubFn->eraseFromParent();
1143
1144  return true;
1145}
1146
1147// Remove suspend points that are simplified.
1148static void simplifySuspendPoints(coro::Shape &Shape) {
1149  // Currently, the only simplification we do is switch-lowering-specific.
1150  if (Shape.ABI != coro::ABI::Switch)
1151    return;
1152
1153  auto &S = Shape.CoroSuspends;
1154  size_t I = 0, N = S.size();
1155  if (N == 0)
1156    return;
1157  while (true) {
1158    auto SI = cast<CoroSuspendInst>(S[I]);
1159    // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1160    // to resume a coroutine suspended at the final suspend point.
1161    if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1162      if (--N == I)
1163        break;
1164      std::swap(S[I], S[N]);
1165      continue;
1166    }
1167    if (++I == N)
1168      break;
1169  }
1170  S.resize(N);
1171}
1172
1173static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1174                                 SmallVectorImpl<Function *> &Clones) {
1175  assert(Shape.ABI == coro::ABI::Switch);
1176
1177  createResumeEntryBlock(F, Shape);
1178  auto ResumeClone = createClone(F, ".resume", Shape,
1179                                 CoroCloner::Kind::SwitchResume);
1180  auto DestroyClone = createClone(F, ".destroy", Shape,
1181                                  CoroCloner::Kind::SwitchUnwind);
1182  auto CleanupClone = createClone(F, ".cleanup", Shape,
1183                                  CoroCloner::Kind::SwitchCleanup);
1184
1185  postSplitCleanup(*ResumeClone);
1186  postSplitCleanup(*DestroyClone);
1187  postSplitCleanup(*CleanupClone);
1188
1189  addMustTailToCoroResumes(*ResumeClone);
1190
1191  // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1192  updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1193
1194  assert(Clones.empty());
1195  Clones.push_back(ResumeClone);
1196  Clones.push_back(DestroyClone);
1197  Clones.push_back(CleanupClone);
1198
1199  // Create a constant array referring to resume/destroy/clone functions pointed
1200  // by the last argument of @llvm.coro.info, so that CoroElide pass can
1201  // determined correct function to call.
1202  setCoroInfo(F, Shape, Clones);
1203}
1204
1205static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1206                                 SmallVectorImpl<Function *> &Clones) {
1207  assert(Shape.ABI == coro::ABI::Retcon ||
1208         Shape.ABI == coro::ABI::RetconOnce);
1209  assert(Clones.empty());
1210
1211  // Reset various things that the optimizer might have decided it
1212  // "knows" about the coroutine function due to not seeing a return.
1213  F.removeFnAttr(Attribute::NoReturn);
1214  F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
1215  F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
1216
1217  // Allocate the frame.
1218  auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1219  Value *RawFramePtr;
1220  if (Shape.RetconLowering.IsFrameInlineInStorage) {
1221    RawFramePtr = Id->getStorage();
1222  } else {
1223    IRBuilder<> Builder(Id);
1224
1225    // Determine the size of the frame.
1226    const DataLayout &DL = F.getParent()->getDataLayout();
1227    auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1228
1229    // Allocate.  We don't need to update the call graph node because we're
1230    // going to recompute it from scratch after splitting.
1231    RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1232    RawFramePtr =
1233      Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1234
1235    // Stash the allocated frame pointer in the continuation storage.
1236    auto Dest = Builder.CreateBitCast(Id->getStorage(),
1237                                      RawFramePtr->getType()->getPointerTo());
1238    Builder.CreateStore(RawFramePtr, Dest);
1239  }
1240
1241  // Map all uses of llvm.coro.begin to the allocated frame pointer.
1242  {
1243    // Make sure we don't invalidate Shape.FramePtr.
1244    TrackingVH<Instruction> Handle(Shape.FramePtr);
1245    Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1246    Shape.FramePtr = Handle.getValPtr();
1247  }
1248
1249  // Create a unique return block.
1250  BasicBlock *ReturnBB = nullptr;
1251  SmallVector<PHINode *, 4> ReturnPHIs;
1252
1253  // Create all the functions in order after the main function.
1254  auto NextF = std::next(F.getIterator());
1255
1256  // Create a continuation function for each of the suspend points.
1257  Clones.reserve(Shape.CoroSuspends.size());
1258  for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1259    auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1260
1261    // Create the clone declaration.
1262    auto Continuation =
1263      createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);
1264    Clones.push_back(Continuation);
1265
1266    // Insert a branch to the unified return block immediately before
1267    // the suspend point.
1268    auto SuspendBB = Suspend->getParent();
1269    auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1270    auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1271
1272    // Create the unified return block.
1273    if (!ReturnBB) {
1274      // Place it before the first suspend.
1275      ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1276                                    NewSuspendBB);
1277      Shape.RetconLowering.ReturnBlock = ReturnBB;
1278
1279      IRBuilder<> Builder(ReturnBB);
1280
1281      // Create PHIs for all the return values.
1282      assert(ReturnPHIs.empty());
1283
1284      // First, the continuation.
1285      ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1286                                             Shape.CoroSuspends.size()));
1287
1288      // Next, all the directly-yielded values.
1289      for (auto ResultTy : Shape.getRetconResultTypes())
1290        ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1291                                               Shape.CoroSuspends.size()));
1292
1293      // Build the return value.
1294      auto RetTy = F.getReturnType();
1295
1296      // Cast the continuation value if necessary.
1297      // We can't rely on the types matching up because that type would
1298      // have to be infinite.
1299      auto CastedContinuationTy =
1300        (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1301      auto *CastedContinuation =
1302        Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1303
1304      Value *RetV;
1305      if (ReturnPHIs.size() == 1) {
1306        RetV = CastedContinuation;
1307      } else {
1308        RetV = UndefValue::get(RetTy);
1309        RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1310        for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1311          RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1312      }
1313
1314      Builder.CreateRet(RetV);
1315    }
1316
1317    // Branch to the return block.
1318    Branch->setSuccessor(0, ReturnBB);
1319    ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1320    size_t NextPHIIndex = 1;
1321    for (auto &VUse : Suspend->value_operands())
1322      ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1323    assert(NextPHIIndex == ReturnPHIs.size());
1324  }
1325
1326  assert(Clones.size() == Shape.CoroSuspends.size());
1327  for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1328    auto Suspend = Shape.CoroSuspends[i];
1329    auto Clone = Clones[i];
1330
1331    CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1332  }
1333}
1334
1335namespace {
1336  class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1337    Function &F;
1338  public:
1339    PrettyStackTraceFunction(Function &F) : F(F) {}
1340    void print(raw_ostream &OS) const override {
1341      OS << "While splitting coroutine ";
1342      F.printAsOperand(OS, /*print type*/ false, F.getParent());
1343      OS << "\n";
1344    }
1345  };
1346}
1347
1348static void splitCoroutine(Function &F, coro::Shape &Shape,
1349                           SmallVectorImpl<Function *> &Clones) {
1350  switch (Shape.ABI) {
1351  case coro::ABI::Switch:
1352    return splitSwitchCoroutine(F, Shape, Clones);
1353  case coro::ABI::Retcon:
1354  case coro::ABI::RetconOnce:
1355    return splitRetconCoroutine(F, Shape, Clones);
1356  }
1357  llvm_unreachable("bad ABI kind");
1358}
1359
1360static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
1361  PrettyStackTraceFunction prettyStackTrace(F);
1362
1363  // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1364  // up by uses in unreachable blocks, so remove them as a first pass.
1365  removeUnreachableBlocks(F);
1366
1367  coro::Shape Shape(F);
1368  if (!Shape.CoroBegin)
1369    return;
1370
1371  simplifySuspendPoints(Shape);
1372  buildCoroutineFrame(F, Shape);
1373  replaceFrameSize(Shape);
1374
1375  SmallVector<Function*, 4> Clones;
1376
1377  // If there are no suspend points, no split required, just remove
1378  // the allocation and deallocation blocks, they are not needed.
1379  if (Shape.CoroSuspends.empty()) {
1380    handleNoSuspendCoroutine(Shape);
1381  } else {
1382    splitCoroutine(F, Shape, Clones);
1383  }
1384
1385  // Replace all the swifterror operations in the original function.
1386  // This invalidates SwiftErrorOps in the Shape.
1387  replaceSwiftErrorOps(F, Shape, nullptr);
1388
1389  removeCoroEnds(Shape, &CG);
1390  postSplitCleanup(F);
1391
1392  // Update call graph and add the functions we created to the SCC.
1393  coro::updateCallGraph(F, Clones, CG, SCC);
1394}
1395
1396// When we see the coroutine the first time, we insert an indirect call to a
1397// devirt trigger function and mark the coroutine that it is now ready for
1398// split.
1399static void prepareForSplit(Function &F, CallGraph &CG) {
1400  Module &M = *F.getParent();
1401  LLVMContext &Context = F.getContext();
1402#ifndef NDEBUG
1403  Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
1404  assert(DevirtFn && "coro.devirt.trigger function not found");
1405#endif
1406
1407  F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
1408
1409  // Insert an indirect call sequence that will be devirtualized by CoroElide
1410  // pass:
1411  //    %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
1412  //    %1 = bitcast i8* %0 to void(i8*)*
1413  //    call void %1(i8* null)
1414  coro::LowererBase Lowerer(M);
1415  Instruction *InsertPt = F.getEntryBlock().getTerminator();
1416  auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
1417  auto *DevirtFnAddr =
1418      Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
1419  FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
1420                                         {Type::getInt8PtrTy(Context)}, false);
1421  auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
1422
1423  // Update CG graph with an indirect call we just added.
1424  CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
1425}
1426
1427// Make sure that there is a devirtualization trigger function that the
1428// coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
1429// trigger function is not found, we will create one and add it to the current
1430// SCC.
1431static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
1432  Module &M = CG.getModule();
1433  if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
1434    return;
1435
1436  LLVMContext &C = M.getContext();
1437  auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
1438                                 /*isVarArg=*/false);
1439  Function *DevirtFn =
1440      Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
1441                       CORO_DEVIRT_TRIGGER_FN, &M);
1442  DevirtFn->addFnAttr(Attribute::AlwaysInline);
1443  auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
1444  ReturnInst::Create(C, Entry);
1445
1446  auto *Node = CG.getOrInsertFunction(DevirtFn);
1447
1448  SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
1449  Nodes.push_back(Node);
1450  SCC.initialize(Nodes);
1451}
1452
1453/// Replace a call to llvm.coro.prepare.retcon.
1454static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
1455  auto CastFn = Prepare->getArgOperand(0); // as an i8*
1456  auto Fn = CastFn->stripPointerCasts(); // as its original type
1457
1458  // Find call graph nodes for the preparation.
1459  CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
1460  if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
1461    PrepareUserNode = CG[Prepare->getFunction()];
1462    FnNode = CG[ConcreteFn];
1463  }
1464
1465  // Attempt to peephole this pattern:
1466  //    %0 = bitcast [[TYPE]] @some_function to i8*
1467  //    %1 = call @llvm.coro.prepare.retcon(i8* %0)
1468  //    %2 = bitcast %1 to [[TYPE]]
1469  // ==>
1470  //    %2 = @some_function
1471  for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
1472         UI != UE; ) {
1473    // Look for bitcasts back to the original function type.
1474    auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
1475    if (!Cast || Cast->getType() != Fn->getType()) continue;
1476
1477    // Check whether the replacement will introduce new direct calls.
1478    // If so, we'll need to update the call graph.
1479    if (PrepareUserNode) {
1480      for (auto &Use : Cast->uses()) {
1481        if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
1482          if (!CB->isCallee(&Use))
1483            continue;
1484          PrepareUserNode->removeCallEdgeFor(*CB);
1485          PrepareUserNode->addCalledFunction(CB, FnNode);
1486        }
1487      }
1488    }
1489
1490    // Replace and remove the cast.
1491    Cast->replaceAllUsesWith(Fn);
1492    Cast->eraseFromParent();
1493  }
1494
1495  // Replace any remaining uses with the function as an i8*.
1496  // This can never directly be a callee, so we don't need to update CG.
1497  Prepare->replaceAllUsesWith(CastFn);
1498  Prepare->eraseFromParent();
1499
1500  // Kill dead bitcasts.
1501  while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
1502    if (!Cast->use_empty()) break;
1503    CastFn = Cast->getOperand(0);
1504    Cast->eraseFromParent();
1505  }
1506}
1507
1508/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
1509/// IPO from operating on calls to a retcon coroutine before it's been
1510/// split.  This is only safe to do after we've split all retcon
1511/// coroutines in the module.  We can do that this in this pass because
1512/// this pass does promise to split all retcon coroutines (as opposed to
1513/// switch coroutines, which are lowered in multiple stages).
1514static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
1515  bool Changed = false;
1516  for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
1517         PI != PE; ) {
1518    // Intrinsics can only be used in calls.
1519    auto *Prepare = cast<CallInst>((PI++)->getUser());
1520    replacePrepare(Prepare, CG);
1521    Changed = true;
1522  }
1523
1524  return Changed;
1525}
1526
1527//===----------------------------------------------------------------------===//
1528//                              Top Level Driver
1529//===----------------------------------------------------------------------===//
1530
1531namespace {
1532
1533struct CoroSplitLegacy : public CallGraphSCCPass {
1534  static char ID; // Pass identification, replacement for typeid
1535
1536  CoroSplitLegacy() : CallGraphSCCPass(ID) {
1537    initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
1538  }
1539
1540  bool Run = false;
1541
1542  // A coroutine is identified by the presence of coro.begin intrinsic, if
1543  // we don't have any, this pass has nothing to do.
1544  bool doInitialization(CallGraph &CG) override {
1545    Run = coro::declaresIntrinsics(CG.getModule(),
1546                                   {"llvm.coro.begin",
1547                                    "llvm.coro.prepare.retcon"});
1548    return CallGraphSCCPass::doInitialization(CG);
1549  }
1550
1551  bool runOnSCC(CallGraphSCC &SCC) override {
1552    if (!Run)
1553      return false;
1554
1555    // Check for uses of llvm.coro.prepare.retcon.
1556    auto PrepareFn =
1557      SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon");
1558    if (PrepareFn && PrepareFn->use_empty())
1559      PrepareFn = nullptr;
1560
1561    // Find coroutines for processing.
1562    SmallVector<Function *, 4> Coroutines;
1563    for (CallGraphNode *CGN : SCC)
1564      if (auto *F = CGN->getFunction())
1565        if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
1566          Coroutines.push_back(F);
1567
1568    if (Coroutines.empty() && !PrepareFn)
1569      return false;
1570
1571    CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
1572
1573    if (Coroutines.empty())
1574      return replaceAllPrepares(PrepareFn, CG);
1575
1576    createDevirtTriggerFunc(CG, SCC);
1577
1578    // Split all the coroutines.
1579    for (Function *F : Coroutines) {
1580      Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
1581      StringRef Value = Attr.getValueAsString();
1582      LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
1583                        << "' state: " << Value << "\n");
1584      if (Value == UNPREPARED_FOR_SPLIT) {
1585        prepareForSplit(*F, CG);
1586        continue;
1587      }
1588      F->removeFnAttr(CORO_PRESPLIT_ATTR);
1589      splitCoroutine(*F, CG, SCC);
1590    }
1591
1592    if (PrepareFn)
1593      replaceAllPrepares(PrepareFn, CG);
1594
1595    return true;
1596  }
1597
1598  void getAnalysisUsage(AnalysisUsage &AU) const override {
1599    CallGraphSCCPass::getAnalysisUsage(AU);
1600  }
1601
1602  StringRef getPassName() const override { return "Coroutine Splitting"; }
1603};
1604
1605} // end anonymous namespace
1606
1607char CoroSplitLegacy::ID = 0;
1608
1609INITIALIZE_PASS_BEGIN(
1610    CoroSplitLegacy, "coro-split",
1611    "Split coroutine into a set of functions driving its state machine", false,
1612    false)
1613INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
1614INITIALIZE_PASS_END(
1615    CoroSplitLegacy, "coro-split",
1616    "Split coroutine into a set of functions driving its state machine", false,
1617    false)
1618
1619Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); }
1620