CoroSplit.cpp revision 363496
1//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// This pass builds the coroutine frame and outlines resume and destroy parts 9// of the coroutine into separate functions. 10// 11// We present a coroutine to an LLVM as an ordinary function with suspension 12// points marked up with intrinsics. We let the optimizer party on the coroutine 13// as a single function for as long as possible. Shortly before the coroutine is 14// eligible to be inlined into its callers, we split up the coroutine into parts 15// corresponding to an initial, resume and destroy invocations of the coroutine, 16// add them to the current SCC and restart the IPO pipeline to optimize the 17// coroutine subfunctions we extracted before proceeding to the caller of the 18// coroutine. 19//===----------------------------------------------------------------------===// 20 21#include "CoroInstr.h" 22#include "CoroInternal.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/SmallPtrSet.h" 25#include "llvm/ADT/SmallVector.h" 26#include "llvm/ADT/StringRef.h" 27#include "llvm/ADT/Twine.h" 28#include "llvm/Analysis/CallGraph.h" 29#include "llvm/Analysis/CallGraphSCCPass.h" 30#include "llvm/IR/Argument.h" 31#include "llvm/IR/Attributes.h" 32#include "llvm/IR/BasicBlock.h" 33#include "llvm/IR/CFG.h" 34#include "llvm/IR/CallSite.h" 35#include "llvm/IR/CallingConv.h" 36#include "llvm/IR/Constants.h" 37#include "llvm/IR/DataLayout.h" 38#include "llvm/IR/DerivedTypes.h" 39#include "llvm/IR/Function.h" 40#include "llvm/IR/GlobalValue.h" 41#include "llvm/IR/GlobalVariable.h" 42#include "llvm/IR/IRBuilder.h" 43#include "llvm/IR/InstIterator.h" 44#include "llvm/IR/InstrTypes.h" 45#include "llvm/IR/Instruction.h" 46#include "llvm/IR/Instructions.h" 47#include "llvm/IR/IntrinsicInst.h" 48#include "llvm/IR/LLVMContext.h" 49#include "llvm/IR/LegacyPassManager.h" 50#include "llvm/IR/Module.h" 51#include "llvm/IR/Type.h" 52#include "llvm/IR/Value.h" 53#include "llvm/IR/Verifier.h" 54#include "llvm/InitializePasses.h" 55#include "llvm/Pass.h" 56#include "llvm/Support/Casting.h" 57#include "llvm/Support/Debug.h" 58#include "llvm/Support/PrettyStackTrace.h" 59#include "llvm/Support/raw_ostream.h" 60#include "llvm/Transforms/Scalar.h" 61#include "llvm/Transforms/Utils/BasicBlockUtils.h" 62#include "llvm/Transforms/Utils/Cloning.h" 63#include "llvm/Transforms/Utils/Local.h" 64#include "llvm/Transforms/Utils/ValueMapper.h" 65#include <cassert> 66#include <cstddef> 67#include <cstdint> 68#include <initializer_list> 69#include <iterator> 70 71using namespace llvm; 72 73#define DEBUG_TYPE "coro-split" 74 75namespace { 76 77/// A little helper class for building 78class CoroCloner { 79public: 80 enum class Kind { 81 /// The shared resume function for a switch lowering. 82 SwitchResume, 83 84 /// The shared unwind function for a switch lowering. 85 SwitchUnwind, 86 87 /// The shared cleanup function for a switch lowering. 88 SwitchCleanup, 89 90 /// An individual continuation function. 91 Continuation, 92 }; 93private: 94 Function &OrigF; 95 Function *NewF; 96 const Twine &Suffix; 97 coro::Shape &Shape; 98 Kind FKind; 99 ValueToValueMapTy VMap; 100 IRBuilder<> Builder; 101 Value *NewFramePtr = nullptr; 102 Value *SwiftErrorSlot = nullptr; 103 104 /// The active suspend instruction; meaningful only for continuation ABIs. 105 AnyCoroSuspendInst *ActiveSuspend = nullptr; 106 107public: 108 /// Create a cloner for a switch lowering. 109 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 110 Kind FKind) 111 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), 112 FKind(FKind), Builder(OrigF.getContext()) { 113 assert(Shape.ABI == coro::ABI::Switch); 114 } 115 116 /// Create a cloner for a continuation lowering. 117 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 118 Function *NewF, AnyCoroSuspendInst *ActiveSuspend) 119 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), 120 FKind(Kind::Continuation), Builder(OrigF.getContext()), 121 ActiveSuspend(ActiveSuspend) { 122 assert(Shape.ABI == coro::ABI::Retcon || 123 Shape.ABI == coro::ABI::RetconOnce); 124 assert(NewF && "need existing function for continuation"); 125 assert(ActiveSuspend && "need active suspend point for continuation"); 126 } 127 128 Function *getFunction() const { 129 assert(NewF != nullptr && "declaration not yet set"); 130 return NewF; 131 } 132 133 void create(); 134 135private: 136 bool isSwitchDestroyFunction() { 137 switch (FKind) { 138 case Kind::Continuation: 139 case Kind::SwitchResume: 140 return false; 141 case Kind::SwitchUnwind: 142 case Kind::SwitchCleanup: 143 return true; 144 } 145 llvm_unreachable("Unknown CoroCloner::Kind enum"); 146 } 147 148 void createDeclaration(); 149 void replaceEntryBlock(); 150 Value *deriveNewFramePointer(); 151 void replaceRetconSuspendUses(); 152 void replaceCoroSuspends(); 153 void replaceCoroEnds(); 154 void replaceSwiftErrorOps(); 155 void handleFinalSuspend(); 156 void maybeFreeContinuationStorage(); 157}; 158 159} // end anonymous namespace 160 161static void maybeFreeRetconStorage(IRBuilder<> &Builder, 162 const coro::Shape &Shape, Value *FramePtr, 163 CallGraph *CG) { 164 assert(Shape.ABI == coro::ABI::Retcon || 165 Shape.ABI == coro::ABI::RetconOnce); 166 if (Shape.RetconLowering.IsFrameInlineInStorage) 167 return; 168 169 Shape.emitDealloc(Builder, FramePtr, CG); 170} 171 172/// Replace a non-unwind call to llvm.coro.end. 173static void replaceFallthroughCoroEnd(CoroEndInst *End, 174 const coro::Shape &Shape, Value *FramePtr, 175 bool InResume, CallGraph *CG) { 176 // Start inserting right before the coro.end. 177 IRBuilder<> Builder(End); 178 179 // Create the return instruction. 180 switch (Shape.ABI) { 181 // The cloned functions in switch-lowering always return void. 182 case coro::ABI::Switch: 183 // coro.end doesn't immediately end the coroutine in the main function 184 // in this lowering, because we need to deallocate the coroutine. 185 if (!InResume) 186 return; 187 Builder.CreateRetVoid(); 188 break; 189 190 // In unique continuation lowering, the continuations always return void. 191 // But we may have implicitly allocated storage. 192 case coro::ABI::RetconOnce: 193 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 194 Builder.CreateRetVoid(); 195 break; 196 197 // In non-unique continuation lowering, we signal completion by returning 198 // a null continuation. 199 case coro::ABI::Retcon: { 200 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 201 auto RetTy = Shape.getResumeFunctionType()->getReturnType(); 202 auto RetStructTy = dyn_cast<StructType>(RetTy); 203 PointerType *ContinuationTy = 204 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); 205 206 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); 207 if (RetStructTy) { 208 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), 209 ReturnValue, 0); 210 } 211 Builder.CreateRet(ReturnValue); 212 break; 213 } 214 } 215 216 // Remove the rest of the block, by splitting it into an unreachable block. 217 auto *BB = End->getParent(); 218 BB->splitBasicBlock(End); 219 BB->getTerminator()->eraseFromParent(); 220} 221 222/// Replace an unwind call to llvm.coro.end. 223static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape, 224 Value *FramePtr, bool InResume, CallGraph *CG){ 225 IRBuilder<> Builder(End); 226 227 switch (Shape.ABI) { 228 // In switch-lowering, this does nothing in the main function. 229 case coro::ABI::Switch: 230 if (!InResume) 231 return; 232 break; 233 234 // In continuation-lowering, this frees the continuation storage. 235 case coro::ABI::Retcon: 236 case coro::ABI::RetconOnce: 237 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 238 break; 239 } 240 241 // If coro.end has an associated bundle, add cleanupret instruction. 242 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { 243 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); 244 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); 245 End->getParent()->splitBasicBlock(End); 246 CleanupRet->getParent()->getTerminator()->eraseFromParent(); 247 } 248} 249 250static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape, 251 Value *FramePtr, bool InResume, CallGraph *CG) { 252 if (End->isUnwind()) 253 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); 254 else 255 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); 256 257 auto &Context = End->getContext(); 258 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) 259 : ConstantInt::getFalse(Context)); 260 End->eraseFromParent(); 261} 262 263// Create an entry block for a resume function with a switch that will jump to 264// suspend points. 265static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { 266 assert(Shape.ABI == coro::ABI::Switch); 267 LLVMContext &C = F.getContext(); 268 269 // resume.entry: 270 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, 271 // i32 2 272 // % index = load i32, i32* %index.addr 273 // switch i32 %index, label %unreachable [ 274 // i32 0, label %resume.0 275 // i32 1, label %resume.1 276 // ... 277 // ] 278 279 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); 280 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); 281 282 IRBuilder<> Builder(NewEntry); 283 auto *FramePtr = Shape.FramePtr; 284 auto *FrameTy = Shape.FrameTy; 285 auto *GepIndex = Builder.CreateStructGEP( 286 FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); 287 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); 288 auto *Switch = 289 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); 290 Shape.SwitchLowering.ResumeSwitch = Switch; 291 292 size_t SuspendIndex = 0; 293 for (auto *AnyS : Shape.CoroSuspends) { 294 auto *S = cast<CoroSuspendInst>(AnyS); 295 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); 296 297 // Replace CoroSave with a store to Index: 298 // %index.addr = getelementptr %f.frame... (index field number) 299 // store i32 0, i32* %index.addr1 300 auto *Save = S->getCoroSave(); 301 Builder.SetInsertPoint(Save); 302 if (S->isFinal()) { 303 // Final suspend point is represented by storing zero in ResumeFnAddr. 304 auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr, 305 coro::Shape::SwitchFieldIndex::Resume, 306 "ResumeFn.addr"); 307 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( 308 cast<PointerType>(GepIndex->getType())->getElementType())); 309 Builder.CreateStore(NullPtr, GepIndex); 310 } else { 311 auto *GepIndex = Builder.CreateStructGEP( 312 FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); 313 Builder.CreateStore(IndexVal, GepIndex); 314 } 315 Save->replaceAllUsesWith(ConstantTokenNone::get(C)); 316 Save->eraseFromParent(); 317 318 // Split block before and after coro.suspend and add a jump from an entry 319 // switch: 320 // 321 // whateverBB: 322 // whatever 323 // %0 = call i8 @llvm.coro.suspend(token none, i1 false) 324 // switch i8 %0, label %suspend[i8 0, label %resume 325 // i8 1, label %cleanup] 326 // becomes: 327 // 328 // whateverBB: 329 // whatever 330 // br label %resume.0.landing 331 // 332 // resume.0: ; <--- jump from the switch in the resume.entry 333 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) 334 // br label %resume.0.landing 335 // 336 // resume.0.landing: 337 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] 338 // switch i8 % 1, label %suspend [i8 0, label %resume 339 // i8 1, label %cleanup] 340 341 auto *SuspendBB = S->getParent(); 342 auto *ResumeBB = 343 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); 344 auto *LandingBB = ResumeBB->splitBasicBlock( 345 S->getNextNode(), ResumeBB->getName() + Twine(".landing")); 346 Switch->addCase(IndexVal, ResumeBB); 347 348 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); 349 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front()); 350 S->replaceAllUsesWith(PN); 351 PN->addIncoming(Builder.getInt8(-1), SuspendBB); 352 PN->addIncoming(S, ResumeBB); 353 354 ++SuspendIndex; 355 } 356 357 Builder.SetInsertPoint(UnreachBB); 358 Builder.CreateUnreachable(); 359 360 Shape.SwitchLowering.ResumeEntryBlock = NewEntry; 361} 362 363 364// Rewrite final suspend point handling. We do not use suspend index to 365// represent the final suspend point. Instead we zero-out ResumeFnAddr in the 366// coroutine frame, since it is undefined behavior to resume a coroutine 367// suspended at the final suspend point. Thus, in the resume function, we can 368// simply remove the last case (when coro::Shape is built, the final suspend 369// point (if present) is always the last element of CoroSuspends array). 370// In the destroy function, we add a code sequence to check if ResumeFnAddress 371// is Null, and if so, jump to the appropriate label to handle cleanup from the 372// final suspend point. 373void CoroCloner::handleFinalSuspend() { 374 assert(Shape.ABI == coro::ABI::Switch && 375 Shape.SwitchLowering.HasFinalSuspend); 376 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); 377 auto FinalCaseIt = std::prev(Switch->case_end()); 378 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); 379 Switch->removeCase(FinalCaseIt); 380 if (isSwitchDestroyFunction()) { 381 BasicBlock *OldSwitchBB = Switch->getParent(); 382 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); 383 Builder.SetInsertPoint(OldSwitchBB->getTerminator()); 384 auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, 385 coro::Shape::SwitchFieldIndex::Resume, 386 "ResumeFn.addr"); 387 auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), 388 GepIndex); 389 auto *Cond = Builder.CreateIsNull(Load); 390 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); 391 OldSwitchBB->getTerminator()->eraseFromParent(); 392 } 393} 394 395static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, 396 const Twine &Suffix, 397 Module::iterator InsertBefore) { 398 Module *M = OrigF.getParent(); 399 auto *FnTy = Shape.getResumeFunctionType(); 400 401 Function *NewF = 402 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, 403 OrigF.getName() + Suffix); 404 NewF->addParamAttr(0, Attribute::NonNull); 405 NewF->addParamAttr(0, Attribute::NoAlias); 406 407 M->getFunctionList().insert(InsertBefore, NewF); 408 409 return NewF; 410} 411 412/// Replace uses of the active llvm.coro.suspend.retcon call with the 413/// arguments to the continuation function. 414/// 415/// This assumes that the builder has a meaningful insertion point. 416void CoroCloner::replaceRetconSuspendUses() { 417 assert(Shape.ABI == coro::ABI::Retcon || 418 Shape.ABI == coro::ABI::RetconOnce); 419 420 auto NewS = VMap[ActiveSuspend]; 421 if (NewS->use_empty()) return; 422 423 // Copy out all the continuation arguments after the buffer pointer into 424 // an easily-indexed data structure for convenience. 425 SmallVector<Value*, 8> Args; 426 for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I) 427 Args.push_back(&*I); 428 429 // If the suspend returns a single scalar value, we can just do a simple 430 // replacement. 431 if (!isa<StructType>(NewS->getType())) { 432 assert(Args.size() == 1); 433 NewS->replaceAllUsesWith(Args.front()); 434 return; 435 } 436 437 // Try to peephole extracts of an aggregate return. 438 for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) { 439 auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser()); 440 if (!EVI || EVI->getNumIndices() != 1) 441 continue; 442 443 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); 444 EVI->eraseFromParent(); 445 } 446 447 // If we have no remaining uses, we're done. 448 if (NewS->use_empty()) return; 449 450 // Otherwise, we need to create an aggregate. 451 Value *Agg = UndefValue::get(NewS->getType()); 452 for (size_t I = 0, E = Args.size(); I != E; ++I) 453 Agg = Builder.CreateInsertValue(Agg, Args[I], I); 454 455 NewS->replaceAllUsesWith(Agg); 456} 457 458void CoroCloner::replaceCoroSuspends() { 459 Value *SuspendResult; 460 461 switch (Shape.ABI) { 462 // In switch lowering, replace coro.suspend with the appropriate value 463 // for the type of function we're extracting. 464 // Replacing coro.suspend with (0) will result in control flow proceeding to 465 // a resume label associated with a suspend point, replacing it with (1) will 466 // result in control flow proceeding to a cleanup label associated with this 467 // suspend point. 468 case coro::ABI::Switch: 469 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); 470 break; 471 472 // In returned-continuation lowering, the arguments from earlier 473 // continuations are theoretically arbitrary, and they should have been 474 // spilled. 475 case coro::ABI::RetconOnce: 476 case coro::ABI::Retcon: 477 return; 478 } 479 480 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { 481 // The active suspend was handled earlier. 482 if (CS == ActiveSuspend) continue; 483 484 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); 485 MappedCS->replaceAllUsesWith(SuspendResult); 486 MappedCS->eraseFromParent(); 487 } 488} 489 490void CoroCloner::replaceCoroEnds() { 491 for (CoroEndInst *CE : Shape.CoroEnds) { 492 // We use a null call graph because there's no call graph node for 493 // the cloned function yet. We'll just be rebuilding that later. 494 auto NewCE = cast<CoroEndInst>(VMap[CE]); 495 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); 496 } 497} 498 499static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, 500 ValueToValueMapTy *VMap) { 501 Value *CachedSlot = nullptr; 502 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { 503 if (CachedSlot) { 504 assert(CachedSlot->getType()->getPointerElementType() == ValueTy && 505 "multiple swifterror slots in function with different types"); 506 return CachedSlot; 507 } 508 509 // Check if the function has a swifterror argument. 510 for (auto &Arg : F.args()) { 511 if (Arg.isSwiftError()) { 512 CachedSlot = &Arg; 513 assert(Arg.getType()->getPointerElementType() == ValueTy && 514 "swifterror argument does not have expected type"); 515 return &Arg; 516 } 517 } 518 519 // Create a swifterror alloca. 520 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); 521 auto Alloca = Builder.CreateAlloca(ValueTy); 522 Alloca->setSwiftError(true); 523 524 CachedSlot = Alloca; 525 return Alloca; 526 }; 527 528 for (CallInst *Op : Shape.SwiftErrorOps) { 529 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; 530 IRBuilder<> Builder(MappedOp); 531 532 // If there are no arguments, this is a 'get' operation. 533 Value *MappedResult; 534 if (Op->getNumArgOperands() == 0) { 535 auto ValueTy = Op->getType(); 536 auto Slot = getSwiftErrorSlot(ValueTy); 537 MappedResult = Builder.CreateLoad(ValueTy, Slot); 538 } else { 539 assert(Op->getNumArgOperands() == 1); 540 auto Value = MappedOp->getArgOperand(0); 541 auto ValueTy = Value->getType(); 542 auto Slot = getSwiftErrorSlot(ValueTy); 543 Builder.CreateStore(Value, Slot); 544 MappedResult = Slot; 545 } 546 547 MappedOp->replaceAllUsesWith(MappedResult); 548 MappedOp->eraseFromParent(); 549 } 550 551 // If we're updating the original function, we've invalidated SwiftErrorOps. 552 if (VMap == nullptr) { 553 Shape.SwiftErrorOps.clear(); 554 } 555} 556 557void CoroCloner::replaceSwiftErrorOps() { 558 ::replaceSwiftErrorOps(*NewF, Shape, &VMap); 559} 560 561void CoroCloner::replaceEntryBlock() { 562 // In the original function, the AllocaSpillBlock is a block immediately 563 // following the allocation of the frame object which defines GEPs for 564 // all the allocas that have been moved into the frame, and it ends by 565 // branching to the original beginning of the coroutine. Make this 566 // the entry block of the cloned function. 567 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); 568 Entry->setName("entry" + Suffix); 569 Entry->moveBefore(&NewF->getEntryBlock()); 570 Entry->getTerminator()->eraseFromParent(); 571 572 // Clear all predecessors of the new entry block. There should be 573 // exactly one predecessor, which we created when splitting out 574 // AllocaSpillBlock to begin with. 575 assert(Entry->hasOneUse()); 576 auto BranchToEntry = cast<BranchInst>(Entry->user_back()); 577 assert(BranchToEntry->isUnconditional()); 578 Builder.SetInsertPoint(BranchToEntry); 579 Builder.CreateUnreachable(); 580 BranchToEntry->eraseFromParent(); 581 582 // TODO: move any allocas into Entry that weren't moved into the frame. 583 // (Currently we move all allocas into the frame.) 584 585 // Branch from the entry to the appropriate place. 586 Builder.SetInsertPoint(Entry); 587 switch (Shape.ABI) { 588 case coro::ABI::Switch: { 589 // In switch-lowering, we built a resume-entry block in the original 590 // function. Make the entry block branch to this. 591 auto *SwitchBB = 592 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); 593 Builder.CreateBr(SwitchBB); 594 break; 595 } 596 597 case coro::ABI::Retcon: 598 case coro::ABI::RetconOnce: { 599 // In continuation ABIs, we want to branch to immediately after the 600 // active suspend point. Earlier phases will have put the suspend in its 601 // own basic block, so just thread our jump directly to its successor. 602 auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]); 603 auto Branch = cast<BranchInst>(MappedCS->getNextNode()); 604 assert(Branch->isUnconditional()); 605 Builder.CreateBr(Branch->getSuccessor(0)); 606 break; 607 } 608 } 609} 610 611/// Derive the value of the new frame pointer. 612Value *CoroCloner::deriveNewFramePointer() { 613 // Builder should be inserting to the front of the new entry block. 614 615 switch (Shape.ABI) { 616 // In switch-lowering, the argument is the frame pointer. 617 case coro::ABI::Switch: 618 return &*NewF->arg_begin(); 619 620 // In continuation-lowering, the argument is the opaque storage. 621 case coro::ABI::Retcon: 622 case coro::ABI::RetconOnce: { 623 Argument *NewStorage = &*NewF->arg_begin(); 624 auto FramePtrTy = Shape.FrameTy->getPointerTo(); 625 626 // If the storage is inline, just bitcast to the storage to the frame type. 627 if (Shape.RetconLowering.IsFrameInlineInStorage) 628 return Builder.CreateBitCast(NewStorage, FramePtrTy); 629 630 // Otherwise, load the real frame from the opaque storage. 631 auto FramePtrPtr = 632 Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); 633 return Builder.CreateLoad(FramePtrPtr); 634 } 635 } 636 llvm_unreachable("bad ABI"); 637} 638 639/// Clone the body of the original function into a resume function of 640/// some sort. 641void CoroCloner::create() { 642 // Create the new function if we don't already have one. 643 if (!NewF) { 644 NewF = createCloneDeclaration(OrigF, Shape, Suffix, 645 OrigF.getParent()->end()); 646 } 647 648 // Replace all args with undefs. The buildCoroutineFrame algorithm already 649 // rewritten access to the args that occurs after suspend points with loads 650 // and stores to/from the coroutine frame. 651 for (Argument &A : OrigF.args()) 652 VMap[&A] = UndefValue::get(A.getType()); 653 654 SmallVector<ReturnInst *, 4> Returns; 655 656 // Ignore attempts to change certain attributes of the function. 657 // TODO: maybe there should be a way to suppress this during cloning? 658 auto savedVisibility = NewF->getVisibility(); 659 auto savedUnnamedAddr = NewF->getUnnamedAddr(); 660 auto savedDLLStorageClass = NewF->getDLLStorageClass(); 661 662 // NewF's linkage (which CloneFunctionInto does *not* change) might not 663 // be compatible with the visibility of OrigF (which it *does* change), 664 // so protect against that. 665 auto savedLinkage = NewF->getLinkage(); 666 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); 667 668 CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns); 669 670 NewF->setLinkage(savedLinkage); 671 NewF->setVisibility(savedVisibility); 672 NewF->setUnnamedAddr(savedUnnamedAddr); 673 NewF->setDLLStorageClass(savedDLLStorageClass); 674 675 auto &Context = NewF->getContext(); 676 677 // Replace the attributes of the new function: 678 auto OrigAttrs = NewF->getAttributes(); 679 auto NewAttrs = AttributeList(); 680 681 switch (Shape.ABI) { 682 case coro::ABI::Switch: 683 // Bootstrap attributes by copying function attributes from the 684 // original function. This should include optimization settings and so on. 685 NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, 686 OrigAttrs.getFnAttributes()); 687 break; 688 689 case coro::ABI::Retcon: 690 case coro::ABI::RetconOnce: 691 // If we have a continuation prototype, just use its attributes, 692 // full-stop. 693 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); 694 break; 695 } 696 697 // Make the frame parameter nonnull and noalias. 698 NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull); 699 NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias); 700 701 switch (Shape.ABI) { 702 // In these ABIs, the cloned functions always return 'void', and the 703 // existing return sites are meaningless. Note that for unique 704 // continuations, this includes the returns associated with suspends; 705 // this is fine because we can't suspend twice. 706 case coro::ABI::Switch: 707 case coro::ABI::RetconOnce: 708 // Remove old returns. 709 for (ReturnInst *Return : Returns) 710 changeToUnreachable(Return, /*UseLLVMTrap=*/false); 711 break; 712 713 // With multi-suspend continuations, we'll already have eliminated the 714 // original returns and inserted returns before all the suspend points, 715 // so we want to leave any returns in place. 716 case coro::ABI::Retcon: 717 break; 718 } 719 720 NewF->setAttributes(NewAttrs); 721 NewF->setCallingConv(Shape.getResumeFunctionCC()); 722 723 // Set up the new entry block. 724 replaceEntryBlock(); 725 726 Builder.SetInsertPoint(&NewF->getEntryBlock().front()); 727 NewFramePtr = deriveNewFramePointer(); 728 729 // Remap frame pointer. 730 Value *OldFramePtr = VMap[Shape.FramePtr]; 731 NewFramePtr->takeName(OldFramePtr); 732 OldFramePtr->replaceAllUsesWith(NewFramePtr); 733 734 // Remap vFrame pointer. 735 auto *NewVFrame = Builder.CreateBitCast( 736 NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame"); 737 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); 738 OldVFrame->replaceAllUsesWith(NewVFrame); 739 740 switch (Shape.ABI) { 741 case coro::ABI::Switch: 742 // Rewrite final suspend handling as it is not done via switch (allows to 743 // remove final case from the switch, since it is undefined behavior to 744 // resume the coroutine suspended at the final suspend point. 745 if (Shape.SwitchLowering.HasFinalSuspend) 746 handleFinalSuspend(); 747 break; 748 749 case coro::ABI::Retcon: 750 case coro::ABI::RetconOnce: 751 // Replace uses of the active suspend with the corresponding 752 // continuation-function arguments. 753 assert(ActiveSuspend != nullptr && 754 "no active suspend when lowering a continuation-style coroutine"); 755 replaceRetconSuspendUses(); 756 break; 757 } 758 759 // Handle suspends. 760 replaceCoroSuspends(); 761 762 // Handle swifterror. 763 replaceSwiftErrorOps(); 764 765 // Remove coro.end intrinsics. 766 replaceCoroEnds(); 767 768 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, 769 // to suppress deallocation code. 770 if (Shape.ABI == coro::ABI::Switch) 771 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), 772 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); 773} 774 775// Create a resume clone by cloning the body of the original function, setting 776// new entry block and replacing coro.suspend an appropriate value to force 777// resume or cleanup pass for every suspend point. 778static Function *createClone(Function &F, const Twine &Suffix, 779 coro::Shape &Shape, CoroCloner::Kind FKind) { 780 CoroCloner Cloner(F, Suffix, Shape, FKind); 781 Cloner.create(); 782 return Cloner.getFunction(); 783} 784 785/// Remove calls to llvm.coro.end in the original function. 786static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) { 787 for (auto End : Shape.CoroEnds) { 788 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG); 789 } 790} 791 792static void replaceFrameSize(coro::Shape &Shape) { 793 if (Shape.CoroSizes.empty()) 794 return; 795 796 // In the same function all coro.sizes should have the same result type. 797 auto *SizeIntrin = Shape.CoroSizes.back(); 798 Module *M = SizeIntrin->getModule(); 799 const DataLayout &DL = M->getDataLayout(); 800 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 801 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); 802 803 for (CoroSizeInst *CS : Shape.CoroSizes) { 804 CS->replaceAllUsesWith(SizeConstant); 805 CS->eraseFromParent(); 806 } 807} 808 809// Create a global constant array containing pointers to functions provided and 810// set Info parameter of CoroBegin to point at this constant. Example: 811// 812// @f.resumers = internal constant [2 x void(%f.frame*)*] 813// [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy] 814// define void @f() { 815// ... 816// call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, 817// i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*)) 818// 819// Assumes that all the functions have the same signature. 820static void setCoroInfo(Function &F, coro::Shape &Shape, 821 ArrayRef<Function *> Fns) { 822 // This only works under the switch-lowering ABI because coro elision 823 // only works on the switch-lowering ABI. 824 assert(Shape.ABI == coro::ABI::Switch); 825 826 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end()); 827 assert(!Args.empty()); 828 Function *Part = *Fns.begin(); 829 Module *M = Part->getParent(); 830 auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); 831 832 auto *ConstVal = ConstantArray::get(ArrTy, Args); 833 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, 834 GlobalVariable::PrivateLinkage, ConstVal, 835 F.getName() + Twine(".resumers")); 836 837 // Update coro.begin instruction to refer to this constant. 838 LLVMContext &C = F.getContext(); 839 auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C)); 840 Shape.getSwitchCoroId()->setInfo(BC); 841} 842 843// Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. 844static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, 845 Function *DestroyFn, Function *CleanupFn) { 846 assert(Shape.ABI == coro::ABI::Switch); 847 848 IRBuilder<> Builder(Shape.FramePtr->getNextNode()); 849 auto *ResumeAddr = Builder.CreateStructGEP( 850 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, 851 "resume.addr"); 852 Builder.CreateStore(ResumeFn, ResumeAddr); 853 854 Value *DestroyOrCleanupFn = DestroyFn; 855 856 CoroIdInst *CoroId = Shape.getSwitchCoroId(); 857 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { 858 // If there is a CoroAlloc and it returns false (meaning we elide the 859 // allocation, use CleanupFn instead of DestroyFn). 860 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); 861 } 862 863 auto *DestroyAddr = Builder.CreateStructGEP( 864 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, 865 "destroy.addr"); 866 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); 867} 868 869static void postSplitCleanup(Function &F) { 870 removeUnreachableBlocks(F); 871 872 // For now, we do a mandatory verification step because we don't 873 // entirely trust this pass. Note that we don't want to add a verifier 874 // pass to FPM below because it will also verify all the global data. 875 verifyFunction(F); 876 877 legacy::FunctionPassManager FPM(F.getParent()); 878 879 FPM.add(createSCCPPass()); 880 FPM.add(createCFGSimplificationPass()); 881 FPM.add(createEarlyCSEPass()); 882 FPM.add(createCFGSimplificationPass()); 883 884 FPM.doInitialization(); 885 FPM.run(F); 886 FPM.doFinalization(); 887} 888 889// Assuming we arrived at the block NewBlock from Prev instruction, store 890// PHI's incoming values in the ResolvedValues map. 891static void 892scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, 893 DenseMap<Value *, Value *> &ResolvedValues) { 894 auto *PrevBB = Prev->getParent(); 895 for (PHINode &PN : NewBlock->phis()) { 896 auto V = PN.getIncomingValueForBlock(PrevBB); 897 // See if we already resolved it. 898 auto VI = ResolvedValues.find(V); 899 if (VI != ResolvedValues.end()) 900 V = VI->second; 901 // Remember the value. 902 ResolvedValues[&PN] = V; 903 } 904} 905 906// Replace a sequence of branches leading to a ret, with a clone of a ret 907// instruction. Suspend instruction represented by a switch, track the PHI 908// values and select the correct case successor when possible. 909static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { 910 DenseMap<Value *, Value *> ResolvedValues; 911 BasicBlock *UnconditionalSucc = nullptr; 912 913 Instruction *I = InitialInst; 914 while (I->isTerminator()) { 915 if (isa<ReturnInst>(I)) { 916 if (I != InitialInst) { 917 // If InitialInst is an unconditional branch, 918 // remove PHI values that come from basic block of InitialInst 919 if (UnconditionalSucc) 920 for (PHINode &PN : UnconditionalSucc->phis()) { 921 int idx = PN.getBasicBlockIndex(InitialInst->getParent()); 922 if (idx != -1) 923 PN.removeIncomingValue(idx); 924 } 925 ReplaceInstWithInst(InitialInst, I->clone()); 926 } 927 return true; 928 } 929 if (auto *BR = dyn_cast<BranchInst>(I)) { 930 if (BR->isUnconditional()) { 931 BasicBlock *BB = BR->getSuccessor(0); 932 if (I == InitialInst) 933 UnconditionalSucc = BB; 934 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); 935 I = BB->getFirstNonPHIOrDbgOrLifetime(); 936 continue; 937 } 938 } else if (auto *SI = dyn_cast<SwitchInst>(I)) { 939 Value *V = SI->getCondition(); 940 auto it = ResolvedValues.find(V); 941 if (it != ResolvedValues.end()) 942 V = it->second; 943 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { 944 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); 945 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); 946 I = BB->getFirstNonPHIOrDbgOrLifetime(); 947 continue; 948 } 949 } 950 return false; 951 } 952 return false; 953} 954 955// Add musttail to any resume instructions that is immediately followed by a 956// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call 957// for symmetrical coroutine control transfer (C++ Coroutines TS extension). 958// This transformation is done only in the resume part of the coroutine that has 959// identical signature and calling convention as the coro.resume call. 960static void addMustTailToCoroResumes(Function &F) { 961 bool changed = false; 962 963 // Collect potential resume instructions. 964 SmallVector<CallInst *, 4> Resumes; 965 for (auto &I : instructions(F)) 966 if (auto *Call = dyn_cast<CallInst>(&I)) 967 if (auto *CalledValue = Call->getCalledValue()) 968 // CoroEarly pass replaced coro resumes with indirect calls to an 969 // address return by CoroSubFnInst intrinsic. See if it is one of those. 970 if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts())) 971 Resumes.push_back(Call); 972 973 // Set musttail on those that are followed by a ret instruction. 974 for (CallInst *Call : Resumes) 975 if (simplifyTerminatorLeadingToRet(Call->getNextNode())) { 976 Call->setTailCallKind(CallInst::TCK_MustTail); 977 changed = true; 978 } 979 980 if (changed) 981 removeUnreachableBlocks(F); 982} 983 984// Coroutine has no suspend points. Remove heap allocation for the coroutine 985// frame if possible. 986static void handleNoSuspendCoroutine(coro::Shape &Shape) { 987 auto *CoroBegin = Shape.CoroBegin; 988 auto *CoroId = CoroBegin->getId(); 989 auto *AllocInst = CoroId->getCoroAlloc(); 990 switch (Shape.ABI) { 991 case coro::ABI::Switch: { 992 auto SwitchId = cast<CoroIdInst>(CoroId); 993 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); 994 if (AllocInst) { 995 IRBuilder<> Builder(AllocInst); 996 // FIXME: Need to handle overaligned members. 997 auto *Frame = Builder.CreateAlloca(Shape.FrameTy); 998 auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); 999 AllocInst->replaceAllUsesWith(Builder.getFalse()); 1000 AllocInst->eraseFromParent(); 1001 CoroBegin->replaceAllUsesWith(VFrame); 1002 } else { 1003 CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); 1004 } 1005 break; 1006 } 1007 1008 case coro::ABI::Retcon: 1009 case coro::ABI::RetconOnce: 1010 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); 1011 break; 1012 } 1013 1014 CoroBegin->eraseFromParent(); 1015} 1016 1017// SimplifySuspendPoint needs to check that there is no calls between 1018// coro_save and coro_suspend, since any of the calls may potentially resume 1019// the coroutine and if that is the case we cannot eliminate the suspend point. 1020static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) { 1021 for (Instruction *I = From; I != To; I = I->getNextNode()) { 1022 // Assume that no intrinsic can resume the coroutine. 1023 if (isa<IntrinsicInst>(I)) 1024 continue; 1025 1026 if (CallSite(I)) 1027 return true; 1028 } 1029 return false; 1030} 1031 1032static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { 1033 SmallPtrSet<BasicBlock *, 8> Set; 1034 SmallVector<BasicBlock *, 8> Worklist; 1035 1036 Set.insert(SaveBB); 1037 Worklist.push_back(ResDesBB); 1038 1039 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr 1040 // returns a token consumed by suspend instruction, all blocks in between 1041 // will have to eventually hit SaveBB when going backwards from ResDesBB. 1042 while (!Worklist.empty()) { 1043 auto *BB = Worklist.pop_back_val(); 1044 Set.insert(BB); 1045 for (auto *Pred : predecessors(BB)) 1046 if (Set.count(Pred) == 0) 1047 Worklist.push_back(Pred); 1048 } 1049 1050 // SaveBB and ResDesBB are checked separately in hasCallsBetween. 1051 Set.erase(SaveBB); 1052 Set.erase(ResDesBB); 1053 1054 for (auto *BB : Set) 1055 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr)) 1056 return true; 1057 1058 return false; 1059} 1060 1061static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { 1062 auto *SaveBB = Save->getParent(); 1063 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); 1064 1065 if (SaveBB == ResumeOrDestroyBB) 1066 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy); 1067 1068 // Any calls from Save to the end of the block? 1069 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr)) 1070 return true; 1071 1072 // Any calls from begging of the block up to ResumeOrDestroy? 1073 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(), 1074 ResumeOrDestroy)) 1075 return true; 1076 1077 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? 1078 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) 1079 return true; 1080 1081 return false; 1082} 1083 1084// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the 1085// suspend point and replace it with nornal control flow. 1086static bool simplifySuspendPoint(CoroSuspendInst *Suspend, 1087 CoroBeginInst *CoroBegin) { 1088 Instruction *Prev = Suspend->getPrevNode(); 1089 if (!Prev) { 1090 auto *Pred = Suspend->getParent()->getSinglePredecessor(); 1091 if (!Pred) 1092 return false; 1093 Prev = Pred->getTerminator(); 1094 } 1095 1096 CallSite CS{Prev}; 1097 if (!CS) 1098 return false; 1099 1100 auto *CallInstr = CS.getInstruction(); 1101 1102 auto *Callee = CS.getCalledValue()->stripPointerCasts(); 1103 1104 // See if the callsite is for resumption or destruction of the coroutine. 1105 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); 1106 if (!SubFn) 1107 return false; 1108 1109 // Does not refer to the current coroutine, we cannot do anything with it. 1110 if (SubFn->getFrame() != CoroBegin) 1111 return false; 1112 1113 // See if the transformation is safe. Specifically, see if there are any 1114 // calls in between Save and CallInstr. They can potenitally resume the 1115 // coroutine rendering this optimization unsafe. 1116 auto *Save = Suspend->getCoroSave(); 1117 if (hasCallsBetween(Save, CallInstr)) 1118 return false; 1119 1120 // Replace llvm.coro.suspend with the value that results in resumption over 1121 // the resume or cleanup path. 1122 Suspend->replaceAllUsesWith(SubFn->getRawIndex()); 1123 Suspend->eraseFromParent(); 1124 Save->eraseFromParent(); 1125 1126 // No longer need a call to coro.resume or coro.destroy. 1127 if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) { 1128 BranchInst::Create(Invoke->getNormalDest(), Invoke); 1129 } 1130 1131 // Grab the CalledValue from CS before erasing the CallInstr. 1132 auto *CalledValue = CS.getCalledValue(); 1133 CallInstr->eraseFromParent(); 1134 1135 // If no more users remove it. Usually it is a bitcast of SubFn. 1136 if (CalledValue != SubFn && CalledValue->user_empty()) 1137 if (auto *I = dyn_cast<Instruction>(CalledValue)) 1138 I->eraseFromParent(); 1139 1140 // Now we are good to remove SubFn. 1141 if (SubFn->user_empty()) 1142 SubFn->eraseFromParent(); 1143 1144 return true; 1145} 1146 1147// Remove suspend points that are simplified. 1148static void simplifySuspendPoints(coro::Shape &Shape) { 1149 // Currently, the only simplification we do is switch-lowering-specific. 1150 if (Shape.ABI != coro::ABI::Switch) 1151 return; 1152 1153 auto &S = Shape.CoroSuspends; 1154 size_t I = 0, N = S.size(); 1155 if (N == 0) 1156 return; 1157 while (true) { 1158 auto SI = cast<CoroSuspendInst>(S[I]); 1159 // Leave final.suspend to handleFinalSuspend since it is undefined behavior 1160 // to resume a coroutine suspended at the final suspend point. 1161 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { 1162 if (--N == I) 1163 break; 1164 std::swap(S[I], S[N]); 1165 continue; 1166 } 1167 if (++I == N) 1168 break; 1169 } 1170 S.resize(N); 1171} 1172 1173static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, 1174 SmallVectorImpl<Function *> &Clones) { 1175 assert(Shape.ABI == coro::ABI::Switch); 1176 1177 createResumeEntryBlock(F, Shape); 1178 auto ResumeClone = createClone(F, ".resume", Shape, 1179 CoroCloner::Kind::SwitchResume); 1180 auto DestroyClone = createClone(F, ".destroy", Shape, 1181 CoroCloner::Kind::SwitchUnwind); 1182 auto CleanupClone = createClone(F, ".cleanup", Shape, 1183 CoroCloner::Kind::SwitchCleanup); 1184 1185 postSplitCleanup(*ResumeClone); 1186 postSplitCleanup(*DestroyClone); 1187 postSplitCleanup(*CleanupClone); 1188 1189 addMustTailToCoroResumes(*ResumeClone); 1190 1191 // Store addresses resume/destroy/cleanup functions in the coroutine frame. 1192 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); 1193 1194 assert(Clones.empty()); 1195 Clones.push_back(ResumeClone); 1196 Clones.push_back(DestroyClone); 1197 Clones.push_back(CleanupClone); 1198 1199 // Create a constant array referring to resume/destroy/clone functions pointed 1200 // by the last argument of @llvm.coro.info, so that CoroElide pass can 1201 // determined correct function to call. 1202 setCoroInfo(F, Shape, Clones); 1203} 1204 1205static void splitRetconCoroutine(Function &F, coro::Shape &Shape, 1206 SmallVectorImpl<Function *> &Clones) { 1207 assert(Shape.ABI == coro::ABI::Retcon || 1208 Shape.ABI == coro::ABI::RetconOnce); 1209 assert(Clones.empty()); 1210 1211 // Reset various things that the optimizer might have decided it 1212 // "knows" about the coroutine function due to not seeing a return. 1213 F.removeFnAttr(Attribute::NoReturn); 1214 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); 1215 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1216 1217 // Allocate the frame. 1218 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId()); 1219 Value *RawFramePtr; 1220 if (Shape.RetconLowering.IsFrameInlineInStorage) { 1221 RawFramePtr = Id->getStorage(); 1222 } else { 1223 IRBuilder<> Builder(Id); 1224 1225 // Determine the size of the frame. 1226 const DataLayout &DL = F.getParent()->getDataLayout(); 1227 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1228 1229 // Allocate. We don't need to update the call graph node because we're 1230 // going to recompute it from scratch after splitting. 1231 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); 1232 RawFramePtr = 1233 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); 1234 1235 // Stash the allocated frame pointer in the continuation storage. 1236 auto Dest = Builder.CreateBitCast(Id->getStorage(), 1237 RawFramePtr->getType()->getPointerTo()); 1238 Builder.CreateStore(RawFramePtr, Dest); 1239 } 1240 1241 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1242 { 1243 // Make sure we don't invalidate Shape.FramePtr. 1244 TrackingVH<Instruction> Handle(Shape.FramePtr); 1245 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); 1246 Shape.FramePtr = Handle.getValPtr(); 1247 } 1248 1249 // Create a unique return block. 1250 BasicBlock *ReturnBB = nullptr; 1251 SmallVector<PHINode *, 4> ReturnPHIs; 1252 1253 // Create all the functions in order after the main function. 1254 auto NextF = std::next(F.getIterator()); 1255 1256 // Create a continuation function for each of the suspend points. 1257 Clones.reserve(Shape.CoroSuspends.size()); 1258 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1259 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]); 1260 1261 // Create the clone declaration. 1262 auto Continuation = 1263 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF); 1264 Clones.push_back(Continuation); 1265 1266 // Insert a branch to the unified return block immediately before 1267 // the suspend point. 1268 auto SuspendBB = Suspend->getParent(); 1269 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1270 auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1271 1272 // Create the unified return block. 1273 if (!ReturnBB) { 1274 // Place it before the first suspend. 1275 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, 1276 NewSuspendBB); 1277 Shape.RetconLowering.ReturnBlock = ReturnBB; 1278 1279 IRBuilder<> Builder(ReturnBB); 1280 1281 // Create PHIs for all the return values. 1282 assert(ReturnPHIs.empty()); 1283 1284 // First, the continuation. 1285 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), 1286 Shape.CoroSuspends.size())); 1287 1288 // Next, all the directly-yielded values. 1289 for (auto ResultTy : Shape.getRetconResultTypes()) 1290 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy, 1291 Shape.CoroSuspends.size())); 1292 1293 // Build the return value. 1294 auto RetTy = F.getReturnType(); 1295 1296 // Cast the continuation value if necessary. 1297 // We can't rely on the types matching up because that type would 1298 // have to be infinite. 1299 auto CastedContinuationTy = 1300 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); 1301 auto *CastedContinuation = 1302 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); 1303 1304 Value *RetV; 1305 if (ReturnPHIs.size() == 1) { 1306 RetV = CastedContinuation; 1307 } else { 1308 RetV = UndefValue::get(RetTy); 1309 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); 1310 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) 1311 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); 1312 } 1313 1314 Builder.CreateRet(RetV); 1315 } 1316 1317 // Branch to the return block. 1318 Branch->setSuccessor(0, ReturnBB); 1319 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); 1320 size_t NextPHIIndex = 1; 1321 for (auto &VUse : Suspend->value_operands()) 1322 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); 1323 assert(NextPHIIndex == ReturnPHIs.size()); 1324 } 1325 1326 assert(Clones.size() == Shape.CoroSuspends.size()); 1327 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1328 auto Suspend = Shape.CoroSuspends[i]; 1329 auto Clone = Clones[i]; 1330 1331 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); 1332 } 1333} 1334 1335namespace { 1336 class PrettyStackTraceFunction : public PrettyStackTraceEntry { 1337 Function &F; 1338 public: 1339 PrettyStackTraceFunction(Function &F) : F(F) {} 1340 void print(raw_ostream &OS) const override { 1341 OS << "While splitting coroutine "; 1342 F.printAsOperand(OS, /*print type*/ false, F.getParent()); 1343 OS << "\n"; 1344 } 1345 }; 1346} 1347 1348static void splitCoroutine(Function &F, coro::Shape &Shape, 1349 SmallVectorImpl<Function *> &Clones) { 1350 switch (Shape.ABI) { 1351 case coro::ABI::Switch: 1352 return splitSwitchCoroutine(F, Shape, Clones); 1353 case coro::ABI::Retcon: 1354 case coro::ABI::RetconOnce: 1355 return splitRetconCoroutine(F, Shape, Clones); 1356 } 1357 llvm_unreachable("bad ABI kind"); 1358} 1359 1360static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { 1361 PrettyStackTraceFunction prettyStackTrace(F); 1362 1363 // The suspend-crossing algorithm in buildCoroutineFrame get tripped 1364 // up by uses in unreachable blocks, so remove them as a first pass. 1365 removeUnreachableBlocks(F); 1366 1367 coro::Shape Shape(F); 1368 if (!Shape.CoroBegin) 1369 return; 1370 1371 simplifySuspendPoints(Shape); 1372 buildCoroutineFrame(F, Shape); 1373 replaceFrameSize(Shape); 1374 1375 SmallVector<Function*, 4> Clones; 1376 1377 // If there are no suspend points, no split required, just remove 1378 // the allocation and deallocation blocks, they are not needed. 1379 if (Shape.CoroSuspends.empty()) { 1380 handleNoSuspendCoroutine(Shape); 1381 } else { 1382 splitCoroutine(F, Shape, Clones); 1383 } 1384 1385 // Replace all the swifterror operations in the original function. 1386 // This invalidates SwiftErrorOps in the Shape. 1387 replaceSwiftErrorOps(F, Shape, nullptr); 1388 1389 removeCoroEnds(Shape, &CG); 1390 postSplitCleanup(F); 1391 1392 // Update call graph and add the functions we created to the SCC. 1393 coro::updateCallGraph(F, Clones, CG, SCC); 1394} 1395 1396// When we see the coroutine the first time, we insert an indirect call to a 1397// devirt trigger function and mark the coroutine that it is now ready for 1398// split. 1399static void prepareForSplit(Function &F, CallGraph &CG) { 1400 Module &M = *F.getParent(); 1401 LLVMContext &Context = F.getContext(); 1402#ifndef NDEBUG 1403 Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN); 1404 assert(DevirtFn && "coro.devirt.trigger function not found"); 1405#endif 1406 1407 F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); 1408 1409 // Insert an indirect call sequence that will be devirtualized by CoroElide 1410 // pass: 1411 // %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1) 1412 // %1 = bitcast i8* %0 to void(i8*)* 1413 // call void %1(i8* null) 1414 coro::LowererBase Lowerer(M); 1415 Instruction *InsertPt = F.getEntryBlock().getTerminator(); 1416 auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context)); 1417 auto *DevirtFnAddr = 1418 Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt); 1419 FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context), 1420 {Type::getInt8PtrTy(Context)}, false); 1421 auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt); 1422 1423 // Update CG graph with an indirect call we just added. 1424 CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode()); 1425} 1426 1427// Make sure that there is a devirtualization trigger function that the 1428// coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt 1429// trigger function is not found, we will create one and add it to the current 1430// SCC. 1431static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { 1432 Module &M = CG.getModule(); 1433 if (M.getFunction(CORO_DEVIRT_TRIGGER_FN)) 1434 return; 1435 1436 LLVMContext &C = M.getContext(); 1437 auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C), 1438 /*isVarArg=*/false); 1439 Function *DevirtFn = 1440 Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage, 1441 CORO_DEVIRT_TRIGGER_FN, &M); 1442 DevirtFn->addFnAttr(Attribute::AlwaysInline); 1443 auto *Entry = BasicBlock::Create(C, "entry", DevirtFn); 1444 ReturnInst::Create(C, Entry); 1445 1446 auto *Node = CG.getOrInsertFunction(DevirtFn); 1447 1448 SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end()); 1449 Nodes.push_back(Node); 1450 SCC.initialize(Nodes); 1451} 1452 1453/// Replace a call to llvm.coro.prepare.retcon. 1454static void replacePrepare(CallInst *Prepare, CallGraph &CG) { 1455 auto CastFn = Prepare->getArgOperand(0); // as an i8* 1456 auto Fn = CastFn->stripPointerCasts(); // as its original type 1457 1458 // Find call graph nodes for the preparation. 1459 CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr; 1460 if (auto ConcreteFn = dyn_cast<Function>(Fn)) { 1461 PrepareUserNode = CG[Prepare->getFunction()]; 1462 FnNode = CG[ConcreteFn]; 1463 } 1464 1465 // Attempt to peephole this pattern: 1466 // %0 = bitcast [[TYPE]] @some_function to i8* 1467 // %1 = call @llvm.coro.prepare.retcon(i8* %0) 1468 // %2 = bitcast %1 to [[TYPE]] 1469 // ==> 1470 // %2 = @some_function 1471 for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); 1472 UI != UE; ) { 1473 // Look for bitcasts back to the original function type. 1474 auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser()); 1475 if (!Cast || Cast->getType() != Fn->getType()) continue; 1476 1477 // Check whether the replacement will introduce new direct calls. 1478 // If so, we'll need to update the call graph. 1479 if (PrepareUserNode) { 1480 for (auto &Use : Cast->uses()) { 1481 if (auto *CB = dyn_cast<CallBase>(Use.getUser())) { 1482 if (!CB->isCallee(&Use)) 1483 continue; 1484 PrepareUserNode->removeCallEdgeFor(*CB); 1485 PrepareUserNode->addCalledFunction(CB, FnNode); 1486 } 1487 } 1488 } 1489 1490 // Replace and remove the cast. 1491 Cast->replaceAllUsesWith(Fn); 1492 Cast->eraseFromParent(); 1493 } 1494 1495 // Replace any remaining uses with the function as an i8*. 1496 // This can never directly be a callee, so we don't need to update CG. 1497 Prepare->replaceAllUsesWith(CastFn); 1498 Prepare->eraseFromParent(); 1499 1500 // Kill dead bitcasts. 1501 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { 1502 if (!Cast->use_empty()) break; 1503 CastFn = Cast->getOperand(0); 1504 Cast->eraseFromParent(); 1505 } 1506} 1507 1508/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent 1509/// IPO from operating on calls to a retcon coroutine before it's been 1510/// split. This is only safe to do after we've split all retcon 1511/// coroutines in the module. We can do that this in this pass because 1512/// this pass does promise to split all retcon coroutines (as opposed to 1513/// switch coroutines, which are lowered in multiple stages). 1514static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { 1515 bool Changed = false; 1516 for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); 1517 PI != PE; ) { 1518 // Intrinsics can only be used in calls. 1519 auto *Prepare = cast<CallInst>((PI++)->getUser()); 1520 replacePrepare(Prepare, CG); 1521 Changed = true; 1522 } 1523 1524 return Changed; 1525} 1526 1527//===----------------------------------------------------------------------===// 1528// Top Level Driver 1529//===----------------------------------------------------------------------===// 1530 1531namespace { 1532 1533struct CoroSplitLegacy : public CallGraphSCCPass { 1534 static char ID; // Pass identification, replacement for typeid 1535 1536 CoroSplitLegacy() : CallGraphSCCPass(ID) { 1537 initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry()); 1538 } 1539 1540 bool Run = false; 1541 1542 // A coroutine is identified by the presence of coro.begin intrinsic, if 1543 // we don't have any, this pass has nothing to do. 1544 bool doInitialization(CallGraph &CG) override { 1545 Run = coro::declaresIntrinsics(CG.getModule(), 1546 {"llvm.coro.begin", 1547 "llvm.coro.prepare.retcon"}); 1548 return CallGraphSCCPass::doInitialization(CG); 1549 } 1550 1551 bool runOnSCC(CallGraphSCC &SCC) override { 1552 if (!Run) 1553 return false; 1554 1555 // Check for uses of llvm.coro.prepare.retcon. 1556 auto PrepareFn = 1557 SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon"); 1558 if (PrepareFn && PrepareFn->use_empty()) 1559 PrepareFn = nullptr; 1560 1561 // Find coroutines for processing. 1562 SmallVector<Function *, 4> Coroutines; 1563 for (CallGraphNode *CGN : SCC) 1564 if (auto *F = CGN->getFunction()) 1565 if (F->hasFnAttribute(CORO_PRESPLIT_ATTR)) 1566 Coroutines.push_back(F); 1567 1568 if (Coroutines.empty() && !PrepareFn) 1569 return false; 1570 1571 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1572 1573 if (Coroutines.empty()) 1574 return replaceAllPrepares(PrepareFn, CG); 1575 1576 createDevirtTriggerFunc(CG, SCC); 1577 1578 // Split all the coroutines. 1579 for (Function *F : Coroutines) { 1580 Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR); 1581 StringRef Value = Attr.getValueAsString(); 1582 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName() 1583 << "' state: " << Value << "\n"); 1584 if (Value == UNPREPARED_FOR_SPLIT) { 1585 prepareForSplit(*F, CG); 1586 continue; 1587 } 1588 F->removeFnAttr(CORO_PRESPLIT_ATTR); 1589 splitCoroutine(*F, CG, SCC); 1590 } 1591 1592 if (PrepareFn) 1593 replaceAllPrepares(PrepareFn, CG); 1594 1595 return true; 1596 } 1597 1598 void getAnalysisUsage(AnalysisUsage &AU) const override { 1599 CallGraphSCCPass::getAnalysisUsage(AU); 1600 } 1601 1602 StringRef getPassName() const override { return "Coroutine Splitting"; } 1603}; 1604 1605} // end anonymous namespace 1606 1607char CoroSplitLegacy::ID = 0; 1608 1609INITIALIZE_PASS_BEGIN( 1610 CoroSplitLegacy, "coro-split", 1611 "Split coroutine into a set of functions driving its state machine", false, 1612 false) 1613INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1614INITIALIZE_PASS_END( 1615 CoroSplitLegacy, "coro-split", 1616 "Split coroutine into a set of functions driving its state machine", false, 1617 false) 1618 1619Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); } 1620