1292915Sdim//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===// 2292915Sdim// 3292915Sdim// The LLVM Compiler Infrastructure 4292915Sdim// 5292915Sdim// This file is distributed under the University of Illinois Open Source 6292915Sdim// License. See LICENSE.TXT for details. 7292915Sdim// 8292915Sdim//===---------------------------------------------------------------------===// 9292915Sdim/// 10292915Sdim/// \file 11292915Sdim/// \brief This implements the Relooper algorithm. This implementation includes 12292915Sdim/// optimizations added since the original academic paper [1] was published. 13292915Sdim/// 14292915Sdim/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In 15292915Sdim/// Proceedings of the ACM international conference companion on Object 16292915Sdim/// oriented programming systems languages and applications companion 17292915Sdim/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 18292915Sdim/// http://doi.acm.org/10.1145/2048147.2048224 19292915Sdim/// 20292915Sdim//===-------------------------------------------------------------------===// 21292915Sdim 22292915Sdim#include "Relooper.h" 23292915Sdim#include "WebAssembly.h" 24292915Sdim 25292915Sdim#include "llvm/ADT/STLExtras.h" 26292915Sdim#include "llvm/IR/CFG.h" 27292915Sdim#include "llvm/IR/Function.h" 28292915Sdim#include "llvm/Pass.h" 29292915Sdim#include "llvm/Support/CommandLine.h" 30292915Sdim#include "llvm/Support/Debug.h" 31292915Sdim#include "llvm/Support/raw_ostream.h" 32292915Sdim 33292915Sdim#include <cstring> 34292915Sdim#include <cstdlib> 35292915Sdim#include <functional> 36292915Sdim#include <list> 37292915Sdim#include <stack> 38292915Sdim#include <string> 39292915Sdim 40292915Sdim#define DEBUG_TYPE "relooper" 41292915Sdim 42292915Sdimusing namespace llvm; 43292915Sdimusing namespace Relooper; 44292915Sdim 45292915Sdimstatic cl::opt<int> RelooperSplittingFactor( 46292915Sdim "relooper-splitting-factor", 47292915Sdim cl::desc( 48292915Sdim "How much to discount code size when deciding whether to split a node"), 49292915Sdim cl::init(5)); 50292915Sdim 51292915Sdimstatic cl::opt<unsigned> RelooperMultipleSwitchThreshold( 52292915Sdim "relooper-multiple-switch-threshold", 53292915Sdim cl::desc( 54292915Sdim "How many entries to allow in a multiple before we use a switch"), 55292915Sdim cl::init(10)); 56292915Sdim 57292915Sdimstatic cl::opt<unsigned> RelooperNestingLimit( 58292915Sdim "relooper-nesting-limit", 59292915Sdim cl::desc( 60292915Sdim "How much nesting is acceptable"), 61292915Sdim cl::init(20)); 62292915Sdim 63292915Sdim 64292915Sdimnamespace { 65292915Sdim/// 66292915Sdim/// Implements the relooper algorithm for a function's blocks. 67292915Sdim/// 68292915Sdim/// Implementation details: The Relooper instance has 69292915Sdim/// ownership of the blocks and shapes, and frees them when done. 70292915Sdim/// 71292915Sdimstruct RelooperAlgorithm { 72292915Sdim std::deque<Block *> Blocks; 73292915Sdim std::deque<Shape *> Shapes; 74292915Sdim Shape *Root; 75292915Sdim bool MinSize; 76292915Sdim int BlockIdCounter; 77292915Sdim int ShapeIdCounter; 78292915Sdim 79292915Sdim RelooperAlgorithm(); 80292915Sdim ~RelooperAlgorithm(); 81292915Sdim 82292915Sdim void AddBlock(Block *New, int Id = -1); 83292915Sdim 84292915Sdim // Calculates the shapes 85292915Sdim void Calculate(Block *Entry); 86292915Sdim 87292915Sdim // Sets us to try to minimize size 88292915Sdim void SetMinSize(bool MinSize_) { MinSize = MinSize_; } 89292915Sdim}; 90292915Sdim 91292915Sdimstruct RelooperAnalysis final : public FunctionPass { 92292915Sdim static char ID; 93292915Sdim RelooperAnalysis() : FunctionPass(ID) {} 94292915Sdim const char *getPassName() const override { return "relooper"; } 95292915Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 96292915Sdim AU.setPreservesAll(); 97292915Sdim } 98292915Sdim bool runOnFunction(Function &F) override; 99292915Sdim}; 100292915Sdim} 101292915Sdim 102292915Sdim// RelooperAnalysis 103292915Sdim 104292915Sdimchar RelooperAnalysis::ID = 0; 105292915SdimFunctionPass *llvm::createWebAssemblyRelooper() { 106292915Sdim return new RelooperAnalysis(); 107292915Sdim} 108292915Sdim 109292915Sdimbool RelooperAnalysis::runOnFunction(Function &F) { 110292915Sdim DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); 111292915Sdim RelooperAlgorithm R; 112292915Sdim // FIXME: remove duplication between relooper's and LLVM's BBs. 113292915Sdim std::map<const BasicBlock *, Block *> BB2B; 114292915Sdim std::map<const Block *, const BasicBlock *> B2BB; 115292915Sdim for (const BasicBlock &BB : F) { 116292915Sdim // FIXME: getName is wrong here, Code is meant to represent amount of code. 117292915Sdim // FIXME: use BranchVarInit for switch. 118292915Sdim Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); 119292915Sdim R.AddBlock(B); 120292915Sdim assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); 121292915Sdim assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); 122292915Sdim BB2B[&BB] = B; 123292915Sdim B2BB[B] = &BB; 124292915Sdim } 125292915Sdim for (Block *B : R.Blocks) { 126292915Sdim const BasicBlock *BB = B2BB[B]; 127292915Sdim for (const BasicBlock *Successor : successors(BB)) 128292915Sdim // FIXME: add branch's Condition and Code below. 129292915Sdim B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); 130292915Sdim } 131292915Sdim R.Calculate(BB2B[&F.getEntryBlock()]); 132292915Sdim return false; // Analysis passes don't modify anything. 133292915Sdim} 134292915Sdim 135292915Sdim// Helpers 136292915Sdim 137292915Sdimtypedef MapVector<Block *, BlockSet> BlockBlockSetMap; 138292915Sdimtypedef std::list<Block *> BlockList; 139292915Sdim 140292915Sdimtemplate <class T, class U> 141292915Sdimstatic bool contains(const T &container, const U &contained) { 142292915Sdim return container.count(contained); 143292915Sdim} 144292915Sdim 145292915Sdim 146292915Sdim// Branch 147292915Sdim 148292915SdimBranch::Branch(const char *ConditionInit, const char *CodeInit) 149292915Sdim : Ancestor(nullptr), Labeled(true) { 150292915Sdim // FIXME: move from char* to LLVM data structures 151292915Sdim Condition = ConditionInit ? strdup(ConditionInit) : nullptr; 152292915Sdim Code = CodeInit ? strdup(CodeInit) : nullptr; 153292915Sdim} 154292915Sdim 155292915SdimBranch::~Branch() { 156292915Sdim // FIXME: move from char* to LLVM data structures 157292915Sdim free(static_cast<void *>(const_cast<char *>(Condition))); 158292915Sdim free(static_cast<void *>(const_cast<char *>(Code))); 159292915Sdim} 160292915Sdim 161292915Sdim// Block 162292915Sdim 163292915SdimBlock::Block(const char *CodeInit, const char *BranchVarInit) 164292915Sdim : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { 165292915Sdim // FIXME: move from char* to LLVM data structures 166292915Sdim Code = strdup(CodeInit); 167292915Sdim BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; 168292915Sdim} 169292915Sdim 170292915SdimBlock::~Block() { 171292915Sdim // FIXME: move from char* to LLVM data structures 172292915Sdim free(static_cast<void *>(const_cast<char *>(Code))); 173292915Sdim free(static_cast<void *>(const_cast<char *>(BranchVar))); 174292915Sdim} 175292915Sdim 176292915Sdimvoid Block::AddBranchTo(Block *Target, const char *Condition, 177292915Sdim const char *Code) { 178292915Sdim assert(!contains(BranchesOut, Target) && 179292915Sdim "cannot add more than one branch to the same target"); 180292915Sdim BranchesOut[Target] = make_unique<Branch>(Condition, Code); 181292915Sdim} 182292915Sdim 183292915Sdim// Relooper 184292915Sdim 185292915SdimRelooperAlgorithm::RelooperAlgorithm() 186292915Sdim : Root(nullptr), MinSize(false), BlockIdCounter(1), 187292915Sdim ShapeIdCounter(0) { // block ID 0 is reserved for clearings 188292915Sdim} 189292915Sdim 190292915SdimRelooperAlgorithm::~RelooperAlgorithm() { 191292915Sdim for (auto Curr : Blocks) 192292915Sdim delete Curr; 193292915Sdim for (auto Curr : Shapes) 194292915Sdim delete Curr; 195292915Sdim} 196292915Sdim 197292915Sdimvoid RelooperAlgorithm::AddBlock(Block *New, int Id) { 198292915Sdim New->Id = Id == -1 ? BlockIdCounter++ : Id; 199292915Sdim Blocks.push_back(New); 200292915Sdim} 201292915Sdim 202292915Sdimstruct RelooperRecursor { 203292915Sdim RelooperAlgorithm *Parent; 204292915Sdim RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} 205292915Sdim}; 206292915Sdim 207292915Sdimvoid RelooperAlgorithm::Calculate(Block *Entry) { 208292915Sdim // Scan and optimize the input 209292915Sdim struct PreOptimizer : public RelooperRecursor { 210292915Sdim PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} 211292915Sdim BlockSet Live; 212292915Sdim 213292915Sdim void FindLive(Block *Root) { 214292915Sdim BlockList ToInvestigate; 215292915Sdim ToInvestigate.push_back(Root); 216292915Sdim while (!ToInvestigate.empty()) { 217292915Sdim Block *Curr = ToInvestigate.front(); 218292915Sdim ToInvestigate.pop_front(); 219292915Sdim if (contains(Live, Curr)) 220292915Sdim continue; 221292915Sdim Live.insert(Curr); 222292915Sdim for (const auto &iter : Curr->BranchesOut) 223292915Sdim ToInvestigate.push_back(iter.first); 224292915Sdim } 225292915Sdim } 226292915Sdim 227292915Sdim // If a block has multiple entries but no exits, and it is small enough, it 228292915Sdim // is useful to split it. A common example is a C++ function where 229292915Sdim // everything ends up at a final exit block and does some RAII cleanup. 230292915Sdim // Without splitting, we will be forced to introduce labelled loops to 231292915Sdim // allow reaching the final block 232292915Sdim void SplitDeadEnds() { 233292915Sdim unsigned TotalCodeSize = 0; 234292915Sdim for (const auto &Curr : Live) { 235292915Sdim TotalCodeSize += strlen(Curr->Code); 236292915Sdim } 237292915Sdim BlockSet Splits; 238292915Sdim BlockSet Removed; 239292915Sdim for (const auto &Original : Live) { 240292915Sdim if (Original->BranchesIn.size() <= 1 || 241292915Sdim !Original->BranchesOut.empty()) 242292915Sdim continue; // only dead ends, for now 243292915Sdim if (contains(Original->BranchesOut, Original)) 244292915Sdim continue; // cannot split a looping node 245292915Sdim if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > 246292915Sdim TotalCodeSize / RelooperSplittingFactor) 247292915Sdim continue; // if splitting increases raw code size by a significant 248292915Sdim // amount, abort 249292915Sdim // Split the node (for simplicity, we replace all the blocks, even 250292915Sdim // though we could have reused the original) 251292915Sdim DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); 252292915Sdim for (const auto &Prior : Original->BranchesIn) { 253292915Sdim Block *Split = new Block(Original->Code, Original->BranchVar); 254292915Sdim Parent->AddBlock(Split, Original->Id); 255292915Sdim Split->BranchesIn.insert(Prior); 256292915Sdim std::unique_ptr<Branch> Details; 257292915Sdim Details.swap(Prior->BranchesOut[Original]); 258292915Sdim Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition, 259292915Sdim Details->Code); 260292915Sdim for (const auto &iter : Original->BranchesOut) { 261292915Sdim Block *Post = iter.first; 262292915Sdim Branch *Details = iter.second.get(); 263292915Sdim Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition, 264292915Sdim Details->Code); 265292915Sdim Post->BranchesIn.insert(Split); 266292915Sdim } 267292915Sdim Splits.insert(Split); 268292915Sdim Removed.insert(Original); 269292915Sdim } 270292915Sdim for (const auto &iter : Original->BranchesOut) { 271292915Sdim Block *Post = iter.first; 272292915Sdim Post->BranchesIn.remove(Original); 273292915Sdim } 274292915Sdim } 275292915Sdim for (const auto &iter : Splits) 276292915Sdim Live.insert(iter); 277292915Sdim for (const auto &iter : Removed) 278292915Sdim Live.remove(iter); 279292915Sdim } 280292915Sdim }; 281292915Sdim PreOptimizer Pre(this); 282292915Sdim Pre.FindLive(Entry); 283292915Sdim 284292915Sdim // Add incoming branches from live blocks, ignoring dead code 285292915Sdim for (unsigned i = 0; i < Blocks.size(); i++) { 286292915Sdim Block *Curr = Blocks[i]; 287292915Sdim if (!contains(Pre.Live, Curr)) 288292915Sdim continue; 289292915Sdim for (const auto &iter : Curr->BranchesOut) 290292915Sdim iter.first->BranchesIn.insert(Curr); 291292915Sdim } 292292915Sdim 293292915Sdim if (!MinSize) 294292915Sdim Pre.SplitDeadEnds(); 295292915Sdim 296292915Sdim // Recursively process the graph 297292915Sdim 298292915Sdim struct Analyzer : public RelooperRecursor { 299292915Sdim Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} 300292915Sdim 301292915Sdim // Add a shape to the list of shapes in this Relooper calculation 302292915Sdim void Notice(Shape *New) { 303292915Sdim New->Id = Parent->ShapeIdCounter++; 304292915Sdim Parent->Shapes.push_back(New); 305292915Sdim } 306292915Sdim 307292915Sdim // Create a list of entries from a block. If LimitTo is provided, only 308292915Sdim // results in that set will appear 309292915Sdim void GetBlocksOut(Block *Source, BlockSet &Entries, 310292915Sdim BlockSet *LimitTo = nullptr) { 311292915Sdim for (const auto &iter : Source->BranchesOut) 312292915Sdim if (!LimitTo || contains(*LimitTo, iter.first)) 313292915Sdim Entries.insert(iter.first); 314292915Sdim } 315292915Sdim 316292915Sdim // Converts/processes all branchings to a specific target 317292915Sdim void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, 318292915Sdim BlockSet &From) { 319292915Sdim DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type 320292915Sdim << "\n"); 321292915Sdim for (auto iter = Target->BranchesIn.begin(); 322292915Sdim iter != Target->BranchesIn.end();) { 323292915Sdim Block *Prior = *iter; 324292915Sdim if (!contains(From, Prior)) { 325292915Sdim iter++; 326292915Sdim continue; 327292915Sdim } 328292915Sdim std::unique_ptr<Branch> PriorOut; 329292915Sdim PriorOut.swap(Prior->BranchesOut[Target]); 330292915Sdim PriorOut->Ancestor = Ancestor; 331292915Sdim PriorOut->Type = Type; 332292915Sdim if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor)) 333292915Sdim Multiple->Breaks++; // We are breaking out of this Multiple, so need a 334292915Sdim // loop 335292915Sdim iter++; // carefully increment iter before erasing 336292915Sdim Target->BranchesIn.remove(Prior); 337292915Sdim Target->ProcessedBranchesIn.insert(Prior); 338292915Sdim Prior->ProcessedBranchesOut[Target].swap(PriorOut); 339292915Sdim } 340292915Sdim } 341292915Sdim 342292915Sdim Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { 343292915Sdim DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); 344292915Sdim SimpleShape *Simple = new SimpleShape; 345292915Sdim Notice(Simple); 346292915Sdim Simple->Inner = Inner; 347292915Sdim Inner->Parent = Simple; 348292915Sdim if (Blocks.size() > 1) { 349292915Sdim Blocks.remove(Inner); 350292915Sdim GetBlocksOut(Inner, NextEntries, &Blocks); 351292915Sdim BlockSet JustInner; 352292915Sdim JustInner.insert(Inner); 353292915Sdim for (const auto &iter : NextEntries) 354292915Sdim Solipsize(iter, Branch::Direct, Simple, JustInner); 355292915Sdim } 356292915Sdim return Simple; 357292915Sdim } 358292915Sdim 359292915Sdim Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, 360292915Sdim BlockSet &NextEntries) { 361292915Sdim // Find the inner blocks in this loop. Proceed backwards from the entries 362292915Sdim // until 363292915Sdim // you reach a seen block, collecting as you go. 364292915Sdim BlockSet InnerBlocks; 365292915Sdim BlockSet Queue = Entries; 366292915Sdim while (!Queue.empty()) { 367292915Sdim Block *Curr = *(Queue.begin()); 368292915Sdim Queue.remove(*Queue.begin()); 369292915Sdim if (!contains(InnerBlocks, Curr)) { 370292915Sdim // This element is new, mark it as inner and remove from outer 371292915Sdim InnerBlocks.insert(Curr); 372292915Sdim Blocks.remove(Curr); 373292915Sdim // Add the elements prior to it 374292915Sdim for (const auto &iter : Curr->BranchesIn) 375292915Sdim Queue.insert(iter); 376292915Sdim } 377292915Sdim } 378292915Sdim assert(!InnerBlocks.empty()); 379292915Sdim 380292915Sdim for (const auto &Curr : InnerBlocks) { 381292915Sdim for (const auto &iter : Curr->BranchesOut) { 382292915Sdim Block *Possible = iter.first; 383292915Sdim if (!contains(InnerBlocks, Possible)) 384292915Sdim NextEntries.insert(Possible); 385292915Sdim } 386292915Sdim } 387292915Sdim 388292915Sdim LoopShape *Loop = new LoopShape(); 389292915Sdim Notice(Loop); 390292915Sdim 391292915Sdim // Solipsize the loop, replacing with break/continue and marking branches 392292915Sdim // as Processed (will not affect later calculations) 393292915Sdim // A. Branches to the loop entries become a continue to this shape 394292915Sdim for (const auto &iter : Entries) 395292915Sdim Solipsize(iter, Branch::Continue, Loop, InnerBlocks); 396292915Sdim // B. Branches to outside the loop (a next entry) become breaks on this 397292915Sdim // shape 398292915Sdim for (const auto &iter : NextEntries) 399292915Sdim Solipsize(iter, Branch::Break, Loop, InnerBlocks); 400292915Sdim // Finish up 401292915Sdim Shape *Inner = Process(InnerBlocks, Entries, nullptr); 402292915Sdim Loop->Inner = Inner; 403292915Sdim return Loop; 404292915Sdim } 405292915Sdim 406292915Sdim // For each entry, find the independent group reachable by it. The 407292915Sdim // independent group is the entry itself, plus all the blocks it can 408292915Sdim // reach that cannot be directly reached by another entry. Note that we 409292915Sdim // ignore directly reaching the entry itself by another entry. 410292915Sdim // @param Ignore - previous blocks that are irrelevant 411292915Sdim void FindIndependentGroups(BlockSet &Entries, 412292915Sdim BlockBlockSetMap &IndependentGroups, 413292915Sdim BlockSet *Ignore = nullptr) { 414292915Sdim typedef std::map<Block *, Block *> BlockBlockMap; 415292915Sdim 416292915Sdim struct HelperClass { 417292915Sdim BlockBlockSetMap &IndependentGroups; 418292915Sdim BlockBlockMap Ownership; // For each block, which entry it belongs to. 419292915Sdim // We have reached it from there. 420292915Sdim 421292915Sdim HelperClass(BlockBlockSetMap &IndependentGroupsInit) 422292915Sdim : IndependentGroups(IndependentGroupsInit) {} 423292915Sdim void InvalidateWithChildren(Block *New) { 424292915Sdim // Being in the list means you need to be invalidated 425292915Sdim BlockList ToInvalidate; 426292915Sdim ToInvalidate.push_back(New); 427292915Sdim while (!ToInvalidate.empty()) { 428292915Sdim Block *Invalidatee = ToInvalidate.front(); 429292915Sdim ToInvalidate.pop_front(); 430292915Sdim Block *Owner = Ownership[Invalidatee]; 431292915Sdim // Owner may have been invalidated, do not add to 432292915Sdim // IndependentGroups! 433292915Sdim if (contains(IndependentGroups, Owner)) 434292915Sdim IndependentGroups[Owner].remove(Invalidatee); 435292915Sdim if (Ownership[Invalidatee]) { // may have been seen before and 436292915Sdim // invalidated already 437292915Sdim Ownership[Invalidatee] = nullptr; 438292915Sdim for (const auto &iter : Invalidatee->BranchesOut) { 439292915Sdim Block *Target = iter.first; 440292915Sdim BlockBlockMap::iterator Known = Ownership.find(Target); 441292915Sdim if (Known != Ownership.end()) { 442292915Sdim Block *TargetOwner = Known->second; 443292915Sdim if (TargetOwner) 444292915Sdim ToInvalidate.push_back(Target); 445292915Sdim } 446292915Sdim } 447292915Sdim } 448292915Sdim } 449292915Sdim } 450292915Sdim }; 451292915Sdim HelperClass Helper(IndependentGroups); 452292915Sdim 453292915Sdim // We flow out from each of the entries, simultaneously. 454292915Sdim // When we reach a new block, we add it as belonging to the one we got to 455292915Sdim // it from. 456292915Sdim // If we reach a new block that is already marked as belonging to someone, 457292915Sdim // it is reachable by two entries and is not valid for any of them. 458292915Sdim // Remove it and all it can reach that have been visited. 459292915Sdim 460292915Sdim // Being in the queue means we just added this item, and 461292915Sdim // we need to add its children 462292915Sdim BlockList Queue; 463292915Sdim for (const auto &Entry : Entries) { 464292915Sdim Helper.Ownership[Entry] = Entry; 465292915Sdim IndependentGroups[Entry].insert(Entry); 466292915Sdim Queue.push_back(Entry); 467292915Sdim } 468292915Sdim while (!Queue.empty()) { 469292915Sdim Block *Curr = Queue.front(); 470292915Sdim Queue.pop_front(); 471292915Sdim Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership 472292915Sdim // map if we are in the queue 473292915Sdim if (!Owner) 474292915Sdim continue; // we have been invalidated meanwhile after being reached 475292915Sdim // from two entries 476292915Sdim // Add all children 477292915Sdim for (const auto &iter : Curr->BranchesOut) { 478292915Sdim Block *New = iter.first; 479292915Sdim BlockBlockMap::iterator Known = Helper.Ownership.find(New); 480292915Sdim if (Known == Helper.Ownership.end()) { 481292915Sdim // New node. Add it, and put it in the queue 482292915Sdim Helper.Ownership[New] = Owner; 483292915Sdim IndependentGroups[Owner].insert(New); 484292915Sdim Queue.push_back(New); 485292915Sdim continue; 486292915Sdim } 487292915Sdim Block *NewOwner = Known->second; 488292915Sdim if (!NewOwner) 489292915Sdim continue; // We reached an invalidated node 490292915Sdim if (NewOwner != Owner) 491292915Sdim // Invalidate this and all reachable that we have seen - we reached 492292915Sdim // this from two locations 493292915Sdim Helper.InvalidateWithChildren(New); 494292915Sdim // otherwise, we have the same owner, so do nothing 495292915Sdim } 496292915Sdim } 497292915Sdim 498292915Sdim // Having processed all the interesting blocks, we remain with just one 499292915Sdim // potential issue: 500292915Sdim // If a->b, and a was invalidated, but then b was later reached by 501292915Sdim // someone else, we must invalidate b. To check for this, we go over all 502292915Sdim // elements in the independent groups, if an element has a parent which 503292915Sdim // does *not* have the same owner, we/ must remove it and all its 504292915Sdim // children. 505292915Sdim 506292915Sdim for (const auto &iter : Entries) { 507292915Sdim BlockSet &CurrGroup = IndependentGroups[iter]; 508292915Sdim BlockList ToInvalidate; 509292915Sdim for (const auto &iter : CurrGroup) { 510292915Sdim Block *Child = iter; 511292915Sdim for (const auto &iter : Child->BranchesIn) { 512292915Sdim Block *Parent = iter; 513292915Sdim if (Ignore && contains(*Ignore, Parent)) 514292915Sdim continue; 515292915Sdim if (Helper.Ownership[Parent] != Helper.Ownership[Child]) 516292915Sdim ToInvalidate.push_back(Child); 517292915Sdim } 518292915Sdim } 519292915Sdim while (!ToInvalidate.empty()) { 520292915Sdim Block *Invalidatee = ToInvalidate.front(); 521292915Sdim ToInvalidate.pop_front(); 522292915Sdim Helper.InvalidateWithChildren(Invalidatee); 523292915Sdim } 524292915Sdim } 525292915Sdim 526292915Sdim // Remove empty groups 527292915Sdim for (const auto &iter : Entries) 528292915Sdim if (IndependentGroups[iter].empty()) 529292915Sdim IndependentGroups.erase(iter); 530292915Sdim } 531292915Sdim 532292915Sdim Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, 533292915Sdim BlockBlockSetMap &IndependentGroups, Shape *Prev, 534292915Sdim BlockSet &NextEntries) { 535292915Sdim bool Fused = isa<SimpleShape>(Prev); 536292915Sdim MultipleShape *Multiple = new MultipleShape(); 537292915Sdim Notice(Multiple); 538292915Sdim BlockSet CurrEntries; 539292915Sdim for (auto &iter : IndependentGroups) { 540292915Sdim Block *CurrEntry = iter.first; 541292915Sdim BlockSet &CurrBlocks = iter.second; 542292915Sdim // Create inner block 543292915Sdim CurrEntries.clear(); 544292915Sdim CurrEntries.insert(CurrEntry); 545292915Sdim for (const auto &CurrInner : CurrBlocks) { 546292915Sdim // Remove the block from the remaining blocks 547292915Sdim Blocks.remove(CurrInner); 548292915Sdim // Find new next entries and fix branches to them 549292915Sdim for (auto iter = CurrInner->BranchesOut.begin(); 550292915Sdim iter != CurrInner->BranchesOut.end();) { 551292915Sdim Block *CurrTarget = iter->first; 552292915Sdim auto Next = iter; 553292915Sdim Next++; 554292915Sdim if (!contains(CurrBlocks, CurrTarget)) { 555292915Sdim NextEntries.insert(CurrTarget); 556292915Sdim Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); 557292915Sdim } 558292915Sdim iter = Next; // increment carefully because Solipsize can remove us 559292915Sdim } 560292915Sdim } 561292915Sdim Multiple->InnerMap[CurrEntry->Id] = 562292915Sdim Process(CurrBlocks, CurrEntries, nullptr); 563292915Sdim // If we are not fused, then our entries will actually be checked 564292915Sdim if (!Fused) 565292915Sdim CurrEntry->IsCheckedMultipleEntry = true; 566292915Sdim } 567292915Sdim // Add entries not handled as next entries, they are deferred 568292915Sdim for (const auto &Entry : Entries) 569292915Sdim if (!contains(IndependentGroups, Entry)) 570292915Sdim NextEntries.insert(Entry); 571292915Sdim // The multiple has been created, we can decide how to implement it 572292915Sdim if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { 573292915Sdim Multiple->UseSwitch = true; 574292915Sdim Multiple->Breaks++; // switch captures breaks 575292915Sdim } 576292915Sdim return Multiple; 577292915Sdim } 578292915Sdim 579292915Sdim // Main function. 580292915Sdim // Process a set of blocks with specified entries, returns a shape 581292915Sdim // The Make* functions receive a NextEntries. If they fill it with data, 582292915Sdim // those are the entries for the ->Next block on them, and the blocks 583292915Sdim // are what remains in Blocks (which Make* modify). In this way 584292915Sdim // we avoid recursing on Next (imagine a long chain of Simples, if we 585292915Sdim // recursed we could blow the stack). 586292915Sdim Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { 587292915Sdim BlockSet *Entries = &InitialEntries; 588292915Sdim BlockSet TempEntries[2]; 589292915Sdim int CurrTempIndex = 0; 590292915Sdim BlockSet *NextEntries; 591292915Sdim Shape *Ret = nullptr; 592292915Sdim 593292915Sdim auto Make = [&](Shape *Temp) { 594292915Sdim if (Prev) 595292915Sdim Prev->Next = Temp; 596292915Sdim if (!Ret) 597292915Sdim Ret = Temp; 598292915Sdim Prev = Temp; 599292915Sdim Entries = NextEntries; 600292915Sdim }; 601292915Sdim 602292915Sdim while (1) { 603292915Sdim CurrTempIndex = 1 - CurrTempIndex; 604292915Sdim NextEntries = &TempEntries[CurrTempIndex]; 605292915Sdim NextEntries->clear(); 606292915Sdim 607292915Sdim if (Entries->empty()) 608292915Sdim return Ret; 609292915Sdim if (Entries->size() == 1) { 610292915Sdim Block *Curr = *(Entries->begin()); 611292915Sdim if (Curr->BranchesIn.empty()) { 612292915Sdim // One entry, no looping ==> Simple 613292915Sdim Make(MakeSimple(Blocks, Curr, *NextEntries)); 614292915Sdim if (NextEntries->empty()) 615292915Sdim return Ret; 616292915Sdim continue; 617292915Sdim } 618292915Sdim // One entry, looping ==> Loop 619292915Sdim Make(MakeLoop(Blocks, *Entries, *NextEntries)); 620292915Sdim if (NextEntries->empty()) 621292915Sdim return Ret; 622292915Sdim continue; 623292915Sdim } 624292915Sdim 625292915Sdim // More than one entry, try to eliminate through a Multiple groups of 626292915Sdim // independent blocks from an entry/ies. It is important to remove 627292915Sdim // through multiples as opposed to looping since the former is more 628292915Sdim // performant. 629292915Sdim BlockBlockSetMap IndependentGroups; 630292915Sdim FindIndependentGroups(*Entries, IndependentGroups); 631292915Sdim 632292915Sdim if (!IndependentGroups.empty()) { 633292915Sdim // We can handle a group in a multiple if its entry cannot be reached 634292915Sdim // by another group. 635292915Sdim // Note that it might be reachable by itself - a loop. But that is 636292915Sdim // fine, we will create a loop inside the multiple block (which 637292915Sdim // is the performant order to do it). 638292915Sdim for (auto iter = IndependentGroups.begin(); 639292915Sdim iter != IndependentGroups.end();) { 640292915Sdim Block *Entry = iter->first; 641292915Sdim BlockSet &Group = iter->second; 642292915Sdim auto curr = iter++; // iterate carefully, we may delete 643292915Sdim for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); 644292915Sdim iterBranch != Entry->BranchesIn.end(); iterBranch++) { 645292915Sdim Block *Origin = *iterBranch; 646292915Sdim if (!contains(Group, Origin)) { 647292915Sdim // Reached from outside the group, so we cannot handle this 648292915Sdim IndependentGroups.erase(curr); 649292915Sdim break; 650292915Sdim } 651292915Sdim } 652292915Sdim } 653292915Sdim 654292915Sdim // As an optimization, if we have 2 independent groups, and one is a 655292915Sdim // small dead end, we can handle only that dead end. 656292915Sdim // The other then becomes a Next - without nesting in the code and 657292915Sdim // recursion in the analysis. 658292915Sdim // TODO: if the larger is the only dead end, handle that too 659292915Sdim // TODO: handle >2 groups 660292915Sdim // TODO: handle not just dead ends, but also that do not branch to the 661292915Sdim // NextEntries. However, must be careful there since we create a 662292915Sdim // Next, and that Next can prevent eliminating a break (since we no 663292915Sdim // longer naturally reach the same place), which may necessitate a 664292915Sdim // one-time loop, which makes the unnesting pointless. 665292915Sdim if (IndependentGroups.size() == 2) { 666292915Sdim // Find the smaller one 667292915Sdim auto iter = IndependentGroups.begin(); 668292915Sdim Block *SmallEntry = iter->first; 669292915Sdim auto SmallSize = iter->second.size(); 670292915Sdim iter++; 671292915Sdim Block *LargeEntry = iter->first; 672292915Sdim auto LargeSize = iter->second.size(); 673292915Sdim if (SmallSize != LargeSize) { // ignore the case where they are 674292915Sdim // identical - keep things symmetrical 675292915Sdim // there 676292915Sdim if (SmallSize > LargeSize) { 677292915Sdim Block *Temp = SmallEntry; 678292915Sdim SmallEntry = LargeEntry; 679292915Sdim LargeEntry = Temp; // Note: we did not flip the Sizes too, they 680292915Sdim // are now invalid. TODO: use the smaller 681292915Sdim // size as a limit? 682292915Sdim } 683292915Sdim // Check if dead end 684292915Sdim bool DeadEnd = true; 685292915Sdim BlockSet &SmallGroup = IndependentGroups[SmallEntry]; 686292915Sdim for (const auto &Curr : SmallGroup) { 687292915Sdim for (const auto &iter : Curr->BranchesOut) { 688292915Sdim Block *Target = iter.first; 689292915Sdim if (!contains(SmallGroup, Target)) { 690292915Sdim DeadEnd = false; 691292915Sdim break; 692292915Sdim } 693292915Sdim } 694292915Sdim if (!DeadEnd) 695292915Sdim break; 696292915Sdim } 697292915Sdim if (DeadEnd) 698292915Sdim IndependentGroups.erase(LargeEntry); 699292915Sdim } 700292915Sdim } 701292915Sdim 702292915Sdim if (!IndependentGroups.empty()) 703292915Sdim // Some groups removable ==> Multiple 704292915Sdim Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, 705292915Sdim *NextEntries)); 706292915Sdim if (NextEntries->empty()) 707292915Sdim return Ret; 708292915Sdim continue; 709292915Sdim } 710292915Sdim // No independent groups, must be loopable ==> Loop 711292915Sdim Make(MakeLoop(Blocks, *Entries, *NextEntries)); 712292915Sdim if (NextEntries->empty()) 713292915Sdim return Ret; 714292915Sdim continue; 715292915Sdim } 716292915Sdim } 717292915Sdim }; 718292915Sdim 719292915Sdim // Main 720292915Sdim 721292915Sdim BlockSet AllBlocks; 722292915Sdim for (const auto &Curr : Pre.Live) { 723292915Sdim AllBlocks.insert(Curr); 724292915Sdim } 725292915Sdim 726292915Sdim BlockSet Entries; 727292915Sdim Entries.insert(Entry); 728292915Sdim Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); 729292915Sdim assert(Root); 730292915Sdim 731292915Sdim /// 732292915Sdim /// Relooper post-optimizer 733292915Sdim /// 734292915Sdim struct PostOptimizer { 735292915Sdim RelooperAlgorithm *Parent; 736292915Sdim std::stack<Shape *> LoopStack; 737292915Sdim 738292915Sdim PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} 739292915Sdim 740292915Sdim void ShapeSwitch(Shape* var, 741292915Sdim std::function<void (SimpleShape*)> simple, 742292915Sdim std::function<void (MultipleShape*)> multiple, 743292915Sdim std::function<void (LoopShape*)> loop) { 744292915Sdim switch (var->getKind()) { 745292915Sdim case Shape::SK_Simple: { 746292915Sdim simple(cast<SimpleShape>(var)); 747292915Sdim break; 748292915Sdim } 749292915Sdim case Shape::SK_Multiple: { 750292915Sdim multiple(cast<MultipleShape>(var)); 751292915Sdim break; 752292915Sdim } 753292915Sdim case Shape::SK_Loop: { 754292915Sdim loop(cast<LoopShape>(var)); 755292915Sdim break; 756292915Sdim } 757292915Sdim } 758292915Sdim } 759292915Sdim 760292915Sdim // Find the blocks that natural control flow can get us directly to, or 761292915Sdim // through a multiple that we ignore 762292915Sdim void FollowNaturalFlow(Shape *S, BlockSet &Out) { 763292915Sdim ShapeSwitch(S, [&](SimpleShape* Simple) { 764292915Sdim Out.insert(Simple->Inner); 765292915Sdim }, [&](MultipleShape* Multiple) { 766292915Sdim for (const auto &iter : Multiple->InnerMap) { 767292915Sdim FollowNaturalFlow(iter.second, Out); 768292915Sdim } 769292915Sdim FollowNaturalFlow(Multiple->Next, Out); 770292915Sdim }, [&](LoopShape* Loop) { 771292915Sdim FollowNaturalFlow(Loop->Inner, Out); 772292915Sdim }); 773292915Sdim } 774292915Sdim 775292915Sdim void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { 776292915Sdim if (Root->Next) { 777292915Sdim Root->Natural = Root->Next; 778292915Sdim FindNaturals(Root->Next, Otherwise); 779292915Sdim } else { 780292915Sdim Root->Natural = Otherwise; 781292915Sdim } 782292915Sdim 783292915Sdim ShapeSwitch(Root, [](SimpleShape* Simple) { 784292915Sdim }, [&](MultipleShape* Multiple) { 785292915Sdim for (const auto &iter : Multiple->InnerMap) { 786292915Sdim FindNaturals(iter.second, Root->Natural); 787292915Sdim } 788292915Sdim }, [&](LoopShape* Loop){ 789292915Sdim FindNaturals(Loop->Inner, Loop->Inner); 790292915Sdim }); 791292915Sdim } 792292915Sdim 793292915Sdim // Remove unneeded breaks and continues. 794292915Sdim // A flow operation is trivially unneeded if the shape we naturally get to 795292915Sdim // by normal code execution is the same as the flow forces us to. 796292915Sdim void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, 797292915Sdim LoopShape *LastLoop = nullptr, 798292915Sdim unsigned Depth = 0) { 799292915Sdim BlockSet NaturalBlocks; 800292915Sdim FollowNaturalFlow(Natural, NaturalBlocks); 801292915Sdim Shape *Next = Root; 802292915Sdim while (Next) { 803292915Sdim Root = Next; 804292915Sdim Next = nullptr; 805292915Sdim ShapeSwitch( 806292915Sdim Root, 807292915Sdim [&](SimpleShape* Simple) { 808292915Sdim if (Simple->Inner->BranchVar) 809292915Sdim LastLoop = 810292915Sdim nullptr; // a switch clears out the loop (TODO: only for 811292915Sdim // breaks, not continue) 812292915Sdim 813292915Sdim if (Simple->Next) { 814292915Sdim if (!Simple->Inner->BranchVar && 815292915Sdim Simple->Inner->ProcessedBranchesOut.size() == 2 && 816292915Sdim Depth < RelooperNestingLimit) { 817292915Sdim // If there is a next block, we already know at Simple 818292915Sdim // creation time to make direct branches, and we can do 819292915Sdim // nothing more in general. But, we try to optimize the 820292915Sdim // case of a break and a direct: This would normally be 821292915Sdim // if (break?) { break; } .. 822292915Sdim // but if we make sure to nest the else, we can save the 823292915Sdim // break, 824292915Sdim // if (!break?) { .. } 825292915Sdim // This is also better because the more canonical nested 826292915Sdim // form is easier to further optimize later. The 827292915Sdim // downside is more nesting, which adds to size in builds with 828292915Sdim // whitespace. 829292915Sdim // Note that we avoid switches, as it complicates control flow 830292915Sdim // and is not relevant for the common case we optimize here. 831292915Sdim bool Found = false; 832292915Sdim bool Abort = false; 833292915Sdim for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 834292915Sdim Block *Target = iter.first; 835292915Sdim Branch *Details = iter.second.get(); 836292915Sdim if (Details->Type == Branch::Break) { 837292915Sdim Found = true; 838292915Sdim if (!contains(NaturalBlocks, Target)) 839292915Sdim Abort = true; 840292915Sdim } else if (Details->Type != Branch::Direct) 841292915Sdim Abort = true; 842292915Sdim } 843292915Sdim if (Found && !Abort) { 844292915Sdim for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 845292915Sdim Branch *Details = iter.second.get(); 846292915Sdim if (Details->Type == Branch::Break) { 847292915Sdim Details->Type = Branch::Direct; 848292915Sdim if (MultipleShape *Multiple = 849292915Sdim dyn_cast<MultipleShape>(Details->Ancestor)) 850292915Sdim Multiple->Breaks--; 851292915Sdim } else { 852292915Sdim assert(Details->Type == Branch::Direct); 853292915Sdim Details->Type = Branch::Nested; 854292915Sdim } 855292915Sdim } 856292915Sdim } 857292915Sdim Depth++; // this optimization increases depth, for us and all 858292915Sdim // our next chain (i.e., until this call returns) 859292915Sdim } 860292915Sdim Next = Simple->Next; 861292915Sdim } else { 862292915Sdim // If there is no next then Natural is where we will 863292915Sdim // go to by doing nothing, so we can potentially optimize some 864292915Sdim // branches to direct. 865292915Sdim for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 866292915Sdim Block *Target = iter.first; 867292915Sdim Branch *Details = iter.second.get(); 868292915Sdim if (Details->Type != Branch::Direct && 869292915Sdim contains(NaturalBlocks, 870292915Sdim Target)) { // note: cannot handle split blocks 871292915Sdim Details->Type = Branch::Direct; 872292915Sdim if (MultipleShape *Multiple = 873292915Sdim dyn_cast<MultipleShape>(Details->Ancestor)) 874292915Sdim Multiple->Breaks--; 875292915Sdim } else if (Details->Type == Branch::Break && LastLoop && 876292915Sdim LastLoop->Natural == Details->Ancestor->Natural) { 877292915Sdim // it is important to simplify breaks, as simpler breaks 878292915Sdim // enable other optimizations 879292915Sdim Details->Labeled = false; 880292915Sdim if (MultipleShape *Multiple = 881292915Sdim dyn_cast<MultipleShape>(Details->Ancestor)) 882292915Sdim Multiple->Breaks--; 883292915Sdim } 884292915Sdim } 885292915Sdim } 886292915Sdim }, [&](MultipleShape* Multiple) 887292915Sdim { 888292915Sdim for (const auto &iter : Multiple->InnerMap) { 889292915Sdim RemoveUnneededFlows(iter.second, Multiple->Next, 890292915Sdim Multiple->Breaks ? nullptr : LastLoop, 891292915Sdim Depth + 1); 892292915Sdim } 893292915Sdim Next = Multiple->Next; 894292915Sdim }, [&](LoopShape* Loop) 895292915Sdim { 896292915Sdim RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); 897292915Sdim Next = Loop->Next; 898292915Sdim }); 899292915Sdim } 900292915Sdim } 901292915Sdim 902292915Sdim // After we know which loops exist, we can calculate which need to be 903292915Sdim // labeled 904292915Sdim void FindLabeledLoops(Shape *Root) { 905292915Sdim Shape *Next = Root; 906292915Sdim while (Next) { 907292915Sdim Root = Next; 908292915Sdim Next = nullptr; 909292915Sdim 910292915Sdim ShapeSwitch( 911292915Sdim Root, 912292915Sdim [&](SimpleShape *Simple) { 913292915Sdim MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next); 914292915Sdim // If we are fusing a Multiple with a loop into this Simple, then 915292915Sdim // visit it now 916292915Sdim if (Fused && Fused->Breaks) 917292915Sdim LoopStack.push(Fused); 918292915Sdim if (Simple->Inner->BranchVar) 919292915Sdim LoopStack.push(nullptr); // a switch means breaks are now useless, 920292915Sdim // push a dummy 921292915Sdim if (Fused) { 922292915Sdim if (Fused->UseSwitch) 923292915Sdim LoopStack.push(nullptr); // a switch means breaks are now 924292915Sdim // useless, push a dummy 925292915Sdim for (const auto &iter : Fused->InnerMap) { 926292915Sdim FindLabeledLoops(iter.second); 927292915Sdim } 928292915Sdim } 929292915Sdim for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { 930292915Sdim Branch *Details = iter.second.get(); 931292915Sdim if (Details->Type == Branch::Break || 932292915Sdim Details->Type == Branch::Continue) { 933292915Sdim assert(!LoopStack.empty()); 934292915Sdim if (Details->Ancestor != LoopStack.top() && Details->Labeled) { 935292915Sdim if (MultipleShape *Multiple = 936292915Sdim dyn_cast<MultipleShape>(Details->Ancestor)) { 937292915Sdim Multiple->Labeled = true; 938292915Sdim } else { 939292915Sdim LoopShape *Loop = cast<LoopShape>(Details->Ancestor); 940292915Sdim Loop->Labeled = true; 941292915Sdim } 942292915Sdim } else { 943292915Sdim Details->Labeled = false; 944292915Sdim } 945292915Sdim } 946292915Sdim if (Fused && Fused->UseSwitch) 947292915Sdim LoopStack.pop(); 948292915Sdim if (Simple->Inner->BranchVar) 949292915Sdim LoopStack.pop(); 950292915Sdim if (Fused && Fused->Breaks) 951292915Sdim LoopStack.pop(); 952292915Sdim if (Fused) 953292915Sdim Next = Fused->Next; 954292915Sdim else 955292915Sdim Next = Root->Next; 956292915Sdim } 957292915Sdim } 958292915Sdim , [&](MultipleShape* Multiple) { 959292915Sdim if (Multiple->Breaks) 960292915Sdim LoopStack.push(Multiple); 961292915Sdim for (const auto &iter : Multiple->InnerMap) 962292915Sdim FindLabeledLoops(iter.second); 963292915Sdim if (Multiple->Breaks) 964292915Sdim LoopStack.pop(); 965292915Sdim Next = Root->Next; 966292915Sdim } 967292915Sdim , [&](LoopShape* Loop) { 968292915Sdim LoopStack.push(Loop); 969292915Sdim FindLabeledLoops(Loop->Inner); 970292915Sdim LoopStack.pop(); 971292915Sdim Next = Root->Next; 972292915Sdim }); 973292915Sdim } 974292915Sdim } 975292915Sdim 976292915Sdim void Process(Shape * Root) { 977292915Sdim FindNaturals(Root); 978292915Sdim RemoveUnneededFlows(Root); 979292915Sdim FindLabeledLoops(Root); 980292915Sdim } 981292915Sdim }; 982292915Sdim 983292915Sdim PostOptimizer(this).Process(Root); 984292915Sdim} 985