1//===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This implements the Relooper algorithm. This implementation includes
12/// optimizations added since the original academic paper [1] was published.
13///
14/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
15/// Proceedings of the ACM international conference companion on Object
16/// oriented programming systems languages and applications companion
17/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
18/// http://doi.acm.org/10.1145/2048147.2048224
19///
20//===-------------------------------------------------------------------===//
21
22#include "Relooper.h"
23#include "WebAssembly.h"
24
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Function.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/raw_ostream.h"
32
33#include <cstring>
34#include <cstdlib>
35#include <functional>
36#include <list>
37#include <stack>
38#include <string>
39
40#define DEBUG_TYPE "relooper"
41
42using namespace llvm;
43using namespace Relooper;
44
45static cl::opt<int> RelooperSplittingFactor(
46    "relooper-splitting-factor",
47    cl::desc(
48        "How much to discount code size when deciding whether to split a node"),
49    cl::init(5));
50
51static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
52    "relooper-multiple-switch-threshold",
53    cl::desc(
54        "How many entries to allow in a multiple before we use a switch"),
55    cl::init(10));
56
57static cl::opt<unsigned> RelooperNestingLimit(
58    "relooper-nesting-limit",
59    cl::desc(
60        "How much nesting is acceptable"),
61    cl::init(20));
62
63
64namespace {
65///
66/// Implements the relooper algorithm for a function's blocks.
67///
68/// Implementation details: The Relooper instance has
69/// ownership of the blocks and shapes, and frees them when done.
70///
71struct RelooperAlgorithm {
72  std::deque<Block *> Blocks;
73  std::deque<Shape *> Shapes;
74  Shape *Root;
75  bool MinSize;
76  int BlockIdCounter;
77  int ShapeIdCounter;
78
79  RelooperAlgorithm();
80  ~RelooperAlgorithm();
81
82  void AddBlock(Block *New, int Id = -1);
83
84  // Calculates the shapes
85  void Calculate(Block *Entry);
86
87  // Sets us to try to minimize size
88  void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
89};
90
91struct RelooperAnalysis final : public FunctionPass {
92  static char ID;
93  RelooperAnalysis() : FunctionPass(ID) {}
94  const char *getPassName() const override { return "relooper"; }
95  void getAnalysisUsage(AnalysisUsage &AU) const override {
96    AU.setPreservesAll();
97  }
98  bool runOnFunction(Function &F) override;
99};
100}
101
102// RelooperAnalysis
103
104char RelooperAnalysis::ID = 0;
105FunctionPass *llvm::createWebAssemblyRelooper() {
106  return new RelooperAnalysis();
107}
108
109bool RelooperAnalysis::runOnFunction(Function &F) {
110  DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
111  RelooperAlgorithm R;
112  // FIXME: remove duplication between relooper's and LLVM's BBs.
113  std::map<const BasicBlock *, Block *> BB2B;
114  std::map<const Block *, const BasicBlock *> B2BB;
115  for (const BasicBlock &BB : F) {
116    // FIXME: getName is wrong here, Code is meant to represent amount of code.
117    // FIXME: use BranchVarInit for switch.
118    Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
119    R.AddBlock(B);
120    assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
121    assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
122    BB2B[&BB] = B;
123    B2BB[B] = &BB;
124  }
125  for (Block *B : R.Blocks) {
126    const BasicBlock *BB = B2BB[B];
127    for (const BasicBlock *Successor : successors(BB))
128      // FIXME: add branch's Condition and Code below.
129      B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
130  }
131  R.Calculate(BB2B[&F.getEntryBlock()]);
132  return false; // Analysis passes don't modify anything.
133}
134
135// Helpers
136
137typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
138typedef std::list<Block *> BlockList;
139
140template <class T, class U>
141static bool contains(const T &container, const U &contained) {
142  return container.count(contained);
143}
144
145
146// Branch
147
148Branch::Branch(const char *ConditionInit, const char *CodeInit)
149    : Ancestor(nullptr), Labeled(true) {
150  // FIXME: move from char* to LLVM data structures
151  Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
152  Code = CodeInit ? strdup(CodeInit) : nullptr;
153}
154
155Branch::~Branch() {
156  // FIXME: move from char* to LLVM data structures
157  free(static_cast<void *>(const_cast<char *>(Condition)));
158  free(static_cast<void *>(const_cast<char *>(Code)));
159}
160
161// Block
162
163Block::Block(const char *CodeInit, const char *BranchVarInit)
164    : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
165  // FIXME: move from char* to LLVM data structures
166  Code = strdup(CodeInit);
167  BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
168}
169
170Block::~Block() {
171  // FIXME: move from char* to LLVM data structures
172  free(static_cast<void *>(const_cast<char *>(Code)));
173  free(static_cast<void *>(const_cast<char *>(BranchVar)));
174}
175
176void Block::AddBranchTo(Block *Target, const char *Condition,
177                        const char *Code) {
178  assert(!contains(BranchesOut, Target) &&
179         "cannot add more than one branch to the same target");
180  BranchesOut[Target] = make_unique<Branch>(Condition, Code);
181}
182
183// Relooper
184
185RelooperAlgorithm::RelooperAlgorithm()
186    : Root(nullptr), MinSize(false), BlockIdCounter(1),
187      ShapeIdCounter(0) { // block ID 0 is reserved for clearings
188}
189
190RelooperAlgorithm::~RelooperAlgorithm() {
191  for (auto Curr : Blocks)
192    delete Curr;
193  for (auto Curr : Shapes)
194    delete Curr;
195}
196
197void RelooperAlgorithm::AddBlock(Block *New, int Id) {
198  New->Id = Id == -1 ? BlockIdCounter++ : Id;
199  Blocks.push_back(New);
200}
201
202struct RelooperRecursor {
203  RelooperAlgorithm *Parent;
204  RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
205};
206
207void RelooperAlgorithm::Calculate(Block *Entry) {
208  // Scan and optimize the input
209  struct PreOptimizer : public RelooperRecursor {
210    PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
211    BlockSet Live;
212
213    void FindLive(Block *Root) {
214      BlockList ToInvestigate;
215      ToInvestigate.push_back(Root);
216      while (!ToInvestigate.empty()) {
217        Block *Curr = ToInvestigate.front();
218        ToInvestigate.pop_front();
219        if (contains(Live, Curr))
220          continue;
221        Live.insert(Curr);
222        for (const auto &iter : Curr->BranchesOut)
223          ToInvestigate.push_back(iter.first);
224      }
225    }
226
227    // If a block has multiple entries but no exits, and it is small enough, it
228    // is useful to split it. A common example is a C++ function where
229    // everything ends up at a final exit block and does some RAII cleanup.
230    // Without splitting, we will be forced to introduce labelled loops to
231    // allow reaching the final block
232    void SplitDeadEnds() {
233      unsigned TotalCodeSize = 0;
234      for (const auto &Curr : Live) {
235        TotalCodeSize += strlen(Curr->Code);
236      }
237      BlockSet Splits;
238      BlockSet Removed;
239      for (const auto &Original : Live) {
240        if (Original->BranchesIn.size() <= 1 ||
241            !Original->BranchesOut.empty())
242          continue; // only dead ends, for now
243        if (contains(Original->BranchesOut, Original))
244          continue; // cannot split a looping node
245        if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
246            TotalCodeSize / RelooperSplittingFactor)
247          continue; // if splitting increases raw code size by a significant
248                    // amount, abort
249        // Split the node (for simplicity, we replace all the blocks, even
250        // though we could have reused the original)
251        DEBUG(dbgs() << "  Splitting '" << Original->Code << "'\n");
252        for (const auto &Prior : Original->BranchesIn) {
253          Block *Split = new Block(Original->Code, Original->BranchVar);
254          Parent->AddBlock(Split, Original->Id);
255          Split->BranchesIn.insert(Prior);
256          std::unique_ptr<Branch> Details;
257          Details.swap(Prior->BranchesOut[Original]);
258          Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
259                                                          Details->Code);
260          for (const auto &iter : Original->BranchesOut) {
261            Block *Post = iter.first;
262            Branch *Details = iter.second.get();
263            Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
264                                                           Details->Code);
265            Post->BranchesIn.insert(Split);
266          }
267          Splits.insert(Split);
268          Removed.insert(Original);
269        }
270        for (const auto &iter : Original->BranchesOut) {
271          Block *Post = iter.first;
272          Post->BranchesIn.remove(Original);
273        }
274      }
275      for (const auto &iter : Splits)
276        Live.insert(iter);
277      for (const auto &iter : Removed)
278        Live.remove(iter);
279    }
280  };
281  PreOptimizer Pre(this);
282  Pre.FindLive(Entry);
283
284  // Add incoming branches from live blocks, ignoring dead code
285  for (unsigned i = 0; i < Blocks.size(); i++) {
286    Block *Curr = Blocks[i];
287    if (!contains(Pre.Live, Curr))
288      continue;
289    for (const auto &iter : Curr->BranchesOut)
290      iter.first->BranchesIn.insert(Curr);
291  }
292
293  if (!MinSize)
294    Pre.SplitDeadEnds();
295
296  // Recursively process the graph
297
298  struct Analyzer : public RelooperRecursor {
299    Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
300
301    // Add a shape to the list of shapes in this Relooper calculation
302    void Notice(Shape *New) {
303      New->Id = Parent->ShapeIdCounter++;
304      Parent->Shapes.push_back(New);
305    }
306
307    // Create a list of entries from a block. If LimitTo is provided, only
308    // results in that set will appear
309    void GetBlocksOut(Block *Source, BlockSet &Entries,
310                      BlockSet *LimitTo = nullptr) {
311      for (const auto &iter : Source->BranchesOut)
312        if (!LimitTo || contains(*LimitTo, iter.first))
313          Entries.insert(iter.first);
314    }
315
316    // Converts/processes all branchings to a specific target
317    void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
318                   BlockSet &From) {
319      DEBUG(dbgs() << "  Solipsize '" << Target->Code << "' type " << Type
320                   << "\n");
321      for (auto iter = Target->BranchesIn.begin();
322           iter != Target->BranchesIn.end();) {
323        Block *Prior = *iter;
324        if (!contains(From, Prior)) {
325          iter++;
326          continue;
327        }
328        std::unique_ptr<Branch> PriorOut;
329        PriorOut.swap(Prior->BranchesOut[Target]);
330        PriorOut->Ancestor = Ancestor;
331        PriorOut->Type = Type;
332        if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
333          Multiple->Breaks++; // We are breaking out of this Multiple, so need a
334                              // loop
335        iter++; // carefully increment iter before erasing
336        Target->BranchesIn.remove(Prior);
337        Target->ProcessedBranchesIn.insert(Prior);
338        Prior->ProcessedBranchesOut[Target].swap(PriorOut);
339      }
340    }
341
342    Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
343      DEBUG(dbgs() << "  MakeSimple inner block '" << Inner->Code << "'\n");
344      SimpleShape *Simple = new SimpleShape;
345      Notice(Simple);
346      Simple->Inner = Inner;
347      Inner->Parent = Simple;
348      if (Blocks.size() > 1) {
349        Blocks.remove(Inner);
350        GetBlocksOut(Inner, NextEntries, &Blocks);
351        BlockSet JustInner;
352        JustInner.insert(Inner);
353        for (const auto &iter : NextEntries)
354          Solipsize(iter, Branch::Direct, Simple, JustInner);
355      }
356      return Simple;
357    }
358
359    Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
360                    BlockSet &NextEntries) {
361      // Find the inner blocks in this loop. Proceed backwards from the entries
362      // until
363      // you reach a seen block, collecting as you go.
364      BlockSet InnerBlocks;
365      BlockSet Queue = Entries;
366      while (!Queue.empty()) {
367        Block *Curr = *(Queue.begin());
368        Queue.remove(*Queue.begin());
369        if (!contains(InnerBlocks, Curr)) {
370          // This element is new, mark it as inner and remove from outer
371          InnerBlocks.insert(Curr);
372          Blocks.remove(Curr);
373          // Add the elements prior to it
374          for (const auto &iter : Curr->BranchesIn)
375            Queue.insert(iter);
376        }
377      }
378      assert(!InnerBlocks.empty());
379
380      for (const auto &Curr : InnerBlocks) {
381        for (const auto &iter : Curr->BranchesOut) {
382          Block *Possible = iter.first;
383          if (!contains(InnerBlocks, Possible))
384            NextEntries.insert(Possible);
385        }
386      }
387
388      LoopShape *Loop = new LoopShape();
389      Notice(Loop);
390
391      // Solipsize the loop, replacing with break/continue and marking branches
392      // as Processed (will not affect later calculations)
393      // A. Branches to the loop entries become a continue to this shape
394      for (const auto &iter : Entries)
395        Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
396      // B. Branches to outside the loop (a next entry) become breaks on this
397      // shape
398      for (const auto &iter : NextEntries)
399        Solipsize(iter, Branch::Break, Loop, InnerBlocks);
400      // Finish up
401      Shape *Inner = Process(InnerBlocks, Entries, nullptr);
402      Loop->Inner = Inner;
403      return Loop;
404    }
405
406    // For each entry, find the independent group reachable by it. The
407    // independent group is the entry itself, plus all the blocks it can
408    // reach that cannot be directly reached by another entry. Note that we
409    // ignore directly reaching the entry itself by another entry.
410    //   @param Ignore - previous blocks that are irrelevant
411    void FindIndependentGroups(BlockSet &Entries,
412                               BlockBlockSetMap &IndependentGroups,
413                               BlockSet *Ignore = nullptr) {
414      typedef std::map<Block *, Block *> BlockBlockMap;
415
416      struct HelperClass {
417        BlockBlockSetMap &IndependentGroups;
418        BlockBlockMap Ownership; // For each block, which entry it belongs to.
419                                 // We have reached it from there.
420
421        HelperClass(BlockBlockSetMap &IndependentGroupsInit)
422            : IndependentGroups(IndependentGroupsInit) {}
423        void InvalidateWithChildren(Block *New) {
424          // Being in the list means you need to be invalidated
425          BlockList ToInvalidate;
426          ToInvalidate.push_back(New);
427          while (!ToInvalidate.empty()) {
428            Block *Invalidatee = ToInvalidate.front();
429            ToInvalidate.pop_front();
430            Block *Owner = Ownership[Invalidatee];
431            // Owner may have been invalidated, do not add to
432            // IndependentGroups!
433            if (contains(IndependentGroups, Owner))
434              IndependentGroups[Owner].remove(Invalidatee);
435            if (Ownership[Invalidatee]) { // may have been seen before and
436                                          // invalidated already
437              Ownership[Invalidatee] = nullptr;
438              for (const auto &iter : Invalidatee->BranchesOut) {
439                Block *Target = iter.first;
440                BlockBlockMap::iterator Known = Ownership.find(Target);
441                if (Known != Ownership.end()) {
442                  Block *TargetOwner = Known->second;
443                  if (TargetOwner)
444                    ToInvalidate.push_back(Target);
445                }
446              }
447            }
448          }
449        }
450      };
451      HelperClass Helper(IndependentGroups);
452
453      // We flow out from each of the entries, simultaneously.
454      // When we reach a new block, we add it as belonging to the one we got to
455      // it from.
456      // If we reach a new block that is already marked as belonging to someone,
457      // it is reachable by two entries and is not valid for any of them.
458      // Remove it and all it can reach that have been visited.
459
460      // Being in the queue means we just added this item, and
461      // we need to add its children
462      BlockList Queue;
463      for (const auto &Entry : Entries) {
464        Helper.Ownership[Entry] = Entry;
465        IndependentGroups[Entry].insert(Entry);
466        Queue.push_back(Entry);
467      }
468      while (!Queue.empty()) {
469        Block *Curr = Queue.front();
470        Queue.pop_front();
471        Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
472                                               // map if we are in the queue
473        if (!Owner)
474          continue; // we have been invalidated meanwhile after being reached
475                    // from two entries
476        // Add all children
477        for (const auto &iter : Curr->BranchesOut) {
478          Block *New = iter.first;
479          BlockBlockMap::iterator Known = Helper.Ownership.find(New);
480          if (Known == Helper.Ownership.end()) {
481            // New node. Add it, and put it in the queue
482            Helper.Ownership[New] = Owner;
483            IndependentGroups[Owner].insert(New);
484            Queue.push_back(New);
485            continue;
486          }
487          Block *NewOwner = Known->second;
488          if (!NewOwner)
489            continue; // We reached an invalidated node
490          if (NewOwner != Owner)
491            // Invalidate this and all reachable that we have seen - we reached
492            // this from two locations
493            Helper.InvalidateWithChildren(New);
494          // otherwise, we have the same owner, so do nothing
495        }
496      }
497
498      // Having processed all the interesting blocks, we remain with just one
499      // potential issue:
500      // If a->b, and a was invalidated, but then b was later reached by
501      // someone else, we must invalidate b. To check for this, we go over all
502      // elements in the independent groups, if an element has a parent which
503      // does *not* have the same owner, we/ must remove it and all its
504      // children.
505
506      for (const auto &iter : Entries) {
507        BlockSet &CurrGroup = IndependentGroups[iter];
508        BlockList ToInvalidate;
509        for (const auto &iter : CurrGroup) {
510          Block *Child = iter;
511          for (const auto &iter : Child->BranchesIn) {
512            Block *Parent = iter;
513            if (Ignore && contains(*Ignore, Parent))
514              continue;
515            if (Helper.Ownership[Parent] != Helper.Ownership[Child])
516              ToInvalidate.push_back(Child);
517          }
518        }
519        while (!ToInvalidate.empty()) {
520          Block *Invalidatee = ToInvalidate.front();
521          ToInvalidate.pop_front();
522          Helper.InvalidateWithChildren(Invalidatee);
523        }
524      }
525
526      // Remove empty groups
527      for (const auto &iter : Entries)
528        if (IndependentGroups[iter].empty())
529          IndependentGroups.erase(iter);
530    }
531
532    Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
533                        BlockBlockSetMap &IndependentGroups, Shape *Prev,
534                        BlockSet &NextEntries) {
535      bool Fused = isa<SimpleShape>(Prev);
536      MultipleShape *Multiple = new MultipleShape();
537      Notice(Multiple);
538      BlockSet CurrEntries;
539      for (auto &iter : IndependentGroups) {
540        Block *CurrEntry = iter.first;
541        BlockSet &CurrBlocks = iter.second;
542        // Create inner block
543        CurrEntries.clear();
544        CurrEntries.insert(CurrEntry);
545        for (const auto &CurrInner : CurrBlocks) {
546          // Remove the block from the remaining blocks
547          Blocks.remove(CurrInner);
548          // Find new next entries and fix branches to them
549          for (auto iter = CurrInner->BranchesOut.begin();
550               iter != CurrInner->BranchesOut.end();) {
551            Block *CurrTarget = iter->first;
552            auto Next = iter;
553            Next++;
554            if (!contains(CurrBlocks, CurrTarget)) {
555              NextEntries.insert(CurrTarget);
556              Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
557            }
558            iter = Next; // increment carefully because Solipsize can remove us
559          }
560        }
561        Multiple->InnerMap[CurrEntry->Id] =
562            Process(CurrBlocks, CurrEntries, nullptr);
563        // If we are not fused, then our entries will actually be checked
564        if (!Fused)
565          CurrEntry->IsCheckedMultipleEntry = true;
566      }
567      // Add entries not handled as next entries, they are deferred
568      for (const auto &Entry : Entries)
569        if (!contains(IndependentGroups, Entry))
570          NextEntries.insert(Entry);
571      // The multiple has been created, we can decide how to implement it
572      if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
573        Multiple->UseSwitch = true;
574        Multiple->Breaks++; // switch captures breaks
575      }
576      return Multiple;
577    }
578
579    // Main function.
580    // Process a set of blocks with specified entries, returns a shape
581    // The Make* functions receive a NextEntries. If they fill it with data,
582    // those are the entries for the ->Next block on them, and the blocks
583    // are what remains in Blocks (which Make* modify). In this way
584    // we avoid recursing on Next (imagine a long chain of Simples, if we
585    // recursed we could blow the stack).
586    Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
587      BlockSet *Entries = &InitialEntries;
588      BlockSet TempEntries[2];
589      int CurrTempIndex = 0;
590      BlockSet *NextEntries;
591      Shape *Ret = nullptr;
592
593      auto Make = [&](Shape *Temp) {
594        if (Prev)
595          Prev->Next = Temp;
596        if (!Ret)
597          Ret = Temp;
598        Prev = Temp;
599        Entries = NextEntries;
600      };
601
602      while (1) {
603        CurrTempIndex = 1 - CurrTempIndex;
604        NextEntries = &TempEntries[CurrTempIndex];
605        NextEntries->clear();
606
607        if (Entries->empty())
608          return Ret;
609        if (Entries->size() == 1) {
610          Block *Curr = *(Entries->begin());
611          if (Curr->BranchesIn.empty()) {
612            // One entry, no looping ==> Simple
613            Make(MakeSimple(Blocks, Curr, *NextEntries));
614            if (NextEntries->empty())
615              return Ret;
616            continue;
617          }
618          // One entry, looping ==> Loop
619          Make(MakeLoop(Blocks, *Entries, *NextEntries));
620          if (NextEntries->empty())
621            return Ret;
622          continue;
623        }
624
625        // More than one entry, try to eliminate through a Multiple groups of
626        // independent blocks from an entry/ies. It is important to remove
627        // through multiples as opposed to looping since the former is more
628        // performant.
629        BlockBlockSetMap IndependentGroups;
630        FindIndependentGroups(*Entries, IndependentGroups);
631
632        if (!IndependentGroups.empty()) {
633          // We can handle a group in a multiple if its entry cannot be reached
634          // by another group.
635          // Note that it might be reachable by itself - a loop. But that is
636          // fine, we will create a loop inside the multiple block (which
637          // is the performant order to do it).
638          for (auto iter = IndependentGroups.begin();
639               iter != IndependentGroups.end();) {
640            Block *Entry = iter->first;
641            BlockSet &Group = iter->second;
642            auto curr = iter++; // iterate carefully, we may delete
643            for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
644                 iterBranch != Entry->BranchesIn.end(); iterBranch++) {
645              Block *Origin = *iterBranch;
646              if (!contains(Group, Origin)) {
647                // Reached from outside the group, so we cannot handle this
648                IndependentGroups.erase(curr);
649                break;
650              }
651            }
652          }
653
654          // As an optimization, if we have 2 independent groups, and one is a
655          // small dead end, we can handle only that dead end.
656          // The other then becomes a Next - without nesting in the code and
657          // recursion in the analysis.
658          // TODO: if the larger is the only dead end, handle that too
659          // TODO: handle >2 groups
660          // TODO: handle not just dead ends, but also that do not branch to the
661          // NextEntries. However, must be careful there since we create a
662          // Next, and that Next can prevent eliminating a break (since we no
663          // longer naturally reach the same place), which may necessitate a
664          // one-time loop, which makes the unnesting pointless.
665          if (IndependentGroups.size() == 2) {
666            // Find the smaller one
667            auto iter = IndependentGroups.begin();
668            Block *SmallEntry = iter->first;
669            auto SmallSize = iter->second.size();
670            iter++;
671            Block *LargeEntry = iter->first;
672            auto LargeSize = iter->second.size();
673            if (SmallSize != LargeSize) { // ignore the case where they are
674                                          // identical - keep things symmetrical
675                                          // there
676              if (SmallSize > LargeSize) {
677                Block *Temp = SmallEntry;
678                SmallEntry = LargeEntry;
679                LargeEntry = Temp; // Note: we did not flip the Sizes too, they
680                                   // are now invalid. TODO: use the smaller
681                                   // size as a limit?
682              }
683              // Check if dead end
684              bool DeadEnd = true;
685              BlockSet &SmallGroup = IndependentGroups[SmallEntry];
686              for (const auto &Curr : SmallGroup) {
687                for (const auto &iter : Curr->BranchesOut) {
688                  Block *Target = iter.first;
689                  if (!contains(SmallGroup, Target)) {
690                    DeadEnd = false;
691                    break;
692                  }
693                }
694                if (!DeadEnd)
695                  break;
696              }
697              if (DeadEnd)
698                IndependentGroups.erase(LargeEntry);
699            }
700          }
701
702          if (!IndependentGroups.empty())
703            // Some groups removable ==> Multiple
704            Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
705                              *NextEntries));
706            if (NextEntries->empty())
707              return Ret;
708            continue;
709        }
710        // No independent groups, must be loopable ==> Loop
711        Make(MakeLoop(Blocks, *Entries, *NextEntries));
712        if (NextEntries->empty())
713          return Ret;
714        continue;
715      }
716    }
717  };
718
719  // Main
720
721  BlockSet AllBlocks;
722  for (const auto &Curr : Pre.Live) {
723    AllBlocks.insert(Curr);
724  }
725
726  BlockSet Entries;
727  Entries.insert(Entry);
728  Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
729  assert(Root);
730
731  ///
732  /// Relooper post-optimizer
733  ///
734  struct PostOptimizer {
735    RelooperAlgorithm *Parent;
736    std::stack<Shape *> LoopStack;
737
738    PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
739
740    void ShapeSwitch(Shape* var,
741                     std::function<void (SimpleShape*)> simple,
742                     std::function<void (MultipleShape*)> multiple,
743                     std::function<void (LoopShape*)> loop) {
744      switch (var->getKind()) {
745        case Shape::SK_Simple: {
746          simple(cast<SimpleShape>(var));
747          break;
748        }
749        case Shape::SK_Multiple: {
750          multiple(cast<MultipleShape>(var));
751          break;
752        }
753        case Shape::SK_Loop: {
754          loop(cast<LoopShape>(var));
755          break;
756        }
757      }
758    }
759
760    // Find the blocks that natural control flow can get us directly to, or
761    // through a multiple that we ignore
762    void FollowNaturalFlow(Shape *S, BlockSet &Out) {
763      ShapeSwitch(S, [&](SimpleShape* Simple) {
764        Out.insert(Simple->Inner);
765      }, [&](MultipleShape* Multiple) {
766        for (const auto &iter : Multiple->InnerMap) {
767          FollowNaturalFlow(iter.second, Out);
768        }
769        FollowNaturalFlow(Multiple->Next, Out);
770      }, [&](LoopShape* Loop) {
771        FollowNaturalFlow(Loop->Inner, Out);
772      });
773    }
774
775    void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
776      if (Root->Next) {
777        Root->Natural = Root->Next;
778        FindNaturals(Root->Next, Otherwise);
779      } else {
780        Root->Natural = Otherwise;
781      }
782
783      ShapeSwitch(Root, [](SimpleShape* Simple) {
784      }, [&](MultipleShape* Multiple) {
785        for (const auto &iter : Multiple->InnerMap) {
786          FindNaturals(iter.second, Root->Natural);
787        }
788      }, [&](LoopShape* Loop){
789        FindNaturals(Loop->Inner, Loop->Inner);
790      });
791    }
792
793    // Remove unneeded breaks and continues.
794    // A flow operation is trivially unneeded if the shape we naturally get to
795    // by normal code execution is the same as the flow forces us to.
796    void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
797                             LoopShape *LastLoop = nullptr,
798                             unsigned Depth = 0) {
799      BlockSet NaturalBlocks;
800      FollowNaturalFlow(Natural, NaturalBlocks);
801      Shape *Next = Root;
802      while (Next) {
803        Root = Next;
804        Next = nullptr;
805        ShapeSwitch(
806            Root,
807            [&](SimpleShape* Simple) {
808              if (Simple->Inner->BranchVar)
809                LastLoop =
810                    nullptr; // a switch clears out the loop (TODO: only for
811                             // breaks, not continue)
812
813              if (Simple->Next) {
814                if (!Simple->Inner->BranchVar &&
815                    Simple->Inner->ProcessedBranchesOut.size() == 2 &&
816                    Depth < RelooperNestingLimit) {
817                  // If there is a next block, we already know at Simple
818                  // creation time to make direct branches, and we can do
819                  // nothing more in general. But, we try to optimize the
820                  // case of a break and a direct: This would normally be
821                  //   if (break?) { break; } ..
822                  // but if we make sure to nest the else, we can save the
823                  // break,
824                  //   if (!break?) { .. }
825                  // This is also better because the more canonical nested
826                  // form is easier to further optimize later. The
827                  // downside is more nesting, which adds to size in builds with
828                  // whitespace.
829                  // Note that we avoid switches, as it complicates control flow
830                  // and is not relevant for the common case we optimize here.
831                  bool Found = false;
832                  bool Abort = false;
833                  for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
834                    Block *Target = iter.first;
835                    Branch *Details = iter.second.get();
836                    if (Details->Type == Branch::Break) {
837                      Found = true;
838                      if (!contains(NaturalBlocks, Target))
839                        Abort = true;
840                    } else if (Details->Type != Branch::Direct)
841                      Abort = true;
842                  }
843                  if (Found && !Abort) {
844                    for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
845                      Branch *Details = iter.second.get();
846                      if (Details->Type == Branch::Break) {
847                        Details->Type = Branch::Direct;
848                        if (MultipleShape *Multiple =
849                                dyn_cast<MultipleShape>(Details->Ancestor))
850                          Multiple->Breaks--;
851                      } else {
852                        assert(Details->Type == Branch::Direct);
853                        Details->Type = Branch::Nested;
854                      }
855                    }
856                  }
857                  Depth++; // this optimization increases depth, for us and all
858                           // our next chain (i.e., until this call returns)
859                }
860                Next = Simple->Next;
861              } else {
862                // If there is no next then Natural is where we will
863                // go to by doing nothing, so we can potentially optimize some
864                // branches to direct.
865                for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
866                  Block *Target = iter.first;
867                  Branch *Details = iter.second.get();
868                  if (Details->Type != Branch::Direct &&
869                      contains(NaturalBlocks,
870                               Target)) { // note: cannot handle split blocks
871                    Details->Type = Branch::Direct;
872                    if (MultipleShape *Multiple =
873                            dyn_cast<MultipleShape>(Details->Ancestor))
874                      Multiple->Breaks--;
875                  } else if (Details->Type == Branch::Break && LastLoop &&
876                             LastLoop->Natural == Details->Ancestor->Natural) {
877                    // it is important to simplify breaks, as simpler breaks
878                    // enable other optimizations
879                    Details->Labeled = false;
880                    if (MultipleShape *Multiple =
881                            dyn_cast<MultipleShape>(Details->Ancestor))
882                      Multiple->Breaks--;
883                  }
884                }
885              }
886            }, [&](MultipleShape* Multiple)
887            {
888              for (const auto &iter : Multiple->InnerMap) {
889                RemoveUnneededFlows(iter.second, Multiple->Next,
890                                    Multiple->Breaks ? nullptr : LastLoop,
891                                    Depth + 1);
892              }
893              Next = Multiple->Next;
894            }, [&](LoopShape* Loop)
895            {
896              RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
897              Next = Loop->Next;
898            });
899      }
900    }
901
902    // After we know which loops exist, we can calculate which need to be
903    // labeled
904    void FindLabeledLoops(Shape *Root) {
905      Shape *Next = Root;
906      while (Next) {
907        Root = Next;
908        Next = nullptr;
909
910        ShapeSwitch(
911            Root,
912            [&](SimpleShape *Simple) {
913          MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
914          // If we are fusing a Multiple with a loop into this Simple, then
915          // visit it now
916          if (Fused && Fused->Breaks)
917            LoopStack.push(Fused);
918          if (Simple->Inner->BranchVar)
919            LoopStack.push(nullptr); // a switch means breaks are now useless,
920                                     // push a dummy
921          if (Fused) {
922            if (Fused->UseSwitch)
923              LoopStack.push(nullptr); // a switch means breaks are now
924                                       // useless, push a dummy
925            for (const auto &iter : Fused->InnerMap) {
926              FindLabeledLoops(iter.second);
927            }
928          }
929          for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
930            Branch *Details = iter.second.get();
931            if (Details->Type == Branch::Break ||
932                Details->Type == Branch::Continue) {
933              assert(!LoopStack.empty());
934              if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
935                if (MultipleShape *Multiple =
936                        dyn_cast<MultipleShape>(Details->Ancestor)) {
937                  Multiple->Labeled = true;
938                } else {
939                  LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
940                  Loop->Labeled = true;
941                }
942              } else {
943                Details->Labeled = false;
944              }
945            }
946            if (Fused && Fused->UseSwitch)
947              LoopStack.pop();
948            if (Simple->Inner->BranchVar)
949              LoopStack.pop();
950            if (Fused && Fused->Breaks)
951              LoopStack.pop();
952            if (Fused)
953              Next = Fused->Next;
954            else
955              Next = Root->Next;
956          }
957          }
958          , [&](MultipleShape* Multiple) {
959            if (Multiple->Breaks)
960              LoopStack.push(Multiple);
961            for (const auto &iter : Multiple->InnerMap)
962              FindLabeledLoops(iter.second);
963            if (Multiple->Breaks)
964              LoopStack.pop();
965            Next = Root->Next;
966          }
967          , [&](LoopShape* Loop) {
968            LoopStack.push(Loop);
969            FindLabeledLoops(Loop->Inner);
970            LoopStack.pop();
971            Next = Root->Next;
972          });
973      }
974    }
975
976    void Process(Shape * Root) {
977      FindNaturals(Root);
978      RemoveUnneededFlows(Root);
979      FindLabeledLoops(Root);
980    }
981  };
982
983  PostOptimizer(this).Process(Root);
984}
985