WebAssemblyCFGStackify.cpp revision 294024
1//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a CFG stacking pass.
12///
13/// This pass reorders the blocks in a function to put them into a reverse
14/// post-order [0], with special care to keep the order as similar as possible
15/// to the original order, and to keep loops contiguous even in the case of
16/// split backedges.
17///
18/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since
19/// scope boundaries serve as the labels for WebAssembly's control transfers.
20///
21/// This is sufficient to convert arbitrary CFGs into a form that works on
22/// WebAssembly, provided that all loops are single-entry.
23///
24/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings
25///
26//===----------------------------------------------------------------------===//
27
28#include "WebAssembly.h"
29#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
30#include "WebAssemblySubtarget.h"
31#include "llvm/ADT/SCCIterator.h"
32#include "llvm/ADT/SetVector.h"
33#include "llvm/CodeGen/MachineDominators.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineLoopInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/Passes.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/raw_ostream.h"
41using namespace llvm;
42
43#define DEBUG_TYPE "wasm-cfg-stackify"
44
45namespace {
46class WebAssemblyCFGStackify final : public MachineFunctionPass {
47  const char *getPassName() const override {
48    return "WebAssembly CFG Stackify";
49  }
50
51  void getAnalysisUsage(AnalysisUsage &AU) const override {
52    AU.setPreservesCFG();
53    AU.addRequired<MachineDominatorTree>();
54    AU.addPreserved<MachineDominatorTree>();
55    AU.addRequired<MachineLoopInfo>();
56    AU.addPreserved<MachineLoopInfo>();
57    MachineFunctionPass::getAnalysisUsage(AU);
58  }
59
60  bool runOnMachineFunction(MachineFunction &MF) override;
61
62public:
63  static char ID; // Pass identification, replacement for typeid
64  WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
65};
66} // end anonymous namespace
67
68char WebAssemblyCFGStackify::ID = 0;
69FunctionPass *llvm::createWebAssemblyCFGStackify() {
70  return new WebAssemblyCFGStackify();
71}
72
73static void EliminateMultipleEntryLoops(MachineFunction &MF,
74                                        const MachineLoopInfo &MLI) {
75  SmallPtrSet<MachineBasicBlock *, 8> InSet;
76  for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF);
77       I != E; ++I) {
78    const std::vector<MachineBasicBlock *> &CurrentSCC = *I;
79
80    // Skip trivial SCCs.
81    if (CurrentSCC.size() == 1)
82      continue;
83
84    InSet.insert(CurrentSCC.begin(), CurrentSCC.end());
85    MachineBasicBlock *Header = nullptr;
86    for (MachineBasicBlock *MBB : CurrentSCC) {
87      for (MachineBasicBlock *Pred : MBB->predecessors()) {
88        if (InSet.count(Pred))
89          continue;
90        if (!Header) {
91          Header = MBB;
92          break;
93        }
94        // TODO: Implement multiple-entry loops.
95        report_fatal_error("multiple-entry loops are not supported yet");
96      }
97    }
98    assert(MLI.isLoopHeader(Header));
99
100    InSet.clear();
101  }
102}
103
104namespace {
105/// Post-order traversal stack entry.
106struct POStackEntry {
107  MachineBasicBlock *MBB;
108  SmallVector<MachineBasicBlock *, 0> Succs;
109
110  POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
111               const MachineLoopInfo &MLI);
112};
113} // end anonymous namespace
114
115static bool LoopContains(const MachineLoop *Loop,
116                         const MachineBasicBlock *MBB) {
117  return Loop ? Loop->contains(MBB) : true;
118}
119
120POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
121                           const MachineLoopInfo &MLI)
122    : MBB(MBB), Succs(MBB->successors()) {
123  // RPO is not a unique form, since at every basic block with multiple
124  // successors, the DFS has to pick which order to visit the successors in.
125  // Sort them strategically (see below).
126  MachineLoop *Loop = MLI.getLoopFor(MBB);
127  MachineFunction::iterator Next = next(MachineFunction::iterator(MBB));
128  MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next;
129  std::stable_sort(
130      Succs.begin(), Succs.end(),
131      [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) {
132        if (A == B)
133          return false;
134
135        // Keep loops contiguous by preferring the block that's in the same
136        // loop.
137        bool LoopContainsA = LoopContains(Loop, A);
138        bool LoopContainsB = LoopContains(Loop, B);
139        if (LoopContainsA && !LoopContainsB)
140          return true;
141        if (!LoopContainsA && LoopContainsB)
142          return false;
143
144        // Minimize perturbation by preferring the block which is the immediate
145        // layout successor.
146        if (A == LayoutSucc)
147          return true;
148        if (B == LayoutSucc)
149          return false;
150
151        // TODO: More sophisticated orderings may be profitable here.
152
153        return false;
154      });
155}
156
157/// Return the "bottom" block of a loop. This differs from
158/// MachineLoop::getBottomBlock in that it works even if the loop is
159/// discontiguous.
160static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) {
161  MachineBasicBlock *Bottom = Loop->getHeader();
162  for (MachineBasicBlock *MBB : Loop->blocks())
163    if (MBB->getNumber() > Bottom->getNumber())
164      Bottom = MBB;
165  return Bottom;
166}
167
168/// Sort the blocks in RPO, taking special care to make sure that loops are
169/// contiguous even in the case of split backedges.
170///
171/// TODO: Determine whether RPO is actually worthwhile, or whether we should
172/// move to just a stable-topological-sort-based approach that would preserve
173/// more of the original order.
174static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
175  // Note that we do our own RPO rather than using
176  // "llvm/ADT/PostOrderIterator.h" because we want control over the order that
177  // successors are visited in (see above). Also, we can sort the blocks in the
178  // MachineFunction as we go.
179  SmallPtrSet<MachineBasicBlock *, 16> Visited;
180  SmallVector<POStackEntry, 16> Stack;
181
182  MachineBasicBlock *EntryBlock = &*MF.begin();
183  Visited.insert(EntryBlock);
184  Stack.push_back(POStackEntry(EntryBlock, MF, MLI));
185
186  for (;;) {
187    POStackEntry &Entry = Stack.back();
188    SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs;
189    if (!Succs.empty()) {
190      MachineBasicBlock *Succ = Succs.pop_back_val();
191      if (Visited.insert(Succ).second)
192        Stack.push_back(POStackEntry(Succ, MF, MLI));
193      continue;
194    }
195
196    // Put the block in its position in the MachineFunction.
197    MachineBasicBlock &MBB = *Entry.MBB;
198    MBB.moveBefore(&*MF.begin());
199
200    // Branch instructions may utilize a fallthrough, so update them if a
201    // fallthrough has been added or removed.
202    if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() &&
203        !MBB.back().isBarrier())
204      report_fatal_error(
205          "Non-branch terminator with fallthrough cannot yet be rewritten");
206    if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch())
207      MBB.updateTerminator();
208
209    Stack.pop_back();
210    if (Stack.empty())
211      break;
212  }
213
214  // Now that we've sorted the blocks in RPO, renumber them.
215  MF.RenumberBlocks();
216
217#ifndef NDEBUG
218  SmallSetVector<MachineLoop *, 8> OnStack;
219
220  // Insert a sentinel representing the degenerate loop that starts at the
221  // function entry block and includes the entire function as a "loop" that
222  // executes once.
223  OnStack.insert(nullptr);
224
225  for (auto &MBB : MF) {
226    assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
227
228    MachineLoop *Loop = MLI.getLoopFor(&MBB);
229    if (Loop && &MBB == Loop->getHeader()) {
230      // Loop header. The loop predecessor should be sorted above, and the other
231      // predecessors should be backedges below.
232      for (auto Pred : MBB.predecessors())
233        assert(
234            (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
235            "Loop header predecessors must be loop predecessors or backedges");
236      assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
237    } else {
238      // Not a loop header. All predecessors should be sorted above.
239      for (auto Pred : MBB.predecessors())
240        assert(Pred->getNumber() < MBB.getNumber() &&
241               "Non-loop-header predecessors should be topologically sorted");
242      assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
243             "Blocks must be nested in their loops");
244    }
245    while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
246      OnStack.pop_back();
247  }
248  assert(OnStack.pop_back_val() == nullptr &&
249         "The function entry block shouldn't actually be a loop header");
250  assert(OnStack.empty() &&
251         "Control flow stack pushes and pops should be balanced.");
252#endif
253}
254
255/// Test whether Pred has any terminators explicitly branching to MBB, as
256/// opposed to falling through. Note that it's possible (eg. in unoptimized
257/// code) for a branch instruction to both branch to a block and fallthrough
258/// to it, so we check the actual branch operands to see if there are any
259/// explicit mentions.
260static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
261                                 MachineBasicBlock *MBB) {
262  for (MachineInstr &MI : Pred->terminators())
263    for (MachineOperand &MO : MI.explicit_operands())
264      if (MO.isMBB() && MO.getMBB() == MBB)
265        return true;
266  return false;
267}
268
269/// Insert a BLOCK marker for branches to MBB (if needed).
270static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
271                             SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
272                             const WebAssemblyInstrInfo &TII,
273                             const MachineLoopInfo &MLI,
274                             MachineDominatorTree &MDT) {
275  // First compute the nearest common dominator of all forward non-fallthrough
276  // predecessors so that we minimize the time that the BLOCK is on the stack,
277  // which reduces overall stack height.
278  MachineBasicBlock *Header = nullptr;
279  bool IsBranchedTo = false;
280  int MBBNumber = MBB.getNumber();
281  for (MachineBasicBlock *Pred : MBB.predecessors())
282    if (Pred->getNumber() < MBBNumber) {
283      Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
284      if (ExplicitlyBranchesTo(Pred, &MBB))
285        IsBranchedTo = true;
286    }
287  if (!Header)
288    return;
289  if (!IsBranchedTo)
290    return;
291
292  assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
293  MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));
294
295  // If the nearest common dominator is inside a more deeply nested context,
296  // walk out to the nearest scope which isn't more deeply nested.
297  for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
298    if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
299      if (ScopeTop->getNumber() > Header->getNumber()) {
300        // Skip over an intervening scope.
301        I = next(MachineFunction::iterator(ScopeTop));
302      } else {
303        // We found a scope level at an appropriate depth.
304        Header = ScopeTop;
305        break;
306      }
307    }
308  }
309
310  // If there's a loop which ends just before MBB which contains Header, we can
311  // reuse its label instead of inserting a new BLOCK.
312  for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
313       Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
314    if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
315      return;
316
317  // Decide where in Header to put the BLOCK.
318  MachineBasicBlock::iterator InsertPos;
319  MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
320  if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) {
321    // Header is the header of a loop that does not lexically contain MBB, so
322    // the BLOCK needs to be above the LOOP.
323    InsertPos = Header->begin();
324  } else {
325    // Otherwise, insert the BLOCK as late in Header as we can, but before the
326    // beginning of the local expression tree and any nested BLOCKs.
327    InsertPos = Header->getFirstTerminator();
328    while (InsertPos != Header->begin() &&
329           prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
330           prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
331           prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
332           prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
333      --InsertPos;
334  }
335
336  // Add the BLOCK.
337  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));
338
339  // Mark the end of the block.
340  InsertPos = MBB.begin();
341  while (InsertPos != MBB.end() &&
342         InsertPos->getOpcode() == WebAssembly::END_LOOP)
343    ++InsertPos;
344  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));
345
346  // Track the farthest-spanning scope that ends at this point.
347  int Number = MBB.getNumber();
348  if (!ScopeTops[Number] ||
349      ScopeTops[Number]->getNumber() > Header->getNumber())
350    ScopeTops[Number] = Header;
351}
352
353/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
354static void PlaceLoopMarker(
355    MachineBasicBlock &MBB, MachineFunction &MF,
356    SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
357    DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops,
358    const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
359  MachineLoop *Loop = MLI.getLoopFor(&MBB);
360  if (!Loop || Loop->getHeader() != &MBB)
361    return;
362
363  // The operand of a LOOP is the first block after the loop. If the loop is the
364  // bottom of the function, insert a dummy block at the end.
365  MachineBasicBlock *Bottom = LoopBottom(Loop);
366  auto Iter = next(MachineFunction::iterator(Bottom));
367  if (Iter == MF.end()) {
368    MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
369    // Give it a fake predecessor so that AsmPrinter prints its label.
370    Label->addSuccessor(Label);
371    MF.push_back(Label);
372    Iter = next(MachineFunction::iterator(Bottom));
373  }
374  MachineBasicBlock *AfterLoop = &*Iter;
375
376  // Mark the beginning of the loop (after the end of any existing loop that
377  // ends here).
378  auto InsertPos = MBB.begin();
379  while (InsertPos != MBB.end() &&
380         InsertPos->getOpcode() == WebAssembly::END_LOOP)
381    ++InsertPos;
382  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP));
383
384  // Mark the end of the loop.
385  MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(),
386                              TII.get(WebAssembly::END_LOOP));
387  LoopTops[End] = &MBB;
388
389  assert((!ScopeTops[AfterLoop->getNumber()] ||
390          ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
391         "With RPO we should visit the outer-most loop for a block first.");
392  if (!ScopeTops[AfterLoop->getNumber()])
393    ScopeTops[AfterLoop->getNumber()] = &MBB;
394}
395
396static unsigned
397GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
398         const MachineBasicBlock *MBB) {
399  unsigned Depth = 0;
400  for (auto X : reverse(Stack)) {
401    if (X == MBB)
402      break;
403    ++Depth;
404  }
405  assert(Depth < Stack.size() && "Branch destination should be in scope");
406  return Depth;
407}
408
409/// Insert LOOP and BLOCK markers at appropriate places.
410static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
411                         const WebAssemblyInstrInfo &TII,
412                         MachineDominatorTree &MDT) {
413  // For each block whose label represents the end of a scope, record the block
414  // which holds the beginning of the scope. This will allow us to quickly skip
415  // over scoped regions when walking blocks. We allocate one more than the
416  // number of blocks in the function to accommodate for the possible fake block
417  // we may insert at the end.
418  SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
419
420  // For eacn LOOP_END, the corresponding LOOP.
421  DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops;
422
423  for (auto &MBB : MF) {
424    // Place the LOOP for MBB if MBB is the header of a loop.
425    PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
426
427    // Place the BLOCK for MBB if MBB is branched to from above.
428    PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
429  }
430
431  // Now rewrite references to basic blocks to be depth immediates.
432  SmallVector<const MachineBasicBlock *, 8> Stack;
433  for (auto &MBB : reverse(MF)) {
434    for (auto &MI : reverse(MBB)) {
435      switch (MI.getOpcode()) {
436      case WebAssembly::BLOCK:
437        assert(ScopeTops[Stack.back()->getNumber()] == &MBB &&
438               "Block should be balanced");
439        Stack.pop_back();
440        break;
441      case WebAssembly::LOOP:
442        assert(Stack.back() == &MBB && "Loop top should be balanced");
443        Stack.pop_back();
444        Stack.pop_back();
445        break;
446      case WebAssembly::END_BLOCK:
447        Stack.push_back(&MBB);
448        break;
449      case WebAssembly::END_LOOP:
450        Stack.push_back(&MBB);
451        Stack.push_back(LoopTops[&MI]);
452        break;
453      default:
454        if (MI.isTerminator()) {
455          // Rewrite MBB operands to be depth immediates.
456          SmallVector<MachineOperand, 4> Ops(MI.operands());
457          while (MI.getNumOperands() > 0)
458            MI.RemoveOperand(MI.getNumOperands() - 1);
459          for (auto MO : Ops) {
460            if (MO.isMBB())
461              MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB()));
462            MI.addOperand(MF, MO);
463          }
464        }
465        break;
466      }
467    }
468  }
469  assert(Stack.empty() && "Control flow should be balanced");
470}
471
472bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
473  DEBUG(dbgs() << "********** CFG Stackifying **********\n"
474                  "********** Function: "
475               << MF.getName() << '\n');
476
477  const auto &MLI = getAnalysis<MachineLoopInfo>();
478  auto &MDT = getAnalysis<MachineDominatorTree>();
479  // Liveness is not tracked for EXPR_STACK physreg.
480  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
481  MF.getRegInfo().invalidateLiveness();
482
483  // RPO sorting needs all loops to be single-entry.
484  EliminateMultipleEntryLoops(MF, MLI);
485
486  // Sort the blocks in RPO, with contiguous loops.
487  SortBlocks(MF, MLI);
488
489  // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
490  PlaceMarkers(MF, MLI, TII, MDT);
491
492  return true;
493}
494