1//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a pass that removes irreducible control flow.
11/// Irreducible control flow means multiple-entry loops, which this pass
12/// transforms to have a single entry.
13///
14/// Note that LLVM has a generic pass that lowers irreducible control flow, but
15/// it linearizes control flow, turning diamonds into two triangles, which is
16/// both unnecessary and undesirable for WebAssembly.
17///
18/// The big picture: We recursively process each "region", defined as a group
19/// of blocks with a single entry and no branches back to that entry. A region
20/// may be the entire function body, or the inner part of a loop, i.e., the
21/// loop's body without branches back to the loop entry. In each region we fix
22/// up multi-entry loops by adding a new block that can dispatch to each of the
23/// loop entries, based on the value of a label "helper" variable, and we
24/// replace direct branches to the entries with assignments to the label
25/// variable and a branch to the dispatch block. Then the dispatch block is the
26/// single entry in the loop containing the previous multiple entries. After
27/// ensuring all the loops in a region are reducible, we recurse into them. The
28/// total time complexity of this pass is:
29///
30///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31///     NumLoops * NumLoops)
32///
33/// This pass is similar to what the Relooper [1] does. Both identify looping
34/// code that requires multiple entries, and resolve it in a similar way (in
35/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36/// also that like the Relooper, we implement a "minimal" intervention: we only
37/// use the "label" helper for the blocks we absolutely must and no others. We
38/// also prioritize code size and do not duplicate code in order to resolve
39/// irreducibility. The graph algorithms for finding loops and entries and so
40/// forth are also similar to the Relooper. The main differences between this
41/// pass and the Relooper are:
42///
43///  * We just care about irreducibility, so we just look at loops.
44///  * The Relooper emits structured control flow (with ifs etc.), while we
45///    emit a CFG.
46///
47/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48/// Proceedings of the ACM international conference companion on Object oriented
49/// programming systems languages and applications companion (SPLASH '11). ACM,
50/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51/// http://doi.acm.org/10.1145/2048147.2048224
52///
53//===----------------------------------------------------------------------===//
54
55#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56#include "WebAssembly.h"
57#include "WebAssemblySubtarget.h"
58#include "llvm/CodeGen/MachineInstrBuilder.h"
59#include "llvm/Support/Debug.h"
60using namespace llvm;
61
62#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
63
64namespace {
65
66using BlockVector = SmallVector<MachineBasicBlock *, 4>;
67using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
68
69// Calculates reachability in a region. Ignores branches to blocks outside of
70// the region, and ignores branches to the region entry (for the case where
71// the region is the inner part of a loop).
72class ReachabilityGraph {
73public:
74  ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
75      : Entry(Entry), Blocks(Blocks) {
76#ifndef NDEBUG
77    // The region must have a single entry.
78    for (auto *MBB : Blocks) {
79      if (MBB != Entry) {
80        for (auto *Pred : MBB->predecessors()) {
81          assert(inRegion(Pred));
82        }
83      }
84    }
85#endif
86    calculate();
87  }
88
89  bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
90    assert(inRegion(From) && inRegion(To));
91    auto I = Reachable.find(From);
92    if (I == Reachable.end())
93      return false;
94    return I->second.count(To);
95  }
96
97  // "Loopers" are blocks that are in a loop. We detect these by finding blocks
98  // that can reach themselves.
99  const BlockSet &getLoopers() const { return Loopers; }
100
101  // Get all blocks that are loop entries.
102  const BlockSet &getLoopEntries() const { return LoopEntries; }
103
104  // Get all blocks that enter a particular loop from outside.
105  const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
106    assert(inRegion(LoopEntry));
107    auto I = LoopEnterers.find(LoopEntry);
108    assert(I != LoopEnterers.end());
109    return I->second;
110  }
111
112private:
113  MachineBasicBlock *Entry;
114  const BlockSet &Blocks;
115
116  BlockSet Loopers, LoopEntries;
117  DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
118
119  bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
120
121  // Maps a block to all the other blocks it can reach.
122  DenseMap<MachineBasicBlock *, BlockSet> Reachable;
123
124  void calculate() {
125    // Reachability computation work list. Contains pairs of recent additions
126    // (A, B) where we just added a link A => B.
127    using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
128    SmallVector<BlockPair, 4> WorkList;
129
130    // Add all relevant direct branches.
131    for (auto *MBB : Blocks) {
132      for (auto *Succ : MBB->successors()) {
133        if (Succ != Entry && inRegion(Succ)) {
134          Reachable[MBB].insert(Succ);
135          WorkList.emplace_back(MBB, Succ);
136        }
137      }
138    }
139
140    while (!WorkList.empty()) {
141      MachineBasicBlock *MBB, *Succ;
142      std::tie(MBB, Succ) = WorkList.pop_back_val();
143      assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
144      if (MBB != Entry) {
145        // We recently added MBB => Succ, and that means we may have enabled
146        // Pred => MBB => Succ.
147        for (auto *Pred : MBB->predecessors()) {
148          if (Reachable[Pred].insert(Succ).second) {
149            WorkList.emplace_back(Pred, Succ);
150          }
151        }
152      }
153    }
154
155    // Blocks that can return to themselves are in a loop.
156    for (auto *MBB : Blocks) {
157      if (canReach(MBB, MBB)) {
158        Loopers.insert(MBB);
159      }
160    }
161    assert(!Loopers.count(Entry));
162
163    // Find the loop entries - loopers reachable from blocks not in that loop -
164    // and those outside blocks that reach them, the "loop enterers".
165    for (auto *Looper : Loopers) {
166      for (auto *Pred : Looper->predecessors()) {
167        // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
168        // otherwise, it is a block that enters into the loop.
169        if (!canReach(Looper, Pred)) {
170          LoopEntries.insert(Looper);
171          LoopEnterers[Looper].insert(Pred);
172        }
173      }
174    }
175  }
176};
177
178// Finds the blocks in a single-entry loop, given the loop entry and the
179// list of blocks that enter the loop.
180class LoopBlocks {
181public:
182  LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
183      : Entry(Entry), Enterers(Enterers) {
184    calculate();
185  }
186
187  BlockSet &getBlocks() { return Blocks; }
188
189private:
190  MachineBasicBlock *Entry;
191  const BlockSet &Enterers;
192
193  BlockSet Blocks;
194
195  void calculate() {
196    // Going backwards from the loop entry, if we ignore the blocks entering
197    // from outside, we will traverse all the blocks in the loop.
198    BlockVector WorkList;
199    BlockSet AddedToWorkList;
200    Blocks.insert(Entry);
201    for (auto *Pred : Entry->predecessors()) {
202      if (!Enterers.count(Pred)) {
203        WorkList.push_back(Pred);
204        AddedToWorkList.insert(Pred);
205      }
206    }
207
208    while (!WorkList.empty()) {
209      auto *MBB = WorkList.pop_back_val();
210      assert(!Enterers.count(MBB));
211      if (Blocks.insert(MBB).second) {
212        for (auto *Pred : MBB->predecessors()) {
213          if (!AddedToWorkList.count(Pred)) {
214            WorkList.push_back(Pred);
215            AddedToWorkList.insert(Pred);
216          }
217        }
218      }
219    }
220  }
221};
222
223class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
224  StringRef getPassName() const override {
225    return "WebAssembly Fix Irreducible Control Flow";
226  }
227
228  bool runOnMachineFunction(MachineFunction &MF) override;
229
230  bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
231                     MachineFunction &MF);
232
233  void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
234                           MachineFunction &MF, const ReachabilityGraph &Graph);
235
236public:
237  static char ID; // Pass identification, replacement for typeid
238  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
239};
240
241bool WebAssemblyFixIrreducibleControlFlow::processRegion(
242    MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
243  bool Changed = false;
244
245  // Remove irreducibility before processing child loops, which may take
246  // multiple iterations.
247  while (true) {
248    ReachabilityGraph Graph(Entry, Blocks);
249
250    bool FoundIrreducibility = false;
251
252    for (auto *LoopEntry : Graph.getLoopEntries()) {
253      // Find mutual entries - all entries which can reach this one, and
254      // are reached by it (that always includes LoopEntry itself). All mutual
255      // entries must be in the same loop, so if we have more than one, then we
256      // have irreducible control flow.
257      //
258      // Note that irreducibility may involve inner loops, e.g. imagine A
259      // starts one loop, and it has B inside it which starts an inner loop.
260      // If we add a branch from all the way on the outside to B, then in a
261      // sense B is no longer an "inner" loop, semantically speaking. We will
262      // fix that irreducibility by adding a block that dispatches to either
263      // either A or B, so B will no longer be an inner loop in our output.
264      // (A fancier approach might try to keep it as such.)
265      //
266      // Note that we still need to recurse into inner loops later, to handle
267      // the case where the irreducibility is entirely nested - we would not
268      // be able to identify that at this point, since the enclosing loop is
269      // a group of blocks all of whom can reach each other. (We'll see the
270      // irreducibility after removing branches to the top of that enclosing
271      // loop.)
272      BlockSet MutualLoopEntries;
273      MutualLoopEntries.insert(LoopEntry);
274      for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
275        if (OtherLoopEntry != LoopEntry &&
276            Graph.canReach(LoopEntry, OtherLoopEntry) &&
277            Graph.canReach(OtherLoopEntry, LoopEntry)) {
278          MutualLoopEntries.insert(OtherLoopEntry);
279        }
280      }
281
282      if (MutualLoopEntries.size() > 1) {
283        makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
284        FoundIrreducibility = true;
285        Changed = true;
286        break;
287      }
288    }
289    // Only go on to actually process the inner loops when we are done
290    // removing irreducible control flow and changing the graph. Modifying
291    // the graph as we go is possible, and that might let us avoid looking at
292    // the already-fixed loops again if we are careful, but all that is
293    // complex and bug-prone. Since irreducible loops are rare, just starting
294    // another iteration is best.
295    if (FoundIrreducibility) {
296      continue;
297    }
298
299    for (auto *LoopEntry : Graph.getLoopEntries()) {
300      LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
301      // Each of these calls to processRegion may change the graph, but are
302      // guaranteed not to interfere with each other. The only changes we make
303      // to the graph are to add blocks on the way to a loop entry. As the
304      // loops are disjoint, that means we may only alter branches that exit
305      // another loop, which are ignored when recursing into that other loop
306      // anyhow.
307      if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
308        Changed = true;
309      }
310    }
311
312    return Changed;
313  }
314}
315
316// Given a set of entries to a single loop, create a single entry for that
317// loop by creating a dispatch block for them, routing control flow using
318// a helper variable. Also updates Blocks with any new blocks created, so
319// that we properly track all the blocks in the region. But this does not update
320// ReachabilityGraph; this will be updated in the caller of this function as
321// needed.
322void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
323    BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
324    const ReachabilityGraph &Graph) {
325  assert(Entries.size() >= 2);
326
327  // Sort the entries to ensure a deterministic build.
328  BlockVector SortedEntries(Entries.begin(), Entries.end());
329  llvm::sort(SortedEntries,
330             [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
331               auto ANum = A->getNumber();
332               auto BNum = B->getNumber();
333               return ANum < BNum;
334             });
335
336#ifndef NDEBUG
337  for (auto Block : SortedEntries)
338    assert(Block->getNumber() != -1);
339  if (SortedEntries.size() > 1) {
340    for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
341         ++I) {
342      auto ANum = (*I)->getNumber();
343      auto BNum = (*(std::next(I)))->getNumber();
344      assert(ANum != BNum);
345    }
346  }
347#endif
348
349  // Create a dispatch block which will contain a jump table to the entries.
350  MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
351  MF.insert(MF.end(), Dispatch);
352  Blocks.insert(Dispatch);
353
354  // Add the jump table.
355  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
356  MachineInstrBuilder MIB =
357      BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
358
359  // Add the register which will be used to tell the jump table which block to
360  // jump to.
361  MachineRegisterInfo &MRI = MF.getRegInfo();
362  Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
363  MIB.addReg(Reg);
364
365  // Compute the indices in the superheader, one for each bad block, and
366  // add them as successors.
367  DenseMap<MachineBasicBlock *, unsigned> Indices;
368  for (auto *Entry : SortedEntries) {
369    auto Pair = Indices.insert(std::make_pair(Entry, 0));
370    assert(Pair.second);
371
372    unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
373    Pair.first->second = Index;
374
375    MIB.addMBB(Entry);
376    Dispatch->addSuccessor(Entry);
377  }
378
379  // Rewrite the problematic successors for every block that wants to reach
380  // the bad blocks. For simplicity, we just introduce a new block for every
381  // edge we need to rewrite. (Fancier things are possible.)
382
383  BlockVector AllPreds;
384  for (auto *Entry : SortedEntries) {
385    for (auto *Pred : Entry->predecessors()) {
386      if (Pred != Dispatch) {
387        AllPreds.push_back(Pred);
388      }
389    }
390  }
391
392  // This set stores predecessors within this loop.
393  DenseSet<MachineBasicBlock *> InLoop;
394  for (auto *Pred : AllPreds) {
395    for (auto *Entry : Pred->successors()) {
396      if (!Entries.count(Entry))
397        continue;
398      if (Graph.canReach(Entry, Pred)) {
399        InLoop.insert(Pred);
400        break;
401      }
402    }
403  }
404
405  // Record if each entry has a layout predecessor. This map stores
406  // <<Predecessor is within the loop?, loop entry>, layout predecessor>
407  std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
408      EntryToLayoutPred;
409  for (auto *Pred : AllPreds)
410    for (auto *Entry : Pred->successors())
411      if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
412        EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
413
414  // We need to create at most two routing blocks per entry: one for
415  // predecessors outside the loop and one for predecessors inside the loop.
416  // This map stores
417  // <<Predecessor is within the loop?, loop entry>, routing block>
418  std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
419  for (auto *Pred : AllPreds) {
420    bool PredInLoop = InLoop.count(Pred);
421    for (auto *Entry : Pred->successors()) {
422      if (!Entries.count(Entry) ||
423          Map.count(std::make_pair(InLoop.count(Pred), Entry)))
424        continue;
425      // If there exists a layout predecessor of this entry and this predecessor
426      // is not that, we rather create a routing block after that layout
427      // predecessor to save a branch.
428      if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
429          EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
430        continue;
431
432      // This is a successor we need to rewrite.
433      MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
434      MF.insert(Pred->isLayoutSuccessor(Entry)
435                    ? MachineFunction::iterator(Entry)
436                    : MF.end(),
437                Routing);
438      Blocks.insert(Routing);
439
440      // Set the jump table's register of the index of the block we wish to
441      // jump to, and jump to the jump table.
442      BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
443          .addImm(Indices[Entry]);
444      BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
445      Routing->addSuccessor(Dispatch);
446      Map[std::make_pair(PredInLoop, Entry)] = Routing;
447    }
448  }
449
450  for (auto *Pred : AllPreds) {
451    bool PredInLoop = InLoop.count(Pred);
452    // Remap the terminator operands and the successor list.
453    for (MachineInstr &Term : Pred->terminators())
454      for (auto &Op : Term.explicit_uses())
455        if (Op.isMBB() && Indices.count(Op.getMBB()))
456          Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
457
458    for (auto *Succ : Pred->successors()) {
459      if (!Entries.count(Succ))
460        continue;
461      auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
462      Pred->replaceSuccessor(Succ, Routing);
463    }
464  }
465
466  // Create a fake default label, because br_table requires one.
467  MIB.addMBB(MIB.getInstr()
468                 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
469                 .getMBB());
470}
471
472} // end anonymous namespace
473
474char WebAssemblyFixIrreducibleControlFlow::ID = 0;
475INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
476                "Removes irreducible control flow", false, false)
477
478FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
479  return new WebAssemblyFixIrreducibleControlFlow();
480}
481
482bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
483    MachineFunction &MF) {
484  LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
485                       "********** Function: "
486                    << MF.getName() << '\n');
487
488  // Start the recursive process on the entire function body.
489  BlockSet AllBlocks;
490  for (auto &MBB : MF) {
491    AllBlocks.insert(&MBB);
492  }
493
494  if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
495    // We rewrote part of the function; recompute relevant things.
496    MF.getRegInfo().invalidateLiveness();
497    MF.RenumberBlocks();
498    return true;
499  }
500
501  return false;
502}
503