WebAssemblyLateEHPrepare.cpp revision 344779
1//=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Does various transformations for exception handling.
12///
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16#include "WebAssembly.h"
17#include "WebAssemblySubtarget.h"
18#include "WebAssemblyUtilities.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/WasmEHFuncInfo.h"
21#include "llvm/MC/MCAsmInfo.h"
22using namespace llvm;
23
24#define DEBUG_TYPE "wasm-exception-prepare"
25
26namespace {
27class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
28  StringRef getPassName() const override {
29    return "WebAssembly Prepare Exception";
30  }
31
32  bool runOnMachineFunction(MachineFunction &MF) override;
33
34  bool removeUnnecessaryUnreachables(MachineFunction &MF);
35  bool replaceFuncletReturns(MachineFunction &MF);
36  bool hoistCatches(MachineFunction &MF);
37  bool addCatchAlls(MachineFunction &MF);
38  bool addRethrows(MachineFunction &MF);
39  bool ensureSingleBBTermPads(MachineFunction &MF);
40  bool mergeTerminatePads(MachineFunction &MF);
41  bool addCatchAllTerminatePads(MachineFunction &MF);
42
43public:
44  static char ID; // Pass identification, replacement for typeid
45  WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
46};
47} // end anonymous namespace
48
49char WebAssemblyLateEHPrepare::ID = 0;
50INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
51                "WebAssembly Late Exception Preparation", false, false)
52
53FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
54  return new WebAssemblyLateEHPrepare();
55}
56
57// Returns the nearest EH pad that dominates this instruction. This does not use
58// dominator analysis; it just does BFS on its predecessors until arriving at an
59// EH pad. This assumes valid EH scopes so the first EH pad it arrives in all
60// possible search paths should be the same.
61// Returns nullptr in case it does not find any EH pad in the search, or finds
62// multiple different EH pads.
63static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
64  MachineFunction *MF = MI->getParent()->getParent();
65  SmallVector<MachineBasicBlock *, 2> WL;
66  SmallPtrSet<MachineBasicBlock *, 2> Visited;
67  WL.push_back(MI->getParent());
68  MachineBasicBlock *EHPad = nullptr;
69  while (!WL.empty()) {
70    MachineBasicBlock *MBB = WL.pop_back_val();
71    if (Visited.count(MBB))
72      continue;
73    Visited.insert(MBB);
74    if (MBB->isEHPad()) {
75      if (EHPad && EHPad != MBB)
76        return nullptr;
77      EHPad = MBB;
78      continue;
79    }
80    if (MBB == &MF->front())
81      return nullptr;
82    WL.append(MBB->pred_begin(), MBB->pred_end());
83  }
84  return EHPad;
85}
86
87// Erase the specified BBs if the BB does not have any remaining predecessors,
88// and also all its dead children.
89template <typename Container>
90static void eraseDeadBBsAndChildren(const Container &MBBs) {
91  SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
92  while (!WL.empty()) {
93    MachineBasicBlock *MBB = WL.pop_back_val();
94    if (!MBB->pred_empty())
95      continue;
96    SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(),
97                                              MBB->succ_end());
98    WL.append(MBB->succ_begin(), MBB->succ_end());
99    for (auto *Succ : Succs)
100      MBB->removeSuccessor(Succ);
101    MBB->eraseFromParent();
102  }
103}
104
105bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
106  LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n"
107                       "********** Function: "
108                    << MF.getName() << '\n');
109
110  if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
111      ExceptionHandling::Wasm)
112    return false;
113
114  bool Changed = false;
115  Changed |= removeUnnecessaryUnreachables(MF);
116  Changed |= addRethrows(MF);
117  if (!MF.getFunction().hasPersonalityFn())
118    return Changed;
119  Changed |= replaceFuncletReturns(MF);
120  Changed |= hoistCatches(MF);
121  Changed |= addCatchAlls(MF);
122  Changed |= ensureSingleBBTermPads(MF);
123  Changed |= mergeTerminatePads(MF);
124  Changed |= addCatchAllTerminatePads(MF);
125  return Changed;
126}
127
128bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
129    MachineFunction &MF) {
130  bool Changed = false;
131  for (auto &MBB : MF) {
132    for (auto &MI : MBB) {
133      if (!WebAssembly::isThrow(MI))
134        continue;
135      Changed = true;
136
137      // The instruction after the throw should be an unreachable or a branch to
138      // another BB that should eventually lead to an unreachable. Delete it
139      // because throw itself is a terminator, and also delete successors if
140      // any.
141      MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
142      SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
143                                                MBB.succ_end());
144      for (auto *Succ : Succs)
145        MBB.removeSuccessor(Succ);
146      eraseDeadBBsAndChildren(Succs);
147    }
148  }
149
150  return Changed;
151}
152
153bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
154  bool Changed = false;
155  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
156  auto *EHInfo = MF.getWasmEHFuncInfo();
157
158  for (auto &MBB : MF) {
159    auto Pos = MBB.getFirstTerminator();
160    if (Pos == MBB.end())
161      continue;
162    MachineInstr *TI = &*Pos;
163
164    switch (TI->getOpcode()) {
165    case WebAssembly::CATCHRET: {
166      // Replace a catchret with a branch
167      MachineBasicBlock *TBB = TI->getOperand(0).getMBB();
168      if (!MBB.isLayoutSuccessor(TBB))
169        BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR))
170            .addMBB(TBB);
171      TI->eraseFromParent();
172      Changed = true;
173      break;
174    }
175    case WebAssembly::CLEANUPRET: {
176      // Replace a cleanupret with a rethrow
177      if (EHInfo->hasThrowUnwindDest(&MBB))
178        BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
179            .addMBB(EHInfo->getThrowUnwindDest(&MBB));
180      else
181        BuildMI(MBB, TI, TI->getDebugLoc(),
182                TII.get(WebAssembly::RETHROW_TO_CALLER));
183
184      TI->eraseFromParent();
185      Changed = true;
186      break;
187    }
188    }
189  }
190  return Changed;
191}
192
193// Hoist catch instructions to the beginning of their matching EH pad BBs in
194// case,
195// (1) catch instruction is not the first instruction in EH pad.
196// ehpad:
197//   some_other_instruction
198//   ...
199//   %exn = catch 0
200// (2) catch instruction is in a non-EH pad BB. For example,
201// ehpad:
202//   br bb0
203// bb0:
204//   %exn = catch 0
205bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
206  bool Changed = false;
207  SmallVector<MachineInstr *, 16> Catches;
208  for (auto &MBB : MF)
209    for (auto &MI : MBB)
210      if (WebAssembly::isCatch(MI))
211        Catches.push_back(&MI);
212
213  for (auto *Catch : Catches) {
214    MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
215    assert(EHPad && "No matching EH pad for catch");
216    if (EHPad->begin() == Catch)
217      continue;
218    Changed = true;
219    EHPad->insert(EHPad->begin(), Catch->removeFromParent());
220  }
221  return Changed;
222}
223
224// Add catch_all to beginning of cleanup pads.
225bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
226  bool Changed = false;
227  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
228
229  for (auto &MBB : MF) {
230    if (!MBB.isEHPad())
231      continue;
232    // This runs after hoistCatches(), so we assume that if there is a catch,
233    // that should be the first instruction in an EH pad.
234    if (!WebAssembly::isCatch(*MBB.begin())) {
235      Changed = true;
236      BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
237              TII.get(WebAssembly::CATCH_ALL));
238    }
239  }
240  return Changed;
241}
242
243// Add a 'rethrow' instruction after __cxa_rethrow() call
244bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
245  bool Changed = false;
246  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
247  auto *EHInfo = MF.getWasmEHFuncInfo();
248
249  for (auto &MBB : MF)
250    for (auto &MI : MBB) {
251      // Check if it is a call to __cxa_rethrow()
252      if (!MI.isCall())
253        continue;
254      MachineOperand &CalleeOp = MI.getOperand(0);
255      if (!CalleeOp.isGlobal() ||
256          CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn)
257        continue;
258
259      // Now we have __cxa_rethrow() call
260      Changed = true;
261      auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
262      while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
263        ++InsertPt;
264      MachineInstr *Rethrow = nullptr;
265      if (EHInfo->hasThrowUnwindDest(&MBB))
266        Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
267                          TII.get(WebAssembly::RETHROW))
268                      .addMBB(EHInfo->getThrowUnwindDest(&MBB));
269      else
270        Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
271                          TII.get(WebAssembly::RETHROW_TO_CALLER));
272
273      // Because __cxa_rethrow does not return, the instruction after the
274      // rethrow should be an unreachable or a branch to another BB that should
275      // eventually lead to an unreachable. Delete it because rethrow itself is
276      // a terminator, and also delete non-EH pad successors if any.
277      MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
278      SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
279      for (auto *Succ : MBB.successors())
280        if (!Succ->isEHPad())
281          NonPadSuccessors.push_back(Succ);
282      for (auto *Succ : NonPadSuccessors)
283        MBB.removeSuccessor(Succ);
284      eraseDeadBBsAndChildren(NonPadSuccessors);
285    }
286  return Changed;
287}
288
289// Terminate pads are an single-BB EH pad in the form of
290// termpad:
291//   %exn = catch 0
292//   call @__clang_call_terminate(%exn)
293//   unreachable
294// (There can be local.set and local.gets before the call if we didn't run
295// RegStackify)
296// But code transformations can change or add more control flow, so the call to
297// __clang_call_terminate() function may not be in the original EH pad anymore.
298// This ensures every terminate pad is a single BB in the form illustrated
299// above.
300bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
301  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
302
303  // Find calls to __clang_call_terminate()
304  SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
305  for (auto &MBB : MF)
306    for (auto &MI : MBB)
307      if (MI.isCall()) {
308        const MachineOperand &CalleeOp = MI.getOperand(0);
309        if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
310                                       WebAssembly::ClangCallTerminateFn)
311          ClangCallTerminateCalls.push_back(&MI);
312      }
313
314  bool Changed = false;
315  for (auto *Call : ClangCallTerminateCalls) {
316    MachineBasicBlock *EHPad = getMatchingEHPad(Call);
317    assert(EHPad && "No matching EH pad for catch");
318
319    // If it is already the form we want, skip it
320    if (Call->getParent() == EHPad &&
321        Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
322      continue;
323
324    // In case the __clang_call_terminate() call is not in its matching EH pad,
325    // move the call to the end of EH pad and add an unreachable instruction
326    // after that. Delete all successors and their children if any, because here
327    // the program terminates.
328    Changed = true;
329    MachineInstr *Catch = &*EHPad->begin();
330    // This runs after hoistCatches(), so catch instruction should be at the top
331    assert(WebAssembly::isCatch(*Catch));
332    // Takes the result register of the catch instruction as argument. There may
333    // have been some other local.set/local.gets in between, but at this point
334    // we don't care.
335    Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
336    auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
337    EHPad->insert(InsertPos, Call->removeFromParent());
338    BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
339            TII.get(WebAssembly::UNREACHABLE));
340    EHPad->erase(InsertPos, EHPad->end());
341    SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
342                                              EHPad->succ_end());
343    for (auto *Succ : Succs)
344      EHPad->removeSuccessor(Succ);
345    eraseDeadBBsAndChildren(Succs);
346  }
347  return Changed;
348}
349
350// In case there are multiple terminate pads, merge them into one for code size.
351// This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
352// single BB.
353// In principle this violates EH scope relationship because it can merge
354// multiple inner EH scopes, each of which is in different outer EH scope. But
355// getEHScopeMembership() function will not be called after this, so it is fine.
356bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
357  SmallVector<MachineBasicBlock *, 8> TermPads;
358  for (auto &MBB : MF)
359    if (WebAssembly::isCatchTerminatePad(MBB))
360      TermPads.push_back(&MBB);
361  if (TermPads.empty())
362    return false;
363
364  MachineBasicBlock *UniqueTermPad = TermPads.front();
365  for (auto *TermPad :
366       llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
367    SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
368                                              TermPad->pred_end());
369    for (auto *Pred : Preds)
370      Pred->replaceSuccessor(TermPad, UniqueTermPad);
371    TermPad->eraseFromParent();
372  }
373  return true;
374}
375
376// Terminate pads are cleanup pads, so they should start with a 'catch_all'
377// instruction. But in the Itanium model, when we have a C++ exception object,
378// we pass them to __clang_call_terminate function, which calls __cxa_end_catch
379// with the passed exception pointer and then std::terminate. This is the reason
380// that terminate pads are generated with not a catch_all but a catch
381// instruction in clang and earlier llvm passes. Here we append a terminate pad
382// with a catch_all after each existing terminate pad so we can also catch
383// foreign exceptions. For every terminate pad:
384//   %exn = catch 0
385//   call @__clang_call_terminate(%exn)
386//   unreachable
387// We append this BB right after that:
388//   catch_all
389//   call @std::terminate()
390//   unreachable
391bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
392  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
393  SmallVector<MachineBasicBlock *, 8> TermPads;
394  for (auto &MBB : MF)
395    if (WebAssembly::isCatchTerminatePad(MBB))
396      TermPads.push_back(&MBB);
397  if (TermPads.empty())
398    return false;
399
400  Function *StdTerminateFn =
401      MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
402  assert(StdTerminateFn && "There is no std::terminate() function");
403  for (auto *CatchTermPad : TermPads) {
404    DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
405    auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
406    MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
407              CatchAllTermPad);
408    CatchAllTermPad->setIsEHPad();
409    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
410    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
411        .addGlobalAddress(StdTerminateFn);
412    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
413
414    // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
415    // a successor of an existing terminate pad. CatchAllTermPad should have all
416    // predecessors CatchTermPad has instead. This is a hack to force
417    // CatchAllTermPad be always sorted right after CatchTermPad; the correct
418    // predecessor-successor relationships will be restored in CFGStackify pass.
419    CatchTermPad->addSuccessor(CatchAllTermPad);
420  }
421  return true;
422}
423