WebAssemblyLateEHPrepare.cpp revision 341825
1//=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Does various transformations for exception handling.
12///
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16#include "WebAssembly.h"
17#include "WebAssemblySubtarget.h"
18#include "WebAssemblyUtilities.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/WasmEHFuncInfo.h"
21#include "llvm/MC/MCAsmInfo.h"
22using namespace llvm;
23
24#define DEBUG_TYPE "wasm-exception-prepare"
25
26namespace {
27class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
28  StringRef getPassName() const override {
29    return "WebAssembly Prepare Exception";
30  }
31
32  bool runOnMachineFunction(MachineFunction &MF) override;
33
34  bool replaceFuncletReturns(MachineFunction &MF);
35  bool hoistCatches(MachineFunction &MF);
36  bool addCatchAlls(MachineFunction &MF);
37  bool addRethrows(MachineFunction &MF);
38  bool ensureSingleBBTermPads(MachineFunction &MF);
39  bool mergeTerminatePads(MachineFunction &MF);
40  bool addCatchAllTerminatePads(MachineFunction &MF);
41
42public:
43  static char ID; // Pass identification, replacement for typeid
44  WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
45};
46} // end anonymous namespace
47
48char WebAssemblyLateEHPrepare::ID = 0;
49INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
50                "WebAssembly Exception Preparation", false, false)
51
52FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
53  return new WebAssemblyLateEHPrepare();
54}
55
56// Returns the nearest EH pad that dominates this instruction. This does not use
57// dominator analysis; it just does BFS on its predecessors until arriving at an
58// EH pad. This assumes valid EH scopes so the first EH pad it arrives in all
59// possible search paths should be the same.
60// Returns nullptr in case it does not find any EH pad in the search, or finds
61// multiple different EH pads.
62MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
63  MachineFunction *MF = MI->getParent()->getParent();
64  SmallVector<MachineBasicBlock *, 2> WL;
65  SmallPtrSet<MachineBasicBlock *, 2> Visited;
66  WL.push_back(MI->getParent());
67  MachineBasicBlock *EHPad = nullptr;
68  while (!WL.empty()) {
69    MachineBasicBlock *MBB = WL.pop_back_val();
70    if (Visited.count(MBB))
71      continue;
72    Visited.insert(MBB);
73    if (MBB->isEHPad()) {
74      if (EHPad && EHPad != MBB)
75        return nullptr;
76      EHPad = MBB;
77      continue;
78    }
79    if (MBB == &MF->front())
80      return nullptr;
81    WL.append(MBB->pred_begin(), MBB->pred_end());
82  }
83  return EHPad;
84}
85
86// Erases the given BB and all its children from the function. If other BBs have
87// this BB as a successor, the successor relationships will be deleted as well.
88static void EraseBBAndChildren(MachineBasicBlock *MBB) {
89  SmallVector<MachineBasicBlock *, 8> WL;
90  WL.push_back(MBB);
91  while (!WL.empty()) {
92    MachineBasicBlock *MBB = WL.pop_back_val();
93    for (auto *Pred : MBB->predecessors())
94      Pred->removeSuccessor(MBB);
95    for (auto *Succ : MBB->successors()) {
96      WL.push_back(Succ);
97      MBB->removeSuccessor(Succ);
98    }
99    MBB->eraseFromParent();
100  }
101}
102
103bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
104  if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
105      ExceptionHandling::Wasm)
106    return false;
107
108  bool Changed = false;
109  Changed |= addRethrows(MF);
110  if (!MF.getFunction().hasPersonalityFn())
111    return Changed;
112  Changed |= replaceFuncletReturns(MF);
113  Changed |= hoistCatches(MF);
114  Changed |= addCatchAlls(MF);
115  Changed |= ensureSingleBBTermPads(MF);
116  Changed |= mergeTerminatePads(MF);
117  Changed |= addCatchAllTerminatePads(MF);
118  return Changed;
119}
120
121bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
122  bool Changed = false;
123  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
124  auto *EHInfo = MF.getWasmEHFuncInfo();
125
126  for (auto &MBB : MF) {
127    auto Pos = MBB.getFirstTerminator();
128    if (Pos == MBB.end())
129      continue;
130    MachineInstr *TI = &*Pos;
131
132    switch (TI->getOpcode()) {
133    case WebAssembly::CATCHRET: {
134      // Replace a catchret with a branch
135      MachineBasicBlock *TBB = TI->getOperand(0).getMBB();
136      if (!MBB.isLayoutSuccessor(TBB))
137        BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR))
138            .addMBB(TBB);
139      TI->eraseFromParent();
140      Changed = true;
141      break;
142    }
143    case WebAssembly::CLEANUPRET: {
144      // Replace a cleanupret with a rethrow
145      if (EHInfo->hasThrowUnwindDest(&MBB))
146        BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
147            .addMBB(EHInfo->getThrowUnwindDest(&MBB));
148      else
149        BuildMI(MBB, TI, TI->getDebugLoc(),
150                TII.get(WebAssembly::RETHROW_TO_CALLER));
151
152      TI->eraseFromParent();
153      Changed = true;
154      break;
155    }
156    }
157  }
158  return Changed;
159}
160
161// Hoist catch instructions to the beginning of their matching EH pad BBs in
162// case,
163// (1) catch instruction is not the first instruction in EH pad.
164// ehpad:
165//   some_other_instruction
166//   ...
167//   %exn = catch 0
168// (2) catch instruction is in a non-EH pad BB. For example,
169// ehpad:
170//   br bb0
171// bb0:
172//   %exn = catch 0
173bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
174  bool Changed = false;
175  SmallVector<MachineInstr *, 16> Catches;
176  for (auto &MBB : MF)
177    for (auto &MI : MBB)
178      if (WebAssembly::isCatch(MI))
179        Catches.push_back(&MI);
180
181  for (auto *Catch : Catches) {
182    MachineBasicBlock *EHPad = GetMatchingEHPad(Catch);
183    assert(EHPad && "No matching EH pad for catch");
184    if (EHPad->begin() == Catch)
185      continue;
186    Changed = true;
187    EHPad->insert(EHPad->begin(), Catch->removeFromParent());
188  }
189  return Changed;
190}
191
192// Add catch_all to beginning of cleanup pads.
193bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
194  bool Changed = false;
195  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
196
197  for (auto &MBB : MF) {
198    if (!MBB.isEHPad())
199      continue;
200    // This runs after hoistCatches(), so we assume that if there is a catch,
201    // that should be the first instruction in an EH pad.
202    if (!WebAssembly::isCatch(*MBB.begin())) {
203      Changed = true;
204      BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
205              TII.get(WebAssembly::CATCH_ALL));
206    }
207  }
208  return Changed;
209}
210
211// Add a 'rethrow' instruction after __cxa_rethrow() call
212bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
213  bool Changed = false;
214  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
215  auto *EHInfo = MF.getWasmEHFuncInfo();
216
217  for (auto &MBB : MF)
218    for (auto &MI : MBB) {
219      // Check if it is a call to __cxa_rethrow()
220      if (!MI.isCall())
221        continue;
222      MachineOperand &CalleeOp = MI.getOperand(0);
223      if (!CalleeOp.isGlobal() ||
224          CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn)
225        continue;
226
227      // Now we have __cxa_rethrow() call
228      Changed = true;
229      auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
230      while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
231        ++InsertPt;
232      MachineInstr *Rethrow = nullptr;
233      if (EHInfo->hasThrowUnwindDest(&MBB))
234        Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
235                          TII.get(WebAssembly::RETHROW))
236                      .addMBB(EHInfo->getThrowUnwindDest(&MBB));
237      else
238        Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
239                          TII.get(WebAssembly::RETHROW_TO_CALLER));
240
241      // Becasue __cxa_rethrow does not return, the instruction after the
242      // rethrow should be an unreachable or a branch to another BB that should
243      // eventually lead to an unreachable. Delete it because rethrow itself is
244      // a terminator, and also delete non-EH pad successors if any.
245      MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
246      for (auto *Succ : MBB.successors())
247        if (!Succ->isEHPad())
248          EraseBBAndChildren(Succ);
249    }
250  return Changed;
251}
252
253// Terminate pads are an single-BB EH pad in the form of
254// termpad:
255//   %exn = catch 0
256//   call @__clang_call_terminate(%exn)
257//   unreachable
258// (There can be set_local and get_locals before the call if we didn't run
259// RegStackify)
260// But code transformations can change or add more control flow, so the call to
261// __clang_call_terminate() function may not be in the original EH pad anymore.
262// This ensures every terminate pad is a single BB in the form illustrated
263// above.
264bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
265  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
266
267  // Find calls to __clang_call_terminate()
268  SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
269  for (auto &MBB : MF)
270    for (auto &MI : MBB)
271      if (MI.isCall()) {
272        const MachineOperand &CalleeOp = MI.getOperand(0);
273        if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
274                                       WebAssembly::ClangCallTerminateFn)
275          ClangCallTerminateCalls.push_back(&MI);
276      }
277
278  bool Changed = false;
279  for (auto *Call : ClangCallTerminateCalls) {
280    MachineBasicBlock *EHPad = GetMatchingEHPad(Call);
281    assert(EHPad && "No matching EH pad for catch");
282
283    // If it is already the form we want, skip it
284    if (Call->getParent() == EHPad &&
285        Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
286      continue;
287
288    // In case the __clang_call_terminate() call is not in its matching EH pad,
289    // move the call to the end of EH pad and add an unreachable instruction
290    // after that. Delete all successors and their children if any, because here
291    // the program terminates.
292    Changed = true;
293    MachineInstr *Catch = &*EHPad->begin();
294    // This runs after hoistCatches(), so catch instruction should be at the top
295    assert(WebAssembly::isCatch(*Catch));
296    // Takes the result register of the catch instruction as argument. There may
297    // have been some other set_local/get_locals in between, but at this point
298    // we don't care.
299    Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
300    auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
301    EHPad->insert(InsertPos, Call->removeFromParent());
302    BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
303            TII.get(WebAssembly::UNREACHABLE));
304    EHPad->erase(InsertPos, EHPad->end());
305    for (auto *Succ : EHPad->successors())
306      EraseBBAndChildren(Succ);
307  }
308  return Changed;
309}
310
311// In case there are multiple terminate pads, merge them into one for code size.
312// This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
313// single BB.
314// In principle this violates EH scope relationship because it can merge
315// multiple inner EH scopes, each of which is in different outer EH scope. But
316// getEHScopeMembership() function will not be called after this, so it is fine.
317bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
318  SmallVector<MachineBasicBlock *, 8> TermPads;
319  for (auto &MBB : MF)
320    if (WebAssembly::isCatchTerminatePad(MBB))
321      TermPads.push_back(&MBB);
322  if (TermPads.empty())
323    return false;
324
325  MachineBasicBlock *UniqueTermPad = TermPads.front();
326  for (auto *TermPad :
327       llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
328    SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
329                                              TermPad->pred_end());
330    for (auto *Pred : Preds)
331      Pred->replaceSuccessor(TermPad, UniqueTermPad);
332    TermPad->eraseFromParent();
333  }
334  return true;
335}
336
337// Terminate pads are cleanup pads, so they should start with a 'catch_all'
338// instruction. But in the Itanium model, when we have a C++ exception object,
339// we pass them to __clang_call_terminate function, which calls __cxa_end_catch
340// with the passed exception pointer and then std::terminate. This is the reason
341// that terminate pads are generated with not a catch_all but a catch
342// instruction in clang and earlier llvm passes. Here we append a terminate pad
343// with a catch_all after each existing terminate pad so we can also catch
344// foreign exceptions. For every terminate pad:
345//   %exn = catch 0
346//   call @__clang_call_terminate(%exn)
347//   unreachable
348// We append this BB right after that:
349//   catch_all
350//   call @std::terminate()
351//   unreachable
352bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
353  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
354  SmallVector<MachineBasicBlock *, 8> TermPads;
355  for (auto &MBB : MF)
356    if (WebAssembly::isCatchTerminatePad(MBB))
357      TermPads.push_back(&MBB);
358  if (TermPads.empty())
359    return false;
360
361  Function *StdTerminateFn =
362      MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
363  assert(StdTerminateFn && "There is no std::terminate() function");
364  for (auto *CatchTermPad : TermPads) {
365    DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
366    auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
367    MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
368              CatchAllTermPad);
369    CatchAllTermPad->setIsEHPad();
370    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
371    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
372        .addGlobalAddress(StdTerminateFn);
373    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
374
375    // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
376    // a successor of an existing terminate pad. CatchAllTermPad should have all
377    // predecessors CatchTermPad has instead. This is a hack to force
378    // CatchAllTermPad be always sorted right after CatchTermPad; the correct
379    // predecessor-successor relationships will be restored in CFGStackify pass.
380    CatchTermPad->addSuccessor(CatchAllTermPad);
381  }
382  return true;
383}
384