WebAssemblyLateEHPrepare.cpp revision 344779
1//=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief Does various transformations for exception handling. 12/// 13//===----------------------------------------------------------------------===// 14 15#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16#include "WebAssembly.h" 17#include "WebAssemblySubtarget.h" 18#include "WebAssemblyUtilities.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/WasmEHFuncInfo.h" 21#include "llvm/MC/MCAsmInfo.h" 22using namespace llvm; 23 24#define DEBUG_TYPE "wasm-exception-prepare" 25 26namespace { 27class WebAssemblyLateEHPrepare final : public MachineFunctionPass { 28 StringRef getPassName() const override { 29 return "WebAssembly Prepare Exception"; 30 } 31 32 bool runOnMachineFunction(MachineFunction &MF) override; 33 34 bool removeUnnecessaryUnreachables(MachineFunction &MF); 35 bool replaceFuncletReturns(MachineFunction &MF); 36 bool hoistCatches(MachineFunction &MF); 37 bool addCatchAlls(MachineFunction &MF); 38 bool addRethrows(MachineFunction &MF); 39 bool ensureSingleBBTermPads(MachineFunction &MF); 40 bool mergeTerminatePads(MachineFunction &MF); 41 bool addCatchAllTerminatePads(MachineFunction &MF); 42 43public: 44 static char ID; // Pass identification, replacement for typeid 45 WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} 46}; 47} // end anonymous namespace 48 49char WebAssemblyLateEHPrepare::ID = 0; 50INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, 51 "WebAssembly Late Exception Preparation", false, false) 52 53FunctionPass *llvm::createWebAssemblyLateEHPrepare() { 54 return new WebAssemblyLateEHPrepare(); 55} 56 57// Returns the nearest EH pad that dominates this instruction. This does not use 58// dominator analysis; it just does BFS on its predecessors until arriving at an 59// EH pad. This assumes valid EH scopes so the first EH pad it arrives in all 60// possible search paths should be the same. 61// Returns nullptr in case it does not find any EH pad in the search, or finds 62// multiple different EH pads. 63static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) { 64 MachineFunction *MF = MI->getParent()->getParent(); 65 SmallVector<MachineBasicBlock *, 2> WL; 66 SmallPtrSet<MachineBasicBlock *, 2> Visited; 67 WL.push_back(MI->getParent()); 68 MachineBasicBlock *EHPad = nullptr; 69 while (!WL.empty()) { 70 MachineBasicBlock *MBB = WL.pop_back_val(); 71 if (Visited.count(MBB)) 72 continue; 73 Visited.insert(MBB); 74 if (MBB->isEHPad()) { 75 if (EHPad && EHPad != MBB) 76 return nullptr; 77 EHPad = MBB; 78 continue; 79 } 80 if (MBB == &MF->front()) 81 return nullptr; 82 WL.append(MBB->pred_begin(), MBB->pred_end()); 83 } 84 return EHPad; 85} 86 87// Erase the specified BBs if the BB does not have any remaining predecessors, 88// and also all its dead children. 89template <typename Container> 90static void eraseDeadBBsAndChildren(const Container &MBBs) { 91 SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end()); 92 while (!WL.empty()) { 93 MachineBasicBlock *MBB = WL.pop_back_val(); 94 if (!MBB->pred_empty()) 95 continue; 96 SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(), 97 MBB->succ_end()); 98 WL.append(MBB->succ_begin(), MBB->succ_end()); 99 for (auto *Succ : Succs) 100 MBB->removeSuccessor(Succ); 101 MBB->eraseFromParent(); 102 } 103} 104 105bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { 106 LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n" 107 "********** Function: " 108 << MF.getName() << '\n'); 109 110 if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() != 111 ExceptionHandling::Wasm) 112 return false; 113 114 bool Changed = false; 115 Changed |= removeUnnecessaryUnreachables(MF); 116 Changed |= addRethrows(MF); 117 if (!MF.getFunction().hasPersonalityFn()) 118 return Changed; 119 Changed |= replaceFuncletReturns(MF); 120 Changed |= hoistCatches(MF); 121 Changed |= addCatchAlls(MF); 122 Changed |= ensureSingleBBTermPads(MF); 123 Changed |= mergeTerminatePads(MF); 124 Changed |= addCatchAllTerminatePads(MF); 125 return Changed; 126} 127 128bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( 129 MachineFunction &MF) { 130 bool Changed = false; 131 for (auto &MBB : MF) { 132 for (auto &MI : MBB) { 133 if (!WebAssembly::isThrow(MI)) 134 continue; 135 Changed = true; 136 137 // The instruction after the throw should be an unreachable or a branch to 138 // another BB that should eventually lead to an unreachable. Delete it 139 // because throw itself is a terminator, and also delete successors if 140 // any. 141 MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end()); 142 SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(), 143 MBB.succ_end()); 144 for (auto *Succ : Succs) 145 MBB.removeSuccessor(Succ); 146 eraseDeadBBsAndChildren(Succs); 147 } 148 } 149 150 return Changed; 151} 152 153bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { 154 bool Changed = false; 155 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 156 auto *EHInfo = MF.getWasmEHFuncInfo(); 157 158 for (auto &MBB : MF) { 159 auto Pos = MBB.getFirstTerminator(); 160 if (Pos == MBB.end()) 161 continue; 162 MachineInstr *TI = &*Pos; 163 164 switch (TI->getOpcode()) { 165 case WebAssembly::CATCHRET: { 166 // Replace a catchret with a branch 167 MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); 168 if (!MBB.isLayoutSuccessor(TBB)) 169 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) 170 .addMBB(TBB); 171 TI->eraseFromParent(); 172 Changed = true; 173 break; 174 } 175 case WebAssembly::CLEANUPRET: { 176 // Replace a cleanupret with a rethrow 177 if (EHInfo->hasThrowUnwindDest(&MBB)) 178 BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) 179 .addMBB(EHInfo->getThrowUnwindDest(&MBB)); 180 else 181 BuildMI(MBB, TI, TI->getDebugLoc(), 182 TII.get(WebAssembly::RETHROW_TO_CALLER)); 183 184 TI->eraseFromParent(); 185 Changed = true; 186 break; 187 } 188 } 189 } 190 return Changed; 191} 192 193// Hoist catch instructions to the beginning of their matching EH pad BBs in 194// case, 195// (1) catch instruction is not the first instruction in EH pad. 196// ehpad: 197// some_other_instruction 198// ... 199// %exn = catch 0 200// (2) catch instruction is in a non-EH pad BB. For example, 201// ehpad: 202// br bb0 203// bb0: 204// %exn = catch 0 205bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) { 206 bool Changed = false; 207 SmallVector<MachineInstr *, 16> Catches; 208 for (auto &MBB : MF) 209 for (auto &MI : MBB) 210 if (WebAssembly::isCatch(MI)) 211 Catches.push_back(&MI); 212 213 for (auto *Catch : Catches) { 214 MachineBasicBlock *EHPad = getMatchingEHPad(Catch); 215 assert(EHPad && "No matching EH pad for catch"); 216 if (EHPad->begin() == Catch) 217 continue; 218 Changed = true; 219 EHPad->insert(EHPad->begin(), Catch->removeFromParent()); 220 } 221 return Changed; 222} 223 224// Add catch_all to beginning of cleanup pads. 225bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) { 226 bool Changed = false; 227 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 228 229 for (auto &MBB : MF) { 230 if (!MBB.isEHPad()) 231 continue; 232 // This runs after hoistCatches(), so we assume that if there is a catch, 233 // that should be the first instruction in an EH pad. 234 if (!WebAssembly::isCatch(*MBB.begin())) { 235 Changed = true; 236 BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(), 237 TII.get(WebAssembly::CATCH_ALL)); 238 } 239 } 240 return Changed; 241} 242 243// Add a 'rethrow' instruction after __cxa_rethrow() call 244bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) { 245 bool Changed = false; 246 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 247 auto *EHInfo = MF.getWasmEHFuncInfo(); 248 249 for (auto &MBB : MF) 250 for (auto &MI : MBB) { 251 // Check if it is a call to __cxa_rethrow() 252 if (!MI.isCall()) 253 continue; 254 MachineOperand &CalleeOp = MI.getOperand(0); 255 if (!CalleeOp.isGlobal() || 256 CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn) 257 continue; 258 259 // Now we have __cxa_rethrow() call 260 Changed = true; 261 auto InsertPt = std::next(MachineBasicBlock::iterator(MI)); 262 while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs 263 ++InsertPt; 264 MachineInstr *Rethrow = nullptr; 265 if (EHInfo->hasThrowUnwindDest(&MBB)) 266 Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(), 267 TII.get(WebAssembly::RETHROW)) 268 .addMBB(EHInfo->getThrowUnwindDest(&MBB)); 269 else 270 Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(), 271 TII.get(WebAssembly::RETHROW_TO_CALLER)); 272 273 // Because __cxa_rethrow does not return, the instruction after the 274 // rethrow should be an unreachable or a branch to another BB that should 275 // eventually lead to an unreachable. Delete it because rethrow itself is 276 // a terminator, and also delete non-EH pad successors if any. 277 MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end()); 278 SmallVector<MachineBasicBlock *, 8> NonPadSuccessors; 279 for (auto *Succ : MBB.successors()) 280 if (!Succ->isEHPad()) 281 NonPadSuccessors.push_back(Succ); 282 for (auto *Succ : NonPadSuccessors) 283 MBB.removeSuccessor(Succ); 284 eraseDeadBBsAndChildren(NonPadSuccessors); 285 } 286 return Changed; 287} 288 289// Terminate pads are an single-BB EH pad in the form of 290// termpad: 291// %exn = catch 0 292// call @__clang_call_terminate(%exn) 293// unreachable 294// (There can be local.set and local.gets before the call if we didn't run 295// RegStackify) 296// But code transformations can change or add more control flow, so the call to 297// __clang_call_terminate() function may not be in the original EH pad anymore. 298// This ensures every terminate pad is a single BB in the form illustrated 299// above. 300bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) { 301 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 302 303 // Find calls to __clang_call_terminate() 304 SmallVector<MachineInstr *, 8> ClangCallTerminateCalls; 305 for (auto &MBB : MF) 306 for (auto &MI : MBB) 307 if (MI.isCall()) { 308 const MachineOperand &CalleeOp = MI.getOperand(0); 309 if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() == 310 WebAssembly::ClangCallTerminateFn) 311 ClangCallTerminateCalls.push_back(&MI); 312 } 313 314 bool Changed = false; 315 for (auto *Call : ClangCallTerminateCalls) { 316 MachineBasicBlock *EHPad = getMatchingEHPad(Call); 317 assert(EHPad && "No matching EH pad for catch"); 318 319 // If it is already the form we want, skip it 320 if (Call->getParent() == EHPad && 321 Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE) 322 continue; 323 324 // In case the __clang_call_terminate() call is not in its matching EH pad, 325 // move the call to the end of EH pad and add an unreachable instruction 326 // after that. Delete all successors and their children if any, because here 327 // the program terminates. 328 Changed = true; 329 MachineInstr *Catch = &*EHPad->begin(); 330 // This runs after hoistCatches(), so catch instruction should be at the top 331 assert(WebAssembly::isCatch(*Catch)); 332 // Takes the result register of the catch instruction as argument. There may 333 // have been some other local.set/local.gets in between, but at this point 334 // we don't care. 335 Call->getOperand(1).setReg(Catch->getOperand(0).getReg()); 336 auto InsertPos = std::next(MachineBasicBlock::iterator(Catch)); 337 EHPad->insert(InsertPos, Call->removeFromParent()); 338 BuildMI(*EHPad, InsertPos, Call->getDebugLoc(), 339 TII.get(WebAssembly::UNREACHABLE)); 340 EHPad->erase(InsertPos, EHPad->end()); 341 SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(), 342 EHPad->succ_end()); 343 for (auto *Succ : Succs) 344 EHPad->removeSuccessor(Succ); 345 eraseDeadBBsAndChildren(Succs); 346 } 347 return Changed; 348} 349 350// In case there are multiple terminate pads, merge them into one for code size. 351// This runs after ensureSingleBBTermPads() and assumes every terminate pad is a 352// single BB. 353// In principle this violates EH scope relationship because it can merge 354// multiple inner EH scopes, each of which is in different outer EH scope. But 355// getEHScopeMembership() function will not be called after this, so it is fine. 356bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) { 357 SmallVector<MachineBasicBlock *, 8> TermPads; 358 for (auto &MBB : MF) 359 if (WebAssembly::isCatchTerminatePad(MBB)) 360 TermPads.push_back(&MBB); 361 if (TermPads.empty()) 362 return false; 363 364 MachineBasicBlock *UniqueTermPad = TermPads.front(); 365 for (auto *TermPad : 366 llvm::make_range(std::next(TermPads.begin()), TermPads.end())) { 367 SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(), 368 TermPad->pred_end()); 369 for (auto *Pred : Preds) 370 Pred->replaceSuccessor(TermPad, UniqueTermPad); 371 TermPad->eraseFromParent(); 372 } 373 return true; 374} 375 376// Terminate pads are cleanup pads, so they should start with a 'catch_all' 377// instruction. But in the Itanium model, when we have a C++ exception object, 378// we pass them to __clang_call_terminate function, which calls __cxa_end_catch 379// with the passed exception pointer and then std::terminate. This is the reason 380// that terminate pads are generated with not a catch_all but a catch 381// instruction in clang and earlier llvm passes. Here we append a terminate pad 382// with a catch_all after each existing terminate pad so we can also catch 383// foreign exceptions. For every terminate pad: 384// %exn = catch 0 385// call @__clang_call_terminate(%exn) 386// unreachable 387// We append this BB right after that: 388// catch_all 389// call @std::terminate() 390// unreachable 391bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) { 392 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 393 SmallVector<MachineBasicBlock *, 8> TermPads; 394 for (auto &MBB : MF) 395 if (WebAssembly::isCatchTerminatePad(MBB)) 396 TermPads.push_back(&MBB); 397 if (TermPads.empty()) 398 return false; 399 400 Function *StdTerminateFn = 401 MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn); 402 assert(StdTerminateFn && "There is no std::terminate() function"); 403 for (auto *CatchTermPad : TermPads) { 404 DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin()); 405 auto *CatchAllTermPad = MF.CreateMachineBasicBlock(); 406 MF.insert(std::next(MachineFunction::iterator(CatchTermPad)), 407 CatchAllTermPad); 408 CatchAllTermPad->setIsEHPad(); 409 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL)); 410 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID)) 411 .addGlobalAddress(StdTerminateFn); 412 BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE)); 413 414 // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not 415 // a successor of an existing terminate pad. CatchAllTermPad should have all 416 // predecessors CatchTermPad has instead. This is a hack to force 417 // CatchAllTermPad be always sorted right after CatchTermPad; the correct 418 // predecessor-successor relationships will be restored in CFGStackify pass. 419 CatchTermPad->addSuccessor(CatchAllTermPad); 420 } 421 return true; 422} 423