WebAssemblyCFGStackify.cpp revision 294024
1//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a CFG stacking pass. 12/// 13/// This pass reorders the blocks in a function to put them into a reverse 14/// post-order [0], with special care to keep the order as similar as possible 15/// to the original order, and to keep loops contiguous even in the case of 16/// split backedges. 17/// 18/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since 19/// scope boundaries serve as the labels for WebAssembly's control transfers. 20/// 21/// This is sufficient to convert arbitrary CFGs into a form that works on 22/// WebAssembly, provided that all loops are single-entry. 23/// 24/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings 25/// 26//===----------------------------------------------------------------------===// 27 28#include "WebAssembly.h" 29#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 30#include "WebAssemblySubtarget.h" 31#include "llvm/ADT/SCCIterator.h" 32#include "llvm/ADT/SetVector.h" 33#include "llvm/CodeGen/MachineDominators.h" 34#include "llvm/CodeGen/MachineFunction.h" 35#include "llvm/CodeGen/MachineInstrBuilder.h" 36#include "llvm/CodeGen/MachineLoopInfo.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/Passes.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Support/raw_ostream.h" 41using namespace llvm; 42 43#define DEBUG_TYPE "wasm-cfg-stackify" 44 45namespace { 46class WebAssemblyCFGStackify final : public MachineFunctionPass { 47 const char *getPassName() const override { 48 return "WebAssembly CFG Stackify"; 49 } 50 51 void getAnalysisUsage(AnalysisUsage &AU) const override { 52 AU.setPreservesCFG(); 53 AU.addRequired<MachineDominatorTree>(); 54 AU.addPreserved<MachineDominatorTree>(); 55 AU.addRequired<MachineLoopInfo>(); 56 AU.addPreserved<MachineLoopInfo>(); 57 MachineFunctionPass::getAnalysisUsage(AU); 58 } 59 60 bool runOnMachineFunction(MachineFunction &MF) override; 61 62public: 63 static char ID; // Pass identification, replacement for typeid 64 WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} 65}; 66} // end anonymous namespace 67 68char WebAssemblyCFGStackify::ID = 0; 69FunctionPass *llvm::createWebAssemblyCFGStackify() { 70 return new WebAssemblyCFGStackify(); 71} 72 73static void EliminateMultipleEntryLoops(MachineFunction &MF, 74 const MachineLoopInfo &MLI) { 75 SmallPtrSet<MachineBasicBlock *, 8> InSet; 76 for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF); 77 I != E; ++I) { 78 const std::vector<MachineBasicBlock *> &CurrentSCC = *I; 79 80 // Skip trivial SCCs. 81 if (CurrentSCC.size() == 1) 82 continue; 83 84 InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); 85 MachineBasicBlock *Header = nullptr; 86 for (MachineBasicBlock *MBB : CurrentSCC) { 87 for (MachineBasicBlock *Pred : MBB->predecessors()) { 88 if (InSet.count(Pred)) 89 continue; 90 if (!Header) { 91 Header = MBB; 92 break; 93 } 94 // TODO: Implement multiple-entry loops. 95 report_fatal_error("multiple-entry loops are not supported yet"); 96 } 97 } 98 assert(MLI.isLoopHeader(Header)); 99 100 InSet.clear(); 101 } 102} 103 104namespace { 105/// Post-order traversal stack entry. 106struct POStackEntry { 107 MachineBasicBlock *MBB; 108 SmallVector<MachineBasicBlock *, 0> Succs; 109 110 POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, 111 const MachineLoopInfo &MLI); 112}; 113} // end anonymous namespace 114 115static bool LoopContains(const MachineLoop *Loop, 116 const MachineBasicBlock *MBB) { 117 return Loop ? Loop->contains(MBB) : true; 118} 119 120POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, 121 const MachineLoopInfo &MLI) 122 : MBB(MBB), Succs(MBB->successors()) { 123 // RPO is not a unique form, since at every basic block with multiple 124 // successors, the DFS has to pick which order to visit the successors in. 125 // Sort them strategically (see below). 126 MachineLoop *Loop = MLI.getLoopFor(MBB); 127 MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); 128 MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; 129 std::stable_sort( 130 Succs.begin(), Succs.end(), 131 [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { 132 if (A == B) 133 return false; 134 135 // Keep loops contiguous by preferring the block that's in the same 136 // loop. 137 bool LoopContainsA = LoopContains(Loop, A); 138 bool LoopContainsB = LoopContains(Loop, B); 139 if (LoopContainsA && !LoopContainsB) 140 return true; 141 if (!LoopContainsA && LoopContainsB) 142 return false; 143 144 // Minimize perturbation by preferring the block which is the immediate 145 // layout successor. 146 if (A == LayoutSucc) 147 return true; 148 if (B == LayoutSucc) 149 return false; 150 151 // TODO: More sophisticated orderings may be profitable here. 152 153 return false; 154 }); 155} 156 157/// Return the "bottom" block of a loop. This differs from 158/// MachineLoop::getBottomBlock in that it works even if the loop is 159/// discontiguous. 160static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { 161 MachineBasicBlock *Bottom = Loop->getHeader(); 162 for (MachineBasicBlock *MBB : Loop->blocks()) 163 if (MBB->getNumber() > Bottom->getNumber()) 164 Bottom = MBB; 165 return Bottom; 166} 167 168/// Sort the blocks in RPO, taking special care to make sure that loops are 169/// contiguous even in the case of split backedges. 170/// 171/// TODO: Determine whether RPO is actually worthwhile, or whether we should 172/// move to just a stable-topological-sort-based approach that would preserve 173/// more of the original order. 174static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { 175 // Note that we do our own RPO rather than using 176 // "llvm/ADT/PostOrderIterator.h" because we want control over the order that 177 // successors are visited in (see above). Also, we can sort the blocks in the 178 // MachineFunction as we go. 179 SmallPtrSet<MachineBasicBlock *, 16> Visited; 180 SmallVector<POStackEntry, 16> Stack; 181 182 MachineBasicBlock *EntryBlock = &*MF.begin(); 183 Visited.insert(EntryBlock); 184 Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); 185 186 for (;;) { 187 POStackEntry &Entry = Stack.back(); 188 SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs; 189 if (!Succs.empty()) { 190 MachineBasicBlock *Succ = Succs.pop_back_val(); 191 if (Visited.insert(Succ).second) 192 Stack.push_back(POStackEntry(Succ, MF, MLI)); 193 continue; 194 } 195 196 // Put the block in its position in the MachineFunction. 197 MachineBasicBlock &MBB = *Entry.MBB; 198 MBB.moveBefore(&*MF.begin()); 199 200 // Branch instructions may utilize a fallthrough, so update them if a 201 // fallthrough has been added or removed. 202 if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && 203 !MBB.back().isBarrier()) 204 report_fatal_error( 205 "Non-branch terminator with fallthrough cannot yet be rewritten"); 206 if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) 207 MBB.updateTerminator(); 208 209 Stack.pop_back(); 210 if (Stack.empty()) 211 break; 212 } 213 214 // Now that we've sorted the blocks in RPO, renumber them. 215 MF.RenumberBlocks(); 216 217#ifndef NDEBUG 218 SmallSetVector<MachineLoop *, 8> OnStack; 219 220 // Insert a sentinel representing the degenerate loop that starts at the 221 // function entry block and includes the entire function as a "loop" that 222 // executes once. 223 OnStack.insert(nullptr); 224 225 for (auto &MBB : MF) { 226 assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); 227 228 MachineLoop *Loop = MLI.getLoopFor(&MBB); 229 if (Loop && &MBB == Loop->getHeader()) { 230 // Loop header. The loop predecessor should be sorted above, and the other 231 // predecessors should be backedges below. 232 for (auto Pred : MBB.predecessors()) 233 assert( 234 (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && 235 "Loop header predecessors must be loop predecessors or backedges"); 236 assert(OnStack.insert(Loop) && "Loops should be declared at most once."); 237 } else { 238 // Not a loop header. All predecessors should be sorted above. 239 for (auto Pred : MBB.predecessors()) 240 assert(Pred->getNumber() < MBB.getNumber() && 241 "Non-loop-header predecessors should be topologically sorted"); 242 assert(OnStack.count(MLI.getLoopFor(&MBB)) && 243 "Blocks must be nested in their loops"); 244 } 245 while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) 246 OnStack.pop_back(); 247 } 248 assert(OnStack.pop_back_val() == nullptr && 249 "The function entry block shouldn't actually be a loop header"); 250 assert(OnStack.empty() && 251 "Control flow stack pushes and pops should be balanced."); 252#endif 253} 254 255/// Test whether Pred has any terminators explicitly branching to MBB, as 256/// opposed to falling through. Note that it's possible (eg. in unoptimized 257/// code) for a branch instruction to both branch to a block and fallthrough 258/// to it, so we check the actual branch operands to see if there are any 259/// explicit mentions. 260static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, 261 MachineBasicBlock *MBB) { 262 for (MachineInstr &MI : Pred->terminators()) 263 for (MachineOperand &MO : MI.explicit_operands()) 264 if (MO.isMBB() && MO.getMBB() == MBB) 265 return true; 266 return false; 267} 268 269/// Insert a BLOCK marker for branches to MBB (if needed). 270static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, 271 SmallVectorImpl<MachineBasicBlock *> &ScopeTops, 272 const WebAssemblyInstrInfo &TII, 273 const MachineLoopInfo &MLI, 274 MachineDominatorTree &MDT) { 275 // First compute the nearest common dominator of all forward non-fallthrough 276 // predecessors so that we minimize the time that the BLOCK is on the stack, 277 // which reduces overall stack height. 278 MachineBasicBlock *Header = nullptr; 279 bool IsBranchedTo = false; 280 int MBBNumber = MBB.getNumber(); 281 for (MachineBasicBlock *Pred : MBB.predecessors()) 282 if (Pred->getNumber() < MBBNumber) { 283 Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; 284 if (ExplicitlyBranchesTo(Pred, &MBB)) 285 IsBranchedTo = true; 286 } 287 if (!Header) 288 return; 289 if (!IsBranchedTo) 290 return; 291 292 assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); 293 MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB)); 294 295 // If the nearest common dominator is inside a more deeply nested context, 296 // walk out to the nearest scope which isn't more deeply nested. 297 for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { 298 if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { 299 if (ScopeTop->getNumber() > Header->getNumber()) { 300 // Skip over an intervening scope. 301 I = next(MachineFunction::iterator(ScopeTop)); 302 } else { 303 // We found a scope level at an appropriate depth. 304 Header = ScopeTop; 305 break; 306 } 307 } 308 } 309 310 // If there's a loop which ends just before MBB which contains Header, we can 311 // reuse its label instead of inserting a new BLOCK. 312 for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred); 313 Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop()) 314 if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header)) 315 return; 316 317 // Decide where in Header to put the BLOCK. 318 MachineBasicBlock::iterator InsertPos; 319 MachineLoop *HeaderLoop = MLI.getLoopFor(Header); 320 if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { 321 // Header is the header of a loop that does not lexically contain MBB, so 322 // the BLOCK needs to be above the LOOP. 323 InsertPos = Header->begin(); 324 } else { 325 // Otherwise, insert the BLOCK as late in Header as we can, but before the 326 // beginning of the local expression tree and any nested BLOCKs. 327 InsertPos = Header->getFirstTerminator(); 328 while (InsertPos != Header->begin() && 329 prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && 330 prev(InsertPos)->getOpcode() != WebAssembly::LOOP && 331 prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK && 332 prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP) 333 --InsertPos; 334 } 335 336 // Add the BLOCK. 337 BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)); 338 339 // Mark the end of the block. 340 InsertPos = MBB.begin(); 341 while (InsertPos != MBB.end() && 342 InsertPos->getOpcode() == WebAssembly::END_LOOP) 343 ++InsertPos; 344 BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK)); 345 346 // Track the farthest-spanning scope that ends at this point. 347 int Number = MBB.getNumber(); 348 if (!ScopeTops[Number] || 349 ScopeTops[Number]->getNumber() > Header->getNumber()) 350 ScopeTops[Number] = Header; 351} 352 353/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). 354static void PlaceLoopMarker( 355 MachineBasicBlock &MBB, MachineFunction &MF, 356 SmallVectorImpl<MachineBasicBlock *> &ScopeTops, 357 DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops, 358 const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) { 359 MachineLoop *Loop = MLI.getLoopFor(&MBB); 360 if (!Loop || Loop->getHeader() != &MBB) 361 return; 362 363 // The operand of a LOOP is the first block after the loop. If the loop is the 364 // bottom of the function, insert a dummy block at the end. 365 MachineBasicBlock *Bottom = LoopBottom(Loop); 366 auto Iter = next(MachineFunction::iterator(Bottom)); 367 if (Iter == MF.end()) { 368 MachineBasicBlock *Label = MF.CreateMachineBasicBlock(); 369 // Give it a fake predecessor so that AsmPrinter prints its label. 370 Label->addSuccessor(Label); 371 MF.push_back(Label); 372 Iter = next(MachineFunction::iterator(Bottom)); 373 } 374 MachineBasicBlock *AfterLoop = &*Iter; 375 376 // Mark the beginning of the loop (after the end of any existing loop that 377 // ends here). 378 auto InsertPos = MBB.begin(); 379 while (InsertPos != MBB.end() && 380 InsertPos->getOpcode() == WebAssembly::END_LOOP) 381 ++InsertPos; 382 BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP)); 383 384 // Mark the end of the loop. 385 MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(), 386 TII.get(WebAssembly::END_LOOP)); 387 LoopTops[End] = &MBB; 388 389 assert((!ScopeTops[AfterLoop->getNumber()] || 390 ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && 391 "With RPO we should visit the outer-most loop for a block first."); 392 if (!ScopeTops[AfterLoop->getNumber()]) 393 ScopeTops[AfterLoop->getNumber()] = &MBB; 394} 395 396static unsigned 397GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack, 398 const MachineBasicBlock *MBB) { 399 unsigned Depth = 0; 400 for (auto X : reverse(Stack)) { 401 if (X == MBB) 402 break; 403 ++Depth; 404 } 405 assert(Depth < Stack.size() && "Branch destination should be in scope"); 406 return Depth; 407} 408 409/// Insert LOOP and BLOCK markers at appropriate places. 410static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, 411 const WebAssemblyInstrInfo &TII, 412 MachineDominatorTree &MDT) { 413 // For each block whose label represents the end of a scope, record the block 414 // which holds the beginning of the scope. This will allow us to quickly skip 415 // over scoped regions when walking blocks. We allocate one more than the 416 // number of blocks in the function to accommodate for the possible fake block 417 // we may insert at the end. 418 SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); 419 420 // For eacn LOOP_END, the corresponding LOOP. 421 DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops; 422 423 for (auto &MBB : MF) { 424 // Place the LOOP for MBB if MBB is the header of a loop. 425 PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI); 426 427 // Place the BLOCK for MBB if MBB is branched to from above. 428 PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); 429 } 430 431 // Now rewrite references to basic blocks to be depth immediates. 432 SmallVector<const MachineBasicBlock *, 8> Stack; 433 for (auto &MBB : reverse(MF)) { 434 for (auto &MI : reverse(MBB)) { 435 switch (MI.getOpcode()) { 436 case WebAssembly::BLOCK: 437 assert(ScopeTops[Stack.back()->getNumber()] == &MBB && 438 "Block should be balanced"); 439 Stack.pop_back(); 440 break; 441 case WebAssembly::LOOP: 442 assert(Stack.back() == &MBB && "Loop top should be balanced"); 443 Stack.pop_back(); 444 Stack.pop_back(); 445 break; 446 case WebAssembly::END_BLOCK: 447 Stack.push_back(&MBB); 448 break; 449 case WebAssembly::END_LOOP: 450 Stack.push_back(&MBB); 451 Stack.push_back(LoopTops[&MI]); 452 break; 453 default: 454 if (MI.isTerminator()) { 455 // Rewrite MBB operands to be depth immediates. 456 SmallVector<MachineOperand, 4> Ops(MI.operands()); 457 while (MI.getNumOperands() > 0) 458 MI.RemoveOperand(MI.getNumOperands() - 1); 459 for (auto MO : Ops) { 460 if (MO.isMBB()) 461 MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB())); 462 MI.addOperand(MF, MO); 463 } 464 } 465 break; 466 } 467 } 468 } 469 assert(Stack.empty() && "Control flow should be balanced"); 470} 471 472bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { 473 DEBUG(dbgs() << "********** CFG Stackifying **********\n" 474 "********** Function: " 475 << MF.getName() << '\n'); 476 477 const auto &MLI = getAnalysis<MachineLoopInfo>(); 478 auto &MDT = getAnalysis<MachineDominatorTree>(); 479 // Liveness is not tracked for EXPR_STACK physreg. 480 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 481 MF.getRegInfo().invalidateLiveness(); 482 483 // RPO sorting needs all loops to be single-entry. 484 EliminateMultipleEntryLoops(MF, MLI); 485 486 // Sort the blocks in RPO, with contiguous loops. 487 SortBlocks(MF, MLI); 488 489 // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. 490 PlaceMarkers(MF, MLI, TII, MDT); 491 492 return true; 493} 494