1//===- SpeculateAroundPHIs.cpp --------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" 10#include "llvm/ADT/PostOrderIterator.h" 11#include "llvm/ADT/Sequence.h" 12#include "llvm/ADT/SetVector.h" 13#include "llvm/ADT/Statistic.h" 14#include "llvm/Analysis/TargetTransformInfo.h" 15#include "llvm/Analysis/ValueTracking.h" 16#include "llvm/IR/BasicBlock.h" 17#include "llvm/IR/IRBuilder.h" 18#include "llvm/IR/Instructions.h" 19#include "llvm/IR/IntrinsicInst.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Transforms/Utils/BasicBlockUtils.h" 22 23using namespace llvm; 24 25#define DEBUG_TYPE "spec-phis" 26 27STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around"); 28STATISTIC(NumEdgesSplit, 29 "Number of critical edges which were split for speculation"); 30STATISTIC(NumSpeculatedInstructions, 31 "Number of instructions we speculated around the PHI nodes"); 32STATISTIC(NumNewRedundantInstructions, 33 "Number of new, redundant instructions inserted"); 34 35/// Check whether speculating the users of a PHI node around the PHI 36/// will be safe. 37/// 38/// This checks both that all of the users are safe and also that all of their 39/// operands are either recursively safe or already available along an incoming 40/// edge to the PHI. 41/// 42/// This routine caches both all the safe nodes explored in `PotentialSpecSet` 43/// and the chain of nodes that definitively reach any unsafe node in 44/// `UnsafeSet`. By preserving these between repeated calls to this routine for 45/// PHIs in the same basic block, the exploration here can be reused. However, 46/// these caches must no be reused for PHIs in a different basic block as they 47/// reflect what is available along incoming edges. 48static bool 49isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, 50 SmallPtrSetImpl<Instruction *> &PotentialSpecSet, 51 SmallPtrSetImpl<Instruction *> &UnsafeSet) { 52 auto *PhiBB = PN.getParent(); 53 SmallPtrSet<Instruction *, 4> Visited; 54 SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; 55 56 // Walk each user of the PHI node. 57 for (Use &U : PN.uses()) { 58 auto *UI = cast<Instruction>(U.getUser()); 59 60 // Ensure the use post-dominates the PHI node. This ensures that, in the 61 // absence of unwinding, the use will actually be reached. 62 // FIXME: We use a blunt hammer of requiring them to be in the same basic 63 // block. We should consider using actual post-dominance here in the 64 // future. 65 if (UI->getParent() != PhiBB) { 66 LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n"); 67 return false; 68 } 69 70 if (const auto *CS = dyn_cast<CallBase>(UI)) { 71 if (CS->isConvergent() || CS->cannotDuplicate()) { 72 LLVM_DEBUG(dbgs() << " Unsafe: convergent " 73 "callsite cannot de duplicated: " << *UI << '\n'); 74 return false; 75 } 76 } 77 78 // FIXME: This check is much too conservative. We're not going to move these 79 // instructions onto new dynamic paths through the program unless there is 80 // a call instruction between the use and the PHI node. And memory isn't 81 // changing unless there is a store in that same sequence. We should 82 // probably change this to do at least a limited scan of the intervening 83 // instructions and allow handling stores in easily proven safe cases. 84 if (mayBeMemoryDependent(*UI)) { 85 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n"); 86 return false; 87 } 88 89 // Now do a depth-first search of everything these users depend on to make 90 // sure they are transitively safe. This is a depth-first search, but we 91 // check nodes in preorder to minimize the amount of checking. 92 Visited.insert(UI); 93 DFSStack.push_back({UI, UI->value_op_begin()}); 94 do { 95 User::value_op_iterator OpIt; 96 std::tie(UI, OpIt) = DFSStack.pop_back_val(); 97 98 while (OpIt != UI->value_op_end()) { 99 auto *OpI = dyn_cast<Instruction>(*OpIt); 100 // Increment to the next operand for whenever we continue. 101 ++OpIt; 102 // No need to visit non-instructions, which can't form dependencies. 103 if (!OpI) 104 continue; 105 106 // Now do the main pre-order checks that this operand is a viable 107 // dependency of something we want to speculate. 108 109 // First do a few checks for instructions that won't require 110 // speculation at all because they are trivially available on the 111 // incoming edge (either through dominance or through an incoming value 112 // to a PHI). 113 // 114 // The cases in the current block will be trivially dominated by the 115 // edge. 116 auto *ParentBB = OpI->getParent(); 117 if (ParentBB == PhiBB) { 118 if (isa<PHINode>(OpI)) { 119 // We can trivially map through phi nodes in the same block. 120 continue; 121 } 122 } else if (DT.dominates(ParentBB, PhiBB)) { 123 // Instructions from dominating blocks are already available. 124 continue; 125 } 126 127 // Once we know that we're considering speculating the operand, check 128 // if we've already explored this subgraph and found it to be safe. 129 if (PotentialSpecSet.count(OpI)) 130 continue; 131 132 // If we've already explored this subgraph and found it unsafe, bail. 133 // If when we directly test whether this is safe it fails, bail. 134 if (UnsafeSet.count(OpI) || ParentBB != PhiBB || 135 mayBeMemoryDependent(*OpI)) { 136 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: " 137 << *OpI << "\n"); 138 // Record the stack of instructions which reach this node as unsafe 139 // so we prune subsequent searches. 140 UnsafeSet.insert(OpI); 141 for (auto &StackPair : DFSStack) { 142 Instruction *I = StackPair.first; 143 UnsafeSet.insert(I); 144 } 145 return false; 146 } 147 148 // Skip any operands we're already recursively checking. 149 if (!Visited.insert(OpI).second) 150 continue; 151 152 // Push onto the stack and descend. We can directly continue this 153 // loop when ascending. 154 DFSStack.push_back({UI, OpIt}); 155 UI = OpI; 156 OpIt = OpI->value_op_begin(); 157 } 158 159 // This node and all its operands are safe. Go ahead and cache that for 160 // reuse later. 161 PotentialSpecSet.insert(UI); 162 163 // Continue with the next node on the stack. 164 } while (!DFSStack.empty()); 165 } 166 167#ifndef NDEBUG 168 // Every visited operand should have been marked as safe for speculation at 169 // this point. Verify this and return success. 170 for (auto *I : Visited) 171 assert(PotentialSpecSet.count(I) && 172 "Failed to mark a visited instruction as safe!"); 173#endif 174 return true; 175} 176 177/// Check whether, in isolation, a given PHI node is both safe and profitable 178/// to speculate users around. 179/// 180/// This handles checking whether there are any constant operands to a PHI 181/// which could represent a useful speculation candidate, whether the users of 182/// the PHI are safe to speculate including all their transitive dependencies, 183/// and whether after speculation there will be some cost savings (profit) to 184/// folding the operands into the users of the PHI node. Returns true if both 185/// safe and profitable with relevant cost savings updated in the map and with 186/// an update to the `PotentialSpecSet`. Returns false if either safety or 187/// profitability are absent. Some new entries may be made to the 188/// `PotentialSpecSet` even when this routine returns false, but they remain 189/// conservatively correct. 190/// 191/// The profitability check here is a local one, but it checks this in an 192/// interesting way. Beyond checking that the total cost of materializing the 193/// constants will be less than the cost of folding them into their users, it 194/// also checks that no one incoming constant will have a higher cost when 195/// folded into its users rather than materialized. This higher cost could 196/// result in a dynamic *path* that is more expensive even when the total cost 197/// is lower. Currently, all of the interesting cases where this optimization 198/// should fire are ones where it is a no-loss operation in this sense. If we 199/// ever want to be more aggressive here, we would need to balance the 200/// different incoming edges' cost by looking at their respective 201/// probabilities. 202static bool isSafeAndProfitableToSpeculateAroundPHI( 203 PHINode &PN, SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap, 204 SmallPtrSetImpl<Instruction *> &PotentialSpecSet, 205 SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT, 206 TargetTransformInfo &TTI) { 207 // First see whether there is any cost savings to speculating around this 208 // PHI, and build up a map of the constant inputs to how many times they 209 // occur. 210 bool NonFreeMat = false; 211 struct CostsAndCount { 212 InstructionCost MatCost = TargetTransformInfo::TCC_Free; 213 InstructionCost FoldedCost = TargetTransformInfo::TCC_Free; 214 int Count = 0; 215 }; 216 SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts; 217 SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks; 218 for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) { 219 auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i)); 220 if (!IncomingC) 221 continue; 222 223 // Only visit each incoming edge with a constant input once. 224 if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second) 225 continue; 226 227 auto InsertResult = CostsAndCounts.insert({IncomingC, {}}); 228 // Count how many edges share a given incoming costant. 229 ++InsertResult.first->second.Count; 230 // Only compute the cost the first time we see a particular constant. 231 if (!InsertResult.second) 232 continue; 233 234 InstructionCost &MatCost = InsertResult.first->second.MatCost; 235 MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(), 236 TargetTransformInfo::TCK_SizeAndLatency); 237 NonFreeMat |= MatCost != TTI.TCC_Free; 238 } 239 if (!NonFreeMat) { 240 LLVM_DEBUG(dbgs() << " Free: " << PN << "\n"); 241 // No profit in free materialization. 242 return false; 243 } 244 245 // Now check that the uses of this PHI can actually be speculated, 246 // otherwise we'll still have to materialize the PHI value. 247 if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) { 248 LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n"); 249 return false; 250 } 251 252 // Compute how much (if any) savings are available by speculating around this 253 // PHI. 254 for (Use &U : PN.uses()) { 255 auto *UserI = cast<Instruction>(U.getUser()); 256 // Now check whether there is any savings to folding the incoming constants 257 // into this use. 258 unsigned Idx = U.getOperandNo(); 259 260 // If we have a binary operator that is commutative, an actual constant 261 // operand would end up on the RHS, so pretend the use of the PHI is on the 262 // RHS. 263 // 264 // Technically, this is a bit weird if *both* operands are PHIs we're 265 // speculating. But if that is the case, giving an "optimistic" cost isn't 266 // a bad thing because after speculation it will constant fold. And 267 // moreover, such cases should likely have been constant folded already by 268 // some other pass, so we shouldn't worry about "modeling" them terribly 269 // accurately here. Similarly, if the other operand is a constant, it still 270 // seems fine to be "optimistic" in our cost modeling, because when the 271 // incoming operand from the PHI node is also a constant, we will end up 272 // constant folding. 273 if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1) 274 // Assume we will commute the constant to the RHS to be canonical. 275 Idx = 1; 276 277 // Get the intrinsic ID if this user is an intrinsic. 278 Intrinsic::ID IID = Intrinsic::not_intrinsic; 279 if (auto *UserII = dyn_cast<IntrinsicInst>(UserI)) 280 IID = UserII->getIntrinsicID(); 281 282 for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) { 283 ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first; 284 InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost; 285 InstructionCost &FoldedCost = 286 IncomingConstantAndCostsAndCount.second.FoldedCost; 287 if (IID) 288 FoldedCost += 289 TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), 290 IncomingC->getType(), 291 TargetTransformInfo::TCK_SizeAndLatency); 292 else 293 FoldedCost += 294 TTI.getIntImmCostInst(UserI->getOpcode(), Idx, 295 IncomingC->getValue(), IncomingC->getType(), 296 TargetTransformInfo::TCK_SizeAndLatency); 297 298 // If we accumulate more folded cost for this incoming constant than 299 // materialized cost, then we'll regress any edge with this constant so 300 // just bail. We're only interested in cases where folding the incoming 301 // constants is at least break-even on all paths. 302 if (FoldedCost > MatCost) { 303 LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC 304 << "\n" 305 " Materializing cost: " 306 << MatCost 307 << "\n" 308 " Accumulated folded cost: " 309 << FoldedCost << "\n"); 310 return false; 311 } 312 } 313 } 314 315 // Compute the total cost savings afforded by this PHI node. 316 InstructionCost TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free; 317 for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) { 318 InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost; 319 InstructionCost FoldedCost = 320 IncomingConstantAndCostsAndCount.second.FoldedCost; 321 int Count = IncomingConstantAndCostsAndCount.second.Count; 322 323 TotalMatCost += MatCost * Count; 324 TotalFoldedCost += FoldedCost * Count; 325 } 326 assert(TotalMatCost.isValid() && "Constants must be materializable"); 327 assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is " 328 "less that its materialized cost, " 329 "the sum must be as well."); 330 LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost) 331 << ": " << PN << "\n"); 332 CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost; 333 return true; 334} 335 336/// Simple helper to walk all the users of a list of phis depth first, and call 337/// a visit function on each one in post-order. 338/// 339/// All of the PHIs should be in the same basic block, and this is primarily 340/// used to make a single depth-first walk across their collective users 341/// without revisiting any subgraphs. Callers should provide a fast, idempotent 342/// callable to test whether a node has been visited and the more important 343/// callable to actually visit a particular node. 344/// 345/// Depth-first and postorder here refer to the *operand* graph -- we start 346/// from a collection of users of PHI nodes and walk "up" the operands 347/// depth-first. 348template <typename IsVisitedT, typename VisitT> 349static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs, 350 IsVisitedT IsVisited, 351 VisitT Visit) { 352 SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; 353 for (auto *PN : PNs) 354 for (Use &U : PN->uses()) { 355 auto *UI = cast<Instruction>(U.getUser()); 356 if (IsVisited(UI)) 357 // Already visited this user, continue across the roots. 358 continue; 359 360 // Otherwise, walk the operand graph depth-first and visit each 361 // dependency in postorder. 362 DFSStack.push_back({UI, UI->value_op_begin()}); 363 do { 364 User::value_op_iterator OpIt; 365 std::tie(UI, OpIt) = DFSStack.pop_back_val(); 366 while (OpIt != UI->value_op_end()) { 367 auto *OpI = dyn_cast<Instruction>(*OpIt); 368 // Increment to the next operand for whenever we continue. 369 ++OpIt; 370 // No need to visit non-instructions, which can't form dependencies, 371 // or instructions outside of our potential dependency set that we 372 // were given. Finally, if we've already visited the node, continue 373 // to the next. 374 if (!OpI || IsVisited(OpI)) 375 continue; 376 377 // Push onto the stack and descend. We can directly continue this 378 // loop when ascending. 379 DFSStack.push_back({UI, OpIt}); 380 UI = OpI; 381 OpIt = OpI->value_op_begin(); 382 } 383 384 // Finished visiting children, visit this node. 385 assert(!IsVisited(UI) && "Should not have already visited a node!"); 386 Visit(UI); 387 } while (!DFSStack.empty()); 388 } 389} 390 391/// Find profitable PHIs to speculate. 392/// 393/// For a PHI node to be profitable, we need the cost of speculating its users 394/// (and their dependencies) to not exceed the savings of folding the PHI's 395/// constant operands into the speculated users. 396/// 397/// Computing this is surprisingly challenging. Because users of two different 398/// PHI nodes can depend on each other or on common other instructions, it may 399/// be profitable to speculate two PHI nodes together even though neither one 400/// in isolation is profitable. The straightforward way to find all the 401/// profitable PHIs would be to check each combination of PHIs' cost, but this 402/// is exponential in complexity. 403/// 404/// Even if we assume that we only care about cases where we can consider each 405/// PHI node in isolation (rather than considering cases where none are 406/// profitable in isolation but some subset are profitable as a set), we still 407/// have a challenge. The obvious way to find all individually profitable PHIs 408/// is to iterate until reaching a fixed point, but this will be quadratic in 409/// complexity. =/ 410/// 411/// This code currently uses a linear-to-compute order for a greedy approach. 412/// It won't find cases where a set of PHIs must be considered together, but it 413/// handles most cases of order dependence without quadratic iteration. The 414/// specific order used is the post-order across the operand DAG. When the last 415/// user of a PHI is visited in this postorder walk, we check it for 416/// profitability. 417/// 418/// There is an orthogonal extra complexity to all of this: computing the cost 419/// itself can easily become a linear computation making everything again (at 420/// best) quadratic. Using a postorder over the operand graph makes it 421/// particularly easy to avoid this through dynamic programming. As we do the 422/// postorder walk, we build the transitive cost of that subgraph. It is also 423/// straightforward to then update these costs when we mark a PHI for 424/// speculation so that subsequent PHIs don't re-pay the cost of already 425/// speculated instructions. 426static SmallVector<PHINode *, 16> findProfitablePHIs( 427 ArrayRef<PHINode *> PNs, 428 const SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap, 429 const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, int NumPreds, 430 DominatorTree &DT, TargetTransformInfo &TTI) { 431 SmallVector<PHINode *, 16> SpecPNs; 432 433 // First, establish a reverse mapping from immediate users of the PHI nodes 434 // to the nodes themselves, and count how many users each PHI node has in 435 // a way we can update while processing them. 436 SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap; 437 SmallDenseMap<PHINode *, int, 16> PNUserCountMap; 438 SmallPtrSet<Instruction *, 16> UserSet; 439 for (auto *PN : PNs) { 440 assert(UserSet.empty() && "Must start with an empty user set!"); 441 for (Use &U : PN->uses()) 442 UserSet.insert(cast<Instruction>(U.getUser())); 443 PNUserCountMap[PN] = UserSet.size(); 444 for (auto *UI : UserSet) 445 UserToPNMap.insert({UI, {}}).first->second.push_back(PN); 446 UserSet.clear(); 447 } 448 449 // Now do a DFS across the operand graph of the users, computing cost as we 450 // go and when all costs for a given PHI are known, checking that PHI for 451 // profitability. 452 SmallDenseMap<Instruction *, InstructionCost, 16> SpecCostMap; 453 visitPHIUsersAndDepsInPostOrder( 454 PNs, 455 /*IsVisited*/ 456 [&](Instruction *I) { 457 // We consider anything that isn't potentially speculated to be 458 // "visited" as it is already handled. Similarly, anything that *is* 459 // potentially speculated but for which we have an entry in our cost 460 // map, we're done. 461 return !PotentialSpecSet.count(I) || SpecCostMap.count(I); 462 }, 463 /*Visit*/ 464 [&](Instruction *I) { 465 // We've fully visited the operands, so sum their cost with this node 466 // and update the cost map. 467 InstructionCost Cost = TTI.TCC_Free; 468 for (Value *OpV : I->operand_values()) 469 if (auto *OpI = dyn_cast<Instruction>(OpV)) { 470 auto CostMapIt = SpecCostMap.find(OpI); 471 if (CostMapIt != SpecCostMap.end()) 472 Cost += CostMapIt->second; 473 } 474 Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); 475 bool Inserted = SpecCostMap.insert({I, Cost}).second; 476 (void)Inserted; 477 assert(Inserted && "Must not re-insert a cost during the DFS!"); 478 479 // Now check if this node had a corresponding PHI node using it. If so, 480 // we need to decrement the outstanding user count for it. 481 auto UserPNsIt = UserToPNMap.find(I); 482 if (UserPNsIt == UserToPNMap.end()) 483 return; 484 auto &UserPNs = UserPNsIt->second; 485 auto UserPNsSplitIt = std::stable_partition( 486 UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) { 487 int &PNUserCount = PNUserCountMap.find(UserPN)->second; 488 assert( 489 PNUserCount > 0 && 490 "Should never re-visit a PN after its user count hits zero!"); 491 --PNUserCount; 492 return PNUserCount != 0; 493 }); 494 495 // FIXME: Rather than one at a time, we should sum the savings as the 496 // cost will be completely shared. 497 SmallVector<Instruction *, 16> SpecWorklist; 498 for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) { 499 InstructionCost SpecCost = TTI.TCC_Free; 500 for (Use &U : PN->uses()) 501 SpecCost += 502 SpecCostMap.find(cast<Instruction>(U.getUser()))->second; 503 SpecCost *= (NumPreds - 1); 504 // When the user count of a PHI node hits zero, we should check its 505 // profitability. If profitable, we should mark it for speculation 506 // and zero out the cost of everything it depends on. 507 InstructionCost CostSavings = CostSavingsMap.find(PN)->second; 508 if (SpecCost > CostSavings) { 509 LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN 510 << "\n" 511 " Cost savings: " 512 << CostSavings 513 << "\n" 514 " Speculation cost: " 515 << SpecCost << "\n"); 516 continue; 517 } 518 519 // We're going to speculate this user-associated PHI. Copy it out and 520 // add its users to the worklist to update their cost. 521 SpecPNs.push_back(PN); 522 for (Use &U : PN->uses()) { 523 auto *UI = cast<Instruction>(U.getUser()); 524 auto CostMapIt = SpecCostMap.find(UI); 525 if (CostMapIt->second == 0) 526 continue; 527 // Zero out this cost entry to avoid duplicates. 528 CostMapIt->second = 0; 529 SpecWorklist.push_back(UI); 530 } 531 } 532 533 // Now walk all the operands of the users in the worklist transitively 534 // to zero out all the memoized costs. 535 while (!SpecWorklist.empty()) { 536 Instruction *SpecI = SpecWorklist.pop_back_val(); 537 assert(SpecCostMap.find(SpecI)->second == 0 && 538 "Didn't zero out a cost!"); 539 540 // Walk the operands recursively to zero out their cost as well. 541 for (auto *OpV : SpecI->operand_values()) { 542 auto *OpI = dyn_cast<Instruction>(OpV); 543 if (!OpI) 544 continue; 545 auto CostMapIt = SpecCostMap.find(OpI); 546 if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0) 547 continue; 548 CostMapIt->second = 0; 549 SpecWorklist.push_back(OpI); 550 } 551 } 552 }); 553 554 return SpecPNs; 555} 556 557/// Speculate users around a set of PHI nodes. 558/// 559/// This routine does the actual speculation around a set of PHI nodes where we 560/// have determined this to be both safe and profitable. 561/// 562/// This routine handles any spliting of critical edges necessary to create 563/// a safe block to speculate into as well as cloning the instructions and 564/// rewriting all uses. 565static void speculatePHIs(ArrayRef<PHINode *> SpecPNs, 566 SmallPtrSetImpl<Instruction *> &PotentialSpecSet, 567 SmallSetVector<BasicBlock *, 16> &PredSet, 568 DominatorTree &DT) { 569 LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n"); 570 NumPHIsSpeculated += SpecPNs.size(); 571 572 // Split any critical edges so that we have a block to hoist into. 573 auto *ParentBB = SpecPNs[0]->getParent(); 574 SmallVector<BasicBlock *, 16> SpecPreds; 575 SpecPreds.reserve(PredSet.size()); 576 for (auto *PredBB : PredSet) { 577 auto *NewPredBB = SplitCriticalEdge( 578 PredBB, ParentBB, 579 CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges()); 580 if (NewPredBB) { 581 ++NumEdgesSplit; 582 LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName() 583 << "\n"); 584 SpecPreds.push_back(NewPredBB); 585 } else { 586 assert(PredBB->getSingleSuccessor() == ParentBB && 587 "We need a non-critical predecessor to speculate into."); 588 assert(!isa<InvokeInst>(PredBB->getTerminator()) && 589 "Cannot have a non-critical invoke!"); 590 591 // Already non-critical, use existing pred. 592 SpecPreds.push_back(PredBB); 593 } 594 } 595 596 SmallPtrSet<Instruction *, 16> SpecSet; 597 SmallVector<Instruction *, 16> SpecList; 598 visitPHIUsersAndDepsInPostOrder(SpecPNs, 599 /*IsVisited*/ 600 [&](Instruction *I) { 601 // This is visited if we don't need to 602 // speculate it or we already have 603 // speculated it. 604 return !PotentialSpecSet.count(I) || 605 SpecSet.count(I); 606 }, 607 /*Visit*/ 608 [&](Instruction *I) { 609 // All operands scheduled, schedule this 610 // node. 611 SpecSet.insert(I); 612 SpecList.push_back(I); 613 }); 614 615 int NumSpecInsts = SpecList.size() * SpecPreds.size(); 616 int NumRedundantInsts = NumSpecInsts - SpecList.size(); 617 LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts 618 << " speculated instructions, " << NumRedundantInsts 619 << " redundancies\n"); 620 NumSpeculatedInstructions += NumSpecInsts; 621 NumNewRedundantInstructions += NumRedundantInsts; 622 623 // Each predecessor is numbered by its index in `SpecPreds`, so for each 624 // instruction we speculate, the speculated instruction is stored in that 625 // index of the vector associated with the original instruction. We also 626 // store the incoming values for each predecessor from any PHIs used. 627 SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap; 628 629 // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming 630 // value. This handles both the PHIs we are speculating around and any other 631 // PHIs that happen to be used. 632 for (auto *OrigI : SpecList) 633 for (auto *OpV : OrigI->operand_values()) { 634 auto *OpPN = dyn_cast<PHINode>(OpV); 635 if (!OpPN || OpPN->getParent() != ParentBB) 636 continue; 637 638 auto InsertResult = SpeculatedValueMap.insert({OpPN, {}}); 639 if (!InsertResult.second) 640 continue; 641 642 auto &SpeculatedVals = InsertResult.first->second; 643 644 // Populating our structure for mapping is particularly annoying because 645 // finding an incoming value for a particular predecessor block in a PHI 646 // node is a linear time operation! To avoid quadratic behavior, we build 647 // a map for this PHI node's incoming values and then translate it into 648 // the more compact representation used below. 649 SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap; 650 for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues())) 651 IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i); 652 653 for (auto *PredBB : SpecPreds) 654 SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second); 655 } 656 657 // Speculate into each predecessor. 658 for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) { 659 auto *PredBB = SpecPreds[PredIdx]; 660 assert(PredBB->getSingleSuccessor() == ParentBB && 661 "We need a non-critical predecessor to speculate into."); 662 663 for (auto *OrigI : SpecList) { 664 auto *NewI = OrigI->clone(); 665 NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx)); 666 NewI->insertBefore(PredBB->getTerminator()); 667 668 // Rewrite all the operands to the previously speculated instructions. 669 // Because we're walking in-order, the defs must precede the uses and we 670 // should already have these mappings. 671 for (Use &U : NewI->operands()) { 672 auto *OpI = dyn_cast<Instruction>(U.get()); 673 if (!OpI) 674 continue; 675 auto MapIt = SpeculatedValueMap.find(OpI); 676 if (MapIt == SpeculatedValueMap.end()) 677 continue; 678 const auto &SpeculatedVals = MapIt->second; 679 assert(SpeculatedVals[PredIdx] && 680 "Must have a speculated value for this predecessor!"); 681 assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() && 682 "Speculated value has the wrong type!"); 683 684 // Rewrite the use to this predecessor's speculated instruction. 685 U.set(SpeculatedVals[PredIdx]); 686 } 687 688 // Commute instructions which now have a constant in the LHS but not the 689 // RHS. 690 if (NewI->isBinaryOp() && NewI->isCommutative() && 691 isa<Constant>(NewI->getOperand(0)) && 692 !isa<Constant>(NewI->getOperand(1))) 693 NewI->getOperandUse(0).swap(NewI->getOperandUse(1)); 694 695 SpeculatedValueMap[OrigI].push_back(NewI); 696 assert(SpeculatedValueMap[OrigI][PredIdx] == NewI && 697 "Mismatched speculated instruction index!"); 698 } 699 } 700 701 // Walk the speculated instruction list and if they have uses, insert a PHI 702 // for them from the speculated versions, and replace the uses with the PHI. 703 // Then erase the instructions as they have been fully speculated. The walk 704 // needs to be in reverse so that we don't think there are users when we'll 705 // actually eventually remove them later. 706 IRBuilder<> IRB(SpecPNs[0]); 707 for (auto *OrigI : llvm::reverse(SpecList)) { 708 // Check if we need a PHI for any remaining users and if so, insert it. 709 if (!OrigI->use_empty()) { 710 auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(), 711 Twine(OrigI->getName()) + ".phi"); 712 // Add the incoming values we speculated. 713 auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second; 714 for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) 715 SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]); 716 717 // And replace the uses with the PHI node. 718 OrigI->replaceAllUsesWith(SpecIPN); 719 } 720 721 // It is important to immediately erase this so that it stops using other 722 // instructions. This avoids inserting needless PHIs of them. 723 OrigI->eraseFromParent(); 724 } 725 726 // All of the uses of the speculated phi nodes should be removed at this 727 // point, so erase them. 728 for (auto *SpecPN : SpecPNs) { 729 assert(SpecPN->use_empty() && "All users should have been speculated!"); 730 SpecPN->eraseFromParent(); 731 } 732} 733 734/// Try to speculate around a series of PHIs from a single basic block. 735/// 736/// This routine checks whether any of these PHIs are profitable to speculate 737/// users around. If safe and profitable, it does the speculation. It returns 738/// true when at least some speculation occurs. 739static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs, 740 DominatorTree &DT, TargetTransformInfo &TTI) { 741 LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n"); 742 743 // Savings in cost from speculating around a PHI node. 744 SmallDenseMap<PHINode *, InstructionCost, 16> CostSavingsMap; 745 746 // Remember the set of instructions that are candidates for speculation so 747 // that we can quickly walk things within that space. This prunes out 748 // instructions already available along edges, etc. 749 SmallPtrSet<Instruction *, 16> PotentialSpecSet; 750 751 // Remember the set of instructions that are (transitively) unsafe to 752 // speculate into the incoming edges of this basic block. This avoids 753 // recomputing them for each PHI node we check. This set is specific to this 754 // block though as things are pruned out of it based on what is available 755 // along incoming edges. 756 SmallPtrSet<Instruction *, 16> UnsafeSet; 757 758 // For each PHI node in this block, check whether there are immediate folding 759 // opportunities from speculation, and whether that speculation will be 760 // valid. This determise the set of safe PHIs to speculate. 761 llvm::erase_if(PNs, [&](PHINode *PN) { 762 return !isSafeAndProfitableToSpeculateAroundPHI( 763 *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI); 764 }); 765 // If no PHIs were profitable, skip. 766 if (PNs.empty()) { 767 LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n"); 768 return false; 769 } 770 771 // We need to know how much speculation will cost which is determined by how 772 // many incoming edges will need a copy of each speculated instruction. 773 SmallSetVector<BasicBlock *, 16> PredSet; 774 for (auto *PredBB : PNs[0]->blocks()) { 775 if (!PredSet.insert(PredBB)) 776 continue; 777 778 // We cannot speculate when a predecessor is an indirect branch. 779 // FIXME: We also can't reliably create a non-critical edge block for 780 // speculation if the predecessor is an invoke. This doesn't seem 781 // fundamental and we should probably be splitting critical edges 782 // differently. 783 const auto *TermInst = PredBB->getTerminator(); 784 if (isa<IndirectBrInst>(TermInst) || 785 isa<InvokeInst>(TermInst) || 786 isa<CallBrInst>(TermInst)) { 787 LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " 788 << PredBB->getName() << "\n"); 789 return false; 790 } 791 } 792 if (PredSet.size() < 2) { 793 LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n"); 794 return false; 795 } 796 797 SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs( 798 PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI); 799 if (SpecPNs.empty()) 800 // Nothing to do. 801 return false; 802 803 speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT); 804 return true; 805} 806 807PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F, 808 FunctionAnalysisManager &AM) { 809 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 810 auto &TTI = AM.getResult<TargetIRAnalysis>(F); 811 812 bool Changed = false; 813 for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) { 814 SmallVector<PHINode *, 16> PNs; 815 auto BBI = BB->begin(); 816 while (auto *PN = dyn_cast<PHINode>(&*BBI)) { 817 PNs.push_back(PN); 818 ++BBI; 819 } 820 821 if (PNs.empty()) 822 continue; 823 824 Changed |= tryToSpeculatePHIs(PNs, DT, TTI); 825 } 826 827 if (!Changed) 828 return PreservedAnalyses::all(); 829 830 PreservedAnalyses PA; 831 return PA; 832} 833