1//===- SpeculateAroundPHIs.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
10#include "llvm/ADT/PostOrderIterator.h"
11#include "llvm/ADT/Sequence.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/Statistic.h"
14#include "llvm/Analysis/TargetTransformInfo.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/IR/BasicBlock.h"
17#include "llvm/IR/IRBuilder.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/IR/IntrinsicInst.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Transforms/Utils/BasicBlockUtils.h"
22
23using namespace llvm;
24
25#define DEBUG_TYPE "spec-phis"
26
27STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
28STATISTIC(NumEdgesSplit,
29          "Number of critical edges which were split for speculation");
30STATISTIC(NumSpeculatedInstructions,
31          "Number of instructions we speculated around the PHI nodes");
32STATISTIC(NumNewRedundantInstructions,
33          "Number of new, redundant instructions inserted");
34
35/// Check whether speculating the users of a PHI node around the PHI
36/// will be safe.
37///
38/// This checks both that all of the users are safe and also that all of their
39/// operands are either recursively safe or already available along an incoming
40/// edge to the PHI.
41///
42/// This routine caches both all the safe nodes explored in `PotentialSpecSet`
43/// and the chain of nodes that definitively reach any unsafe node in
44/// `UnsafeSet`. By preserving these between repeated calls to this routine for
45/// PHIs in the same basic block, the exploration here can be reused. However,
46/// these caches must no be reused for PHIs in a different basic block as they
47/// reflect what is available along incoming edges.
48static bool
49isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
50                          SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
51                          SmallPtrSetImpl<Instruction *> &UnsafeSet) {
52  auto *PhiBB = PN.getParent();
53  SmallPtrSet<Instruction *, 4> Visited;
54  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
55
56  // Walk each user of the PHI node.
57  for (Use &U : PN.uses()) {
58    auto *UI = cast<Instruction>(U.getUser());
59
60    // Ensure the use post-dominates the PHI node. This ensures that, in the
61    // absence of unwinding, the use will actually be reached.
62    // FIXME: We use a blunt hammer of requiring them to be in the same basic
63    // block. We should consider using actual post-dominance here in the
64    // future.
65    if (UI->getParent() != PhiBB) {
66      LLVM_DEBUG(dbgs() << "  Unsafe: use in a different BB: " << *UI << "\n");
67      return false;
68    }
69
70    if (const auto *CS = dyn_cast<CallBase>(UI)) {
71      if (CS->isConvergent() || CS->cannotDuplicate()) {
72        LLVM_DEBUG(dbgs() << "  Unsafe: convergent "
73                   "callsite cannot de duplicated: " << *UI << '\n');
74        return false;
75      }
76    }
77
78    // FIXME: This check is much too conservative. We're not going to move these
79    // instructions onto new dynamic paths through the program unless there is
80    // a call instruction between the use and the PHI node. And memory isn't
81    // changing unless there is a store in that same sequence. We should
82    // probably change this to do at least a limited scan of the intervening
83    // instructions and allow handling stores in easily proven safe cases.
84    if (mayBeMemoryDependent(*UI)) {
85      LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate use: " << *UI << "\n");
86      return false;
87    }
88
89    // Now do a depth-first search of everything these users depend on to make
90    // sure they are transitively safe. This is a depth-first search, but we
91    // check nodes in preorder to minimize the amount of checking.
92    Visited.insert(UI);
93    DFSStack.push_back({UI, UI->value_op_begin()});
94    do {
95      User::value_op_iterator OpIt;
96      std::tie(UI, OpIt) = DFSStack.pop_back_val();
97
98      while (OpIt != UI->value_op_end()) {
99        auto *OpI = dyn_cast<Instruction>(*OpIt);
100        // Increment to the next operand for whenever we continue.
101        ++OpIt;
102        // No need to visit non-instructions, which can't form dependencies.
103        if (!OpI)
104          continue;
105
106        // Now do the main pre-order checks that this operand is a viable
107        // dependency of something we want to speculate.
108
109        // First do a few checks for instructions that won't require
110        // speculation at all because they are trivially available on the
111        // incoming edge (either through dominance or through an incoming value
112        // to a PHI).
113        //
114        // The cases in the current block will be trivially dominated by the
115        // edge.
116        auto *ParentBB = OpI->getParent();
117        if (ParentBB == PhiBB) {
118          if (isa<PHINode>(OpI)) {
119            // We can trivially map through phi nodes in the same block.
120            continue;
121          }
122        } else if (DT.dominates(ParentBB, PhiBB)) {
123          // Instructions from dominating blocks are already available.
124          continue;
125        }
126
127        // Once we know that we're considering speculating the operand, check
128        // if we've already explored this subgraph and found it to be safe.
129        if (PotentialSpecSet.count(OpI))
130          continue;
131
132        // If we've already explored this subgraph and found it unsafe, bail.
133        // If when we directly test whether this is safe it fails, bail.
134        if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
135            mayBeMemoryDependent(*OpI)) {
136          LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate transitive use: "
137                            << *OpI << "\n");
138          // Record the stack of instructions which reach this node as unsafe
139          // so we prune subsequent searches.
140          UnsafeSet.insert(OpI);
141          for (auto &StackPair : DFSStack) {
142            Instruction *I = StackPair.first;
143            UnsafeSet.insert(I);
144          }
145          return false;
146        }
147
148        // Skip any operands we're already recursively checking.
149        if (!Visited.insert(OpI).second)
150          continue;
151
152        // Push onto the stack and descend. We can directly continue this
153        // loop when ascending.
154        DFSStack.push_back({UI, OpIt});
155        UI = OpI;
156        OpIt = OpI->value_op_begin();
157      }
158
159      // This node and all its operands are safe. Go ahead and cache that for
160      // reuse later.
161      PotentialSpecSet.insert(UI);
162
163      // Continue with the next node on the stack.
164    } while (!DFSStack.empty());
165  }
166
167#ifndef NDEBUG
168  // Every visited operand should have been marked as safe for speculation at
169  // this point. Verify this and return success.
170  for (auto *I : Visited)
171    assert(PotentialSpecSet.count(I) &&
172           "Failed to mark a visited instruction as safe!");
173#endif
174  return true;
175}
176
177/// Check whether, in isolation, a given PHI node is both safe and profitable
178/// to speculate users around.
179///
180/// This handles checking whether there are any constant operands to a PHI
181/// which could represent a useful speculation candidate, whether the users of
182/// the PHI are safe to speculate including all their transitive dependencies,
183/// and whether after speculation there will be some cost savings (profit) to
184/// folding the operands into the users of the PHI node. Returns true if both
185/// safe and profitable with relevant cost savings updated in the map and with
186/// an update to the `PotentialSpecSet`. Returns false if either safety or
187/// profitability are absent. Some new entries may be made to the
188/// `PotentialSpecSet` even when this routine returns false, but they remain
189/// conservatively correct.
190///
191/// The profitability check here is a local one, but it checks this in an
192/// interesting way. Beyond checking that the total cost of materializing the
193/// constants will be less than the cost of folding them into their users, it
194/// also checks that no one incoming constant will have a higher cost when
195/// folded into its users rather than materialized. This higher cost could
196/// result in a dynamic *path* that is more expensive even when the total cost
197/// is lower. Currently, all of the interesting cases where this optimization
198/// should fire are ones where it is a no-loss operation in this sense. If we
199/// ever want to be more aggressive here, we would need to balance the
200/// different incoming edges' cost by looking at their respective
201/// probabilities.
202static bool isSafeAndProfitableToSpeculateAroundPHI(
203    PHINode &PN, SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
204    SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
205    SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
206    TargetTransformInfo &TTI) {
207  // First see whether there is any cost savings to speculating around this
208  // PHI, and build up a map of the constant inputs to how many times they
209  // occur.
210  bool NonFreeMat = false;
211  struct CostsAndCount {
212    InstructionCost MatCost = TargetTransformInfo::TCC_Free;
213    InstructionCost FoldedCost = TargetTransformInfo::TCC_Free;
214    int Count = 0;
215  };
216  SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
217  SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
218  for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
219    auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
220    if (!IncomingC)
221      continue;
222
223    // Only visit each incoming edge with a constant input once.
224    if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
225      continue;
226
227    auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
228    // Count how many edges share a given incoming costant.
229    ++InsertResult.first->second.Count;
230    // Only compute the cost the first time we see a particular constant.
231    if (!InsertResult.second)
232      continue;
233
234    InstructionCost &MatCost = InsertResult.first->second.MatCost;
235    MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(),
236                                TargetTransformInfo::TCK_SizeAndLatency);
237    NonFreeMat |= MatCost != TTI.TCC_Free;
238  }
239  if (!NonFreeMat) {
240    LLVM_DEBUG(dbgs() << "    Free: " << PN << "\n");
241    // No profit in free materialization.
242    return false;
243  }
244
245  // Now check that the uses of this PHI can actually be speculated,
246  // otherwise we'll still have to materialize the PHI value.
247  if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
248    LLVM_DEBUG(dbgs() << "    Unsafe PHI: " << PN << "\n");
249    return false;
250  }
251
252  // Compute how much (if any) savings are available by speculating around this
253  // PHI.
254  for (Use &U : PN.uses()) {
255    auto *UserI = cast<Instruction>(U.getUser());
256    // Now check whether there is any savings to folding the incoming constants
257    // into this use.
258    unsigned Idx = U.getOperandNo();
259
260    // If we have a binary operator that is commutative, an actual constant
261    // operand would end up on the RHS, so pretend the use of the PHI is on the
262    // RHS.
263    //
264    // Technically, this is a bit weird if *both* operands are PHIs we're
265    // speculating. But if that is the case, giving an "optimistic" cost isn't
266    // a bad thing because after speculation it will constant fold. And
267    // moreover, such cases should likely have been constant folded already by
268    // some other pass, so we shouldn't worry about "modeling" them terribly
269    // accurately here. Similarly, if the other operand is a constant, it still
270    // seems fine to be "optimistic" in our cost modeling, because when the
271    // incoming operand from the PHI node is also a constant, we will end up
272    // constant folding.
273    if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
274      // Assume we will commute the constant to the RHS to be canonical.
275      Idx = 1;
276
277    // Get the intrinsic ID if this user is an intrinsic.
278    Intrinsic::ID IID = Intrinsic::not_intrinsic;
279    if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
280      IID = UserII->getIntrinsicID();
281
282    for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
283      ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
284      InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
285      InstructionCost &FoldedCost =
286          IncomingConstantAndCostsAndCount.second.FoldedCost;
287      if (IID)
288        FoldedCost +=
289          TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
290                                  IncomingC->getType(),
291                                  TargetTransformInfo::TCK_SizeAndLatency);
292      else
293        FoldedCost +=
294            TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
295                                  IncomingC->getValue(), IncomingC->getType(),
296                                  TargetTransformInfo::TCK_SizeAndLatency);
297
298      // If we accumulate more folded cost for this incoming constant than
299      // materialized cost, then we'll regress any edge with this constant so
300      // just bail. We're only interested in cases where folding the incoming
301      // constants is at least break-even on all paths.
302      if (FoldedCost > MatCost) {
303        LLVM_DEBUG(dbgs() << "  Not profitable to fold imm: " << *IncomingC
304                          << "\n"
305                             "    Materializing cost:    "
306                          << MatCost
307                          << "\n"
308                             "    Accumulated folded cost: "
309                          << FoldedCost << "\n");
310        return false;
311      }
312    }
313  }
314
315  // Compute the total cost savings afforded by this PHI node.
316  InstructionCost TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
317  for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
318    InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
319    InstructionCost FoldedCost =
320        IncomingConstantAndCostsAndCount.second.FoldedCost;
321    int Count = IncomingConstantAndCostsAndCount.second.Count;
322
323    TotalMatCost += MatCost * Count;
324    TotalFoldedCost += FoldedCost * Count;
325  }
326  assert(TotalMatCost.isValid() && "Constants must be  materializable");
327  assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
328                                            "less that its materialized cost, "
329                                            "the sum must be as well.");
330  LLVM_DEBUG(dbgs() << "    Cost savings " << (TotalMatCost - TotalFoldedCost)
331                    << ": " << PN << "\n");
332  CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
333  return true;
334}
335
336/// Simple helper to walk all the users of a list of phis depth first, and call
337/// a visit function on each one in post-order.
338///
339/// All of the PHIs should be in the same basic block, and this is primarily
340/// used to make a single depth-first walk across their collective users
341/// without revisiting any subgraphs. Callers should provide a fast, idempotent
342/// callable to test whether a node has been visited and the more important
343/// callable to actually visit a particular node.
344///
345/// Depth-first and postorder here refer to the *operand* graph -- we start
346/// from a collection of users of PHI nodes and walk "up" the operands
347/// depth-first.
348template <typename IsVisitedT, typename VisitT>
349static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
350                                            IsVisitedT IsVisited,
351                                            VisitT Visit) {
352  SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
353  for (auto *PN : PNs)
354    for (Use &U : PN->uses()) {
355      auto *UI = cast<Instruction>(U.getUser());
356      if (IsVisited(UI))
357        // Already visited this user, continue across the roots.
358        continue;
359
360      // Otherwise, walk the operand graph depth-first and visit each
361      // dependency in postorder.
362      DFSStack.push_back({UI, UI->value_op_begin()});
363      do {
364        User::value_op_iterator OpIt;
365        std::tie(UI, OpIt) = DFSStack.pop_back_val();
366        while (OpIt != UI->value_op_end()) {
367          auto *OpI = dyn_cast<Instruction>(*OpIt);
368          // Increment to the next operand for whenever we continue.
369          ++OpIt;
370          // No need to visit non-instructions, which can't form dependencies,
371          // or instructions outside of our potential dependency set that we
372          // were given. Finally, if we've already visited the node, continue
373          // to the next.
374          if (!OpI || IsVisited(OpI))
375            continue;
376
377          // Push onto the stack and descend. We can directly continue this
378          // loop when ascending.
379          DFSStack.push_back({UI, OpIt});
380          UI = OpI;
381          OpIt = OpI->value_op_begin();
382        }
383
384        // Finished visiting children, visit this node.
385        assert(!IsVisited(UI) && "Should not have already visited a node!");
386        Visit(UI);
387      } while (!DFSStack.empty());
388    }
389}
390
391/// Find profitable PHIs to speculate.
392///
393/// For a PHI node to be profitable, we need the cost of speculating its users
394/// (and their dependencies) to not exceed the savings of folding the PHI's
395/// constant operands into the speculated users.
396///
397/// Computing this is surprisingly challenging. Because users of two different
398/// PHI nodes can depend on each other or on common other instructions, it may
399/// be profitable to speculate two PHI nodes together even though neither one
400/// in isolation is profitable. The straightforward way to find all the
401/// profitable PHIs would be to check each combination of PHIs' cost, but this
402/// is exponential in complexity.
403///
404/// Even if we assume that we only care about cases where we can consider each
405/// PHI node in isolation (rather than considering cases where none are
406/// profitable in isolation but some subset are profitable as a set), we still
407/// have a challenge. The obvious way to find all individually profitable PHIs
408/// is to iterate until reaching a fixed point, but this will be quadratic in
409/// complexity. =/
410///
411/// This code currently uses a linear-to-compute order for a greedy approach.
412/// It won't find cases where a set of PHIs must be considered together, but it
413/// handles most cases of order dependence without quadratic iteration. The
414/// specific order used is the post-order across the operand DAG. When the last
415/// user of a PHI is visited in this postorder walk, we check it for
416/// profitability.
417///
418/// There is an orthogonal extra complexity to all of this: computing the cost
419/// itself can easily become a linear computation making everything again (at
420/// best) quadratic. Using a postorder over the operand graph makes it
421/// particularly easy to avoid this through dynamic programming. As we do the
422/// postorder walk, we build the transitive cost of that subgraph. It is also
423/// straightforward to then update these costs when we mark a PHI for
424/// speculation so that subsequent PHIs don't re-pay the cost of already
425/// speculated instructions.
426static SmallVector<PHINode *, 16> findProfitablePHIs(
427    ArrayRef<PHINode *> PNs,
428    const SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
429    const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, int NumPreds,
430    DominatorTree &DT, TargetTransformInfo &TTI) {
431  SmallVector<PHINode *, 16> SpecPNs;
432
433  // First, establish a reverse mapping from immediate users of the PHI nodes
434  // to the nodes themselves, and count how many users each PHI node has in
435  // a way we can update while processing them.
436  SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap;
437  SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
438  SmallPtrSet<Instruction *, 16> UserSet;
439  for (auto *PN : PNs) {
440    assert(UserSet.empty() && "Must start with an empty user set!");
441    for (Use &U : PN->uses())
442      UserSet.insert(cast<Instruction>(U.getUser()));
443    PNUserCountMap[PN] = UserSet.size();
444    for (auto *UI : UserSet)
445      UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
446    UserSet.clear();
447  }
448
449  // Now do a DFS across the operand graph of the users, computing cost as we
450  // go and when all costs for a given PHI are known, checking that PHI for
451  // profitability.
452  SmallDenseMap<Instruction *, InstructionCost, 16> SpecCostMap;
453  visitPHIUsersAndDepsInPostOrder(
454      PNs,
455      /*IsVisited*/
456      [&](Instruction *I) {
457        // We consider anything that isn't potentially speculated to be
458        // "visited" as it is already handled. Similarly, anything that *is*
459        // potentially speculated but for which we have an entry in our cost
460        // map, we're done.
461        return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
462      },
463      /*Visit*/
464      [&](Instruction *I) {
465        // We've fully visited the operands, so sum their cost with this node
466        // and update the cost map.
467        InstructionCost Cost = TTI.TCC_Free;
468        for (Value *OpV : I->operand_values())
469          if (auto *OpI = dyn_cast<Instruction>(OpV)) {
470            auto CostMapIt = SpecCostMap.find(OpI);
471            if (CostMapIt != SpecCostMap.end())
472              Cost += CostMapIt->second;
473          }
474        Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
475        bool Inserted = SpecCostMap.insert({I, Cost}).second;
476        (void)Inserted;
477        assert(Inserted && "Must not re-insert a cost during the DFS!");
478
479        // Now check if this node had a corresponding PHI node using it. If so,
480        // we need to decrement the outstanding user count for it.
481        auto UserPNsIt = UserToPNMap.find(I);
482        if (UserPNsIt == UserToPNMap.end())
483          return;
484        auto &UserPNs = UserPNsIt->second;
485        auto UserPNsSplitIt = std::stable_partition(
486            UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
487              int &PNUserCount = PNUserCountMap.find(UserPN)->second;
488              assert(
489                  PNUserCount > 0 &&
490                  "Should never re-visit a PN after its user count hits zero!");
491              --PNUserCount;
492              return PNUserCount != 0;
493            });
494
495        // FIXME: Rather than one at a time, we should sum the savings as the
496        // cost will be completely shared.
497        SmallVector<Instruction *, 16> SpecWorklist;
498        for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
499          InstructionCost SpecCost = TTI.TCC_Free;
500          for (Use &U : PN->uses())
501            SpecCost +=
502                SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
503          SpecCost *= (NumPreds - 1);
504          // When the user count of a PHI node hits zero, we should check its
505          // profitability. If profitable, we should mark it for speculation
506          // and zero out the cost of everything it depends on.
507          InstructionCost CostSavings = CostSavingsMap.find(PN)->second;
508          if (SpecCost > CostSavings) {
509            LLVM_DEBUG(dbgs() << "  Not profitable, speculation cost: " << *PN
510                              << "\n"
511                                 "    Cost savings:     "
512                              << CostSavings
513                              << "\n"
514                                 "    Speculation cost: "
515                              << SpecCost << "\n");
516            continue;
517          }
518
519          // We're going to speculate this user-associated PHI. Copy it out and
520          // add its users to the worklist to update their cost.
521          SpecPNs.push_back(PN);
522          for (Use &U : PN->uses()) {
523            auto *UI = cast<Instruction>(U.getUser());
524            auto CostMapIt = SpecCostMap.find(UI);
525            if (CostMapIt->second == 0)
526              continue;
527            // Zero out this cost entry to avoid duplicates.
528            CostMapIt->second = 0;
529            SpecWorklist.push_back(UI);
530          }
531        }
532
533        // Now walk all the operands of the users in the worklist transitively
534        // to zero out all the memoized costs.
535        while (!SpecWorklist.empty()) {
536          Instruction *SpecI = SpecWorklist.pop_back_val();
537          assert(SpecCostMap.find(SpecI)->second == 0 &&
538                 "Didn't zero out a cost!");
539
540          // Walk the operands recursively to zero out their cost as well.
541          for (auto *OpV : SpecI->operand_values()) {
542            auto *OpI = dyn_cast<Instruction>(OpV);
543            if (!OpI)
544              continue;
545            auto CostMapIt = SpecCostMap.find(OpI);
546            if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
547              continue;
548            CostMapIt->second = 0;
549            SpecWorklist.push_back(OpI);
550          }
551        }
552      });
553
554  return SpecPNs;
555}
556
557/// Speculate users around a set of PHI nodes.
558///
559/// This routine does the actual speculation around a set of PHI nodes where we
560/// have determined this to be both safe and profitable.
561///
562/// This routine handles any spliting of critical edges necessary to create
563/// a safe block to speculate into as well as cloning the instructions and
564/// rewriting all uses.
565static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
566                          SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
567                          SmallSetVector<BasicBlock *, 16> &PredSet,
568                          DominatorTree &DT) {
569  LLVM_DEBUG(dbgs() << "  Speculating around " << SpecPNs.size() << " PHIs!\n");
570  NumPHIsSpeculated += SpecPNs.size();
571
572  // Split any critical edges so that we have a block to hoist into.
573  auto *ParentBB = SpecPNs[0]->getParent();
574  SmallVector<BasicBlock *, 16> SpecPreds;
575  SpecPreds.reserve(PredSet.size());
576  for (auto *PredBB : PredSet) {
577    auto *NewPredBB = SplitCriticalEdge(
578        PredBB, ParentBB,
579        CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
580    if (NewPredBB) {
581      ++NumEdgesSplit;
582      LLVM_DEBUG(dbgs() << "  Split critical edge from: " << PredBB->getName()
583                        << "\n");
584      SpecPreds.push_back(NewPredBB);
585    } else {
586      assert(PredBB->getSingleSuccessor() == ParentBB &&
587             "We need a non-critical predecessor to speculate into.");
588      assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
589             "Cannot have a non-critical invoke!");
590
591      // Already non-critical, use existing pred.
592      SpecPreds.push_back(PredBB);
593    }
594  }
595
596  SmallPtrSet<Instruction *, 16> SpecSet;
597  SmallVector<Instruction *, 16> SpecList;
598  visitPHIUsersAndDepsInPostOrder(SpecPNs,
599                                  /*IsVisited*/
600                                  [&](Instruction *I) {
601                                    // This is visited if we don't need to
602                                    // speculate it or we already have
603                                    // speculated it.
604                                    return !PotentialSpecSet.count(I) ||
605                                           SpecSet.count(I);
606                                  },
607                                  /*Visit*/
608                                  [&](Instruction *I) {
609                                    // All operands scheduled, schedule this
610                                    // node.
611                                    SpecSet.insert(I);
612                                    SpecList.push_back(I);
613                                  });
614
615  int NumSpecInsts = SpecList.size() * SpecPreds.size();
616  int NumRedundantInsts = NumSpecInsts - SpecList.size();
617  LLVM_DEBUG(dbgs() << "  Inserting " << NumSpecInsts
618                    << " speculated instructions, " << NumRedundantInsts
619                    << " redundancies\n");
620  NumSpeculatedInstructions += NumSpecInsts;
621  NumNewRedundantInstructions += NumRedundantInsts;
622
623  // Each predecessor is numbered by its index in `SpecPreds`, so for each
624  // instruction we speculate, the speculated instruction is stored in that
625  // index of the vector associated with the original instruction. We also
626  // store the incoming values for each predecessor from any PHIs used.
627  SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap;
628
629  // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
630  // value. This handles both the PHIs we are speculating around and any other
631  // PHIs that happen to be used.
632  for (auto *OrigI : SpecList)
633    for (auto *OpV : OrigI->operand_values()) {
634      auto *OpPN = dyn_cast<PHINode>(OpV);
635      if (!OpPN || OpPN->getParent() != ParentBB)
636        continue;
637
638      auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
639      if (!InsertResult.second)
640        continue;
641
642      auto &SpeculatedVals = InsertResult.first->second;
643
644      // Populating our structure for mapping is particularly annoying because
645      // finding an incoming value for a particular predecessor block in a PHI
646      // node is a linear time operation! To avoid quadratic behavior, we build
647      // a map for this PHI node's incoming values and then translate it into
648      // the more compact representation used below.
649      SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap;
650      for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
651        IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
652
653      for (auto *PredBB : SpecPreds)
654        SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
655    }
656
657  // Speculate into each predecessor.
658  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
659    auto *PredBB = SpecPreds[PredIdx];
660    assert(PredBB->getSingleSuccessor() == ParentBB &&
661           "We need a non-critical predecessor to speculate into.");
662
663    for (auto *OrigI : SpecList) {
664      auto *NewI = OrigI->clone();
665      NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
666      NewI->insertBefore(PredBB->getTerminator());
667
668      // Rewrite all the operands to the previously speculated instructions.
669      // Because we're walking in-order, the defs must precede the uses and we
670      // should already have these mappings.
671      for (Use &U : NewI->operands()) {
672        auto *OpI = dyn_cast<Instruction>(U.get());
673        if (!OpI)
674          continue;
675        auto MapIt = SpeculatedValueMap.find(OpI);
676        if (MapIt == SpeculatedValueMap.end())
677          continue;
678        const auto &SpeculatedVals = MapIt->second;
679        assert(SpeculatedVals[PredIdx] &&
680               "Must have a speculated value for this predecessor!");
681        assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
682               "Speculated value has the wrong type!");
683
684        // Rewrite the use to this predecessor's speculated instruction.
685        U.set(SpeculatedVals[PredIdx]);
686      }
687
688      // Commute instructions which now have a constant in the LHS but not the
689      // RHS.
690      if (NewI->isBinaryOp() && NewI->isCommutative() &&
691          isa<Constant>(NewI->getOperand(0)) &&
692          !isa<Constant>(NewI->getOperand(1)))
693        NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
694
695      SpeculatedValueMap[OrigI].push_back(NewI);
696      assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
697             "Mismatched speculated instruction index!");
698    }
699  }
700
701  // Walk the speculated instruction list and if they have uses, insert a PHI
702  // for them from the speculated versions, and replace the uses with the PHI.
703  // Then erase the instructions as they have been fully speculated. The walk
704  // needs to be in reverse so that we don't think there are users when we'll
705  // actually eventually remove them later.
706  IRBuilder<> IRB(SpecPNs[0]);
707  for (auto *OrigI : llvm::reverse(SpecList)) {
708    // Check if we need a PHI for any remaining users and if so, insert it.
709    if (!OrigI->use_empty()) {
710      auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
711                                    Twine(OrigI->getName()) + ".phi");
712      // Add the incoming values we speculated.
713      auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
714      for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
715        SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
716
717      // And replace the uses with the PHI node.
718      OrigI->replaceAllUsesWith(SpecIPN);
719    }
720
721    // It is important to immediately erase this so that it stops using other
722    // instructions. This avoids inserting needless PHIs of them.
723    OrigI->eraseFromParent();
724  }
725
726  // All of the uses of the speculated phi nodes should be removed at this
727  // point, so erase them.
728  for (auto *SpecPN : SpecPNs) {
729    assert(SpecPN->use_empty() && "All users should have been speculated!");
730    SpecPN->eraseFromParent();
731  }
732}
733
734/// Try to speculate around a series of PHIs from a single basic block.
735///
736/// This routine checks whether any of these PHIs are profitable to speculate
737/// users around. If safe and profitable, it does the speculation. It returns
738/// true when at least some speculation occurs.
739static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
740                               DominatorTree &DT, TargetTransformInfo &TTI) {
741  LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
742
743  // Savings in cost from speculating around a PHI node.
744  SmallDenseMap<PHINode *, InstructionCost, 16> CostSavingsMap;
745
746  // Remember the set of instructions that are candidates for speculation so
747  // that we can quickly walk things within that space. This prunes out
748  // instructions already available along edges, etc.
749  SmallPtrSet<Instruction *, 16> PotentialSpecSet;
750
751  // Remember the set of instructions that are (transitively) unsafe to
752  // speculate into the incoming edges of this basic block. This avoids
753  // recomputing them for each PHI node we check. This set is specific to this
754  // block though as things are pruned out of it based on what is available
755  // along incoming edges.
756  SmallPtrSet<Instruction *, 16> UnsafeSet;
757
758  // For each PHI node in this block, check whether there are immediate folding
759  // opportunities from speculation, and whether that speculation will be
760  // valid. This determise the set of safe PHIs to speculate.
761  llvm::erase_if(PNs, [&](PHINode *PN) {
762    return !isSafeAndProfitableToSpeculateAroundPHI(
763        *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
764  });
765  // If no PHIs were profitable, skip.
766  if (PNs.empty()) {
767    LLVM_DEBUG(dbgs() << "  No safe and profitable PHIs found!\n");
768    return false;
769  }
770
771  // We need to know how much speculation will cost which is determined by how
772  // many incoming edges will need a copy of each speculated instruction.
773  SmallSetVector<BasicBlock *, 16> PredSet;
774  for (auto *PredBB : PNs[0]->blocks()) {
775    if (!PredSet.insert(PredBB))
776      continue;
777
778    // We cannot speculate when a predecessor is an indirect branch.
779    // FIXME: We also can't reliably create a non-critical edge block for
780    // speculation if the predecessor is an invoke. This doesn't seem
781    // fundamental and we should probably be splitting critical edges
782    // differently.
783    const auto *TermInst = PredBB->getTerminator();
784    if (isa<IndirectBrInst>(TermInst) ||
785        isa<InvokeInst>(TermInst) ||
786        isa<CallBrInst>(TermInst)) {
787      LLVM_DEBUG(dbgs() << "  Invalid: predecessor terminator: "
788                        << PredBB->getName() << "\n");
789      return false;
790    }
791  }
792  if (PredSet.size() < 2) {
793    LLVM_DEBUG(dbgs() << "  Unimportant: phi with only one predecessor\n");
794    return false;
795  }
796
797  SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
798      PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
799  if (SpecPNs.empty())
800    // Nothing to do.
801    return false;
802
803  speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
804  return true;
805}
806
807PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
808                                               FunctionAnalysisManager &AM) {
809  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
810  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
811
812  bool Changed = false;
813  for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
814    SmallVector<PHINode *, 16> PNs;
815    auto BBI = BB->begin();
816    while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
817      PNs.push_back(PN);
818      ++BBI;
819    }
820
821    if (PNs.empty())
822      continue;
823
824    Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
825  }
826
827  if (!Changed)
828    return PreservedAnalyses::all();
829
830  PreservedAnalyses PA;
831  return PA;
832}
833