1//===- DependenceGraphBuilder.cpp ------------------------------------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements common steps of the build algorithm for construction
9// of dependence graphs such as DDG and PDG.
10//===----------------------------------------------------------------------===//
11
12#include "llvm/Analysis/DependenceGraphBuilder.h"
13#include "llvm/ADT/DepthFirstIterator.h"
14#include "llvm/ADT/EnumeratedArray.h"
15#include "llvm/ADT/PostOrderIterator.h"
16#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/DDG.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "dgb"
23
24STATISTIC(TotalGraphs, "Number of dependence graphs created.");
25STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
26STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
27STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
28STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
29STATISTIC(TotalConfusedEdges,
30          "Number of confused memory dependencies between two nodes.");
31STATISTIC(TotalEdgeReversals,
32          "Number of times the source and sink of dependence was reversed to "
33          "expose cycles in the graph.");
34
35using InstructionListType = SmallVector<Instruction *, 2>;
36
37//===--------------------------------------------------------------------===//
38// AbstractDependenceGraphBuilder implementation
39//===--------------------------------------------------------------------===//
40
41template <class G>
42void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
43  // The BBList is expected to be in program order.
44  size_t NextOrdinal = 1;
45  for (auto *BB : BBList)
46    for (auto &I : *BB)
47      InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
48}
49
50template <class G>
51void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
52  ++TotalGraphs;
53  assert(IMap.empty() && "Expected empty instruction map at start");
54  for (BasicBlock *BB : BBList)
55    for (Instruction &I : *BB) {
56      auto &NewNode = createFineGrainedNode(I);
57      IMap.insert(std::make_pair(&I, &NewNode));
58      NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
59      ++TotalFineGrainedNodes;
60    }
61}
62
63template <class G>
64void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
65  // Create a root node that connects to every connected component of the graph.
66  // This is done to allow graph iterators to visit all the disjoint components
67  // of the graph, in a single walk.
68  //
69  // This algorithm works by going through each node of the graph and for each
70  // node N, do a DFS starting from N. A rooted edge is established between the
71  // root node and N (if N is not yet visited). All the nodes reachable from N
72  // are marked as visited and are skipped in the DFS of subsequent nodes.
73  //
74  // Note: This algorithm tries to limit the number of edges out of the root
75  // node to some extent, but there may be redundant edges created depending on
76  // the iteration order. For example for a graph {A -> B}, an edge from the
77  // root node is added to both nodes if B is visited before A. While it does
78  // not result in minimal number of edges, this approach saves compile-time
79  // while keeping the number of edges in check.
80  auto &RootNode = createRootNode();
81  df_iterator_default_set<const NodeType *, 4> Visited;
82  for (auto *N : Graph) {
83    if (*N == RootNode)
84      continue;
85    for (auto I : depth_first_ext(N, Visited))
86      if (I == N)
87        createRootedEdge(RootNode, *N);
88  }
89}
90
91template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
92  if (!shouldCreatePiBlocks())
93    return;
94
95  LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
96
97  // The overall algorithm is as follows:
98  // 1. Identify SCCs and for each SCC create a pi-block node containing all
99  //    the nodes in that SCC.
100  // 2. Identify incoming edges incident to the nodes inside of the SCC and
101  //    reconnect them to the pi-block node.
102  // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
103  //    outside of it and reconnect them so that the edges are coming out of the
104  //    SCC node instead.
105
106  // Adding nodes as we iterate through the SCCs cause the SCC
107  // iterators to get invalidated. To prevent this invalidation, we first
108  // collect a list of nodes that are part of an SCC, and then iterate over
109  // those lists to create the pi-block nodes. Each element of the list is a
110  // list of nodes in an SCC. Note: trivial SCCs containing a single node are
111  // ignored.
112  SmallVector<NodeListType, 4> ListOfSCCs;
113  for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
114    if (SCC.size() > 1)
115      ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
116  }
117
118  for (NodeListType &NL : ListOfSCCs) {
119    LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
120                      << " nodes in it.\n");
121
122    // SCC iterator may put the nodes in an order that's different from the
123    // program order. To preserve original program order, we sort the list of
124    // nodes based on ordinal numbers computed earlier.
125    llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
126      return getOrdinal(*LHS) < getOrdinal(*RHS);
127    });
128
129    NodeType &PiNode = createPiBlock(NL);
130    ++TotalPiBlockNodes;
131
132    // Build a set to speed up the lookup for edges whose targets
133    // are inside the SCC.
134    SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
135
136    // We have the set of nodes in the SCC. We go through the set of nodes
137    // that are outside of the SCC and look for edges that cross the two sets.
138    for (NodeType *N : Graph) {
139
140      // Skip the SCC node and all the nodes inside of it.
141      if (*N == PiNode || NodesInSCC.count(N))
142        continue;
143
144      enum Direction {
145        Incoming,      // Incoming edges to the SCC
146        Outgoing,      // Edges going ot of the SCC
147        DirectionCount // To make the enum usable as an array index.
148      };
149
150      // Use these flags to help us avoid creating redundant edges. If there
151      // are more than one edges from an outside node to inside nodes, we only
152      // keep one edge from that node to the pi-block node. Similarly, if
153      // there are more than one edges from inside nodes to an outside node,
154      // we only keep one edge from the pi-block node to the outside node.
155      // There is a flag defined for each direction (incoming vs outgoing) and
156      // for each type of edge supported, using a two-dimensional boolean
157      // array.
158      using EdgeKind = typename EdgeType::EdgeKind;
159      EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false,
160                                                                         false};
161
162      auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
163                                     const EdgeKind K) {
164        switch (K) {
165        case EdgeKind::RegisterDefUse:
166          createDefUseEdge(Src, Dst);
167          break;
168        case EdgeKind::MemoryDependence:
169          createMemoryEdge(Src, Dst);
170          break;
171        case EdgeKind::Rooted:
172          createRootedEdge(Src, Dst);
173          break;
174        default:
175          llvm_unreachable("Unsupported type of edge.");
176        }
177      };
178
179      auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
180                                const Direction Dir) {
181        if (!Src->hasEdgeTo(*Dst))
182          return;
183        LLVM_DEBUG(
184            dbgs() << "reconnecting("
185                   << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
186                   << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New
187                   << "\n");
188        assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
189               "Invalid direction.");
190
191        SmallVector<EdgeType *, 10> EL;
192        Src->findEdgesTo(*Dst, EL);
193        for (EdgeType *OldEdge : EL) {
194          EdgeKind Kind = OldEdge->getKind();
195          if (!EdgeAlreadyCreated[Dir][Kind]) {
196            if (Dir == Direction::Incoming) {
197              createEdgeOfKind(*Src, *New, Kind);
198              LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
199            } else if (Dir == Direction::Outgoing) {
200              createEdgeOfKind(*New, *Dst, Kind);
201              LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
202            }
203            EdgeAlreadyCreated[Dir][Kind] = true;
204          }
205          Src->removeEdge(*OldEdge);
206          destroyEdge(*OldEdge);
207          LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
208        }
209      };
210
211      for (NodeType *SCCNode : NL) {
212        // Process incoming edges incident to the pi-block node.
213        reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
214
215        // Process edges that are coming out of the pi-block node.
216        reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
217      }
218    }
219  }
220
221  // Ordinal maps are no longer needed.
222  InstOrdinalMap.clear();
223  NodeOrdinalMap.clear();
224
225  LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
226}
227
228template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
229  for (NodeType *N : Graph) {
230    InstructionListType SrcIList;
231    N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
232
233    // Use a set to mark the targets that we link to N, so we don't add
234    // duplicate def-use edges when more than one instruction in a target node
235    // use results of instructions that are contained in N.
236    SmallPtrSet<NodeType *, 4> VisitedTargets;
237
238    for (Instruction *II : SrcIList) {
239      for (User *U : II->users()) {
240        Instruction *UI = dyn_cast<Instruction>(U);
241        if (!UI)
242          continue;
243        NodeType *DstNode = nullptr;
244        if (IMap.find(UI) != IMap.end())
245          DstNode = IMap.find(UI)->second;
246
247        // In the case of loops, the scope of the subgraph is all the
248        // basic blocks (and instructions within them) belonging to the loop. We
249        // simply ignore all the edges coming from (or going into) instructions
250        // or basic blocks outside of this range.
251        if (!DstNode) {
252          LLVM_DEBUG(
253              dbgs()
254              << "skipped def-use edge since the sink" << *UI
255              << " is outside the range of instructions being considered.\n");
256          continue;
257        }
258
259        // Self dependencies are ignored because they are redundant and
260        // uninteresting.
261        if (DstNode == N) {
262          LLVM_DEBUG(dbgs()
263                     << "skipped def-use edge since the sink and the source ("
264                     << N << ") are the same.\n");
265          continue;
266        }
267
268        if (VisitedTargets.insert(DstNode).second) {
269          createDefUseEdge(*N, *DstNode);
270          ++TotalDefUseEdges;
271        }
272      }
273    }
274  }
275}
276
277template <class G>
278void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
279  using DGIterator = typename G::iterator;
280  auto isMemoryAccess = [](const Instruction *I) {
281    return I->mayReadOrWriteMemory();
282  };
283  for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
284    InstructionListType SrcIList;
285    (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
286    if (SrcIList.empty())
287      continue;
288
289    for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
290      if (**SrcIt == **DstIt)
291        continue;
292      InstructionListType DstIList;
293      (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
294      if (DstIList.empty())
295        continue;
296      bool ForwardEdgeCreated = false;
297      bool BackwardEdgeCreated = false;
298      for (Instruction *ISrc : SrcIList) {
299        for (Instruction *IDst : DstIList) {
300          auto D = DI.depends(ISrc, IDst, true);
301          if (!D)
302            continue;
303
304          // If we have a dependence with its left-most non-'=' direction
305          // being '>' we need to reverse the direction of the edge, because
306          // the source of the dependence cannot occur after the sink. For
307          // confused dependencies, we will create edges in both directions to
308          // represent the possibility of a cycle.
309
310          auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
311            if (!ForwardEdgeCreated) {
312              createMemoryEdge(Src, Dst);
313              ++TotalMemoryEdges;
314            }
315            if (!BackwardEdgeCreated) {
316              createMemoryEdge(Dst, Src);
317              ++TotalMemoryEdges;
318            }
319            ForwardEdgeCreated = BackwardEdgeCreated = true;
320            ++TotalConfusedEdges;
321          };
322
323          auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
324            if (!ForwardEdgeCreated) {
325              createMemoryEdge(Src, Dst);
326              ++TotalMemoryEdges;
327            }
328            ForwardEdgeCreated = true;
329          };
330
331          auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
332            if (!BackwardEdgeCreated) {
333              createMemoryEdge(Dst, Src);
334              ++TotalMemoryEdges;
335            }
336            BackwardEdgeCreated = true;
337          };
338
339          if (D->isConfused())
340            createConfusedEdges(**SrcIt, **DstIt);
341          else if (D->isOrdered() && !D->isLoopIndependent()) {
342            bool ReversedEdge = false;
343            for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
344              if (D->getDirection(Level) == Dependence::DVEntry::EQ)
345                continue;
346              else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
347                createBackwardEdge(**SrcIt, **DstIt);
348                ReversedEdge = true;
349                ++TotalEdgeReversals;
350                break;
351              } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
352                break;
353              else {
354                createConfusedEdges(**SrcIt, **DstIt);
355                break;
356              }
357            }
358            if (!ReversedEdge)
359              createForwardEdge(**SrcIt, **DstIt);
360          } else
361            createForwardEdge(**SrcIt, **DstIt);
362
363          // Avoid creating duplicate edges.
364          if (ForwardEdgeCreated && BackwardEdgeCreated)
365            break;
366        }
367
368        // If we've created edges in both directions, there is no more
369        // unique edge that we can create between these two nodes, so we
370        // can exit early.
371        if (ForwardEdgeCreated && BackwardEdgeCreated)
372          break;
373      }
374    }
375  }
376}
377
378template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
379  if (!shouldSimplify())
380    return;
381  LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
382
383  // This algorithm works by first collecting a set of candidate nodes that have
384  // an out-degree of one (in terms of def-use edges), and then ignoring those
385  // whose targets have an in-degree more than one. Each node in the resulting
386  // set can then be merged with its corresponding target and put back into the
387  // worklist until no further merge candidates are available.
388  SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
389
390  // A mapping between nodes and their in-degree. To save space, this map
391  // only contains nodes that are targets of nodes in the CandidateSourceNodes.
392  DenseMap<NodeType *, unsigned> TargetInDegreeMap;
393
394  for (NodeType *N : Graph) {
395    if (N->getEdges().size() != 1)
396      continue;
397    EdgeType &Edge = N->back();
398    if (!Edge.isDefUse())
399      continue;
400    CandidateSourceNodes.insert(N);
401
402    // Insert an element into the in-degree map and initialize to zero. The
403    // count will get updated in the next step.
404    TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
405  }
406
407  LLVM_DEBUG({
408    dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
409           << "\nNode with single outgoing def-use edge:\n";
410    for (NodeType *N : CandidateSourceNodes) {
411      dbgs() << N << "\n";
412    }
413  });
414
415  for (NodeType *N : Graph) {
416    for (EdgeType *E : *N) {
417      NodeType *Tgt = &E->getTargetNode();
418      auto TgtIT = TargetInDegreeMap.find(Tgt);
419      if (TgtIT != TargetInDegreeMap.end())
420        ++(TgtIT->second);
421    }
422  }
423
424  LLVM_DEBUG({
425    dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
426           << "\nContent of in-degree map:\n";
427    for (auto &I : TargetInDegreeMap) {
428      dbgs() << I.first << " --> " << I.second << "\n";
429    }
430  });
431
432  SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
433                                       CandidateSourceNodes.end());
434  while (!Worklist.empty()) {
435    NodeType &Src = *Worklist.pop_back_val();
436    // As nodes get merged, we need to skip any node that has been removed from
437    // the candidate set (see below).
438    if (!CandidateSourceNodes.erase(&Src))
439      continue;
440
441    assert(Src.getEdges().size() == 1 &&
442           "Expected a single edge from the candidate src node.");
443    NodeType &Tgt = Src.back().getTargetNode();
444    assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
445           "Expected target to be in the in-degree map.");
446
447    if (TargetInDegreeMap[&Tgt] != 1)
448      continue;
449
450    if (!areNodesMergeable(Src, Tgt))
451      continue;
452
453    // Do not merge if there is also an edge from target to src (immediate
454    // cycle).
455    if (Tgt.hasEdgeTo(Src))
456      continue;
457
458    LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
459
460    mergeNodes(Src, Tgt);
461
462    // If the target node is in the candidate set itself, we need to put the
463    // src node back into the worklist again so it gives the target a chance
464    // to get merged into it. For example if we have:
465    // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
466    // then after merging (a) and (b) together, we need to put (a,b) back in
467    // the worklist so that (c) can get merged in as well resulting in
468    // {(a,b,c) -> d}
469    // We also need to remove the old target (b), from the worklist. We first
470    // remove it from the candidate set here, and skip any item from the
471    // worklist that is not in the set.
472    if (CandidateSourceNodes.erase(&Tgt)) {
473      Worklist.push_back(&Src);
474      CandidateSourceNodes.insert(&Src);
475      LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
476    }
477  }
478  LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
479}
480
481template <class G>
482void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
483
484  // If we don't create pi-blocks, then we may not have a DAG.
485  if (!shouldCreatePiBlocks())
486    return;
487
488  SmallVector<NodeType *, 64> NodesInPO;
489  using NodeKind = typename NodeType::NodeKind;
490  for (NodeType *N : post_order(&Graph)) {
491    if (N->getKind() == NodeKind::PiBlock) {
492      // Put members of the pi-block right after the pi-block itself, for
493      // convenience.
494      const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
495      llvm::append_range(NodesInPO, PiBlockMembers);
496    }
497    NodesInPO.push_back(N);
498  }
499
500  size_t OldSize = Graph.Nodes.size();
501  Graph.Nodes.clear();
502  append_range(Graph.Nodes, reverse(NodesInPO));
503  if (Graph.Nodes.size() != OldSize)
504    assert(false &&
505           "Expected the number of nodes to stay the same after the sort");
506}
507
508template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
509template class llvm::DependenceGraphInfo<DDGNode>;
510