1//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Loops should be simplified before this analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
15#include "llvm/ADT/SCCIterator.h"
16#include "llvm/Support/raw_ostream.h"
17#include <numeric>
18
19using namespace llvm;
20using namespace llvm::bfi_detail;
21
22#define DEBUG_TYPE "block-freq"
23
24ScaledNumber<uint64_t> BlockMass::toScaled() const {
25  if (isFull())
26    return ScaledNumber<uint64_t>(1, 0);
27  return ScaledNumber<uint64_t>(getMass() + 1, -64);
28}
29
30void BlockMass::dump() const { print(dbgs()); }
31
32static char getHexDigit(int N) {
33  assert(N < 16);
34  if (N < 10)
35    return '0' + N;
36  return 'a' + N - 10;
37}
38raw_ostream &BlockMass::print(raw_ostream &OS) const {
39  for (int Digits = 0; Digits < 16; ++Digits)
40    OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
41  return OS;
42}
43
44namespace {
45
46typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
47typedef BlockFrequencyInfoImplBase::Distribution Distribution;
48typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
49typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
50typedef BlockFrequencyInfoImplBase::LoopData LoopData;
51typedef BlockFrequencyInfoImplBase::Weight Weight;
52typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
53
54/// \brief Dithering mass distributer.
55///
56/// This class splits up a single mass into portions by weight, dithering to
57/// spread out error.  No mass is lost.  The dithering precision depends on the
58/// precision of the product of \a BlockMass and \a BranchProbability.
59///
60/// The distribution algorithm follows.
61///
62///  1. Initialize by saving the sum of the weights in \a RemWeight and the
63///     mass to distribute in \a RemMass.
64///
65///  2. For each portion:
66///
67///      1. Construct a branch probability, P, as the portion's weight divided
68///         by the current value of \a RemWeight.
69///      2. Calculate the portion's mass as \a RemMass times P.
70///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
71///         the current portion's weight and mass.
72struct DitheringDistributer {
73  uint32_t RemWeight;
74  BlockMass RemMass;
75
76  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
77
78  BlockMass takeMass(uint32_t Weight);
79};
80
81} // end namespace
82
83DitheringDistributer::DitheringDistributer(Distribution &Dist,
84                                           const BlockMass &Mass) {
85  Dist.normalize();
86  RemWeight = Dist.Total;
87  RemMass = Mass;
88}
89
90BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
91  assert(Weight && "invalid weight");
92  assert(Weight <= RemWeight);
93  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
94
95  // Decrement totals (dither).
96  RemWeight -= Weight;
97  RemMass -= Mass;
98  return Mass;
99}
100
101void Distribution::add(const BlockNode &Node, uint64_t Amount,
102                       Weight::DistType Type) {
103  assert(Amount && "invalid weight of 0");
104  uint64_t NewTotal = Total + Amount;
105
106  // Check for overflow.  It should be impossible to overflow twice.
107  bool IsOverflow = NewTotal < Total;
108  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
109  DidOverflow |= IsOverflow;
110
111  // Update the total.
112  Total = NewTotal;
113
114  // Save the weight.
115  Weights.push_back(Weight(Type, Node, Amount));
116}
117
118static void combineWeight(Weight &W, const Weight &OtherW) {
119  assert(OtherW.TargetNode.isValid());
120  if (!W.Amount) {
121    W = OtherW;
122    return;
123  }
124  assert(W.Type == OtherW.Type);
125  assert(W.TargetNode == OtherW.TargetNode);
126  assert(OtherW.Amount && "Expected non-zero weight");
127  if (W.Amount > W.Amount + OtherW.Amount)
128    // Saturate on overflow.
129    W.Amount = UINT64_MAX;
130  else
131    W.Amount += OtherW.Amount;
132}
133static void combineWeightsBySorting(WeightList &Weights) {
134  // Sort so edges to the same node are adjacent.
135  std::sort(Weights.begin(), Weights.end(),
136            [](const Weight &L,
137               const Weight &R) { return L.TargetNode < R.TargetNode; });
138
139  // Combine adjacent edges.
140  WeightList::iterator O = Weights.begin();
141  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
142       ++O, (I = L)) {
143    *O = *I;
144
145    // Find the adjacent weights to the same node.
146    for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
147      combineWeight(*O, *L);
148  }
149
150  // Erase extra entries.
151  Weights.erase(O, Weights.end());
152  return;
153}
154static void combineWeightsByHashing(WeightList &Weights) {
155  // Collect weights into a DenseMap.
156  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
157  HashTable Combined(NextPowerOf2(2 * Weights.size()));
158  for (const Weight &W : Weights)
159    combineWeight(Combined[W.TargetNode.Index], W);
160
161  // Check whether anything changed.
162  if (Weights.size() == Combined.size())
163    return;
164
165  // Fill in the new weights.
166  Weights.clear();
167  Weights.reserve(Combined.size());
168  for (const auto &I : Combined)
169    Weights.push_back(I.second);
170}
171static void combineWeights(WeightList &Weights) {
172  // Use a hash table for many successors to keep this linear.
173  if (Weights.size() > 128) {
174    combineWeightsByHashing(Weights);
175    return;
176  }
177
178  combineWeightsBySorting(Weights);
179}
180static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
181  assert(Shift >= 0);
182  assert(Shift < 64);
183  if (!Shift)
184    return N;
185  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
186}
187void Distribution::normalize() {
188  // Early exit for termination nodes.
189  if (Weights.empty())
190    return;
191
192  // Only bother if there are multiple successors.
193  if (Weights.size() > 1)
194    combineWeights(Weights);
195
196  // Early exit when combined into a single successor.
197  if (Weights.size() == 1) {
198    Total = 1;
199    Weights.front().Amount = 1;
200    return;
201  }
202
203  // Determine how much to shift right so that the total fits into 32-bits.
204  //
205  // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
206  // for each weight can cause a 32-bit overflow.
207  int Shift = 0;
208  if (DidOverflow)
209    Shift = 33;
210  else if (Total > UINT32_MAX)
211    Shift = 33 - countLeadingZeros(Total);
212
213  // Early exit if nothing needs to be scaled.
214  if (!Shift) {
215    // If we didn't overflow then combineWeights() shouldn't have changed the
216    // sum of the weights, but let's double-check.
217    assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
218                                    [](uint64_t Sum, const Weight &W) {
219                      return Sum + W.Amount;
220                    }) &&
221           "Expected total to be correct");
222    return;
223  }
224
225  // Recompute the total through accumulation (rather than shifting it) so that
226  // it's accurate after shifting and any changes combineWeights() made above.
227  Total = 0;
228
229  // Sum the weights to each node and shift right if necessary.
230  for (Weight &W : Weights) {
231    // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
232    // can round here without concern about overflow.
233    assert(W.TargetNode.isValid());
234    W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
235    assert(W.Amount <= UINT32_MAX);
236
237    // Update the total.
238    Total += W.Amount;
239  }
240  assert(Total <= UINT32_MAX);
241}
242
243void BlockFrequencyInfoImplBase::clear() {
244  // Swap with a default-constructed std::vector, since std::vector<>::clear()
245  // does not actually clear heap storage.
246  std::vector<FrequencyData>().swap(Freqs);
247  std::vector<WorkingData>().swap(Working);
248  Loops.clear();
249}
250
251/// \brief Clear all memory not needed downstream.
252///
253/// Releases all memory not used downstream.  In particular, saves Freqs.
254static void cleanup(BlockFrequencyInfoImplBase &BFI) {
255  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
256  BFI.clear();
257  BFI.Freqs = std::move(SavedFreqs);
258}
259
260bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
261                                           const LoopData *OuterLoop,
262                                           const BlockNode &Pred,
263                                           const BlockNode &Succ,
264                                           uint64_t Weight) {
265  if (!Weight)
266    Weight = 1;
267
268  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
269    return OuterLoop && OuterLoop->isHeader(Node);
270  };
271
272  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
273
274#ifndef NDEBUG
275  auto debugSuccessor = [&](const char *Type) {
276    dbgs() << "  =>"
277           << " [" << Type << "] weight = " << Weight;
278    if (!isLoopHeader(Resolved))
279      dbgs() << ", succ = " << getBlockName(Succ);
280    if (Resolved != Succ)
281      dbgs() << ", resolved = " << getBlockName(Resolved);
282    dbgs() << "\n";
283  };
284  (void)debugSuccessor;
285#endif
286
287  if (isLoopHeader(Resolved)) {
288    DEBUG(debugSuccessor("backedge"));
289    Dist.addBackedge(Resolved, Weight);
290    return true;
291  }
292
293  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
294    DEBUG(debugSuccessor("  exit  "));
295    Dist.addExit(Resolved, Weight);
296    return true;
297  }
298
299  if (Resolved < Pred) {
300    if (!isLoopHeader(Pred)) {
301      // If OuterLoop is an irreducible loop, we can't actually handle this.
302      assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
303             "unhandled irreducible control flow");
304
305      // Irreducible backedge.  Abort.
306      DEBUG(debugSuccessor("abort!!!"));
307      return false;
308    }
309
310    // If "Pred" is a loop header, then this isn't really a backedge; rather,
311    // OuterLoop must be irreducible.  These false backedges can come only from
312    // secondary loop headers.
313    assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
314           "unhandled irreducible control flow");
315  }
316
317  DEBUG(debugSuccessor(" local  "));
318  Dist.addLocal(Resolved, Weight);
319  return true;
320}
321
322bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
323    const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
324  // Copy the exit map into Dist.
325  for (const auto &I : Loop.Exits)
326    if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
327                   I.second.getMass()))
328      // Irreducible backedge.
329      return false;
330
331  return true;
332}
333
334/// \brief Compute the loop scale for a loop.
335void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
336  // Compute loop scale.
337  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
338
339  // Infinite loops need special handling. If we give the back edge an infinite
340  // mass, they may saturate all the other scales in the function down to 1,
341  // making all the other region temperatures look exactly the same. Choose an
342  // arbitrary scale to avoid these issues.
343  //
344  // FIXME: An alternate way would be to select a symbolic scale which is later
345  // replaced to be the maximum of all computed scales plus 1. This would
346  // appropriately describe the loop as having a large scale, without skewing
347  // the final frequency computation.
348  const Scaled64 InifiniteLoopScale(1, 12);
349
350  // LoopScale == 1 / ExitMass
351  // ExitMass == HeadMass - BackedgeMass
352  BlockMass TotalBackedgeMass;
353  for (auto &Mass : Loop.BackedgeMass)
354    TotalBackedgeMass += Mass;
355  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
356
357  // Block scale stores the inverse of the scale. If this is an infinite loop,
358  // its exit mass will be zero. In this case, use an arbitrary scale for the
359  // loop scale.
360  Loop.Scale =
361      ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
362
363  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
364               << " - " << TotalBackedgeMass << ")\n"
365               << " - scale = " << Loop.Scale << "\n");
366}
367
368/// \brief Package up a loop.
369void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
370  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
371
372  // Clear the subloop exits to prevent quadratic memory usage.
373  for (const BlockNode &M : Loop.Nodes) {
374    if (auto *Loop = Working[M.Index].getPackagedLoop())
375      Loop->Exits.clear();
376    DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
377  }
378  Loop.IsPackaged = true;
379}
380
381#ifndef NDEBUG
382static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
383                        const DitheringDistributer &D, const BlockNode &T,
384                        const BlockMass &M, const char *Desc) {
385  dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
386  if (Desc)
387    dbgs() << " [" << Desc << "]";
388  if (T.isValid())
389    dbgs() << " to " << BFI.getBlockName(T);
390  dbgs() << "\n";
391}
392#endif
393
394void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
395                                                LoopData *OuterLoop,
396                                                Distribution &Dist) {
397  BlockMass Mass = Working[Source.Index].getMass();
398  DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
399
400  // Distribute mass to successors as laid out in Dist.
401  DitheringDistributer D(Dist, Mass);
402
403  for (const Weight &W : Dist.Weights) {
404    // Check for a local edge (non-backedge and non-exit).
405    BlockMass Taken = D.takeMass(W.Amount);
406    if (W.Type == Weight::Local) {
407      Working[W.TargetNode.Index].getMass() += Taken;
408      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
409      continue;
410    }
411
412    // Backedges and exits only make sense if we're processing a loop.
413    assert(OuterLoop && "backedge or exit outside of loop");
414
415    // Check for a backedge.
416    if (W.Type == Weight::Backedge) {
417      OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
418      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
419      continue;
420    }
421
422    // This must be an exit.
423    assert(W.Type == Weight::Exit);
424    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
425    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
426  }
427}
428
429static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
430                                     const Scaled64 &Min, const Scaled64 &Max) {
431  // Scale the Factor to a size that creates integers.  Ideally, integers would
432  // be scaled so that Max == UINT64_MAX so that they can be best
433  // differentiated.  However, in the presence of large frequency values, small
434  // frequencies are scaled down to 1, making it impossible to differentiate
435  // small, unequal numbers. When the spread between Min and Max frequencies
436  // fits well within MaxBits, we make the scale be at least 8.
437  const unsigned MaxBits = 64;
438  const unsigned SpreadBits = (Max / Min).lg();
439  Scaled64 ScalingFactor;
440  if (SpreadBits <= MaxBits - 3) {
441    // If the values are small enough, make the scaling factor at least 8 to
442    // allow distinguishing small values.
443    ScalingFactor = Min.inverse();
444    ScalingFactor <<= 3;
445  } else {
446    // If the values need more than MaxBits to be represented, saturate small
447    // frequency values down to 1 by using a scaling factor that benefits large
448    // frequency values.
449    ScalingFactor = Scaled64(1, MaxBits) / Max;
450  }
451
452  // Translate the floats to integers.
453  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
454               << ", factor = " << ScalingFactor << "\n");
455  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
456    Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
457    BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
458    DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
459                 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
460                 << ", int = " << BFI.Freqs[Index].Integer << "\n");
461  }
462}
463
464/// \brief Unwrap a loop package.
465///
466/// Visits all the members of a loop, adjusting their BlockData according to
467/// the loop's pseudo-node.
468static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
469  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
470               << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
471               << "\n");
472  Loop.Scale *= Loop.Mass.toScaled();
473  Loop.IsPackaged = false;
474  DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
475
476  // Propagate the head scale through the loop.  Since members are visited in
477  // RPO, the head scale will be updated by the loop scale first, and then the
478  // final head scale will be used for updated the rest of the members.
479  for (const BlockNode &N : Loop.Nodes) {
480    const auto &Working = BFI.Working[N.Index];
481    Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
482                                       : BFI.Freqs[N.Index].Scaled;
483    Scaled64 New = Loop.Scale * F;
484    DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
485                 << "\n");
486    F = New;
487  }
488}
489
490void BlockFrequencyInfoImplBase::unwrapLoops() {
491  // Set initial frequencies from loop-local masses.
492  for (size_t Index = 0; Index < Working.size(); ++Index)
493    Freqs[Index].Scaled = Working[Index].Mass.toScaled();
494
495  for (LoopData &Loop : Loops)
496    unwrapLoop(*this, Loop);
497}
498
499void BlockFrequencyInfoImplBase::finalizeMetrics() {
500  // Unwrap loop packages in reverse post-order, tracking min and max
501  // frequencies.
502  auto Min = Scaled64::getLargest();
503  auto Max = Scaled64::getZero();
504  for (size_t Index = 0; Index < Working.size(); ++Index) {
505    // Update min/max scale.
506    Min = std::min(Min, Freqs[Index].Scaled);
507    Max = std::max(Max, Freqs[Index].Scaled);
508  }
509
510  // Convert to integers.
511  convertFloatingToInteger(*this, Min, Max);
512
513  // Clean up data structures.
514  cleanup(*this);
515
516  // Print out the final stats.
517  DEBUG(dump());
518}
519
520BlockFrequency
521BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
522  if (!Node.isValid())
523    return 0;
524  return Freqs[Node.Index].Integer;
525}
526Scaled64
527BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
528  if (!Node.isValid())
529    return Scaled64::getZero();
530  return Freqs[Node.Index].Scaled;
531}
532
533void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
534                                              uint64_t Freq) {
535  assert(Node.isValid() && "Expected valid node");
536  assert(Node.Index < Freqs.size() && "Expected legal index");
537  Freqs[Node.Index].Integer = Freq;
538}
539
540std::string
541BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
542  return std::string();
543}
544std::string
545BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
546  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
547}
548
549raw_ostream &
550BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
551                                           const BlockNode &Node) const {
552  return OS << getFloatingBlockFreq(Node);
553}
554
555raw_ostream &
556BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
557                                           const BlockFrequency &Freq) const {
558  Scaled64 Block(Freq.getFrequency(), 0);
559  Scaled64 Entry(getEntryFreq(), 0);
560
561  return OS << Block / Entry;
562}
563
564void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
565  Start = OuterLoop.getHeader();
566  Nodes.reserve(OuterLoop.Nodes.size());
567  for (auto N : OuterLoop.Nodes)
568    addNode(N);
569  indexNodes();
570}
571void IrreducibleGraph::addNodesInFunction() {
572  Start = 0;
573  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
574    if (!BFI.Working[Index].isPackaged())
575      addNode(Index);
576  indexNodes();
577}
578void IrreducibleGraph::indexNodes() {
579  for (auto &I : Nodes)
580    Lookup[I.Node.Index] = &I;
581}
582void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
583                               const BFIBase::LoopData *OuterLoop) {
584  if (OuterLoop && OuterLoop->isHeader(Succ))
585    return;
586  auto L = Lookup.find(Succ.Index);
587  if (L == Lookup.end())
588    return;
589  IrrNode &SuccIrr = *L->second;
590  Irr.Edges.push_back(&SuccIrr);
591  SuccIrr.Edges.push_front(&Irr);
592  ++SuccIrr.NumIn;
593}
594
595namespace llvm {
596template <> struct GraphTraits<IrreducibleGraph> {
597  typedef bfi_detail::IrreducibleGraph GraphT;
598
599  typedef const GraphT::IrrNode NodeType;
600  typedef GraphT::IrrNode::iterator ChildIteratorType;
601
602  static const NodeType *getEntryNode(const GraphT &G) {
603    return G.StartIrr;
604  }
605  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
606  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
607};
608}
609
610/// \brief Find extra irreducible headers.
611///
612/// Find entry blocks and other blocks with backedges, which exist when \c G
613/// contains irreducible sub-SCCs.
614static void findIrreducibleHeaders(
615    const BlockFrequencyInfoImplBase &BFI,
616    const IrreducibleGraph &G,
617    const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
618    LoopData::NodeList &Headers, LoopData::NodeList &Others) {
619  // Map from nodes in the SCC to whether it's an entry block.
620  SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
621
622  // InSCC also acts the set of nodes in the graph.  Seed it.
623  for (const auto *I : SCC)
624    InSCC[I] = false;
625
626  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
627    auto &Irr = *I->first;
628    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
629      if (InSCC.count(P))
630        continue;
631
632      // This is an entry block.
633      I->second = true;
634      Headers.push_back(Irr.Node);
635      DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
636      break;
637    }
638  }
639  assert(Headers.size() >= 2 &&
640         "Expected irreducible CFG; -loop-info is likely invalid");
641  if (Headers.size() == InSCC.size()) {
642    // Every block is a header.
643    std::sort(Headers.begin(), Headers.end());
644    return;
645  }
646
647  // Look for extra headers from irreducible sub-SCCs.
648  for (const auto &I : InSCC) {
649    // Entry blocks are already headers.
650    if (I.second)
651      continue;
652
653    auto &Irr = *I.first;
654    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
655      // Skip forward edges.
656      if (P->Node < Irr.Node)
657        continue;
658
659      // Skip predecessors from entry blocks.  These can have inverted
660      // ordering.
661      if (InSCC.lookup(P))
662        continue;
663
664      // Store the extra header.
665      Headers.push_back(Irr.Node);
666      DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
667      break;
668    }
669    if (Headers.back() == Irr.Node)
670      // Added this as a header.
671      continue;
672
673    // This is not a header.
674    Others.push_back(Irr.Node);
675    DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
676  }
677  std::sort(Headers.begin(), Headers.end());
678  std::sort(Others.begin(), Others.end());
679}
680
681static void createIrreducibleLoop(
682    BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
683    LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
684    const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
685  // Translate the SCC into RPO.
686  DEBUG(dbgs() << " - found-scc\n");
687
688  LoopData::NodeList Headers;
689  LoopData::NodeList Others;
690  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
691
692  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
693                                Headers.end(), Others.begin(), Others.end());
694
695  // Update loop hierarchy.
696  for (const auto &N : Loop->Nodes)
697    if (BFI.Working[N.Index].isLoopHeader())
698      BFI.Working[N.Index].Loop->Parent = &*Loop;
699    else
700      BFI.Working[N.Index].Loop = &*Loop;
701}
702
703iterator_range<std::list<LoopData>::iterator>
704BlockFrequencyInfoImplBase::analyzeIrreducible(
705    const IrreducibleGraph &G, LoopData *OuterLoop,
706    std::list<LoopData>::iterator Insert) {
707  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
708  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
709
710  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
711    if (I->size() < 2)
712      continue;
713
714    // Translate the SCC into RPO.
715    createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
716  }
717
718  if (OuterLoop)
719    return make_range(std::next(Prev), Insert);
720  return make_range(Loops.begin(), Insert);
721}
722
723void
724BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
725  OuterLoop.Exits.clear();
726  for (auto &Mass : OuterLoop.BackedgeMass)
727    Mass = BlockMass::getEmpty();
728  auto O = OuterLoop.Nodes.begin() + 1;
729  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
730    if (!Working[I->Index].isPackaged())
731      *O++ = *I;
732  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
733}
734
735void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
736  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
737
738  // Since the loop has more than one header block, the mass flowing back into
739  // each header will be different. Adjust the mass in each header loop to
740  // reflect the masses flowing through back edges.
741  //
742  // To do this, we distribute the initial mass using the backedge masses
743  // as weights for the distribution.
744  BlockMass LoopMass = BlockMass::getFull();
745  Distribution Dist;
746
747  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
748  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
749    auto &HeaderNode = Loop.Nodes[H];
750    auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
751    DEBUG(dbgs() << " - Add back edge mass for node "
752                 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
753    if (BackedgeMass.getMass() > 0)
754      Dist.addLocal(HeaderNode, BackedgeMass.getMass());
755    else
756      DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
757  }
758
759  DitheringDistributer D(Dist, LoopMass);
760
761  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
762               << " to headers using above weights\n");
763  for (const Weight &W : Dist.Weights) {
764    BlockMass Taken = D.takeMass(W.Amount);
765    assert(W.Type == Weight::Local && "all weights should be local");
766    Working[W.TargetNode.Index].getMass() = Taken;
767    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
768  }
769}
770