1//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass removes the computation of provably redundant expressions that have
10// been computed earlier in a previous iteration. It relies on the use of PHIs
11// to identify loop carried dependences. This is scalar replacement for vector
12// types.
13//
14//-----------------------------------------------------------------------------
15// Motivation: Consider the case where we have the following loop structure.
16//
17// Loop:
18//  t0 = a[i];
19//  t1 = f(t0);
20//  t2 = g(t1);
21//  ...
22//  t3 = a[i+1];
23//  t4 = f(t3);
24//  t5 = g(t4);
25//  t6 = op(t2, t5)
26//  cond_branch <Loop>
27//
28// This can be converted to
29//  t00 = a[0];
30//  t10 = f(t00);
31//  t20 = g(t10);
32// Loop:
33//  t2 = t20;
34//  t3 = a[i+1];
35//  t4 = f(t3);
36//  t5 = g(t4);
37//  t6 = op(t2, t5)
38//  t20 = t5
39//  cond_branch <Loop>
40//
41// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42// Such a loop comes to this pass in the following form.
43//
44// LoopPreheader:
45//  X0 = a[0];
46// Loop:
47//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48//  t1 = f(X2)   <-- I1
49//  t2 = g(t1)
50//  ...
51//  X1 = a[i+1]
52//  t4 = f(X1)   <-- I2
53//  t5 = g(t4)
54//  t6 = op(t2, t5)
55//  cond_branch <Loop>
56//
57// In this pass, we look for PHIs such as X2 whose incoming values come only
58// from the Loop Preheader and over the backedge and additionaly, both these
59// values are the results of the same operation in terms of opcode. We call such
60// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61// over X1 is carried over only one iteration and so the DepChain is only one
62// PHI node long.
63//
64// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65// PHI coming  over the backedge (X1). We stop at the first pair of such users
66// I1 (of X2) and I2 (of X1) that meet the following conditions.
67// 1. I1 and I2 are the same operation, but with different operands.
68// 2. X2 and X1 are used at the same operand number in the two instructions.
69// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70//    a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71//
72// We then make the following transformation
73// LoopPreheader:
74//  X0 = a[0];
75//  Y0 = f(X0);
76// Loop:
77//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78//  Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79//  t1 = f(X2)   <-- Will be removed by DCE.
80//  t2 = g(Y2)
81//  ...
82//  X1 = a[i+1]
83//  t4 = f(X1)
84//  t5 = g(t4)
85//  t6 = op(t2, t5)
86//  cond_branch <Loop>
87//
88// We proceed until we cannot find any more such instructions I1 and I2.
89//
90// --- DepChains & Loop carried dependences ---
91// Consider a single basic block loop such as
92//
93// LoopPreheader:
94//  X0 = ...
95//  Y0 = ...
96// Loop:
97//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98//  Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99//  ...
100//  X1 = ...
101//  ...
102//  cond_branch <Loop>
103//
104// Then there is a dependence between X2 and X1 that goes back one iteration,
105// i.e. X1 is used as X2 in the very next iteration. We represent this as a
106// DepChain from X2 to X1 (X2->X1).
107// Similarly, there is a dependence between Y2 and X1 that goes back two
108// iterations. X1 is used as Y2 two iterations after it is computed. This is
109// represented by a DepChain as (Y2->X2->X1).
110//
111// A DepChain has the following properties.
112// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113//    iterations of carried dependence + 1.
114// 2. All instructions in the DepChain except the last are PHIs.
115//
116//===----------------------------------------------------------------------===//
117
118#include "llvm/ADT/SetVector.h"
119#include "llvm/ADT/SmallVector.h"
120#include "llvm/ADT/Statistic.h"
121#include "llvm/Analysis/LoopInfo.h"
122#include "llvm/Analysis/LoopPass.h"
123#include "llvm/IR/BasicBlock.h"
124#include "llvm/IR/DerivedTypes.h"
125#include "llvm/IR/IRBuilder.h"
126#include "llvm/IR/Instruction.h"
127#include "llvm/IR/Instructions.h"
128#include "llvm/IR/IntrinsicInst.h"
129#include "llvm/IR/Intrinsics.h"
130#include "llvm/IR/IntrinsicsHexagon.h"
131#include "llvm/IR/Use.h"
132#include "llvm/IR/User.h"
133#include "llvm/IR/Value.h"
134#include "llvm/InitializePasses.h"
135#include "llvm/Pass.h"
136#include "llvm/Support/Casting.h"
137#include "llvm/Support/CommandLine.h"
138#include "llvm/Support/Compiler.h"
139#include "llvm/Support/Debug.h"
140#include "llvm/Support/raw_ostream.h"
141#include "llvm/Transforms/Scalar.h"
142#include "llvm/Transforms/Utils.h"
143#include <algorithm>
144#include <cassert>
145#include <cstddef>
146#include <map>
147#include <memory>
148#include <set>
149
150using namespace llvm;
151
152#define DEBUG_TYPE "hexagon-vlcr"
153
154STATISTIC(HexagonNumVectorLoopCarriedReuse,
155          "Number of values that were reused from a previous iteration.");
156
157static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
158    cl::Hidden,
159    cl::desc("Maximum distance of loop carried dependences that are handled"),
160    cl::init(2), cl::ZeroOrMore);
161
162namespace llvm {
163
164void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
165Pass *createHexagonVectorLoopCarriedReusePass();
166
167} // end namespace llvm
168
169namespace {
170
171  // See info about DepChain in the comments at the top of this file.
172  using ChainOfDependences = SmallVector<Instruction *, 4>;
173
174  class DepChain {
175    ChainOfDependences Chain;
176
177  public:
178    bool isIdentical(DepChain &Other) const {
179      if (Other.size() != size())
180        return false;
181      ChainOfDependences &OtherChain = Other.getChain();
182      for (int i = 0; i < size(); ++i) {
183        if (Chain[i] != OtherChain[i])
184          return false;
185      }
186      return true;
187    }
188
189    ChainOfDependences &getChain() {
190      return Chain;
191    }
192
193    int size() const {
194      return Chain.size();
195    }
196
197    void clear() {
198      Chain.clear();
199    }
200
201    void push_back(Instruction *I) {
202      Chain.push_back(I);
203    }
204
205    int iterations() const {
206      return size() - 1;
207    }
208
209    Instruction *front() const {
210      return Chain.front();
211    }
212
213    Instruction *back() const {
214      return Chain.back();
215    }
216
217    Instruction *&operator[](const int index) {
218      return Chain[index];
219    }
220
221   friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
222  };
223
224  LLVM_ATTRIBUTE_UNUSED
225  raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
226    const ChainOfDependences &CD = D.Chain;
227    int ChainSize = CD.size();
228    OS << "**DepChain Start::**\n";
229    for (int i = 0; i < ChainSize -1; ++i) {
230      OS << *(CD[i]) << " -->\n";
231    }
232    OS << *CD[ChainSize-1] << "\n";
233    return OS;
234  }
235
236  struct ReuseValue {
237    Instruction *Inst2Replace = nullptr;
238
239    // In the new PHI node that we'll construct this is the value that'll be
240    // used over the backedge. This is the value that gets reused from a
241    // previous iteration.
242    Instruction *BackedgeInst = nullptr;
243    std::map<Instruction *, DepChain *> DepChains;
244    int Iterations = -1;
245
246    ReuseValue() = default;
247
248    void reset() {
249      Inst2Replace = nullptr;
250      BackedgeInst = nullptr;
251      DepChains.clear();
252      Iterations = -1;
253    }
254    bool isDefined() { return Inst2Replace != nullptr; }
255  };
256
257  LLVM_ATTRIBUTE_UNUSED
258  raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
259    OS << "** ReuseValue ***\n";
260    OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
261    OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
262    return OS;
263  }
264
265  class HexagonVectorLoopCarriedReuse : public LoopPass {
266  public:
267    static char ID;
268
269    explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
270      PassRegistry *PR = PassRegistry::getPassRegistry();
271      initializeHexagonVectorLoopCarriedReusePass(*PR);
272    }
273
274    StringRef getPassName() const override {
275      return "Hexagon-specific loop carried reuse for HVX vectors";
276    }
277
278    void getAnalysisUsage(AnalysisUsage &AU) const override {
279      AU.addRequired<LoopInfoWrapperPass>();
280      AU.addRequiredID(LoopSimplifyID);
281      AU.addRequiredID(LCSSAID);
282      AU.addPreservedID(LCSSAID);
283      AU.setPreservesCFG();
284    }
285
286    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
287
288  private:
289    SetVector<DepChain *> Dependences;
290    std::set<Instruction *> ReplacedInsts;
291    Loop *CurLoop;
292    ReuseValue ReuseCandidate;
293
294    bool doVLCR();
295    void findLoopCarriedDeps();
296    void findValueToReuse();
297    void findDepChainFromPHI(Instruction *I, DepChain &D);
298    void reuseValue();
299    Value *findValueInBlock(Value *Op, BasicBlock *BB);
300    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
301    bool isEquivalentOperation(Instruction *I1, Instruction *I2);
302    bool canReplace(Instruction *I);
303    bool isCallInstCommutative(CallInst *C);
304  };
305
306} // end anonymous namespace
307
308char HexagonVectorLoopCarriedReuse::ID = 0;
309
310INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
311    "Hexagon-specific predictive commoning for HVX vectors", false, false)
312INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
313INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
314INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
315INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
316    "Hexagon-specific predictive commoning for HVX vectors", false, false)
317
318bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
319  if (skipLoop(L))
320    return false;
321
322  if (!L->getLoopPreheader())
323    return false;
324
325  // Work only on innermost loops.
326  if (!L->getSubLoops().empty())
327    return false;
328
329  // Work only on single basic blocks loops.
330  if (L->getNumBlocks() != 1)
331    return false;
332
333  CurLoop = L;
334
335  return doVLCR();
336}
337
338bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
339  switch (C->getCalledFunction()->getIntrinsicID()) {
340    case Intrinsic::hexagon_V6_vaddb:
341    case Intrinsic::hexagon_V6_vaddb_128B:
342    case Intrinsic::hexagon_V6_vaddh:
343    case Intrinsic::hexagon_V6_vaddh_128B:
344    case Intrinsic::hexagon_V6_vaddw:
345    case Intrinsic::hexagon_V6_vaddw_128B:
346    case Intrinsic::hexagon_V6_vaddubh:
347    case Intrinsic::hexagon_V6_vaddubh_128B:
348    case Intrinsic::hexagon_V6_vadduhw:
349    case Intrinsic::hexagon_V6_vadduhw_128B:
350    case Intrinsic::hexagon_V6_vaddhw:
351    case Intrinsic::hexagon_V6_vaddhw_128B:
352    case Intrinsic::hexagon_V6_vmaxb:
353    case Intrinsic::hexagon_V6_vmaxb_128B:
354    case Intrinsic::hexagon_V6_vmaxh:
355    case Intrinsic::hexagon_V6_vmaxh_128B:
356    case Intrinsic::hexagon_V6_vmaxw:
357    case Intrinsic::hexagon_V6_vmaxw_128B:
358    case Intrinsic::hexagon_V6_vmaxub:
359    case Intrinsic::hexagon_V6_vmaxub_128B:
360    case Intrinsic::hexagon_V6_vmaxuh:
361    case Intrinsic::hexagon_V6_vmaxuh_128B:
362    case Intrinsic::hexagon_V6_vminub:
363    case Intrinsic::hexagon_V6_vminub_128B:
364    case Intrinsic::hexagon_V6_vminuh:
365    case Intrinsic::hexagon_V6_vminuh_128B:
366    case Intrinsic::hexagon_V6_vminb:
367    case Intrinsic::hexagon_V6_vminb_128B:
368    case Intrinsic::hexagon_V6_vminh:
369    case Intrinsic::hexagon_V6_vminh_128B:
370    case Intrinsic::hexagon_V6_vminw:
371    case Intrinsic::hexagon_V6_vminw_128B:
372    case Intrinsic::hexagon_V6_vmpyub:
373    case Intrinsic::hexagon_V6_vmpyub_128B:
374    case Intrinsic::hexagon_V6_vmpyuh:
375    case Intrinsic::hexagon_V6_vmpyuh_128B:
376    case Intrinsic::hexagon_V6_vavgub:
377    case Intrinsic::hexagon_V6_vavgub_128B:
378    case Intrinsic::hexagon_V6_vavgh:
379    case Intrinsic::hexagon_V6_vavgh_128B:
380    case Intrinsic::hexagon_V6_vavguh:
381    case Intrinsic::hexagon_V6_vavguh_128B:
382    case Intrinsic::hexagon_V6_vavgw:
383    case Intrinsic::hexagon_V6_vavgw_128B:
384    case Intrinsic::hexagon_V6_vavgb:
385    case Intrinsic::hexagon_V6_vavgb_128B:
386    case Intrinsic::hexagon_V6_vavguw:
387    case Intrinsic::hexagon_V6_vavguw_128B:
388    case Intrinsic::hexagon_V6_vabsdiffh:
389    case Intrinsic::hexagon_V6_vabsdiffh_128B:
390    case Intrinsic::hexagon_V6_vabsdiffub:
391    case Intrinsic::hexagon_V6_vabsdiffub_128B:
392    case Intrinsic::hexagon_V6_vabsdiffuh:
393    case Intrinsic::hexagon_V6_vabsdiffuh_128B:
394    case Intrinsic::hexagon_V6_vabsdiffw:
395    case Intrinsic::hexagon_V6_vabsdiffw_128B:
396      return true;
397    default:
398      return false;
399  }
400}
401
402bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
403                                                          Instruction *I2) {
404  if (!I1->isSameOperationAs(I2))
405    return false;
406  // This check is in place specifically for intrinsics. isSameOperationAs will
407  // return two for any two hexagon intrinsics because they are essentially the
408  // same instruciton (CallInst). We need to scratch the surface to see if they
409  // are calls to the same function.
410  if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
411    if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
412      if (C1->getCalledFunction() != C2->getCalledFunction())
413        return false;
414    }
415  }
416
417  // If both the Instructions are of Vector Type and any of the element
418  // is integer constant, check their values too for equivalence.
419  if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
420    unsigned NumOperands = I1->getNumOperands();
421    for (unsigned i = 0; i < NumOperands; ++i) {
422      ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
423      ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
424      if(!C1) continue;
425      assert(C2);
426      if (C1->getSExtValue() != C2->getSExtValue())
427        return false;
428    }
429  }
430
431  return true;
432}
433
434bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
435  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
436  if (!II)
437    return true;
438
439  switch (II->getIntrinsicID()) {
440  case Intrinsic::hexagon_V6_hi:
441  case Intrinsic::hexagon_V6_lo:
442  case Intrinsic::hexagon_V6_hi_128B:
443  case Intrinsic::hexagon_V6_lo_128B:
444    LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
445    return false;
446  default:
447    return true;
448  }
449}
450void HexagonVectorLoopCarriedReuse::findValueToReuse() {
451  for (auto *D : Dependences) {
452    LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
453    if (D->iterations() > HexagonVLCRIterationLim) {
454      LLVM_DEBUG(
455          dbgs()
456          << ".. Skipping because number of iterations > than the limit\n");
457      continue;
458    }
459
460    PHINode *PN = cast<PHINode>(D->front());
461    Instruction *BEInst = D->back();
462    int Iters = D->iterations();
463    BasicBlock *BB = PN->getParent();
464    LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
465                      << " can be reused\n");
466
467    SmallVector<Instruction *, 4> PNUsers;
468    for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
469      Use &U = *UI;
470      Instruction *User = cast<Instruction>(U.getUser());
471
472      if (User->getParent() != BB)
473        continue;
474      if (ReplacedInsts.count(User)) {
475        LLVM_DEBUG(dbgs() << *User
476                          << " has already been replaced. Skipping...\n");
477        continue;
478      }
479      if (isa<PHINode>(User))
480        continue;
481      if (User->mayHaveSideEffects())
482        continue;
483      if (!canReplace(User))
484        continue;
485
486      PNUsers.push_back(User);
487    }
488    LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
489
490    // For each interesting use I of PN, find an Instruction BEUser that
491    // performs the same operation as I on BEInst and whose other operands,
492    // if any, can also be rematerialized in OtherBB. We stop when we find the
493    // first such Instruction BEUser. This is because once BEUser is
494    // rematerialized in OtherBB, we may find more such "fixup" opportunities
495    // in this block. So, we'll start over again.
496    for (Instruction *I : PNUsers) {
497      for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
498           ++UI) {
499        Use &U = *UI;
500        Instruction *BEUser = cast<Instruction>(U.getUser());
501
502        if (BEUser->getParent() != BB)
503          continue;
504        if (!isEquivalentOperation(I, BEUser))
505          continue;
506
507        int NumOperands = I->getNumOperands();
508
509        // Take operands of each PNUser one by one and try to find DepChain
510        // with every operand of the BEUser. If any of the operands of BEUser
511        // has DepChain with current operand of the PNUser, break the matcher
512        // loop. Keep doing this for Every PNUser operand. If PNUser operand
513        // does not have DepChain with any of the BEUser operand, break the
514        // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
515        // This ensures that DepChain exist for all the PNUser operand with
516        // BEUser operand. This also ensures that DepChains are independent of
517        // the positions in PNUser and BEUser.
518        std::map<Instruction *, DepChain *> DepChains;
519        CallInst *C1 = dyn_cast<CallInst>(I);
520        if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
521          bool Found = false;
522          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
523            Value *Op = I->getOperand(OpNo);
524            Instruction *OpInst = dyn_cast<Instruction>(Op);
525            Found = false;
526            for (int T = 0; T < NumOperands; ++T) {
527              Value *BEOp = BEUser->getOperand(T);
528              Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
529              if (!OpInst && !BEOpInst) {
530                if (Op == BEOp) {
531                  Found = true;
532                  break;
533                }
534              }
535
536              if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
537                continue;
538
539              DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
540
541              if (D) {
542                Found = true;
543                DepChains[OpInst] = D;
544                break;
545              }
546            }
547            if (!Found) {
548              BEUser = nullptr;
549              break;
550            }
551          }
552        } else {
553
554          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
555            Value *Op = I->getOperand(OpNo);
556            Value *BEOp = BEUser->getOperand(OpNo);
557
558            Instruction *OpInst = dyn_cast<Instruction>(Op);
559            if (!OpInst) {
560              if (Op == BEOp)
561                continue;
562              // Do not allow reuse to occur when the operands may be different
563              // values.
564              BEUser = nullptr;
565              break;
566            }
567
568            Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
569            DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
570
571            if (D) {
572              DepChains[OpInst] = D;
573            } else {
574              BEUser = nullptr;
575              break;
576            }
577          }
578        }
579        if (BEUser) {
580          LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
581          ReuseCandidate.Inst2Replace = I;
582          ReuseCandidate.BackedgeInst = BEUser;
583          ReuseCandidate.DepChains = DepChains;
584          ReuseCandidate.Iterations = Iters;
585          return;
586        }
587        ReuseCandidate.reset();
588      }
589    }
590  }
591  ReuseCandidate.reset();
592}
593
594Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
595                                                       BasicBlock *BB) {
596  PHINode *PN = dyn_cast<PHINode>(Op);
597  assert(PN);
598  Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
599  return ValueInBlock;
600}
601
602void HexagonVectorLoopCarriedReuse::reuseValue() {
603  LLVM_DEBUG(dbgs() << ReuseCandidate);
604  Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
605  Instruction *BEInst = ReuseCandidate.BackedgeInst;
606  int NumOperands = Inst2Replace->getNumOperands();
607  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
608  int Iterations = ReuseCandidate.Iterations;
609  BasicBlock *LoopPH = CurLoop->getLoopPreheader();
610  assert(!DepChains.empty() && "No DepChains");
611  LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
612
613  SmallVector<Instruction *, 4> InstsInPreheader;
614  for (int i = 0; i < Iterations; ++i) {
615    Instruction *InstInPreheader = Inst2Replace->clone();
616    SmallVector<Value *, 4> Ops;
617    for (int j = 0; j < NumOperands; ++j) {
618      Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
619      if (!I)
620        continue;
621      // Get the DepChain corresponding to this operand.
622      DepChain &D = *DepChains[I];
623      // Get the PHI for the iteration number and find
624      // the incoming value from the Loop Preheader for
625      // that PHI.
626      Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
627      InstInPreheader->setOperand(j, ValInPreheader);
628    }
629    InstsInPreheader.push_back(InstInPreheader);
630    InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
631    InstInPreheader->insertBefore(LoopPH->getTerminator());
632    LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
633                      << LoopPH->getName() << "\n");
634  }
635  BasicBlock *BB = BEInst->getParent();
636  IRBuilder<> IRB(BB);
637  IRB.SetInsertPoint(BB->getFirstNonPHI());
638  Value *BEVal = BEInst;
639  PHINode *NewPhi;
640  for (int i = Iterations-1; i >=0 ; --i) {
641    Instruction *InstInPreheader = InstsInPreheader[i];
642    NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
643    NewPhi->addIncoming(InstInPreheader, LoopPH);
644    NewPhi->addIncoming(BEVal, BB);
645    LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
646                      << "\n");
647    BEVal = NewPhi;
648  }
649  // We are in LCSSA form. So, a value defined inside the Loop is used only
650  // inside the loop. So, the following is safe.
651  Inst2Replace->replaceAllUsesWith(NewPhi);
652  ReplacedInsts.insert(Inst2Replace);
653  ++HexagonNumVectorLoopCarriedReuse;
654}
655
656bool HexagonVectorLoopCarriedReuse::doVLCR() {
657  assert(CurLoop->getSubLoops().empty() &&
658         "Can do VLCR on the innermost loop only");
659  assert((CurLoop->getNumBlocks() == 1) &&
660         "Can do VLCR only on single block loops");
661
662  bool Changed = false;
663  bool Continue;
664
665  LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
666  do {
667    // Reset datastructures.
668    Dependences.clear();
669    Continue = false;
670
671    findLoopCarriedDeps();
672    findValueToReuse();
673    if (ReuseCandidate.isDefined()) {
674      reuseValue();
675      Changed = true;
676      Continue = true;
677    }
678    llvm::for_each(Dependences, std::default_delete<DepChain>());
679  } while (Continue);
680  return Changed;
681}
682
683void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
684                                                        DepChain &D) {
685  PHINode *PN = dyn_cast<PHINode>(I);
686  if (!PN) {
687    D.push_back(I);
688    return;
689  } else {
690    auto NumIncomingValues = PN->getNumIncomingValues();
691    if (NumIncomingValues != 2) {
692      D.clear();
693      return;
694    }
695
696    BasicBlock *BB = PN->getParent();
697    if (BB != CurLoop->getHeader()) {
698      D.clear();
699      return;
700    }
701
702    Value *BEVal = PN->getIncomingValueForBlock(BB);
703    Instruction *BEInst = dyn_cast<Instruction>(BEVal);
704    // This is a single block loop with a preheader, so at least
705    // one value should come over the backedge.
706    assert(BEInst && "There should be a value over the backedge");
707
708    Value *PreHdrVal =
709      PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
710    if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
711      D.clear();
712      return;
713    }
714    D.push_back(PN);
715    findDepChainFromPHI(BEInst, D);
716  }
717}
718
719DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
720                                                         Instruction *I2,
721                                                         int Iters) {
722  for (auto *D : Dependences) {
723    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
724      return D;
725  }
726  return nullptr;
727}
728
729void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
730  BasicBlock *BB = CurLoop->getHeader();
731  for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
732    auto *PN = cast<PHINode>(I);
733    if (!isa<VectorType>(PN->getType()))
734      continue;
735
736    DepChain *D = new DepChain();
737    findDepChainFromPHI(PN, *D);
738    if (D->size() != 0)
739      Dependences.insert(D);
740    else
741      delete D;
742  }
743  LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
744  LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
745                  ++i) { dbgs() << *Dependences[i] << "\n"; });
746}
747
748Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
749  return new HexagonVectorLoopCarriedReuse();
750}
751