1//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Performs general IR level optimizations on SVE intrinsics.
11//
12// The main goal of this pass is to remove unnecessary reinterpret
13// intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g:
14//
15//   %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
16//   %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
17//
18// This pass also looks for ptest intrinsics & phi instructions where the
19// operands are being needlessly converted to and from svbool_t.
20//
21//===----------------------------------------------------------------------===//
22
23#include "Utils/AArch64BaseInfo.h"
24#include "llvm/ADT/PostOrderIterator.h"
25#include "llvm/ADT/SetVector.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/IntrinsicInst.h"
31#include "llvm/IR/IntrinsicsAArch64.h"
32#include "llvm/IR/LLVMContext.h"
33#include "llvm/IR/PatternMatch.h"
34#include "llvm/InitializePasses.h"
35#include "llvm/Support/Debug.h"
36
37using namespace llvm;
38using namespace llvm::PatternMatch;
39
40#define DEBUG_TYPE "sve-intrinsic-opts"
41
42namespace llvm {
43void initializeSVEIntrinsicOptsPass(PassRegistry &);
44}
45
46namespace {
47struct SVEIntrinsicOpts : public ModulePass {
48  static char ID; // Pass identification, replacement for typeid
49  SVEIntrinsicOpts() : ModulePass(ID) {
50    initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry());
51  }
52
53  bool runOnModule(Module &M) override;
54  void getAnalysisUsage(AnalysisUsage &AU) const override;
55
56private:
57  static IntrinsicInst *isReinterpretToSVBool(Value *V);
58
59  static bool optimizeIntrinsic(Instruction *I);
60
61  bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
62
63  static bool optimizeConvertFromSVBool(IntrinsicInst *I);
64  static bool optimizePTest(IntrinsicInst *I);
65
66  static bool processPhiNode(IntrinsicInst *I);
67};
68} // end anonymous namespace
69
70void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
71  AU.addRequired<DominatorTreeWrapperPass>();
72  AU.setPreservesCFG();
73}
74
75char SVEIntrinsicOpts::ID = 0;
76static const char *name = "SVE intrinsics optimizations";
77INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
78INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
79INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
80
81namespace llvm {
82ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); }
83} // namespace llvm
84
85/// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr
86/// otherwise.
87IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) {
88  IntrinsicInst *I = dyn_cast<IntrinsicInst>(V);
89  if (!I)
90    return nullptr;
91
92  if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
93    return nullptr;
94
95  return I;
96}
97
98/// The function will remove redundant reinterprets casting in the presence
99/// of the control flow
100bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) {
101
102  SmallVector<Instruction *, 32> Worklist;
103  auto RequiredType = X->getType();
104
105  auto *PN = dyn_cast<PHINode>(X->getArgOperand(0));
106  assert(PN && "Expected Phi Node!");
107
108  // Don't create a new Phi unless we can remove the old one.
109  if (!PN->hasOneUse())
110    return false;
111
112  for (Value *IncValPhi : PN->incoming_values()) {
113    auto *Reinterpret = isReinterpretToSVBool(IncValPhi);
114    if (!Reinterpret ||
115        RequiredType != Reinterpret->getArgOperand(0)->getType())
116      return false;
117  }
118
119  // Create the new Phi
120  LLVMContext &Ctx = PN->getContext();
121  IRBuilder<> Builder(Ctx);
122  Builder.SetInsertPoint(PN);
123  PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
124  Worklist.push_back(PN);
125
126  for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
127    auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I));
128    NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
129    Worklist.push_back(Reinterpret);
130  }
131
132  // Cleanup Phi Node and reinterprets
133  X->replaceAllUsesWith(NPN);
134  X->eraseFromParent();
135
136  for (auto &I : Worklist)
137    if (I->use_empty())
138      I->eraseFromParent();
139
140  return true;
141}
142
143bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
144  IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0));
145  IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1));
146
147  if (Op1 && Op2 &&
148      Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
149      Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
150      Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
151
152    Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
153    Type *Tys[] = {Op1->getArgOperand(0)->getType()};
154    Module *M = I->getParent()->getParent()->getParent();
155
156    auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys);
157    auto CI = CallInst::Create(Fn, Ops, I->getName(), I);
158
159    I->replaceAllUsesWith(CI);
160    I->eraseFromParent();
161    if (Op1->use_empty())
162      Op1->eraseFromParent();
163    if (Op1 != Op2 && Op2->use_empty())
164      Op2->eraseFromParent();
165
166    return true;
167  }
168
169  return false;
170}
171
172bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
173  assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool &&
174         "Unexpected opcode");
175
176  // If the reinterpret instruction operand is a PHI Node
177  if (isa<PHINode>(I->getArgOperand(0)))
178    return processPhiNode(I);
179
180  // If we have a reinterpret intrinsic I of type A which is converting from
181  // another reinterpret Y of type B, and the source type of Y is A, then we can
182  // elide away both reinterprets if there are no other users of Y.
183  auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
184  if (!Y)
185    return false;
186
187  Value *SourceVal = Y->getArgOperand(0);
188  if (I->getType() != SourceVal->getType())
189    return false;
190
191  I->replaceAllUsesWith(SourceVal);
192  I->eraseFromParent();
193  if (Y->use_empty())
194    Y->eraseFromParent();
195
196  return true;
197}
198
199bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
200  IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I);
201  if (!IntrI)
202    return false;
203
204  switch (IntrI->getIntrinsicID()) {
205  case Intrinsic::aarch64_sve_convert_from_svbool:
206    return optimizeConvertFromSVBool(IntrI);
207  case Intrinsic::aarch64_sve_ptest_any:
208  case Intrinsic::aarch64_sve_ptest_first:
209  case Intrinsic::aarch64_sve_ptest_last:
210    return optimizePTest(IntrI);
211  default:
212    return false;
213  }
214
215  return true;
216}
217
218bool SVEIntrinsicOpts::optimizeFunctions(
219    SmallSetVector<Function *, 4> &Functions) {
220  bool Changed = false;
221  for (auto *F : Functions) {
222    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
223
224    // Traverse the DT with an rpo walk so we see defs before uses, allowing
225    // simplification to be done incrementally.
226    BasicBlock *Root = DT->getRoot();
227    ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
228    for (auto *BB : RPOT)
229      for (Instruction &I : make_early_inc_range(*BB))
230        Changed |= optimizeIntrinsic(&I);
231  }
232  return Changed;
233}
234
235bool SVEIntrinsicOpts::runOnModule(Module &M) {
236  bool Changed = false;
237  SmallSetVector<Function *, 4> Functions;
238
239  // Check for SVE intrinsic declarations first so that we only iterate over
240  // relevant functions. Where an appropriate declaration is found, store the
241  // function(s) where it is used so we can target these only.
242  for (auto &F : M.getFunctionList()) {
243    if (!F.isDeclaration())
244      continue;
245
246    switch (F.getIntrinsicID()) {
247    case Intrinsic::aarch64_sve_convert_from_svbool:
248    case Intrinsic::aarch64_sve_ptest_any:
249    case Intrinsic::aarch64_sve_ptest_first:
250    case Intrinsic::aarch64_sve_ptest_last:
251      for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
252        auto *Inst = dyn_cast<Instruction>(*I++);
253        Functions.insert(Inst->getFunction());
254      }
255      break;
256    default:
257      break;
258    }
259  }
260
261  if (!Functions.empty())
262    Changed |= optimizeFunctions(Functions);
263
264  return Changed;
265}
266