1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
16#include "GCNSubtarget.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/IntrinsicInst.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/InitializePasses.h"
23#include "llvm/Target/TargetMachine.h"
24#include <cmath>
25
26#define DEBUG_TYPE "amdgpu-simplifylib"
27
28using namespace llvm;
29
30static cl::opt<bool> EnablePreLink("amdgpu-prelink",
31  cl::desc("Enable pre-link mode optimizations"),
32  cl::init(false),
33  cl::Hidden);
34
35static cl::list<std::string> UseNative("amdgpu-use-native",
36  cl::desc("Comma separated list of functions to replace with native, or all"),
37  cl::CommaSeparated, cl::ValueOptional,
38  cl::Hidden);
39
40#define MATH_PI      numbers::pi
41#define MATH_E       numbers::e
42#define MATH_SQRT2   numbers::sqrt2
43#define MATH_SQRT1_2 numbers::inv_sqrt2
44
45namespace llvm {
46
47class AMDGPULibCalls {
48private:
49
50  typedef llvm::AMDGPULibFunc FuncInfo;
51
52  const TargetMachine *TM;
53
54  // -fuse-native.
55  bool AllNative = false;
56
57  bool useNativeFunc(const StringRef F) const;
58
59  // Return a pointer (pointer expr) to the function if function definition with
60  // "FuncName" exists. It may create a new function prototype in pre-link mode.
61  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
62
63  bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
64
65  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
66
67  /* Specialized optimizations */
68
69  // recip (half or native)
70  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
71
72  // divide (half or native)
73  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
74
75  // pow/powr/pown
76  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
77
78  // rootn
79  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
80
81  // fma/mad
82  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
83
84  // -fuse-native for sincos
85  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
86
87  // evaluate calls if calls' arguments are constants.
88  bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
89    double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
90  bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
91
92  // sqrt
93  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
94
95  // sin/cos
96  bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
97
98  // __read_pipe/__write_pipe
99  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
100                            const FuncInfo &FInfo);
101
102  // llvm.amdgcn.wavefrontsize
103  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
104
105  // Get insertion point at entry.
106  BasicBlock::iterator getEntryIns(CallInst * UI);
107  // Insert an Alloc instruction.
108  AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
109  // Get a scalar native builtin single argument FP function
110  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
111
112protected:
113  CallInst *CI;
114
115  bool isUnsafeMath(const CallInst *CI) const;
116
117  void replaceCall(Value *With) {
118    CI->replaceAllUsesWith(With);
119    CI->eraseFromParent();
120  }
121
122public:
123  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
124
125  bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
126
127  void initNativeFuncs();
128
129  // Replace a normal math function call with that native version
130  bool useNative(CallInst *CI);
131};
132
133} // end llvm namespace
134
135namespace {
136
137  class AMDGPUSimplifyLibCalls : public FunctionPass {
138
139  AMDGPULibCalls Simplifier;
140
141  public:
142    static char ID; // Pass identification
143
144    AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
145      : FunctionPass(ID), Simplifier(TM) {
146      initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
147    }
148
149    void getAnalysisUsage(AnalysisUsage &AU) const override {
150      AU.addRequired<AAResultsWrapperPass>();
151    }
152
153    bool runOnFunction(Function &M) override;
154  };
155
156  class AMDGPUUseNativeCalls : public FunctionPass {
157
158  AMDGPULibCalls Simplifier;
159
160  public:
161    static char ID; // Pass identification
162
163    AMDGPUUseNativeCalls() : FunctionPass(ID) {
164      initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
165      Simplifier.initNativeFuncs();
166    }
167
168    bool runOnFunction(Function &F) override;
169  };
170
171} // end anonymous namespace.
172
173char AMDGPUSimplifyLibCalls::ID = 0;
174char AMDGPUUseNativeCalls::ID = 0;
175
176INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
177                      "Simplify well-known AMD library calls", false, false)
178INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
179INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
180                    "Simplify well-known AMD library calls", false, false)
181
182INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
183                "Replace builtin math calls with that native versions.",
184                false, false)
185
186template <typename IRB>
187static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
188                              const Twine &Name = "") {
189  CallInst *R = B.CreateCall(Callee, Arg, Name);
190  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
191    R->setCallingConv(F->getCallingConv());
192  return R;
193}
194
195template <typename IRB>
196static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
197                               Value *Arg2, const Twine &Name = "") {
198  CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
199  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
200    R->setCallingConv(F->getCallingConv());
201  return R;
202}
203
204//  Data structures for table-driven optimizations.
205//  FuncTbl works for both f32 and f64 functions with 1 input argument
206
207struct TableEntry {
208  double   result;
209  double   input;
210};
211
212/* a list of {result, input} */
213static const TableEntry tbl_acos[] = {
214  {MATH_PI / 2.0, 0.0},
215  {MATH_PI / 2.0, -0.0},
216  {0.0, 1.0},
217  {MATH_PI, -1.0}
218};
219static const TableEntry tbl_acosh[] = {
220  {0.0, 1.0}
221};
222static const TableEntry tbl_acospi[] = {
223  {0.5, 0.0},
224  {0.5, -0.0},
225  {0.0, 1.0},
226  {1.0, -1.0}
227};
228static const TableEntry tbl_asin[] = {
229  {0.0, 0.0},
230  {-0.0, -0.0},
231  {MATH_PI / 2.0, 1.0},
232  {-MATH_PI / 2.0, -1.0}
233};
234static const TableEntry tbl_asinh[] = {
235  {0.0, 0.0},
236  {-0.0, -0.0}
237};
238static const TableEntry tbl_asinpi[] = {
239  {0.0, 0.0},
240  {-0.0, -0.0},
241  {0.5, 1.0},
242  {-0.5, -1.0}
243};
244static const TableEntry tbl_atan[] = {
245  {0.0, 0.0},
246  {-0.0, -0.0},
247  {MATH_PI / 4.0, 1.0},
248  {-MATH_PI / 4.0, -1.0}
249};
250static const TableEntry tbl_atanh[] = {
251  {0.0, 0.0},
252  {-0.0, -0.0}
253};
254static const TableEntry tbl_atanpi[] = {
255  {0.0, 0.0},
256  {-0.0, -0.0},
257  {0.25, 1.0},
258  {-0.25, -1.0}
259};
260static const TableEntry tbl_cbrt[] = {
261  {0.0, 0.0},
262  {-0.0, -0.0},
263  {1.0, 1.0},
264  {-1.0, -1.0},
265};
266static const TableEntry tbl_cos[] = {
267  {1.0, 0.0},
268  {1.0, -0.0}
269};
270static const TableEntry tbl_cosh[] = {
271  {1.0, 0.0},
272  {1.0, -0.0}
273};
274static const TableEntry tbl_cospi[] = {
275  {1.0, 0.0},
276  {1.0, -0.0}
277};
278static const TableEntry tbl_erfc[] = {
279  {1.0, 0.0},
280  {1.0, -0.0}
281};
282static const TableEntry tbl_erf[] = {
283  {0.0, 0.0},
284  {-0.0, -0.0}
285};
286static const TableEntry tbl_exp[] = {
287  {1.0, 0.0},
288  {1.0, -0.0},
289  {MATH_E, 1.0}
290};
291static const TableEntry tbl_exp2[] = {
292  {1.0, 0.0},
293  {1.0, -0.0},
294  {2.0, 1.0}
295};
296static const TableEntry tbl_exp10[] = {
297  {1.0, 0.0},
298  {1.0, -0.0},
299  {10.0, 1.0}
300};
301static const TableEntry tbl_expm1[] = {
302  {0.0, 0.0},
303  {-0.0, -0.0}
304};
305static const TableEntry tbl_log[] = {
306  {0.0, 1.0},
307  {1.0, MATH_E}
308};
309static const TableEntry tbl_log2[] = {
310  {0.0, 1.0},
311  {1.0, 2.0}
312};
313static const TableEntry tbl_log10[] = {
314  {0.0, 1.0},
315  {1.0, 10.0}
316};
317static const TableEntry tbl_rsqrt[] = {
318  {1.0, 1.0},
319  {MATH_SQRT1_2, 2.0}
320};
321static const TableEntry tbl_sin[] = {
322  {0.0, 0.0},
323  {-0.0, -0.0}
324};
325static const TableEntry tbl_sinh[] = {
326  {0.0, 0.0},
327  {-0.0, -0.0}
328};
329static const TableEntry tbl_sinpi[] = {
330  {0.0, 0.0},
331  {-0.0, -0.0}
332};
333static const TableEntry tbl_sqrt[] = {
334  {0.0, 0.0},
335  {1.0, 1.0},
336  {MATH_SQRT2, 2.0}
337};
338static const TableEntry tbl_tan[] = {
339  {0.0, 0.0},
340  {-0.0, -0.0}
341};
342static const TableEntry tbl_tanh[] = {
343  {0.0, 0.0},
344  {-0.0, -0.0}
345};
346static const TableEntry tbl_tanpi[] = {
347  {0.0, 0.0},
348  {-0.0, -0.0}
349};
350static const TableEntry tbl_tgamma[] = {
351  {1.0, 1.0},
352  {1.0, 2.0},
353  {2.0, 3.0},
354  {6.0, 4.0}
355};
356
357static bool HasNative(AMDGPULibFunc::EFuncId id) {
358  switch(id) {
359  case AMDGPULibFunc::EI_DIVIDE:
360  case AMDGPULibFunc::EI_COS:
361  case AMDGPULibFunc::EI_EXP:
362  case AMDGPULibFunc::EI_EXP2:
363  case AMDGPULibFunc::EI_EXP10:
364  case AMDGPULibFunc::EI_LOG:
365  case AMDGPULibFunc::EI_LOG2:
366  case AMDGPULibFunc::EI_LOG10:
367  case AMDGPULibFunc::EI_POWR:
368  case AMDGPULibFunc::EI_RECIP:
369  case AMDGPULibFunc::EI_RSQRT:
370  case AMDGPULibFunc::EI_SIN:
371  case AMDGPULibFunc::EI_SINCOS:
372  case AMDGPULibFunc::EI_SQRT:
373  case AMDGPULibFunc::EI_TAN:
374    return true;
375  default:;
376  }
377  return false;
378}
379
380using TableRef = ArrayRef<TableEntry>;
381
382static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
383  switch(id) {
384  case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
385  case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
386  case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
387  case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
388  case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
389  case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
390  case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
391  case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
392  case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
393  case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
394  case AMDGPULibFunc::EI_NCOS:
395  case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
396  case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
397  case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
398  case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
399  case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
400  case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
401  case AMDGPULibFunc::EI_NEXP2:
402  case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
403  case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
404  case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
405  case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
406  case AMDGPULibFunc::EI_NLOG2:
407  case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
408  case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
409  case AMDGPULibFunc::EI_NRSQRT:
410  case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
411  case AMDGPULibFunc::EI_NSIN:
412  case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
413  case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
414  case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
415  case AMDGPULibFunc::EI_NSQRT:
416  case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
417  case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
418  case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
419  case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
420  case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
421  default:;
422  }
423  return TableRef();
424}
425
426static inline int getVecSize(const AMDGPULibFunc& FInfo) {
427  return FInfo.getLeads()[0].VectorSize;
428}
429
430static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
431  return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
432}
433
434FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
435  // If we are doing PreLinkOpt, the function is external. So it is safe to
436  // use getOrInsertFunction() at this stage.
437
438  return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
439                       : AMDGPULibFunc::getFunction(M, fInfo);
440}
441
442bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
443                                       FuncInfo &FInfo) {
444  return AMDGPULibFunc::parse(FMangledName, FInfo);
445}
446
447bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
448  if (auto Op = dyn_cast<FPMathOperator>(CI))
449    if (Op->isFast())
450      return true;
451  const Function *F = CI->getParent()->getParent();
452  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
453  return Attr.getValueAsBool();
454}
455
456bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
457  return AllNative || llvm::is_contained(UseNative, F);
458}
459
460void AMDGPULibCalls::initNativeFuncs() {
461  AllNative = useNativeFunc("all") ||
462              (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
463               UseNative.begin()->empty());
464}
465
466bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
467  bool native_sin = useNativeFunc("sin");
468  bool native_cos = useNativeFunc("cos");
469
470  if (native_sin && native_cos) {
471    Module *M = aCI->getModule();
472    Value *opr0 = aCI->getArgOperand(0);
473
474    AMDGPULibFunc nf;
475    nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
476    nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
477
478    nf.setPrefix(AMDGPULibFunc::NATIVE);
479    nf.setId(AMDGPULibFunc::EI_SIN);
480    FunctionCallee sinExpr = getFunction(M, nf);
481
482    nf.setPrefix(AMDGPULibFunc::NATIVE);
483    nf.setId(AMDGPULibFunc::EI_COS);
484    FunctionCallee cosExpr = getFunction(M, nf);
485    if (sinExpr && cosExpr) {
486      Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
487      Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
488      new StoreInst(cosval, aCI->getArgOperand(1), aCI);
489
490      DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
491                                          << " with native version of sin/cos");
492
493      replaceCall(sinval);
494      return true;
495    }
496  }
497  return false;
498}
499
500bool AMDGPULibCalls::useNative(CallInst *aCI) {
501  CI = aCI;
502  Function *Callee = aCI->getCalledFunction();
503
504  FuncInfo FInfo;
505  if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
506      FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
507      getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
508      !(AllNative || useNativeFunc(FInfo.getName()))) {
509    return false;
510  }
511
512  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
513    return sincosUseNative(aCI, FInfo);
514
515  FInfo.setPrefix(AMDGPULibFunc::NATIVE);
516  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
517  if (!F)
518    return false;
519
520  aCI->setCalledFunction(F);
521  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
522                                      << " with native version");
523  return true;
524}
525
526// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
527// builtin, with appended type size and alignment arguments, where 2 or 4
528// indicates the original number of arguments. The library has optimized version
529// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
530// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
531// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
532// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
533bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
534                                          const FuncInfo &FInfo) {
535  auto *Callee = CI->getCalledFunction();
536  if (!Callee->isDeclaration())
537    return false;
538
539  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
540  auto *M = Callee->getParent();
541  auto &Ctx = M->getContext();
542  std::string Name = std::string(Callee->getName());
543  auto NumArg = CI->arg_size();
544  if (NumArg != 4 && NumArg != 6)
545    return false;
546  auto *PacketSize = CI->getArgOperand(NumArg - 2);
547  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
548  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
549    return false;
550  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
551  Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
552  if (Alignment != Size)
553    return false;
554
555  Type *PtrElemTy;
556  if (Size <= 8)
557    PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
558  else
559    PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
560  unsigned PtrArgLoc = CI->arg_size() - 3;
561  auto PtrArg = CI->getArgOperand(PtrArgLoc);
562  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
563  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
564
565  SmallVector<llvm::Type *, 6> ArgTys;
566  for (unsigned I = 0; I != PtrArgLoc; ++I)
567    ArgTys.push_back(CI->getArgOperand(I)->getType());
568  ArgTys.push_back(PtrTy);
569
570  Name = Name + "_" + std::to_string(Size);
571  auto *FTy = FunctionType::get(Callee->getReturnType(),
572                                ArrayRef<Type *>(ArgTys), false);
573  AMDGPULibFunc NewLibFunc(Name, FTy);
574  FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
575  if (!F)
576    return false;
577
578  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
579  SmallVector<Value *, 6> Args;
580  for (unsigned I = 0; I != PtrArgLoc; ++I)
581    Args.push_back(CI->getArgOperand(I));
582  Args.push_back(BCast);
583
584  auto *NCI = B.CreateCall(F, Args);
585  NCI->setAttributes(CI->getAttributes());
586  CI->replaceAllUsesWith(NCI);
587  CI->dropAllReferences();
588  CI->eraseFromParent();
589
590  return true;
591}
592
593// This function returns false if no change; return true otherwise.
594bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
595  this->CI = CI;
596  Function *Callee = CI->getCalledFunction();
597
598  // Ignore indirect calls.
599  if (Callee == nullptr)
600    return false;
601
602  BasicBlock *BB = CI->getParent();
603  LLVMContext &Context = CI->getParent()->getContext();
604  IRBuilder<> B(Context);
605
606  // Set the builder to the instruction after the call.
607  B.SetInsertPoint(BB, CI->getIterator());
608
609  // Copy fast flags from the original call.
610  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
611    B.setFastMathFlags(FPOp->getFastMathFlags());
612
613  switch (Callee->getIntrinsicID()) {
614  default:
615    break;
616  case Intrinsic::amdgcn_wavefrontsize:
617    return !EnablePreLink && fold_wavefrontsize(CI, B);
618  }
619
620  FuncInfo FInfo;
621  if (!parseFunctionName(Callee->getName(), FInfo))
622    return false;
623
624  // Further check the number of arguments to see if they match.
625  if (CI->arg_size() != FInfo.getNumArgs())
626    return false;
627
628  if (TDOFold(CI, FInfo))
629    return true;
630
631  // Under unsafe-math, evaluate calls if possible.
632  // According to Brian Sumner, we can do this for all f32 function calls
633  // using host's double function calls.
634  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
635    return true;
636
637  // Specialized optimizations for each function call
638  switch (FInfo.getId()) {
639  case AMDGPULibFunc::EI_RECIP:
640    // skip vector function
641    assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
642             FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
643            "recip must be an either native or half function");
644    return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
645
646  case AMDGPULibFunc::EI_DIVIDE:
647    // skip vector function
648    assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
649             FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
650            "divide must be an either native or half function");
651    return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
652
653  case AMDGPULibFunc::EI_POW:
654  case AMDGPULibFunc::EI_POWR:
655  case AMDGPULibFunc::EI_POWN:
656    return fold_pow(CI, B, FInfo);
657
658  case AMDGPULibFunc::EI_ROOTN:
659    // skip vector function
660    return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
661
662  case AMDGPULibFunc::EI_FMA:
663  case AMDGPULibFunc::EI_MAD:
664  case AMDGPULibFunc::EI_NFMA:
665    // skip vector function
666    return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
667
668  case AMDGPULibFunc::EI_SQRT:
669    return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
670  case AMDGPULibFunc::EI_COS:
671  case AMDGPULibFunc::EI_SIN:
672    if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
673         getArgType(FInfo) == AMDGPULibFunc::F64)
674        && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
675      return fold_sincos(CI, B, AA);
676
677    break;
678  case AMDGPULibFunc::EI_READ_PIPE_2:
679  case AMDGPULibFunc::EI_READ_PIPE_4:
680  case AMDGPULibFunc::EI_WRITE_PIPE_2:
681  case AMDGPULibFunc::EI_WRITE_PIPE_4:
682    return fold_read_write_pipe(CI, B, FInfo);
683
684  default:
685    break;
686  }
687
688  return false;
689}
690
691bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
692  // Table-Driven optimization
693  const TableRef tr = getOptTable(FInfo.getId());
694  if (tr.empty())
695    return false;
696
697  int const sz = (int)tr.size();
698  Value *opr0 = CI->getArgOperand(0);
699
700  if (getVecSize(FInfo) > 1) {
701    if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
702      SmallVector<double, 0> DVal;
703      for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
704        ConstantFP *eltval = dyn_cast<ConstantFP>(
705                               CV->getElementAsConstant((unsigned)eltNo));
706        assert(eltval && "Non-FP arguments in math function!");
707        bool found = false;
708        for (int i=0; i < sz; ++i) {
709          if (eltval->isExactlyValue(tr[i].input)) {
710            DVal.push_back(tr[i].result);
711            found = true;
712            break;
713          }
714        }
715        if (!found) {
716          // This vector constants not handled yet.
717          return false;
718        }
719      }
720      LLVMContext &context = CI->getParent()->getParent()->getContext();
721      Constant *nval;
722      if (getArgType(FInfo) == AMDGPULibFunc::F32) {
723        SmallVector<float, 0> FVal;
724        for (unsigned i = 0; i < DVal.size(); ++i) {
725          FVal.push_back((float)DVal[i]);
726        }
727        ArrayRef<float> tmp(FVal);
728        nval = ConstantDataVector::get(context, tmp);
729      } else { // F64
730        ArrayRef<double> tmp(DVal);
731        nval = ConstantDataVector::get(context, tmp);
732      }
733      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
734      replaceCall(nval);
735      return true;
736    }
737  } else {
738    // Scalar version
739    if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
740      for (int i = 0; i < sz; ++i) {
741        if (CF->isExactlyValue(tr[i].input)) {
742          Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
743          LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
744          replaceCall(nval);
745          return true;
746        }
747      }
748    }
749  }
750
751  return false;
752}
753
754//  [native_]half_recip(c) ==> 1.0/c
755bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
756                                const FuncInfo &FInfo) {
757  Value *opr0 = CI->getArgOperand(0);
758  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
759    // Just create a normal div. Later, InstCombine will be able
760    // to compute the divide into a constant (avoid check float infinity
761    // or subnormal at this point).
762    Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
763                               opr0,
764                               "recip2div");
765    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
766    replaceCall(nval);
767    return true;
768  }
769  return false;
770}
771
772//  [native_]half_divide(x, c) ==> x/c
773bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
774                                 const FuncInfo &FInfo) {
775  Value *opr0 = CI->getArgOperand(0);
776  Value *opr1 = CI->getArgOperand(1);
777  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
778  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
779
780  if ((CF0 && CF1) ||  // both are constants
781      (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
782      // CF1 is constant && f32 divide
783  {
784    Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
785                                opr1, "__div2recip");
786    Value *nval  = B.CreateFMul(opr0, nval1, "__div2mul");
787    replaceCall(nval);
788    return true;
789  }
790  return false;
791}
792
793namespace llvm {
794static double log2(double V) {
795#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
796  return ::log2(V);
797#else
798  return log(V) / numbers::ln2;
799#endif
800}
801}
802
803bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
804                              const FuncInfo &FInfo) {
805  assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
806          FInfo.getId() == AMDGPULibFunc::EI_POWR ||
807          FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
808         "fold_pow: encounter a wrong function call");
809
810  Value *opr0, *opr1;
811  ConstantFP *CF;
812  ConstantInt *CINT;
813  ConstantAggregateZero *CZero;
814  Type *eltType;
815
816  opr0 = CI->getArgOperand(0);
817  opr1 = CI->getArgOperand(1);
818  CZero = dyn_cast<ConstantAggregateZero>(opr1);
819  if (getVecSize(FInfo) == 1) {
820    eltType = opr0->getType();
821    CF = dyn_cast<ConstantFP>(opr1);
822    CINT = dyn_cast<ConstantInt>(opr1);
823  } else {
824    VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
825    assert(VTy && "Oprand of vector function should be of vectortype");
826    eltType = VTy->getElementType();
827    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
828
829    // Now, only Handle vector const whose elements have the same value.
830    CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
831    CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
832  }
833
834  // No unsafe math , no constant argument, do nothing
835  if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
836    return false;
837
838  // 0x1111111 means that we don't do anything for this call.
839  int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
840
841  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
842    //  pow/powr/pown(x, 0) == 1
843    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
844    Constant *cnval = ConstantFP::get(eltType, 1.0);
845    if (getVecSize(FInfo) > 1) {
846      cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
847    }
848    replaceCall(cnval);
849    return true;
850  }
851  if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
852    // pow/powr/pown(x, 1.0) = x
853    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
854    replaceCall(opr0);
855    return true;
856  }
857  if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
858    // pow/powr/pown(x, 2.0) = x*x
859    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
860                      << "\n");
861    Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
862    replaceCall(nval);
863    return true;
864  }
865  if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
866    // pow/powr/pown(x, -1.0) = 1.0/x
867    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
868    Constant *cnval = ConstantFP::get(eltType, 1.0);
869    if (getVecSize(FInfo) > 1) {
870      cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
871    }
872    Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
873    replaceCall(nval);
874    return true;
875  }
876
877  Module *M = CI->getModule();
878  if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
879    // pow[r](x, [-]0.5) = sqrt(x)
880    bool issqrt = CF->isExactlyValue(0.5);
881    if (FunctionCallee FPExpr =
882            getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
883                                                : AMDGPULibFunc::EI_RSQRT,
884                                         FInfo))) {
885      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
886                        << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
887      Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
888                                                        : "__pow2rsqrt");
889      replaceCall(nval);
890      return true;
891    }
892  }
893
894  if (!isUnsafeMath(CI))
895    return false;
896
897  // Unsafe Math optimization
898
899  // Remember that ci_opr1 is set if opr1 is integral
900  if (CF) {
901    double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
902                    ? (double)CF->getValueAPF().convertToFloat()
903                    : CF->getValueAPF().convertToDouble();
904    int ival = (int)dval;
905    if ((double)ival == dval) {
906      ci_opr1 = ival;
907    } else
908      ci_opr1 = 0x11111111;
909  }
910
911  // pow/powr/pown(x, c) = [1/](x*x*..x); where
912  //   trunc(c) == c && the number of x == c && |c| <= 12
913  unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
914  if (abs_opr1 <= 12) {
915    Constant *cnval;
916    Value *nval;
917    if (abs_opr1 == 0) {
918      cnval = ConstantFP::get(eltType, 1.0);
919      if (getVecSize(FInfo) > 1) {
920        cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
921      }
922      nval = cnval;
923    } else {
924      Value *valx2 = nullptr;
925      nval = nullptr;
926      while (abs_opr1 > 0) {
927        valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
928        if (abs_opr1 & 1) {
929          nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
930        }
931        abs_opr1 >>= 1;
932      }
933    }
934
935    if (ci_opr1 < 0) {
936      cnval = ConstantFP::get(eltType, 1.0);
937      if (getVecSize(FInfo) > 1) {
938        cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
939      }
940      nval = B.CreateFDiv(cnval, nval, "__1powprod");
941    }
942    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
943                      << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
944                      << ")\n");
945    replaceCall(nval);
946    return true;
947  }
948
949  // powr ---> exp2(y * log2(x))
950  // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
951  FunctionCallee ExpExpr =
952      getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
953  if (!ExpExpr)
954    return false;
955
956  bool needlog = false;
957  bool needabs = false;
958  bool needcopysign = false;
959  Constant *cnval = nullptr;
960  if (getVecSize(FInfo) == 1) {
961    CF = dyn_cast<ConstantFP>(opr0);
962
963    if (CF) {
964      double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
965                   ? (double)CF->getValueAPF().convertToFloat()
966                   : CF->getValueAPF().convertToDouble();
967
968      V = log2(std::abs(V));
969      cnval = ConstantFP::get(eltType, V);
970      needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
971                     CF->isNegative();
972    } else {
973      needlog = true;
974      needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
975                               (!CF || CF->isNegative());
976    }
977  } else {
978    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
979
980    if (!CDV) {
981      needlog = true;
982      needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
983    } else {
984      assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
985              "Wrong vector size detected");
986
987      SmallVector<double, 0> DVal;
988      for (int i=0; i < getVecSize(FInfo); ++i) {
989        double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
990                     ? (double)CDV->getElementAsFloat(i)
991                     : CDV->getElementAsDouble(i);
992        if (V < 0.0) needcopysign = true;
993        V = log2(std::abs(V));
994        DVal.push_back(V);
995      }
996      if (getArgType(FInfo) == AMDGPULibFunc::F32) {
997        SmallVector<float, 0> FVal;
998        for (unsigned i=0; i < DVal.size(); ++i) {
999          FVal.push_back((float)DVal[i]);
1000        }
1001        ArrayRef<float> tmp(FVal);
1002        cnval = ConstantDataVector::get(M->getContext(), tmp);
1003      } else {
1004        ArrayRef<double> tmp(DVal);
1005        cnval = ConstantDataVector::get(M->getContext(), tmp);
1006      }
1007    }
1008  }
1009
1010  if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1011    // We cannot handle corner cases for a general pow() function, give up
1012    // unless y is a constant integral value. Then proceed as if it were pown.
1013    if (getVecSize(FInfo) == 1) {
1014      if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1015        double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1016                   ? (double)CF->getValueAPF().convertToFloat()
1017                   : CF->getValueAPF().convertToDouble();
1018        if (y != (double)(int64_t)y)
1019          return false;
1020      } else
1021        return false;
1022    } else {
1023      if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1024        for (int i=0; i < getVecSize(FInfo); ++i) {
1025          double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1026                     ? (double)CDV->getElementAsFloat(i)
1027                     : CDV->getElementAsDouble(i);
1028          if (y != (double)(int64_t)y)
1029            return false;
1030        }
1031      } else
1032        return false;
1033    }
1034  }
1035
1036  Value *nval;
1037  if (needabs) {
1038    FunctionCallee AbsExpr =
1039        getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1040    if (!AbsExpr)
1041      return false;
1042    nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1043  } else {
1044    nval = cnval ? cnval : opr0;
1045  }
1046  if (needlog) {
1047    FunctionCallee LogExpr =
1048        getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1049    if (!LogExpr)
1050      return false;
1051    nval = CreateCallEx(B,LogExpr, nval, "__log2");
1052  }
1053
1054  if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1055    // convert int(32) to fp(f32 or f64)
1056    opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1057  }
1058  nval = B.CreateFMul(opr1, nval, "__ylogx");
1059  nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1060
1061  if (needcopysign) {
1062    Value *opr_n;
1063    Type* rTy = opr0->getType();
1064    Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1065    Type *nTy = nTyS;
1066    if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1067      nTy = FixedVectorType::get(nTyS, vTy);
1068    unsigned size = nTy->getScalarSizeInBits();
1069    opr_n = CI->getArgOperand(1);
1070    if (opr_n->getType()->isIntegerTy())
1071      opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1072    else
1073      opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1074
1075    Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1076    sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1077    nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1078    nval = B.CreateBitCast(nval, opr0->getType());
1079  }
1080
1081  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1082                    << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1083  replaceCall(nval);
1084
1085  return true;
1086}
1087
1088bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1089                                const FuncInfo &FInfo) {
1090  Value *opr0 = CI->getArgOperand(0);
1091  Value *opr1 = CI->getArgOperand(1);
1092
1093  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1094  if (!CINT) {
1095    return false;
1096  }
1097  int ci_opr1 = (int)CINT->getSExtValue();
1098  if (ci_opr1 == 1) {  // rootn(x, 1) = x
1099    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
1100    replaceCall(opr0);
1101    return true;
1102  }
1103  if (ci_opr1 == 2) {  // rootn(x, 2) = sqrt(x)
1104    Module *M = CI->getModule();
1105    if (FunctionCallee FPExpr =
1106            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1107      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
1108      Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1109      replaceCall(nval);
1110      return true;
1111    }
1112  } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1113    Module *M = CI->getModule();
1114    if (FunctionCallee FPExpr =
1115            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1116      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
1117      Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1118      replaceCall(nval);
1119      return true;
1120    }
1121  } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1122    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
1123    Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1124                               opr0,
1125                               "__rootn2div");
1126    replaceCall(nval);
1127    return true;
1128  } else if (ci_opr1 == -2) {  // rootn(x, -2) = rsqrt(x)
1129    Module *M = CI->getModule();
1130    if (FunctionCallee FPExpr =
1131            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1132      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1133                        << ")\n");
1134      Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1135      replaceCall(nval);
1136      return true;
1137    }
1138  }
1139  return false;
1140}
1141
1142bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1143                                  const FuncInfo &FInfo) {
1144  Value *opr0 = CI->getArgOperand(0);
1145  Value *opr1 = CI->getArgOperand(1);
1146  Value *opr2 = CI->getArgOperand(2);
1147
1148  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1149  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1150  if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1151    // fma/mad(a, b, c) = c if a=0 || b=0
1152    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
1153    replaceCall(opr2);
1154    return true;
1155  }
1156  if (CF0 && CF0->isExactlyValue(1.0f)) {
1157    // fma/mad(a, b, c) = b+c if a=1
1158    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1159                      << "\n");
1160    Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1161    replaceCall(nval);
1162    return true;
1163  }
1164  if (CF1 && CF1->isExactlyValue(1.0f)) {
1165    // fma/mad(a, b, c) = a+c if b=1
1166    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1167                      << "\n");
1168    Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1169    replaceCall(nval);
1170    return true;
1171  }
1172  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1173    if (CF->isZero()) {
1174      // fma/mad(a, b, c) = a*b if c=0
1175      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1176                        << *opr1 << "\n");
1177      Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1178      replaceCall(nval);
1179      return true;
1180    }
1181  }
1182
1183  return false;
1184}
1185
1186// Get a scalar native builtin single argument FP function
1187FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1188                                                 const FuncInfo &FInfo) {
1189  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1190    return nullptr;
1191  FuncInfo nf = FInfo;
1192  nf.setPrefix(AMDGPULibFunc::NATIVE);
1193  return getFunction(M, nf);
1194}
1195
1196// fold sqrt -> native_sqrt (x)
1197bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1198                               const FuncInfo &FInfo) {
1199  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1200      (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1201    if (FunctionCallee FPExpr = getNativeFunction(
1202            CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1203      Value *opr0 = CI->getArgOperand(0);
1204      LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1205                        << "sqrt(" << *opr0 << ")\n");
1206      Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1207      replaceCall(nval);
1208      return true;
1209    }
1210  }
1211  return false;
1212}
1213
1214// fold sin, cos -> sincos.
1215bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1216                                 AliasAnalysis *AA) {
1217  AMDGPULibFunc fInfo;
1218  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1219    return false;
1220
1221  assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1222         fInfo.getId() == AMDGPULibFunc::EI_COS);
1223  bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1224
1225  Value *CArgVal = CI->getArgOperand(0);
1226  BasicBlock * const CBB = CI->getParent();
1227
1228  int const MaxScan = 30;
1229  bool Changed = false;
1230
1231  { // fold in load value.
1232    LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1233    if (LI && LI->getParent() == CBB) {
1234      BasicBlock::iterator BBI = LI->getIterator();
1235      Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1236      if (AvailableVal) {
1237        Changed = true;
1238        CArgVal->replaceAllUsesWith(AvailableVal);
1239        if (CArgVal->getNumUses() == 0)
1240          LI->eraseFromParent();
1241        CArgVal = CI->getArgOperand(0);
1242      }
1243    }
1244  }
1245
1246  Module *M = CI->getModule();
1247  fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
1248  std::string const PairName = fInfo.mangle();
1249
1250  CallInst *UI = nullptr;
1251  for (User* U : CArgVal->users()) {
1252    CallInst *XI = dyn_cast_or_null<CallInst>(U);
1253    if (!XI || XI == CI || XI->getParent() != CBB)
1254      continue;
1255
1256    Function *UCallee = XI->getCalledFunction();
1257    if (!UCallee || !UCallee->getName().equals(PairName))
1258      continue;
1259
1260    BasicBlock::iterator BBI = CI->getIterator();
1261    if (BBI == CI->getParent()->begin())
1262      break;
1263    --BBI;
1264    for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1265      if (cast<Instruction>(BBI) == XI) {
1266        UI = XI;
1267        break;
1268      }
1269    }
1270    if (UI) break;
1271  }
1272
1273  if (!UI)
1274    return Changed;
1275
1276  // Merge the sin and cos.
1277
1278  // for OpenCL 2.0 we have only generic implementation of sincos
1279  // function.
1280  AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
1281  nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
1282  FunctionCallee Fsincos = getFunction(M, nf);
1283  if (!Fsincos)
1284    return Changed;
1285
1286  BasicBlock::iterator ItOld = B.GetInsertPoint();
1287  AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1288  B.SetInsertPoint(UI);
1289
1290  Value *P = Alloc;
1291  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1292  // The allocaInst allocates the memory in private address space. This need
1293  // to be bitcasted to point to the address space of cos pointer type.
1294  // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1295  if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
1296    P = B.CreateAddrSpaceCast(Alloc, PTy);
1297  CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1298
1299  LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1300                    << *Call << "\n");
1301
1302  if (!isSin) { // CI->cos, UI->sin
1303    B.SetInsertPoint(&*ItOld);
1304    UI->replaceAllUsesWith(&*Call);
1305    Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1306    CI->replaceAllUsesWith(Reload);
1307    UI->eraseFromParent();
1308    CI->eraseFromParent();
1309  } else { // CI->sin, UI->cos
1310    Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1311    UI->replaceAllUsesWith(Reload);
1312    CI->replaceAllUsesWith(Call);
1313    UI->eraseFromParent();
1314    CI->eraseFromParent();
1315  }
1316  return true;
1317}
1318
1319bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1320  if (!TM)
1321    return false;
1322
1323  StringRef CPU = TM->getTargetCPU();
1324  StringRef Features = TM->getTargetFeatureString();
1325  if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1326      (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1327    return false;
1328
1329  Function *F = CI->getParent()->getParent();
1330  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1331  unsigned N = ST.getWavefrontSize();
1332
1333  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1334               << N << "\n");
1335
1336  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1337  CI->eraseFromParent();
1338  return true;
1339}
1340
1341// Get insertion point at entry.
1342BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1343  Function * Func = UI->getParent()->getParent();
1344  BasicBlock * BB = &Func->getEntryBlock();
1345  assert(BB && "Entry block not found!");
1346  BasicBlock::iterator ItNew = BB->begin();
1347  return ItNew;
1348}
1349
1350// Insert a AllocsInst at the beginning of function entry block.
1351AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1352                                         const char *prefix) {
1353  BasicBlock::iterator ItNew = getEntryIns(UI);
1354  Function *UCallee = UI->getCalledFunction();
1355  Type *RetType = UCallee->getReturnType();
1356  B.SetInsertPoint(&*ItNew);
1357  AllocaInst *Alloc =
1358      B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
1359  Alloc->setAlignment(
1360      Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1361  return Alloc;
1362}
1363
1364bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1365                                            double& Res0, double& Res1,
1366                                            Constant *copr0, Constant *copr1,
1367                                            Constant *copr2) {
1368  // By default, opr0/opr1/opr3 holds values of float/double type.
1369  // If they are not float/double, each function has to its
1370  // operand separately.
1371  double opr0=0.0, opr1=0.0, opr2=0.0;
1372  ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1373  ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1374  ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1375  if (fpopr0) {
1376    opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1377             ? fpopr0->getValueAPF().convertToDouble()
1378             : (double)fpopr0->getValueAPF().convertToFloat();
1379  }
1380
1381  if (fpopr1) {
1382    opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1383             ? fpopr1->getValueAPF().convertToDouble()
1384             : (double)fpopr1->getValueAPF().convertToFloat();
1385  }
1386
1387  if (fpopr2) {
1388    opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1389             ? fpopr2->getValueAPF().convertToDouble()
1390             : (double)fpopr2->getValueAPF().convertToFloat();
1391  }
1392
1393  switch (FInfo.getId()) {
1394  default : return false;
1395
1396  case AMDGPULibFunc::EI_ACOS:
1397    Res0 = acos(opr0);
1398    return true;
1399
1400  case AMDGPULibFunc::EI_ACOSH:
1401    // acosh(x) == log(x + sqrt(x*x - 1))
1402    Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1403    return true;
1404
1405  case AMDGPULibFunc::EI_ACOSPI:
1406    Res0 = acos(opr0) / MATH_PI;
1407    return true;
1408
1409  case AMDGPULibFunc::EI_ASIN:
1410    Res0 = asin(opr0);
1411    return true;
1412
1413  case AMDGPULibFunc::EI_ASINH:
1414    // asinh(x) == log(x + sqrt(x*x + 1))
1415    Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1416    return true;
1417
1418  case AMDGPULibFunc::EI_ASINPI:
1419    Res0 = asin(opr0) / MATH_PI;
1420    return true;
1421
1422  case AMDGPULibFunc::EI_ATAN:
1423    Res0 = atan(opr0);
1424    return true;
1425
1426  case AMDGPULibFunc::EI_ATANH:
1427    // atanh(x) == (log(x+1) - log(x-1))/2;
1428    Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1429    return true;
1430
1431  case AMDGPULibFunc::EI_ATANPI:
1432    Res0 = atan(opr0) / MATH_PI;
1433    return true;
1434
1435  case AMDGPULibFunc::EI_CBRT:
1436    Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1437    return true;
1438
1439  case AMDGPULibFunc::EI_COS:
1440    Res0 = cos(opr0);
1441    return true;
1442
1443  case AMDGPULibFunc::EI_COSH:
1444    Res0 = cosh(opr0);
1445    return true;
1446
1447  case AMDGPULibFunc::EI_COSPI:
1448    Res0 = cos(MATH_PI * opr0);
1449    return true;
1450
1451  case AMDGPULibFunc::EI_EXP:
1452    Res0 = exp(opr0);
1453    return true;
1454
1455  case AMDGPULibFunc::EI_EXP2:
1456    Res0 = pow(2.0, opr0);
1457    return true;
1458
1459  case AMDGPULibFunc::EI_EXP10:
1460    Res0 = pow(10.0, opr0);
1461    return true;
1462
1463  case AMDGPULibFunc::EI_EXPM1:
1464    Res0 = exp(opr0) - 1.0;
1465    return true;
1466
1467  case AMDGPULibFunc::EI_LOG:
1468    Res0 = log(opr0);
1469    return true;
1470
1471  case AMDGPULibFunc::EI_LOG2:
1472    Res0 = log(opr0) / log(2.0);
1473    return true;
1474
1475  case AMDGPULibFunc::EI_LOG10:
1476    Res0 = log(opr0) / log(10.0);
1477    return true;
1478
1479  case AMDGPULibFunc::EI_RSQRT:
1480    Res0 = 1.0 / sqrt(opr0);
1481    return true;
1482
1483  case AMDGPULibFunc::EI_SIN:
1484    Res0 = sin(opr0);
1485    return true;
1486
1487  case AMDGPULibFunc::EI_SINH:
1488    Res0 = sinh(opr0);
1489    return true;
1490
1491  case AMDGPULibFunc::EI_SINPI:
1492    Res0 = sin(MATH_PI * opr0);
1493    return true;
1494
1495  case AMDGPULibFunc::EI_SQRT:
1496    Res0 = sqrt(opr0);
1497    return true;
1498
1499  case AMDGPULibFunc::EI_TAN:
1500    Res0 = tan(opr0);
1501    return true;
1502
1503  case AMDGPULibFunc::EI_TANH:
1504    Res0 = tanh(opr0);
1505    return true;
1506
1507  case AMDGPULibFunc::EI_TANPI:
1508    Res0 = tan(MATH_PI * opr0);
1509    return true;
1510
1511  case AMDGPULibFunc::EI_RECIP:
1512    Res0 = 1.0 / opr0;
1513    return true;
1514
1515  // two-arg functions
1516  case AMDGPULibFunc::EI_DIVIDE:
1517    Res0 = opr0 / opr1;
1518    return true;
1519
1520  case AMDGPULibFunc::EI_POW:
1521  case AMDGPULibFunc::EI_POWR:
1522    Res0 = pow(opr0, opr1);
1523    return true;
1524
1525  case AMDGPULibFunc::EI_POWN: {
1526    if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1527      double val = (double)iopr1->getSExtValue();
1528      Res0 = pow(opr0, val);
1529      return true;
1530    }
1531    return false;
1532  }
1533
1534  case AMDGPULibFunc::EI_ROOTN: {
1535    if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1536      double val = (double)iopr1->getSExtValue();
1537      Res0 = pow(opr0, 1.0 / val);
1538      return true;
1539    }
1540    return false;
1541  }
1542
1543  // with ptr arg
1544  case AMDGPULibFunc::EI_SINCOS:
1545    Res0 = sin(opr0);
1546    Res1 = cos(opr0);
1547    return true;
1548
1549  // three-arg functions
1550  case AMDGPULibFunc::EI_FMA:
1551  case AMDGPULibFunc::EI_MAD:
1552    Res0 = opr0 * opr1 + opr2;
1553    return true;
1554  }
1555
1556  return false;
1557}
1558
1559bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1560  int numArgs = (int)aCI->arg_size();
1561  if (numArgs > 3)
1562    return false;
1563
1564  Constant *copr0 = nullptr;
1565  Constant *copr1 = nullptr;
1566  Constant *copr2 = nullptr;
1567  if (numArgs > 0) {
1568    if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1569      return false;
1570  }
1571
1572  if (numArgs > 1) {
1573    if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1574      if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1575        return false;
1576    }
1577  }
1578
1579  if (numArgs > 2) {
1580    if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1581      return false;
1582  }
1583
1584  // At this point, all arguments to aCI are constants.
1585
1586  // max vector size is 16, and sincos will generate two results.
1587  double DVal0[16], DVal1[16];
1588  int FuncVecSize = getVecSize(FInfo);
1589  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1590  if (FuncVecSize == 1) {
1591    if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1592                                DVal1[0], copr0, copr1, copr2)) {
1593      return false;
1594    }
1595  } else {
1596    ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1597    ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1598    ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1599    for (int i = 0; i < FuncVecSize; ++i) {
1600      Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1601      Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1602      Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1603      if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1604                                  DVal1[i], celt0, celt1, celt2)) {
1605        return false;
1606      }
1607    }
1608  }
1609
1610  LLVMContext &context = CI->getParent()->getParent()->getContext();
1611  Constant *nval0, *nval1;
1612  if (FuncVecSize == 1) {
1613    nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1614    if (hasTwoResults)
1615      nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1616  } else {
1617    if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1618      SmallVector <float, 0> FVal0, FVal1;
1619      for (int i = 0; i < FuncVecSize; ++i)
1620        FVal0.push_back((float)DVal0[i]);
1621      ArrayRef<float> tmp0(FVal0);
1622      nval0 = ConstantDataVector::get(context, tmp0);
1623      if (hasTwoResults) {
1624        for (int i = 0; i < FuncVecSize; ++i)
1625          FVal1.push_back((float)DVal1[i]);
1626        ArrayRef<float> tmp1(FVal1);
1627        nval1 = ConstantDataVector::get(context, tmp1);
1628      }
1629    } else {
1630      ArrayRef<double> tmp0(DVal0);
1631      nval0 = ConstantDataVector::get(context, tmp0);
1632      if (hasTwoResults) {
1633        ArrayRef<double> tmp1(DVal1);
1634        nval1 = ConstantDataVector::get(context, tmp1);
1635      }
1636    }
1637  }
1638
1639  if (hasTwoResults) {
1640    // sincos
1641    assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1642           "math function with ptr arg not supported yet");
1643    new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1644  }
1645
1646  replaceCall(nval0);
1647  return true;
1648}
1649
1650// Public interface to the Simplify LibCalls pass.
1651FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
1652  return new AMDGPUSimplifyLibCalls(TM);
1653}
1654
1655FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
1656  return new AMDGPUUseNativeCalls();
1657}
1658
1659bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
1660  if (skipFunction(F))
1661    return false;
1662
1663  bool Changed = false;
1664  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1665
1666  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1667             F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1668
1669  for (auto &BB : F) {
1670    for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1671      // Ignore non-calls.
1672      CallInst *CI = dyn_cast<CallInst>(I);
1673      ++I;
1674      // Ignore intrinsics that do not become real instructions.
1675      if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1676        continue;
1677
1678      // Ignore indirect calls.
1679      Function *Callee = CI->getCalledFunction();
1680      if (Callee == nullptr)
1681        continue;
1682
1683      LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1684                 dbgs().flush());
1685      if(Simplifier.fold(CI, AA))
1686        Changed = true;
1687    }
1688  }
1689  return Changed;
1690}
1691
1692PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
1693                                                  FunctionAnalysisManager &AM) {
1694  AMDGPULibCalls Simplifier(&TM);
1695  Simplifier.initNativeFuncs();
1696
1697  bool Changed = false;
1698  auto AA = &AM.getResult<AAManager>(F);
1699
1700  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1701             F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1702
1703  for (auto &BB : F) {
1704    for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1705      // Ignore non-calls.
1706      CallInst *CI = dyn_cast<CallInst>(I);
1707      ++I;
1708      // Ignore intrinsics that do not become real instructions.
1709      if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1710        continue;
1711
1712      // Ignore indirect calls.
1713      Function *Callee = CI->getCalledFunction();
1714      if (Callee == nullptr)
1715        continue;
1716
1717      LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1718                 dbgs().flush());
1719      if (Simplifier.fold(CI, AA))
1720        Changed = true;
1721    }
1722  }
1723  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1724}
1725
1726bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
1727  if (skipFunction(F) || UseNative.empty())
1728    return false;
1729
1730  bool Changed = false;
1731  for (auto &BB : F) {
1732    for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1733      // Ignore non-calls.
1734      CallInst *CI = dyn_cast<CallInst>(I);
1735      ++I;
1736      if (!CI) continue;
1737
1738      // Ignore indirect calls.
1739      Function *Callee = CI->getCalledFunction();
1740      if (Callee == nullptr)
1741        continue;
1742
1743      if (Simplifier.useNative(CI))
1744        Changed = true;
1745    }
1746  }
1747  return Changed;
1748}
1749
1750PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
1751                                                FunctionAnalysisManager &AM) {
1752  if (UseNative.empty())
1753    return PreservedAnalyses::all();
1754
1755  AMDGPULibCalls Simplifier;
1756  Simplifier.initNativeFuncs();
1757
1758  bool Changed = false;
1759  for (auto &BB : F) {
1760    for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1761      // Ignore non-calls.
1762      CallInst *CI = dyn_cast<CallInst>(I);
1763      ++I;
1764      if (!CI)
1765        continue;
1766
1767      // Ignore indirect calls.
1768      Function *Callee = CI->getCalledFunction();
1769      if (Callee == nullptr)
1770        continue;
1771
1772      if (Simplifier.useNative(CI))
1773        Changed = true;
1774    }
1775  }
1776  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1777}
1778