1//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfo::Concept conforming object specific to the
10/// AArch64 target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18
19#include "AArch64.h"
20#include "AArch64Subtarget.h"
21#include "AArch64TargetMachine.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/Analysis/TargetTransformInfo.h"
24#include "llvm/CodeGen/BasicTTIImpl.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/Intrinsics.h"
27#include <cstdint>
28
29namespace llvm {
30
31class APInt;
32class Instruction;
33class IntrinsicInst;
34class Loop;
35class SCEV;
36class ScalarEvolution;
37class Type;
38class Value;
39class VectorType;
40
41class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42  using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43  using TTI = TargetTransformInfo;
44
45  friend BaseT;
46
47  const AArch64Subtarget *ST;
48  const AArch64TargetLowering *TLI;
49
50  const AArch64Subtarget *getST() const { return ST; }
51  const AArch64TargetLowering *getTLI() const { return TLI; }
52
53  enum MemIntrinsicType {
54    VECTOR_LDST_TWO_ELEMENTS,
55    VECTOR_LDST_THREE_ELEMENTS,
56    VECTOR_LDST_FOUR_ELEMENTS
57  };
58
59  bool isWideningInstruction(Type *Ty, unsigned Opcode,
60                             ArrayRef<const Value *> Args);
61
62public:
63  explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65        TLI(ST->getTargetLowering()) {}
66
67  bool areInlineCompatible(const Function *Caller,
68                           const Function *Callee) const;
69
70  /// \name Scalar TTI Implementations
71  /// @{
72
73  using BaseT::getIntImmCost;
74  InstructionCost getIntImmCost(int64_t Val);
75  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
76                                TTI::TargetCostKind CostKind);
77  InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
78                                    const APInt &Imm, Type *Ty,
79                                    TTI::TargetCostKind CostKind,
80                                    Instruction *Inst = nullptr);
81  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
82                                      const APInt &Imm, Type *Ty,
83                                      TTI::TargetCostKind CostKind);
84  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
85
86  /// @}
87
88  /// \name Vector TTI Implementations
89  /// @{
90
91  bool enableInterleavedAccessVectorization() { return true; }
92
93  unsigned getNumberOfRegisters(unsigned ClassID) const {
94    bool Vector = (ClassID == 1);
95    if (Vector) {
96      if (ST->hasNEON())
97        return 32;
98      return 0;
99    }
100    return 31;
101  }
102
103  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
104                                        TTI::TargetCostKind CostKind);
105
106  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
107                                               IntrinsicInst &II) const;
108
109  TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
110    switch (K) {
111    case TargetTransformInfo::RGK_Scalar:
112      return TypeSize::getFixed(64);
113    case TargetTransformInfo::RGK_FixedWidthVector:
114      if (ST->hasSVE())
115        return TypeSize::getFixed(
116            std::max(ST->getMinSVEVectorSizeInBits(), 128u));
117      return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
118    case TargetTransformInfo::RGK_ScalableVector:
119      return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
120    }
121    llvm_unreachable("Unsupported register kind");
122  }
123
124  unsigned getMinVectorRegisterBitWidth() {
125    return ST->getMinVectorRegisterBitWidth();
126  }
127
128  Optional<unsigned> getMaxVScale() const {
129    if (ST->hasSVE())
130      return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
131    return BaseT::getMaxVScale();
132  }
133
134  unsigned getMaxInterleaveFactor(unsigned VF);
135
136  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
137                                        Align Alignment, unsigned AddressSpace,
138                                        TTI::TargetCostKind CostKind);
139
140  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
141                                         const Value *Ptr, bool VariableMask,
142                                         Align Alignment,
143                                         TTI::TargetCostKind CostKind,
144                                         const Instruction *I = nullptr);
145
146  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
147                                   TTI::CastContextHint CCH,
148                                   TTI::TargetCostKind CostKind,
149                                   const Instruction *I = nullptr);
150
151  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
152                                           VectorType *VecTy, unsigned Index);
153
154  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
155                                 const Instruction *I = nullptr);
156
157  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
158                                     unsigned Index);
159
160  InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
161                                         bool IsPairwise, bool IsUnsigned,
162                                         TTI::TargetCostKind CostKind);
163
164  InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
165                                                VectorType *ValTy,
166                                                bool IsPairwiseForm,
167                                                TTI::TargetCostKind CostKind);
168
169  InstructionCost getArithmeticInstrCost(
170      unsigned Opcode, Type *Ty,
171      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
172      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
173      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
174      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
175      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
176      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
177      const Instruction *CxtI = nullptr);
178
179  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
180                                            const SCEV *Ptr);
181
182  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
183                                     CmpInst::Predicate VecPred,
184                                     TTI::TargetCostKind CostKind,
185                                     const Instruction *I = nullptr);
186
187  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
188                                                    bool IsZeroCmp) const;
189  bool useNeonVector(const Type *Ty) const;
190
191  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
192                                  MaybeAlign Alignment, unsigned AddressSpace,
193                                  TTI::TargetCostKind CostKind,
194                                  const Instruction *I = nullptr);
195
196  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
197
198  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
199                               TTI::UnrollingPreferences &UP);
200
201  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
202                             TTI::PeelingPreferences &PP);
203
204  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
205                                           Type *ExpectedType);
206
207  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
208
209  bool isLegalElementTypeForSVE(Type *Ty) const {
210    if (Ty->isPointerTy())
211      return true;
212
213    if (Ty->isBFloatTy() && ST->hasBF16())
214      return true;
215
216    if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
217      return true;
218
219    if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
220        Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
221      return true;
222
223    return false;
224  }
225
226  bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
227    if (!ST->hasSVE())
228      return false;
229
230    // For fixed vectors, avoid scalarization if using SVE for them.
231    if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
232      return false; // Fall back to scalarization of masked operations.
233
234    return isLegalElementTypeForSVE(DataType->getScalarType());
235  }
236
237  bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
238    return isLegalMaskedLoadStore(DataType, Alignment);
239  }
240
241  bool isLegalMaskedStore(Type *DataType, Align Alignment) {
242    return isLegalMaskedLoadStore(DataType, Alignment);
243  }
244
245  bool isLegalMaskedGatherScatter(Type *DataType) const {
246    if (isa<FixedVectorType>(DataType) || !ST->hasSVE())
247      return false;
248
249    return isLegalElementTypeForSVE(DataType->getScalarType());
250  }
251
252  bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
253    return isLegalMaskedGatherScatter(DataType);
254  }
255  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
256    return isLegalMaskedGatherScatter(DataType);
257  }
258
259  bool isLegalNTStore(Type *DataType, Align Alignment) {
260    // NOTE: The logic below is mostly geared towards LV, which calls it with
261    //       vectors with 2 elements. We might want to improve that, if other
262    //       users show up.
263    // Nontemporal vector stores can be directly lowered to STNP, if the vector
264    // can be halved so that each half fits into a register. That's the case if
265    // the element type fits into a register and the number of elements is a
266    // power of 2 > 1.
267    if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
268      unsigned NumElements =
269          cast<FixedVectorType>(DataTypeVTy)->getNumElements();
270      unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
271      return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
272             EltSize <= 128 && isPowerOf2_64(EltSize);
273    }
274    return BaseT::isLegalNTStore(DataType, Alignment);
275  }
276
277  InstructionCost getInterleavedMemoryOpCost(
278      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
279      Align Alignment, unsigned AddressSpace,
280      TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
281      bool UseMaskForCond = false, bool UseMaskForGaps = false);
282
283  bool
284  shouldConsiderAddressTypePromotion(const Instruction &I,
285                                     bool &AllowPromotionWithoutCommonHeader);
286
287  bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
288
289  unsigned getGISelRematGlobalCost() const {
290    return 2;
291  }
292
293  bool supportsScalableVectors() const { return ST->hasSVE(); }
294
295  bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc,
296                                   ElementCount VF) const;
297
298  InstructionCost getArithmeticReductionCost(
299      unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
300      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
301
302  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
303                                 ArrayRef<int> Mask, int Index,
304                                 VectorType *SubTp);
305  /// @}
306};
307
308} // end namespace llvm
309
310#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
311