ARMTargetTransformInfo.h revision 360784
1//===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file a TargetTransformInfo::Concept conforming object specific to the
11/// ARM target machine. It uses the target's detailed information to
12/// provide more precise answers to certain TTI queries, while letting the
13/// target independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20#include "ARM.h"
21#include "ARMSubtarget.h"
22#include "ARMTargetMachine.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/Analysis/TargetTransformInfo.h"
25#include "llvm/CodeGen/BasicTTIImpl.h"
26#include "llvm/IR/Constant.h"
27#include "llvm/IR/Function.h"
28#include "llvm/MC/SubtargetFeature.h"
29
30namespace llvm {
31
32class APInt;
33class ARMTargetLowering;
34class Instruction;
35class Loop;
36class SCEV;
37class ScalarEvolution;
38class Type;
39class Value;
40
41class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
42  using BaseT = BasicTTIImplBase<ARMTTIImpl>;
43  using TTI = TargetTransformInfo;
44
45  friend BaseT;
46
47  const ARMSubtarget *ST;
48  const ARMTargetLowering *TLI;
49
50  // Currently the following features are excluded from InlineFeatureWhitelist.
51  // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
52  // Depending on whether they are set or unset, different
53  // instructions/registers are available. For example, inlining a callee with
54  // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
55  // fail if the callee uses ARM only instructions, e.g. in inline asm.
56  const FeatureBitset InlineFeatureWhitelist = {
57      ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
58      ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
59      ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
60      ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
61      ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
62      ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
63      ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
64      ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
65      ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
66      ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
67      ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
68      ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
69      ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
70      ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
71      ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
72      ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
73      ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
74      ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
75      ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
76      ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
77      ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
78      ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
79      ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
80      ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
81  };
82
83  const ARMSubtarget *getST() const { return ST; }
84  const ARMTargetLowering *getTLI() const { return TLI; }
85
86public:
87  explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
88      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
89        TLI(ST->getTargetLowering()) {}
90
91  bool areInlineCompatible(const Function *Caller,
92                           const Function *Callee) const;
93
94  bool enableInterleavedAccessVectorization() { return true; }
95
96  bool shouldFavorBackedgeIndex(const Loop *L) const {
97    if (L->getHeader()->getParent()->hasOptSize())
98      return false;
99    return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
100  }
101
102  /// Floating-point computation using ARMv8 AArch32 Advanced
103  /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
104  /// and Arm MVE are IEEE-754 compliant.
105  bool isFPVectorizationPotentiallyUnsafe() {
106    return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
107  }
108
109  /// \name Scalar TTI Implementations
110  /// @{
111
112  int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
113                            Type *Ty);
114
115  using BaseT::getIntImmCost;
116  int getIntImmCost(const APInt &Imm, Type *Ty);
117
118  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
119
120  /// @}
121
122  /// \name Vector TTI Implementations
123  /// @{
124
125  unsigned getNumberOfRegisters(unsigned ClassID) const {
126    bool Vector = (ClassID == 1);
127    if (Vector) {
128      if (ST->hasNEON())
129        return 16;
130      if (ST->hasMVEIntegerOps())
131        return 8;
132      return 0;
133    }
134
135    if (ST->isThumb1Only())
136      return 8;
137    return 13;
138  }
139
140  unsigned getRegisterBitWidth(bool Vector) const {
141    if (Vector) {
142      if (ST->hasNEON())
143        return 128;
144      if (ST->hasMVEIntegerOps())
145        return 128;
146      return 0;
147    }
148
149    return 32;
150  }
151
152  unsigned getMaxInterleaveFactor(unsigned VF) {
153    return ST->getMaxInterleaveFactor();
154  }
155
156  bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment);
157
158  bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) {
159    return isLegalMaskedLoad(DataTy, Alignment);
160  }
161
162  bool isLegalMaskedGather(Type *Ty, MaybeAlign Alignment);
163
164  bool isLegalMaskedScatter(Type *Ty, MaybeAlign Alignment) { return false; }
165
166  int getMemcpyCost(const Instruction *I);
167
168  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
169
170  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
171                             TTI::ReductionFlags Flags) const;
172
173  bool shouldExpandReduction(const IntrinsicInst *II) const {
174    switch (II->getIntrinsicID()) {
175    case Intrinsic::experimental_vector_reduce_v2_fadd:
176    case Intrinsic::experimental_vector_reduce_v2_fmul:
177      // We don't have legalization support for ordered FP reductions.
178      if (!II->getFastMathFlags().allowReassoc())
179        return true;
180      // Can't legalize reductions with soft floats.
181      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
182
183    case Intrinsic::experimental_vector_reduce_fmin:
184    case Intrinsic::experimental_vector_reduce_fmax:
185      // Can't legalize reductions with soft floats, and NoNan will create
186      // fminimum which we do not know how to lower.
187      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
188             !II->getFastMathFlags().noNaNs();
189
190    default:
191      // Don't expand anything else, let legalization deal with it.
192      return false;
193    }
194  }
195
196  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
197                       const Instruction *I = nullptr);
198
199  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
200                         const Instruction *I = nullptr);
201
202  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
203
204  int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
205                                const SCEV *Ptr);
206
207  int getArithmeticInstrCost(
208      unsigned Opcode, Type *Ty,
209      TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
210      TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
211      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
212      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
213      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
214      const Instruction *CxtI = nullptr);
215
216  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
217                      unsigned AddressSpace, const Instruction *I = nullptr);
218
219  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
220                                 ArrayRef<unsigned> Indices, unsigned Alignment,
221                                 unsigned AddressSpace,
222                                 bool UseMaskForCond = false,
223                                 bool UseMaskForGaps = false);
224
225  bool isLoweredToCall(const Function *F);
226  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
227                                AssumptionCache &AC,
228                                TargetLibraryInfo *LibInfo,
229                                HardwareLoopInfo &HWLoopInfo);
230  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
231                                   ScalarEvolution &SE,
232                                   AssumptionCache &AC,
233                                   TargetLibraryInfo *TLI,
234                                   DominatorTree *DT,
235                                   const LoopAccessInfo *LAI);
236  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
237                               TTI::UnrollingPreferences &UP);
238
239  bool shouldBuildLookupTablesForConstant(Constant *C) const {
240    // In the ROPI and RWPI relocation models we can't have pointers to global
241    // variables or functions in constant data, so don't convert switches to
242    // lookup tables if any of the values would need relocation.
243    if (ST->isROPI() || ST->isRWPI())
244      return !C->needsRelocation();
245
246    return true;
247  }
248  /// @}
249};
250
251} // end namespace llvm
252
253#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
254