1249259Sdim//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim/// \file 10249259Sdim/// This file implements a TargetTransformInfo analysis pass specific to the 11249259Sdim/// ARM target machine. It uses the target's detailed information to provide 12249259Sdim/// more precise answers to certain TTI queries, while letting the target 13249259Sdim/// independent and default TTI implementations handle the rest. 14249259Sdim/// 15249259Sdim//===----------------------------------------------------------------------===// 16249259Sdim 17249259Sdim#define DEBUG_TYPE "armtti" 18249259Sdim#include "ARM.h" 19249259Sdim#include "ARMTargetMachine.h" 20249259Sdim#include "llvm/Analysis/TargetTransformInfo.h" 21249259Sdim#include "llvm/Support/Debug.h" 22249259Sdim#include "llvm/Target/TargetLowering.h" 23249259Sdim#include "llvm/Target/CostTable.h" 24249259Sdimusing namespace llvm; 25249259Sdim 26249259Sdim// Declare the pass initialization routine locally as target-specific passes 27249259Sdim// don't havve a target-wide initialization entry point, and so we rely on the 28249259Sdim// pass constructor initialization. 29249259Sdimnamespace llvm { 30249259Sdimvoid initializeARMTTIPass(PassRegistry &); 31249259Sdim} 32249259Sdim 33249259Sdimnamespace { 34249259Sdim 35249259Sdimclass ARMTTI : public ImmutablePass, public TargetTransformInfo { 36249259Sdim const ARMBaseTargetMachine *TM; 37249259Sdim const ARMSubtarget *ST; 38249259Sdim const ARMTargetLowering *TLI; 39249259Sdim 40249259Sdim /// Estimate the overhead of scalarizing an instruction. Insert and Extract 41249259Sdim /// are set if the result needs to be inserted and/or extracted from vectors. 42249259Sdim unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 43249259Sdim 44249259Sdimpublic: 45249259Sdim ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 46249259Sdim llvm_unreachable("This pass cannot be directly constructed"); 47249259Sdim } 48249259Sdim 49249259Sdim ARMTTI(const ARMBaseTargetMachine *TM) 50249259Sdim : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 51249259Sdim TLI(TM->getTargetLowering()) { 52249259Sdim initializeARMTTIPass(*PassRegistry::getPassRegistry()); 53249259Sdim } 54249259Sdim 55249259Sdim virtual void initializePass() { 56249259Sdim pushTTIStack(this); 57249259Sdim } 58249259Sdim 59249259Sdim virtual void finalizePass() { 60249259Sdim popTTIStack(); 61249259Sdim } 62249259Sdim 63249259Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 64249259Sdim TargetTransformInfo::getAnalysisUsage(AU); 65249259Sdim } 66249259Sdim 67249259Sdim /// Pass identification. 68249259Sdim static char ID; 69249259Sdim 70249259Sdim /// Provide necessary pointer adjustments for the two base classes. 71249259Sdim virtual void *getAdjustedAnalysisPointer(const void *ID) { 72249259Sdim if (ID == &TargetTransformInfo::ID) 73249259Sdim return (TargetTransformInfo*)this; 74249259Sdim return this; 75249259Sdim } 76249259Sdim 77249259Sdim /// \name Scalar TTI Implementations 78249259Sdim /// @{ 79249259Sdim 80249259Sdim virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; 81249259Sdim 82249259Sdim /// @} 83249259Sdim 84249259Sdim 85249259Sdim /// \name Vector TTI Implementations 86249259Sdim /// @{ 87249259Sdim 88249259Sdim unsigned getNumberOfRegisters(bool Vector) const { 89249259Sdim if (Vector) { 90249259Sdim if (ST->hasNEON()) 91249259Sdim return 16; 92249259Sdim return 0; 93249259Sdim } 94249259Sdim 95249259Sdim if (ST->isThumb1Only()) 96249259Sdim return 8; 97249259Sdim return 16; 98249259Sdim } 99249259Sdim 100249259Sdim unsigned getRegisterBitWidth(bool Vector) const { 101249259Sdim if (Vector) { 102249259Sdim if (ST->hasNEON()) 103249259Sdim return 128; 104249259Sdim return 0; 105249259Sdim } 106249259Sdim 107249259Sdim return 32; 108249259Sdim } 109249259Sdim 110249259Sdim unsigned getMaximumUnrollFactor() const { 111249259Sdim // These are out of order CPUs: 112249259Sdim if (ST->isCortexA15() || ST->isSwift()) 113249259Sdim return 2; 114249259Sdim return 1; 115249259Sdim } 116249259Sdim 117249259Sdim unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 118249259Sdim int Index, Type *SubTp) const; 119249259Sdim 120249259Sdim unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 121249259Sdim Type *Src) const; 122249259Sdim 123249259Sdim unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; 124249259Sdim 125249259Sdim unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; 126249259Sdim 127263509Sdim unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; 128252723Sdim 129252723Sdim unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 130252723Sdim OperandValueKind Op1Info = OK_AnyValue, 131252723Sdim OperandValueKind Op2Info = OK_AnyValue) const; 132263509Sdim 133263509Sdim unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 134263509Sdim unsigned AddressSpace) const; 135249259Sdim /// @} 136249259Sdim}; 137249259Sdim 138249259Sdim} // end anonymous namespace 139249259Sdim 140249259SdimINITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti", 141249259Sdim "ARM Target Transform Info", true, true, false) 142249259Sdimchar ARMTTI::ID = 0; 143249259Sdim 144249259SdimImmutablePass * 145249259Sdimllvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) { 146249259Sdim return new ARMTTI(TM); 147249259Sdim} 148249259Sdim 149249259Sdim 150249259Sdimunsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { 151249259Sdim assert(Ty->isIntegerTy()); 152249259Sdim 153249259Sdim unsigned Bits = Ty->getPrimitiveSizeInBits(); 154249259Sdim if (Bits == 0 || Bits > 32) 155249259Sdim return 4; 156249259Sdim 157249259Sdim int32_t SImmVal = Imm.getSExtValue(); 158249259Sdim uint32_t ZImmVal = Imm.getZExtValue(); 159249259Sdim if (!ST->isThumb()) { 160249259Sdim if ((SImmVal >= 0 && SImmVal < 65536) || 161249259Sdim (ARM_AM::getSOImmVal(ZImmVal) != -1) || 162249259Sdim (ARM_AM::getSOImmVal(~ZImmVal) != -1)) 163249259Sdim return 1; 164249259Sdim return ST->hasV6T2Ops() ? 2 : 3; 165249259Sdim } else if (ST->isThumb2()) { 166249259Sdim if ((SImmVal >= 0 && SImmVal < 65536) || 167249259Sdim (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || 168249259Sdim (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) 169249259Sdim return 1; 170249259Sdim return ST->hasV6T2Ops() ? 2 : 3; 171249259Sdim } else /*Thumb1*/ { 172249259Sdim if (SImmVal >= 0 && SImmVal < 256) 173249259Sdim return 1; 174249259Sdim if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) 175249259Sdim return 2; 176249259Sdim // Load from constantpool. 177249259Sdim return 3; 178249259Sdim } 179249259Sdim return 2; 180249259Sdim} 181249259Sdim 182249259Sdimunsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, 183249259Sdim Type *Src) const { 184249259Sdim int ISD = TLI->InstructionOpcodeToISD(Opcode); 185249259Sdim assert(ISD && "Invalid opcode"); 186249259Sdim 187249259Sdim // Single to/from double precision conversions. 188263509Sdim static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = { 189249259Sdim // Vector fptrunc/fpext conversions. 190249259Sdim { ISD::FP_ROUND, MVT::v2f64, 2 }, 191249259Sdim { ISD::FP_EXTEND, MVT::v2f32, 2 }, 192249259Sdim { ISD::FP_EXTEND, MVT::v4f32, 4 } 193249259Sdim }; 194249259Sdim 195249259Sdim if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || 196249259Sdim ISD == ISD::FP_EXTEND)) { 197249259Sdim std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 198263509Sdim int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); 199249259Sdim if (Idx != -1) 200249259Sdim return LT.first * NEONFltDblTbl[Idx].Cost; 201249259Sdim } 202249259Sdim 203249259Sdim EVT SrcTy = TLI->getValueType(Src); 204249259Sdim EVT DstTy = TLI->getValueType(Dst); 205249259Sdim 206249259Sdim if (!SrcTy.isSimple() || !DstTy.isSimple()) 207249259Sdim return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 208249259Sdim 209249259Sdim // Some arithmetic, load and store operations have specific instructions 210249259Sdim // to cast up/down their types automatically at no extra cost. 211249259Sdim // TODO: Get these tables to know at least what the related operations are. 212263509Sdim static const TypeConversionCostTblEntry<MVT::SimpleValueType> 213263509Sdim NEONVectorConversionTbl[] = { 214249259Sdim { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, 215249259Sdim { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, 216249259Sdim { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, 217249259Sdim { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, 218249259Sdim { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, 219249259Sdim { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, 220249259Sdim 221249259Sdim // The number of vmovl instructions for the extension. 222249259Sdim { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, 223249259Sdim { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, 224249259Sdim { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, 225249259Sdim { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, 226249259Sdim { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, 227249259Sdim { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, 228249259Sdim { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, 229249259Sdim { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, 230249259Sdim { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, 231249259Sdim { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, 232249259Sdim 233252723Sdim // Operations that we legalize using splitting. 234252723Sdim { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, 235252723Sdim { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, 236249259Sdim 237249259Sdim // Vector float <-> i32 conversions. 238249259Sdim { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 239249259Sdim { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 240249259Sdim 241249259Sdim { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 242249259Sdim { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 243249259Sdim { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, 244249259Sdim { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, 245249259Sdim { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 246249259Sdim { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 247249259Sdim { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, 248249259Sdim { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, 249249259Sdim { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, 250249259Sdim { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, 251249259Sdim { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 252249259Sdim { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 253249259Sdim { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, 254249259Sdim { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, 255249259Sdim { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, 256249259Sdim { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, 257249259Sdim { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, 258249259Sdim { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, 259249259Sdim { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, 260249259Sdim { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, 261249259Sdim 262249259Sdim { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, 263249259Sdim { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, 264249259Sdim { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, 265249259Sdim { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, 266249259Sdim { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, 267249259Sdim { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, 268249259Sdim 269249259Sdim // Vector double <-> i32 conversions. 270249259Sdim { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 271249259Sdim { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 272249259Sdim 273249259Sdim { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 274249259Sdim { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 275249259Sdim { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, 276249259Sdim { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, 277249259Sdim { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 278249259Sdim { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 279249259Sdim 280249259Sdim { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, 281249259Sdim { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, 282249259Sdim { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, 283249259Sdim { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, 284249259Sdim { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, 285249259Sdim { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } 286249259Sdim }; 287249259Sdim 288249259Sdim if (SrcTy.isVector() && ST->hasNEON()) { 289263509Sdim int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, 290263509Sdim DstTy.getSimpleVT(), SrcTy.getSimpleVT()); 291249259Sdim if (Idx != -1) 292249259Sdim return NEONVectorConversionTbl[Idx].Cost; 293249259Sdim } 294249259Sdim 295249259Sdim // Scalar float to integer conversions. 296263509Sdim static const TypeConversionCostTblEntry<MVT::SimpleValueType> 297263509Sdim NEONFloatConversionTbl[] = { 298249259Sdim { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, 299249259Sdim { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, 300249259Sdim { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, 301249259Sdim { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, 302249259Sdim { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, 303249259Sdim { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, 304249259Sdim { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, 305249259Sdim { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, 306249259Sdim { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, 307249259Sdim { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, 308249259Sdim { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, 309249259Sdim { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, 310249259Sdim { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, 311249259Sdim { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, 312249259Sdim { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, 313249259Sdim { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, 314249259Sdim { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, 315249259Sdim { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, 316249259Sdim { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, 317249259Sdim { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } 318249259Sdim }; 319249259Sdim if (SrcTy.isFloatingPoint() && ST->hasNEON()) { 320263509Sdim int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, 321263509Sdim DstTy.getSimpleVT(), SrcTy.getSimpleVT()); 322249259Sdim if (Idx != -1) 323249259Sdim return NEONFloatConversionTbl[Idx].Cost; 324249259Sdim } 325249259Sdim 326249259Sdim // Scalar integer to float conversions. 327263509Sdim static const TypeConversionCostTblEntry<MVT::SimpleValueType> 328263509Sdim NEONIntegerConversionTbl[] = { 329249259Sdim { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, 330249259Sdim { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, 331249259Sdim { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, 332249259Sdim { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, 333249259Sdim { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, 334249259Sdim { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, 335249259Sdim { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, 336249259Sdim { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, 337249259Sdim { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, 338249259Sdim { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, 339249259Sdim { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, 340249259Sdim { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, 341249259Sdim { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, 342249259Sdim { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, 343249259Sdim { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, 344249259Sdim { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, 345249259Sdim { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, 346249259Sdim { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, 347249259Sdim { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, 348249259Sdim { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } 349249259Sdim }; 350249259Sdim 351249259Sdim if (SrcTy.isInteger() && ST->hasNEON()) { 352263509Sdim int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD, 353263509Sdim DstTy.getSimpleVT(), SrcTy.getSimpleVT()); 354249259Sdim if (Idx != -1) 355249259Sdim return NEONIntegerConversionTbl[Idx].Cost; 356249259Sdim } 357249259Sdim 358249259Sdim // Scalar integer conversion costs. 359263509Sdim static const TypeConversionCostTblEntry<MVT::SimpleValueType> 360263509Sdim ARMIntegerConversionTbl[] = { 361249259Sdim // i16 -> i64 requires two dependent operations. 362249259Sdim { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, 363249259Sdim 364249259Sdim // Truncates on i64 are assumed to be free. 365249259Sdim { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, 366249259Sdim { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, 367249259Sdim { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, 368249259Sdim { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } 369249259Sdim }; 370249259Sdim 371249259Sdim if (SrcTy.isInteger()) { 372263509Sdim int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, 373263509Sdim DstTy.getSimpleVT(), SrcTy.getSimpleVT()); 374249259Sdim if (Idx != -1) 375249259Sdim return ARMIntegerConversionTbl[Idx].Cost; 376249259Sdim } 377249259Sdim 378249259Sdim return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 379249259Sdim} 380249259Sdim 381249259Sdimunsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, 382249259Sdim unsigned Index) const { 383249259Sdim // Penalize inserting into an D-subregister. We end up with a three times 384249259Sdim // lower estimated throughput on swift. 385249259Sdim if (ST->isSwift() && 386249259Sdim Opcode == Instruction::InsertElement && 387249259Sdim ValTy->isVectorTy() && 388249259Sdim ValTy->getScalarSizeInBits() <= 32) 389249259Sdim return 3; 390249259Sdim 391249259Sdim return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); 392249259Sdim} 393249259Sdim 394249259Sdimunsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 395249259Sdim Type *CondTy) const { 396249259Sdim 397249259Sdim int ISD = TLI->InstructionOpcodeToISD(Opcode); 398249259Sdim // On NEON a a vector select gets lowered to vbsl. 399249259Sdim if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { 400249259Sdim // Lowering of some vector selects is currently far from perfect. 401263509Sdim static const TypeConversionCostTblEntry<MVT::SimpleValueType> 402263509Sdim NEONVectorSelectTbl[] = { 403249259Sdim { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, 404249259Sdim { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, 405249259Sdim { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, 406249259Sdim { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, 407249259Sdim { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, 408249259Sdim { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } 409249259Sdim }; 410249259Sdim 411249259Sdim EVT SelCondTy = TLI->getValueType(CondTy); 412249259Sdim EVT SelValTy = TLI->getValueType(ValTy); 413263509Sdim if (SelCondTy.isSimple() && SelValTy.isSimple()) { 414263509Sdim int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, 415263509Sdim SelCondTy.getSimpleVT(), 416263509Sdim SelValTy.getSimpleVT()); 417263509Sdim if (Idx != -1) 418263509Sdim return NEONVectorSelectTbl[Idx].Cost; 419263509Sdim } 420249259Sdim 421249259Sdim std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); 422249259Sdim return LT.first; 423249259Sdim } 424249259Sdim 425249259Sdim return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 426249259Sdim} 427249259Sdim 428263509Sdimunsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 429263509Sdim // Address computations in vectorized code with non-consecutive addresses will 430263509Sdim // likely result in more instructions compared to scalar code where the 431263509Sdim // computation can more often be merged into the index mode. The resulting 432263509Sdim // extra micro-ops can significantly decrease throughput. 433263509Sdim unsigned NumVectorInstToHideOverhead = 10; 434263509Sdim 435263509Sdim if (Ty->isVectorTy() && IsComplex) 436263509Sdim return NumVectorInstToHideOverhead; 437263509Sdim 438249259Sdim // In many cases the address computation is not merged into the instruction 439249259Sdim // addressing mode. 440249259Sdim return 1; 441249259Sdim} 442249259Sdim 443249259Sdimunsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 444249259Sdim Type *SubTp) const { 445249259Sdim // We only handle costs of reverse shuffles for now. 446249259Sdim if (Kind != SK_Reverse) 447249259Sdim return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 448249259Sdim 449263509Sdim static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { 450249259Sdim // Reverse shuffle cost one instruction if we are shuffling within a double 451249259Sdim // word (vrev) or two if we shuffle a quad word (vrev, vext). 452249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, 453249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, 454249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, 455249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, 456249259Sdim 457249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, 458249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, 459249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, 460249259Sdim { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } 461249259Sdim }; 462249259Sdim 463249259Sdim std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); 464249259Sdim 465263509Sdim int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); 466249259Sdim if (Idx == -1) 467249259Sdim return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 468249259Sdim 469249259Sdim return LT.first * NEONShuffleTbl[Idx].Cost; 470249259Sdim} 471252723Sdim 472252723Sdimunsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info, 473252723Sdim OperandValueKind Op2Info) const { 474252723Sdim 475252723Sdim int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); 476252723Sdim std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 477252723Sdim 478252723Sdim const unsigned FunctionCallDivCost = 20; 479252723Sdim const unsigned ReciprocalDivCost = 10; 480263509Sdim static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = { 481252723Sdim // Division. 482252723Sdim // These costs are somewhat random. Choose a cost of 20 to indicate that 483252723Sdim // vectorizing devision (added function call) is going to be very expensive. 484252723Sdim // Double registers types. 485252723Sdim { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, 486252723Sdim { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, 487252723Sdim { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, 488252723Sdim { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, 489252723Sdim { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, 490252723Sdim { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, 491252723Sdim { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, 492252723Sdim { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, 493252723Sdim { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, 494252723Sdim { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, 495252723Sdim { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, 496252723Sdim { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, 497252723Sdim { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, 498252723Sdim { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, 499252723Sdim { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, 500252723Sdim { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, 501252723Sdim // Quad register types. 502252723Sdim { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, 503252723Sdim { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, 504252723Sdim { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, 505252723Sdim { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, 506252723Sdim { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, 507252723Sdim { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, 508252723Sdim { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, 509252723Sdim { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, 510252723Sdim { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, 511252723Sdim { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, 512252723Sdim { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, 513252723Sdim { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, 514252723Sdim { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, 515252723Sdim { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, 516252723Sdim { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, 517252723Sdim { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, 518252723Sdim // Multiplication. 519252723Sdim }; 520252723Sdim 521252723Sdim int Idx = -1; 522252723Sdim 523252723Sdim if (ST->hasNEON()) 524263509Sdim Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second); 525252723Sdim 526252723Sdim if (Idx != -1) 527252723Sdim return LT.first * CostTbl[Idx].Cost; 528252723Sdim 529263509Sdim unsigned Cost = 530263509Sdim TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); 531252723Sdim 532263509Sdim // This is somewhat of a hack. The problem that we are facing is that SROA 533263509Sdim // creates a sequence of shift, and, or instructions to construct values. 534263509Sdim // These sequences are recognized by the ISel and have zero-cost. Not so for 535263509Sdim // the vectorized code. Because we have support for v2i64 but not i64 those 536263509Sdim // sequences look particularily beneficial to vectorize. 537263509Sdim // To work around this we increase the cost of v2i64 operations to make them 538263509Sdim // seem less beneficial. 539263509Sdim if (LT.second == MVT::v2i64 && 540263509Sdim Op2Info == TargetTransformInfo::OK_UniformConstantValue) 541263509Sdim Cost += 4; 542263509Sdim 543263509Sdim return Cost; 544252723Sdim} 545252723Sdim 546263509Sdimunsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 547263509Sdim unsigned AddressSpace) const { 548263509Sdim std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 549263509Sdim 550263509Sdim if (Src->isVectorTy() && Alignment != 16 && 551263509Sdim Src->getVectorElementType()->isDoubleTy()) { 552263509Sdim // Unaligned loads/stores are extremely inefficient. 553263509Sdim // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. 554263509Sdim return LT.first * 4; 555263509Sdim } 556263509Sdim return LT.first; 557263509Sdim} 558