1249259Sdim//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim/// \file 10249259Sdim/// This file implements a TargetTransformInfo analysis pass specific to the 11249259Sdim/// PPC target machine. It uses the target's detailed information to provide 12249259Sdim/// more precise answers to certain TTI queries, while letting the target 13249259Sdim/// independent and default TTI implementations handle the rest. 14249259Sdim/// 15249259Sdim//===----------------------------------------------------------------------===// 16249259Sdim 17249259Sdim#define DEBUG_TYPE "ppctti" 18249259Sdim#include "PPC.h" 19249259Sdim#include "PPCTargetMachine.h" 20249259Sdim#include "llvm/Analysis/TargetTransformInfo.h" 21249259Sdim#include "llvm/Support/Debug.h" 22249259Sdim#include "llvm/Target/TargetLowering.h" 23249259Sdim#include "llvm/Target/CostTable.h" 24249259Sdimusing namespace llvm; 25249259Sdim 26249259Sdim// Declare the pass initialization routine locally as target-specific passes 27249259Sdim// don't havve a target-wide initialization entry point, and so we rely on the 28249259Sdim// pass constructor initialization. 29249259Sdimnamespace llvm { 30249259Sdimvoid initializePPCTTIPass(PassRegistry &); 31249259Sdim} 32249259Sdim 33249259Sdimnamespace { 34249259Sdim 35249259Sdimclass PPCTTI : public ImmutablePass, public TargetTransformInfo { 36249259Sdim const PPCTargetMachine *TM; 37249259Sdim const PPCSubtarget *ST; 38249259Sdim const PPCTargetLowering *TLI; 39249259Sdim 40249259Sdim /// Estimate the overhead of scalarizing an instruction. Insert and Extract 41249259Sdim /// are set if the result needs to be inserted and/or extracted from vectors. 42249259Sdim unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 43249259Sdim 44249259Sdimpublic: 45249259Sdim PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 46249259Sdim llvm_unreachable("This pass cannot be directly constructed"); 47249259Sdim } 48249259Sdim 49249259Sdim PPCTTI(const PPCTargetMachine *TM) 50249259Sdim : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 51249259Sdim TLI(TM->getTargetLowering()) { 52249259Sdim initializePPCTTIPass(*PassRegistry::getPassRegistry()); 53249259Sdim } 54249259Sdim 55249259Sdim virtual void initializePass() { 56249259Sdim pushTTIStack(this); 57249259Sdim } 58249259Sdim 59249259Sdim virtual void finalizePass() { 60249259Sdim popTTIStack(); 61249259Sdim } 62249259Sdim 63249259Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 64249259Sdim TargetTransformInfo::getAnalysisUsage(AU); 65249259Sdim } 66249259Sdim 67249259Sdim /// Pass identification. 68249259Sdim static char ID; 69249259Sdim 70249259Sdim /// Provide necessary pointer adjustments for the two base classes. 71249259Sdim virtual void *getAdjustedAnalysisPointer(const void *ID) { 72249259Sdim if (ID == &TargetTransformInfo::ID) 73249259Sdim return (TargetTransformInfo*)this; 74249259Sdim return this; 75249259Sdim } 76249259Sdim 77249259Sdim /// \name Scalar TTI Implementations 78249259Sdim /// @{ 79249259Sdim virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; 80263509Sdim virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; 81249259Sdim 82249259Sdim /// @} 83249259Sdim 84249259Sdim /// \name Vector TTI Implementations 85249259Sdim /// @{ 86249259Sdim 87249259Sdim virtual unsigned getNumberOfRegisters(bool Vector) const; 88249259Sdim virtual unsigned getRegisterBitWidth(bool Vector) const; 89249259Sdim virtual unsigned getMaximumUnrollFactor() const; 90249259Sdim virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 91249259Sdim OperandValueKind, 92249259Sdim OperandValueKind) const; 93249259Sdim virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 94249259Sdim int Index, Type *SubTp) const; 95249259Sdim virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 96249259Sdim Type *Src) const; 97249259Sdim virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 98249259Sdim Type *CondTy) const; 99249259Sdim virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 100249259Sdim unsigned Index) const; 101249259Sdim virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 102249259Sdim unsigned Alignment, 103249259Sdim unsigned AddressSpace) const; 104249259Sdim 105249259Sdim /// @} 106249259Sdim}; 107249259Sdim 108249259Sdim} // end anonymous namespace 109249259Sdim 110249259SdimINITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", 111249259Sdim "PPC Target Transform Info", true, true, false) 112249259Sdimchar PPCTTI::ID = 0; 113249259Sdim 114249259SdimImmutablePass * 115249259Sdimllvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { 116249259Sdim return new PPCTTI(TM); 117249259Sdim} 118249259Sdim 119249259Sdim 120249259Sdim//===----------------------------------------------------------------------===// 121249259Sdim// 122249259Sdim// PPC cost model. 123249259Sdim// 124249259Sdim//===----------------------------------------------------------------------===// 125249259Sdim 126249259SdimPPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { 127249259Sdim assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 128249259Sdim if (ST->hasPOPCNTD() && TyWidth <= 64) 129249259Sdim return PSK_FastHardware; 130249259Sdim return PSK_Software; 131249259Sdim} 132249259Sdim 133263509Sdimvoid PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { 134263509Sdim if (ST->getDarwinDirective() == PPC::DIR_A2) { 135263509Sdim // The A2 is in-order with a deep pipeline, and concatenation unrolling 136263509Sdim // helps expose latency-hiding opportunities to the instruction scheduler. 137263509Sdim UP.Partial = UP.Runtime = true; 138263509Sdim } 139263509Sdim} 140263509Sdim 141249259Sdimunsigned PPCTTI::getNumberOfRegisters(bool Vector) const { 142249259Sdim if (Vector && !ST->hasAltivec()) 143249259Sdim return 0; 144249259Sdim return 32; 145249259Sdim} 146249259Sdim 147249259Sdimunsigned PPCTTI::getRegisterBitWidth(bool Vector) const { 148249259Sdim if (Vector) { 149249259Sdim if (ST->hasAltivec()) return 128; 150249259Sdim return 0; 151249259Sdim } 152249259Sdim 153249259Sdim if (ST->isPPC64()) 154249259Sdim return 64; 155249259Sdim return 32; 156249259Sdim 157249259Sdim} 158249259Sdim 159249259Sdimunsigned PPCTTI::getMaximumUnrollFactor() const { 160249259Sdim unsigned Directive = ST->getDarwinDirective(); 161249259Sdim // The 440 has no SIMD support, but floating-point instructions 162249259Sdim // have a 5-cycle latency, so unroll by 5x for latency hiding. 163249259Sdim if (Directive == PPC::DIR_440) 164249259Sdim return 5; 165249259Sdim 166249259Sdim // The A2 has no SIMD support, but floating-point instructions 167249259Sdim // have a 6-cycle latency, so unroll by 6x for latency hiding. 168249259Sdim if (Directive == PPC::DIR_A2) 169249259Sdim return 6; 170249259Sdim 171249259Sdim // FIXME: For lack of any better information, do no harm... 172249259Sdim if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) 173249259Sdim return 1; 174249259Sdim 175249259Sdim // For most things, modern systems have two execution units (and 176249259Sdim // out-of-order execution). 177249259Sdim return 2; 178249259Sdim} 179249259Sdim 180249259Sdimunsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 181249259Sdim OperandValueKind Op1Info, 182249259Sdim OperandValueKind Op2Info) const { 183249259Sdim assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 184249259Sdim 185249259Sdim // Fallback to the default implementation. 186249259Sdim return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, 187249259Sdim Op2Info); 188249259Sdim} 189249259Sdim 190249259Sdimunsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 191249259Sdim Type *SubTp) const { 192249259Sdim return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 193249259Sdim} 194249259Sdim 195249259Sdimunsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { 196249259Sdim assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 197249259Sdim 198249259Sdim return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 199249259Sdim} 200249259Sdim 201249259Sdimunsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 202249259Sdim Type *CondTy) const { 203249259Sdim return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 204249259Sdim} 205249259Sdim 206249259Sdimunsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 207249259Sdim unsigned Index) const { 208249259Sdim assert(Val->isVectorTy() && "This must be a vector type"); 209249259Sdim 210249259Sdim int ISD = TLI->InstructionOpcodeToISD(Opcode); 211249259Sdim assert(ISD && "Invalid opcode"); 212249259Sdim 213249259Sdim // Estimated cost of a load-hit-store delay. This was obtained 214249259Sdim // experimentally as a minimum needed to prevent unprofitable 215249259Sdim // vectorization for the paq8p benchmark. It may need to be 216249259Sdim // raised further if other unprofitable cases remain. 217249259Sdim unsigned LHSPenalty = 12; 218249259Sdim 219249259Sdim // Vector element insert/extract with Altivec is very expensive, 220249259Sdim // because they require store and reload with the attendant 221249259Sdim // processor stall for load-hit-store. Until VSX is available, 222249259Sdim // these need to be estimated as very costly. 223249259Sdim if (ISD == ISD::EXTRACT_VECTOR_ELT || 224249259Sdim ISD == ISD::INSERT_VECTOR_ELT) 225249259Sdim return LHSPenalty + 226249259Sdim TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 227249259Sdim 228249259Sdim return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 229249259Sdim} 230249259Sdim 231249259Sdimunsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 232249259Sdim unsigned AddressSpace) const { 233249259Sdim // Legalize the type. 234249259Sdim std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 235249259Sdim assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && 236249259Sdim "Invalid Opcode"); 237249259Sdim 238249259Sdim // Each load/store unit costs 1. 239249259Sdim unsigned Cost = LT.first * 1; 240249259Sdim 241249259Sdim // PPC in general does not support unaligned loads and stores. They'll need 242249259Sdim // to be decomposed based on the alignment factor. 243249259Sdim unsigned SrcBytes = LT.second.getStoreSize(); 244249259Sdim if (SrcBytes && Alignment && Alignment < SrcBytes) 245249259Sdim Cost *= (SrcBytes/Alignment); 246249259Sdim 247249259Sdim return Cost; 248249259Sdim} 249249259Sdim 250