1239310Sdim// 2239310Sdim// The LLVM Compiler Infrastructure 3239310Sdim// 4239310Sdim// This file is distributed under the University of Illinois Open Source 5239310Sdim// License. See LICENSE.TXT for details. 6239310Sdim// 7239310Sdim//===----------------------------------------------------------------------===// 8239310Sdim// 9239310Sdim// This file defines the interfaces that NVPTX uses to lower LLVM code into a 10239310Sdim// selection DAG. 11239310Sdim// 12239310Sdim//===----------------------------------------------------------------------===// 13239310Sdim 14249423Sdim#include "NVPTXISelLowering.h" 15239310Sdim#include "NVPTX.h" 16239310Sdim#include "NVPTXTargetMachine.h" 17239310Sdim#include "NVPTXTargetObjectFile.h" 18239310Sdim#include "NVPTXUtilities.h" 19239310Sdim#include "llvm/CodeGen/Analysis.h" 20239310Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 21239310Sdim#include "llvm/CodeGen/MachineFunction.h" 22239310Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 23239310Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 24249423Sdim#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25249423Sdim#include "llvm/IR/DerivedTypes.h" 26249423Sdim#include "llvm/IR/Function.h" 27249423Sdim#include "llvm/IR/GlobalValue.h" 28249423Sdim#include "llvm/IR/IntrinsicInst.h" 29249423Sdim#include "llvm/IR/Intrinsics.h" 30249423Sdim#include "llvm/IR/Module.h" 31249423Sdim#include "llvm/MC/MCSectionELF.h" 32239310Sdim#include "llvm/Support/CallSite.h" 33249423Sdim#include "llvm/Support/CommandLine.h" 34249423Sdim#include "llvm/Support/Debug.h" 35239310Sdim#include "llvm/Support/ErrorHandling.h" 36239310Sdim#include "llvm/Support/raw_ostream.h" 37239310Sdim#include <sstream> 38239310Sdim 39239310Sdim#undef DEBUG_TYPE 40239310Sdim#define DEBUG_TYPE "nvptx-lower" 41239310Sdim 42239310Sdimusing namespace llvm; 43239310Sdim 44239310Sdimstatic unsigned int uniqueCallSite = 0; 45239310Sdim 46249423Sdimstatic cl::opt<bool> sched4reg( 47249423Sdim "nvptx-sched4reg", 48249423Sdim cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 49239310Sdim 50249423Sdimstatic bool IsPTXVectorType(MVT VT) { 51249423Sdim switch (VT.SimpleTy) { 52249423Sdim default: 53249423Sdim return false; 54263508Sdim case MVT::v2i1: 55263508Sdim case MVT::v4i1: 56249423Sdim case MVT::v2i8: 57249423Sdim case MVT::v4i8: 58249423Sdim case MVT::v2i16: 59249423Sdim case MVT::v4i16: 60249423Sdim case MVT::v2i32: 61249423Sdim case MVT::v4i32: 62249423Sdim case MVT::v2i64: 63249423Sdim case MVT::v2f32: 64249423Sdim case MVT::v4f32: 65249423Sdim case MVT::v2f64: 66249423Sdim return true; 67249423Sdim } 68249423Sdim} 69239310Sdim 70263508Sdim/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 71263508Sdim/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 72263508Sdim/// into their primitive components. 73263508Sdim/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 74263508Sdim/// same number of types as the Ins/Outs arrays in LowerFormalArguments, 75263508Sdim/// LowerCall, and LowerReturn. 76263508Sdimstatic void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, 77263508Sdim SmallVectorImpl<EVT> &ValueVTs, 78263508Sdim SmallVectorImpl<uint64_t> *Offsets = 0, 79263508Sdim uint64_t StartingOffset = 0) { 80263508Sdim SmallVector<EVT, 16> TempVTs; 81263508Sdim SmallVector<uint64_t, 16> TempOffsets; 82263508Sdim 83263508Sdim ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); 84263508Sdim for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 85263508Sdim EVT VT = TempVTs[i]; 86263508Sdim uint64_t Off = TempOffsets[i]; 87263508Sdim if (VT.isVector()) 88263508Sdim for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 89263508Sdim ValueVTs.push_back(VT.getVectorElementType()); 90263508Sdim if (Offsets) 91263508Sdim Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 92263508Sdim } 93263508Sdim else { 94263508Sdim ValueVTs.push_back(VT); 95263508Sdim if (Offsets) 96263508Sdim Offsets->push_back(Off); 97263508Sdim } 98263508Sdim } 99263508Sdim} 100263508Sdim 101239310Sdim// NVPTXTargetLowering Constructor. 102239310SdimNVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) 103249423Sdim : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), 104249423Sdim nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 105239310Sdim 106239310Sdim // always lower memset, memcpy, and memmove intrinsics to load/store 107239310Sdim // instructions, rather 108239310Sdim // then generating calls to memset, mempcy or memmove. 109249423Sdim MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 110249423Sdim MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 111249423Sdim MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 112239310Sdim 113239310Sdim setBooleanContents(ZeroOrNegativeOneBooleanContent); 114239310Sdim 115239310Sdim // Jump is Expensive. Don't create extra control flow for 'and', 'or' 116239310Sdim // condition branches. 117239310Sdim setJumpIsExpensive(true); 118239310Sdim 119239310Sdim // By default, use the Source scheduling 120239310Sdim if (sched4reg) 121239310Sdim setSchedulingPreference(Sched::RegPressure); 122239310Sdim else 123239310Sdim setSchedulingPreference(Sched::Source); 124239310Sdim 125239310Sdim addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 126239310Sdim addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 127239310Sdim addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 128239310Sdim addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 129239310Sdim addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 130239310Sdim addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 131239310Sdim 132239310Sdim // Operations not directly supported by NVPTX. 133249423Sdim setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 134249423Sdim setOperationAction(ISD::BR_CC, MVT::f32, Expand); 135249423Sdim setOperationAction(ISD::BR_CC, MVT::f64, Expand); 136249423Sdim setOperationAction(ISD::BR_CC, MVT::i1, Expand); 137249423Sdim setOperationAction(ISD::BR_CC, MVT::i8, Expand); 138249423Sdim setOperationAction(ISD::BR_CC, MVT::i16, Expand); 139249423Sdim setOperationAction(ISD::BR_CC, MVT::i32, Expand); 140249423Sdim setOperationAction(ISD::BR_CC, MVT::i64, Expand); 141263508Sdim // Some SIGN_EXTEND_INREG can be done using cvt instruction. 142263508Sdim // For others we will expand to a SHL/SRA pair. 143263508Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 144263508Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 145263508Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 146263508Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 147249423Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 148239310Sdim 149239310Sdim if (nvptxSubtarget.hasROT64()) { 150249423Sdim setOperationAction(ISD::ROTL, MVT::i64, Legal); 151249423Sdim setOperationAction(ISD::ROTR, MVT::i64, Legal); 152249423Sdim } else { 153249423Sdim setOperationAction(ISD::ROTL, MVT::i64, Expand); 154249423Sdim setOperationAction(ISD::ROTR, MVT::i64, Expand); 155239310Sdim } 156239310Sdim if (nvptxSubtarget.hasROT32()) { 157249423Sdim setOperationAction(ISD::ROTL, MVT::i32, Legal); 158249423Sdim setOperationAction(ISD::ROTR, MVT::i32, Legal); 159249423Sdim } else { 160249423Sdim setOperationAction(ISD::ROTL, MVT::i32, Expand); 161249423Sdim setOperationAction(ISD::ROTR, MVT::i32, Expand); 162239310Sdim } 163239310Sdim 164249423Sdim setOperationAction(ISD::ROTL, MVT::i16, Expand); 165249423Sdim setOperationAction(ISD::ROTR, MVT::i16, Expand); 166249423Sdim setOperationAction(ISD::ROTL, MVT::i8, Expand); 167249423Sdim setOperationAction(ISD::ROTR, MVT::i8, Expand); 168249423Sdim setOperationAction(ISD::BSWAP, MVT::i16, Expand); 169249423Sdim setOperationAction(ISD::BSWAP, MVT::i32, Expand); 170249423Sdim setOperationAction(ISD::BSWAP, MVT::i64, Expand); 171239310Sdim 172239310Sdim // Indirect branch is not supported. 173239310Sdim // This also disables Jump Table creation. 174249423Sdim setOperationAction(ISD::BR_JT, MVT::Other, Expand); 175249423Sdim setOperationAction(ISD::BRIND, MVT::Other, Expand); 176239310Sdim 177249423Sdim setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 178249423Sdim setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 179239310Sdim 180239310Sdim // We want to legalize constant related memmove and memcopy 181239310Sdim // intrinsics. 182239310Sdim setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 183239310Sdim 184239310Sdim // Turn FP extload into load/fextend 185239310Sdim setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 186239310Sdim // Turn FP truncstore into trunc + store. 187239310Sdim setTruncStoreAction(MVT::f64, MVT::f32, Expand); 188239310Sdim 189239310Sdim // PTX does not support load / store predicate registers 190243830Sdim setOperationAction(ISD::LOAD, MVT::i1, Custom); 191243830Sdim setOperationAction(ISD::STORE, MVT::i1, Custom); 192243830Sdim 193239310Sdim setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 194239310Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 195239310Sdim setTruncStoreAction(MVT::i64, MVT::i1, Expand); 196239310Sdim setTruncStoreAction(MVT::i32, MVT::i1, Expand); 197239310Sdim setTruncStoreAction(MVT::i16, MVT::i1, Expand); 198239310Sdim setTruncStoreAction(MVT::i8, MVT::i1, Expand); 199239310Sdim 200239310Sdim // This is legal in NVPTX 201249423Sdim setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 202249423Sdim setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 203239310Sdim 204239310Sdim // TRAP can be lowered to PTX trap 205249423Sdim setOperationAction(ISD::TRAP, MVT::Other, Legal); 206239310Sdim 207263508Sdim setOperationAction(ISD::ADDC, MVT::i64, Expand); 208263508Sdim setOperationAction(ISD::ADDE, MVT::i64, Expand); 209263508Sdim 210249423Sdim // Register custom handling for vector loads/stores 211249423Sdim for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; 212249423Sdim ++i) { 213249423Sdim MVT VT = (MVT::SimpleValueType) i; 214249423Sdim if (IsPTXVectorType(VT)) { 215249423Sdim setOperationAction(ISD::LOAD, VT, Custom); 216249423Sdim setOperationAction(ISD::STORE, VT, Custom); 217249423Sdim setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 218249423Sdim } 219249423Sdim } 220239310Sdim 221263508Sdim // Custom handling for i8 intrinsics 222263508Sdim setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 223263508Sdim 224263508Sdim setOperationAction(ISD::CTLZ, MVT::i16, Legal); 225263508Sdim setOperationAction(ISD::CTLZ, MVT::i32, Legal); 226263508Sdim setOperationAction(ISD::CTLZ, MVT::i64, Legal); 227263508Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); 228263508Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); 229263508Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); 230263508Sdim setOperationAction(ISD::CTTZ, MVT::i16, Expand); 231263508Sdim setOperationAction(ISD::CTTZ, MVT::i32, Expand); 232263508Sdim setOperationAction(ISD::CTTZ, MVT::i64, Expand); 233263508Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); 234263508Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 235263508Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 236263508Sdim setOperationAction(ISD::CTPOP, MVT::i16, Legal); 237263508Sdim setOperationAction(ISD::CTPOP, MVT::i32, Legal); 238263508Sdim setOperationAction(ISD::CTPOP, MVT::i64, Legal); 239263508Sdim 240239310Sdim // Now deduce the information based on the above mentioned 241239310Sdim // actions 242239310Sdim computeRegisterProperties(); 243239310Sdim} 244239310Sdim 245239310Sdimconst char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 246239310Sdim switch (Opcode) { 247249423Sdim default: 248249423Sdim return 0; 249249423Sdim case NVPTXISD::CALL: 250249423Sdim return "NVPTXISD::CALL"; 251249423Sdim case NVPTXISD::RET_FLAG: 252249423Sdim return "NVPTXISD::RET_FLAG"; 253249423Sdim case NVPTXISD::Wrapper: 254249423Sdim return "NVPTXISD::Wrapper"; 255249423Sdim case NVPTXISD::DeclareParam: 256249423Sdim return "NVPTXISD::DeclareParam"; 257239310Sdim case NVPTXISD::DeclareScalarParam: 258239310Sdim return "NVPTXISD::DeclareScalarParam"; 259249423Sdim case NVPTXISD::DeclareRet: 260249423Sdim return "NVPTXISD::DeclareRet"; 261249423Sdim case NVPTXISD::DeclareRetParam: 262249423Sdim return "NVPTXISD::DeclareRetParam"; 263249423Sdim case NVPTXISD::PrintCall: 264249423Sdim return "NVPTXISD::PrintCall"; 265249423Sdim case NVPTXISD::LoadParam: 266249423Sdim return "NVPTXISD::LoadParam"; 267263508Sdim case NVPTXISD::LoadParamV2: 268263508Sdim return "NVPTXISD::LoadParamV2"; 269263508Sdim case NVPTXISD::LoadParamV4: 270263508Sdim return "NVPTXISD::LoadParamV4"; 271249423Sdim case NVPTXISD::StoreParam: 272249423Sdim return "NVPTXISD::StoreParam"; 273263508Sdim case NVPTXISD::StoreParamV2: 274263508Sdim return "NVPTXISD::StoreParamV2"; 275263508Sdim case NVPTXISD::StoreParamV4: 276263508Sdim return "NVPTXISD::StoreParamV4"; 277249423Sdim case NVPTXISD::StoreParamS32: 278249423Sdim return "NVPTXISD::StoreParamS32"; 279249423Sdim case NVPTXISD::StoreParamU32: 280249423Sdim return "NVPTXISD::StoreParamU32"; 281249423Sdim case NVPTXISD::CallArgBegin: 282249423Sdim return "NVPTXISD::CallArgBegin"; 283249423Sdim case NVPTXISD::CallArg: 284249423Sdim return "NVPTXISD::CallArg"; 285249423Sdim case NVPTXISD::LastCallArg: 286249423Sdim return "NVPTXISD::LastCallArg"; 287249423Sdim case NVPTXISD::CallArgEnd: 288249423Sdim return "NVPTXISD::CallArgEnd"; 289249423Sdim case NVPTXISD::CallVoid: 290249423Sdim return "NVPTXISD::CallVoid"; 291249423Sdim case NVPTXISD::CallVal: 292249423Sdim return "NVPTXISD::CallVal"; 293249423Sdim case NVPTXISD::CallSymbol: 294249423Sdim return "NVPTXISD::CallSymbol"; 295249423Sdim case NVPTXISD::Prototype: 296249423Sdim return "NVPTXISD::Prototype"; 297249423Sdim case NVPTXISD::MoveParam: 298249423Sdim return "NVPTXISD::MoveParam"; 299249423Sdim case NVPTXISD::StoreRetval: 300249423Sdim return "NVPTXISD::StoreRetval"; 301263508Sdim case NVPTXISD::StoreRetvalV2: 302263508Sdim return "NVPTXISD::StoreRetvalV2"; 303263508Sdim case NVPTXISD::StoreRetvalV4: 304263508Sdim return "NVPTXISD::StoreRetvalV4"; 305249423Sdim case NVPTXISD::PseudoUseParam: 306249423Sdim return "NVPTXISD::PseudoUseParam"; 307249423Sdim case NVPTXISD::RETURN: 308249423Sdim return "NVPTXISD::RETURN"; 309249423Sdim case NVPTXISD::CallSeqBegin: 310249423Sdim return "NVPTXISD::CallSeqBegin"; 311249423Sdim case NVPTXISD::CallSeqEnd: 312249423Sdim return "NVPTXISD::CallSeqEnd"; 313263508Sdim case NVPTXISD::CallPrototype: 314263508Sdim return "NVPTXISD::CallPrototype"; 315249423Sdim case NVPTXISD::LoadV2: 316249423Sdim return "NVPTXISD::LoadV2"; 317249423Sdim case NVPTXISD::LoadV4: 318249423Sdim return "NVPTXISD::LoadV4"; 319249423Sdim case NVPTXISD::LDGV2: 320249423Sdim return "NVPTXISD::LDGV2"; 321249423Sdim case NVPTXISD::LDGV4: 322249423Sdim return "NVPTXISD::LDGV4"; 323249423Sdim case NVPTXISD::LDUV2: 324249423Sdim return "NVPTXISD::LDUV2"; 325249423Sdim case NVPTXISD::LDUV4: 326249423Sdim return "NVPTXISD::LDUV4"; 327249423Sdim case NVPTXISD::StoreV2: 328249423Sdim return "NVPTXISD::StoreV2"; 329249423Sdim case NVPTXISD::StoreV4: 330249423Sdim return "NVPTXISD::StoreV4"; 331239310Sdim } 332239310Sdim} 333239310Sdim 334249423Sdimbool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const { 335249423Sdim return VT == MVT::i1; 336249423Sdim} 337239310Sdim 338239310SdimSDValue 339239310SdimNVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 340263508Sdim SDLoc dl(Op); 341239310Sdim const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 342239310Sdim Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 343239310Sdim return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 344239310Sdim} 345239310Sdim 346263508Sdimstd::string 347263508SdimNVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, 348263508Sdim const SmallVectorImpl<ISD::OutputArg> &Outs, 349263508Sdim unsigned retAlignment, 350263508Sdim const ImmutableCallSite *CS) const { 351239310Sdim 352239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 353263508Sdim assert(isABI && "Non-ABI compilation is not supported"); 354263508Sdim if (!isABI) 355263508Sdim return ""; 356239310Sdim 357239310Sdim std::stringstream O; 358239310Sdim O << "prototype_" << uniqueCallSite << " : .callprototype "; 359239310Sdim 360263508Sdim if (retTy->getTypeID() == Type::VoidTyID) { 361239310Sdim O << "()"; 362263508Sdim } else { 363239310Sdim O << "("; 364263508Sdim if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { 365263508Sdim unsigned size = 0; 366263508Sdim if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 367263508Sdim size = ITy->getBitWidth(); 368263508Sdim if (size < 32) 369263508Sdim size = 32; 370263508Sdim } else { 371263508Sdim assert(retTy->isFloatingPointTy() && 372263508Sdim "Floating point type expected here"); 373263508Sdim size = retTy->getPrimitiveSizeInBits(); 374263508Sdim } 375239310Sdim 376263508Sdim O << ".param .b" << size << " _"; 377263508Sdim } else if (isa<PointerType>(retTy)) { 378263508Sdim O << ".param .b" << getPointerTy().getSizeInBits() << " _"; 379263508Sdim } else { 380263508Sdim if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { 381263508Sdim SmallVector<EVT, 16> vtparts; 382263508Sdim ComputeValueVTs(*this, retTy, vtparts); 383263508Sdim unsigned totalsz = 0; 384263508Sdim for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 385263508Sdim unsigned elems = 1; 386263508Sdim EVT elemtype = vtparts[i]; 387263508Sdim if (vtparts[i].isVector()) { 388263508Sdim elems = vtparts[i].getVectorNumElements(); 389263508Sdim elemtype = vtparts[i].getVectorElementType(); 390239310Sdim } 391263508Sdim // TODO: no need to loop 392263508Sdim for (unsigned j = 0, je = elems; j != je; ++j) { 393263508Sdim unsigned sz = elemtype.getSizeInBits(); 394263508Sdim if (elemtype.isInteger() && (sz < 8)) 395263508Sdim sz = 8; 396263508Sdim totalsz += sz / 8; 397263508Sdim } 398239310Sdim } 399263508Sdim O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; 400263508Sdim } else { 401263508Sdim assert(false && "Unknown return type"); 402239310Sdim } 403239310Sdim } 404239310Sdim O << ") "; 405239310Sdim } 406239310Sdim O << "_ ("; 407239310Sdim 408239310Sdim bool first = true; 409239310Sdim MVT thePointerTy = getPointerTy(); 410239310Sdim 411263508Sdim unsigned OIdx = 0; 412263508Sdim for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 413263508Sdim Type *Ty = Args[i].Ty; 414239310Sdim if (!first) { 415239310Sdim O << ", "; 416239310Sdim } 417239310Sdim first = false; 418239310Sdim 419263508Sdim if (Outs[OIdx].Flags.isByVal() == false) { 420263508Sdim if (Ty->isAggregateType() || Ty->isVectorTy()) { 421263508Sdim unsigned align = 0; 422263508Sdim const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 423263508Sdim const DataLayout *TD = getDataLayout(); 424263508Sdim // +1 because index 0 is reserved for return type alignment 425263508Sdim if (!llvm::getAlign(*CallI, i + 1, align)) 426263508Sdim align = TD->getABITypeAlignment(Ty); 427263508Sdim unsigned sz = TD->getTypeAllocSize(Ty); 428263508Sdim O << ".param .align " << align << " .b8 "; 429263508Sdim O << "_"; 430263508Sdim O << "[" << sz << "]"; 431263508Sdim // update the index for Outs 432263508Sdim SmallVector<EVT, 16> vtparts; 433263508Sdim ComputeValueVTs(*this, Ty, vtparts); 434263508Sdim if (unsigned len = vtparts.size()) 435263508Sdim OIdx += len - 1; 436263508Sdim continue; 437263508Sdim } 438263508Sdim // i8 types in IR will be i16 types in SDAG 439263508Sdim assert((getValueType(Ty) == Outs[OIdx].VT || 440263508Sdim (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 441263508Sdim "type mismatch between callee prototype and arguments"); 442263508Sdim // scalar type 443239310Sdim unsigned sz = 0; 444239310Sdim if (isa<IntegerType>(Ty)) { 445239310Sdim sz = cast<IntegerType>(Ty)->getBitWidth(); 446249423Sdim if (sz < 32) 447249423Sdim sz = 32; 448249423Sdim } else if (isa<PointerType>(Ty)) 449239310Sdim sz = thePointerTy.getSizeInBits(); 450239310Sdim else 451239310Sdim sz = Ty->getPrimitiveSizeInBits(); 452263508Sdim O << ".param .b" << sz << " "; 453239310Sdim O << "_"; 454239310Sdim continue; 455239310Sdim } 456239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Ty); 457249423Sdim assert(PTy && "Param with byval attribute should be a pointer type"); 458239310Sdim Type *ETy = PTy->getElementType(); 459239310Sdim 460263508Sdim unsigned align = Outs[OIdx].Flags.getByValAlign(); 461263508Sdim unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 462263508Sdim O << ".param .align " << align << " .b8 "; 463263508Sdim O << "_"; 464263508Sdim O << "[" << sz << "]"; 465263508Sdim } 466263508Sdim O << ");"; 467263508Sdim return O.str(); 468263508Sdim} 469239310Sdim 470263508Sdimunsigned 471263508SdimNVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 472263508Sdim const ImmutableCallSite *CS, 473263508Sdim Type *Ty, 474263508Sdim unsigned Idx) const { 475263508Sdim const DataLayout *TD = getDataLayout(); 476263508Sdim unsigned Align = 0; 477263508Sdim const Value *DirectCallee = CS->getCalledFunction(); 478263508Sdim 479263508Sdim if (!DirectCallee) { 480263508Sdim // We don't have a direct function symbol, but that may be because of 481263508Sdim // constant cast instructions in the call. 482263508Sdim const Instruction *CalleeI = CS->getInstruction(); 483263508Sdim assert(CalleeI && "Call target is not a function or derived value?"); 484263508Sdim 485263508Sdim // With bitcast'd call targets, the instruction will be the call 486263508Sdim if (isa<CallInst>(CalleeI)) { 487263508Sdim // Check if we have call alignment metadata 488263508Sdim if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 489263508Sdim return Align; 490263508Sdim 491263508Sdim const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 492263508Sdim // Ignore any bitcast instructions 493263508Sdim while(isa<ConstantExpr>(CalleeV)) { 494263508Sdim const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 495263508Sdim if (!CE->isCast()) 496263508Sdim break; 497263508Sdim // Look through the bitcast 498263508Sdim CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 499239310Sdim } 500263508Sdim 501263508Sdim // We have now looked past all of the bitcasts. Do we finally have a 502263508Sdim // Function? 503263508Sdim if (isa<Function>(CalleeV)) 504263508Sdim DirectCallee = CalleeV; 505239310Sdim } 506239310Sdim } 507263508Sdim 508263508Sdim // Check for function alignment information if we found that the 509263508Sdim // ultimate target is a Function 510263508Sdim if (DirectCallee) 511263508Sdim if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 512263508Sdim return Align; 513263508Sdim 514263508Sdim // Call is indirect or alignment information is not available, fall back to 515263508Sdim // the ABI type alignment 516263508Sdim return TD->getABITypeAlignment(Ty); 517239310Sdim} 518239310Sdim 519249423SdimSDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 520249423Sdim SmallVectorImpl<SDValue> &InVals) const { 521249423Sdim SelectionDAG &DAG = CLI.DAG; 522263508Sdim SDLoc dl = CLI.DL; 523263508Sdim SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 524263508Sdim SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 525263508Sdim SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 526249423Sdim SDValue Chain = CLI.Chain; 527249423Sdim SDValue Callee = CLI.Callee; 528249423Sdim bool &isTailCall = CLI.IsTailCall; 529249423Sdim ArgListTy &Args = CLI.Args; 530249423Sdim Type *retTy = CLI.RetTy; 531249423Sdim ImmutableCallSite *CS = CLI.CS; 532239310Sdim 533239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 534263508Sdim assert(isABI && "Non-ABI compilation is not supported"); 535263508Sdim if (!isABI) 536263508Sdim return Chain; 537263508Sdim const DataLayout *TD = getDataLayout(); 538263508Sdim MachineFunction &MF = DAG.getMachineFunction(); 539263508Sdim const Function *F = MF.getFunction(); 540239310Sdim 541239310Sdim SDValue tempChain = Chain; 542249423Sdim Chain = 543263508Sdim DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 544263508Sdim dl); 545239310Sdim SDValue InFlag = Chain.getValue(1); 546239310Sdim 547239310Sdim unsigned paramCount = 0; 548263508Sdim // Args.size() and Outs.size() need not match. 549263508Sdim // Outs.size() will be larger 550263508Sdim // * if there is an aggregate argument with multiple fields (each field 551263508Sdim // showing up separately in Outs) 552263508Sdim // * if there is a vector argument with more than typical vector-length 553263508Sdim // elements (generally if more than 4) where each vector element is 554263508Sdim // individually present in Outs. 555263508Sdim // So a different index should be used for indexing into Outs/OutVals. 556263508Sdim // See similar issue in LowerFormalArguments. 557263508Sdim unsigned OIdx = 0; 558239310Sdim // Declare the .params or .reg need to pass values 559239310Sdim // to the function 560263508Sdim for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 561263508Sdim EVT VT = Outs[OIdx].VT; 562263508Sdim Type *Ty = Args[i].Ty; 563239310Sdim 564263508Sdim if (Outs[OIdx].Flags.isByVal() == false) { 565263508Sdim if (Ty->isAggregateType()) { 566263508Sdim // aggregate 567263508Sdim SmallVector<EVT, 16> vtparts; 568263508Sdim ComputeValueVTs(*this, Ty, vtparts); 569263508Sdim 570263508Sdim unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 571263508Sdim // declare .param .align <align> .b8 .param<n>[<size>]; 572263508Sdim unsigned sz = TD->getTypeAllocSize(Ty); 573263508Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 574263508Sdim SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 575263508Sdim DAG.getConstant(paramCount, MVT::i32), 576263508Sdim DAG.getConstant(sz, MVT::i32), InFlag }; 577263508Sdim Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 578263508Sdim DeclareParamOps, 5); 579263508Sdim InFlag = Chain.getValue(1); 580263508Sdim unsigned curOffset = 0; 581263508Sdim for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 582263508Sdim unsigned elems = 1; 583263508Sdim EVT elemtype = vtparts[j]; 584263508Sdim if (vtparts[j].isVector()) { 585263508Sdim elems = vtparts[j].getVectorNumElements(); 586263508Sdim elemtype = vtparts[j].getVectorElementType(); 587263508Sdim } 588263508Sdim for (unsigned k = 0, ke = elems; k != ke; ++k) { 589263508Sdim unsigned sz = elemtype.getSizeInBits(); 590263508Sdim if (elemtype.isInteger() && (sz < 8)) 591263508Sdim sz = 8; 592263508Sdim SDValue StVal = OutVals[OIdx]; 593263508Sdim if (elemtype.getSizeInBits() < 16) { 594263508Sdim StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 595263508Sdim } 596263508Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 597263508Sdim SDValue CopyParamOps[] = { Chain, 598263508Sdim DAG.getConstant(paramCount, MVT::i32), 599263508Sdim DAG.getConstant(curOffset, MVT::i32), 600263508Sdim StVal, InFlag }; 601263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 602263508Sdim CopyParamVTs, &CopyParamOps[0], 5, 603263508Sdim elemtype, MachinePointerInfo()); 604263508Sdim InFlag = Chain.getValue(1); 605263508Sdim curOffset += sz / 8; 606263508Sdim ++OIdx; 607263508Sdim } 608263508Sdim } 609263508Sdim if (vtparts.size() > 0) 610263508Sdim --OIdx; 611263508Sdim ++paramCount; 612263508Sdim continue; 613263508Sdim } 614263508Sdim if (Ty->isVectorTy()) { 615263508Sdim EVT ObjectVT = getValueType(Ty); 616263508Sdim unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 617263508Sdim // declare .param .align <align> .b8 .param<n>[<size>]; 618263508Sdim unsigned sz = TD->getTypeAllocSize(Ty); 619263508Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 620263508Sdim SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 621263508Sdim DAG.getConstant(paramCount, MVT::i32), 622263508Sdim DAG.getConstant(sz, MVT::i32), InFlag }; 623263508Sdim Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 624263508Sdim DeclareParamOps, 5); 625263508Sdim InFlag = Chain.getValue(1); 626263508Sdim unsigned NumElts = ObjectVT.getVectorNumElements(); 627263508Sdim EVT EltVT = ObjectVT.getVectorElementType(); 628263508Sdim EVT MemVT = EltVT; 629263508Sdim bool NeedExtend = false; 630263508Sdim if (EltVT.getSizeInBits() < 16) { 631263508Sdim NeedExtend = true; 632263508Sdim EltVT = MVT::i16; 633263508Sdim } 634263508Sdim 635263508Sdim // V1 store 636263508Sdim if (NumElts == 1) { 637263508Sdim SDValue Elt = OutVals[OIdx++]; 638263508Sdim if (NeedExtend) 639263508Sdim Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 640263508Sdim 641263508Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 642263508Sdim SDValue CopyParamOps[] = { Chain, 643263508Sdim DAG.getConstant(paramCount, MVT::i32), 644263508Sdim DAG.getConstant(0, MVT::i32), Elt, 645263508Sdim InFlag }; 646263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 647263508Sdim CopyParamVTs, &CopyParamOps[0], 5, 648263508Sdim MemVT, MachinePointerInfo()); 649263508Sdim InFlag = Chain.getValue(1); 650263508Sdim } else if (NumElts == 2) { 651263508Sdim SDValue Elt0 = OutVals[OIdx++]; 652263508Sdim SDValue Elt1 = OutVals[OIdx++]; 653263508Sdim if (NeedExtend) { 654263508Sdim Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 655263508Sdim Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 656263508Sdim } 657263508Sdim 658263508Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 659263508Sdim SDValue CopyParamOps[] = { Chain, 660263508Sdim DAG.getConstant(paramCount, MVT::i32), 661263508Sdim DAG.getConstant(0, MVT::i32), Elt0, Elt1, 662263508Sdim InFlag }; 663263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 664263508Sdim CopyParamVTs, &CopyParamOps[0], 6, 665263508Sdim MemVT, MachinePointerInfo()); 666263508Sdim InFlag = Chain.getValue(1); 667263508Sdim } else { 668263508Sdim unsigned curOffset = 0; 669263508Sdim // V4 stores 670263508Sdim // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 671263508Sdim // the 672263508Sdim // vector will be expanded to a power of 2 elements, so we know we can 673263508Sdim // always round up to the next multiple of 4 when creating the vector 674263508Sdim // stores. 675263508Sdim // e.g. 4 elem => 1 st.v4 676263508Sdim // 6 elem => 2 st.v4 677263508Sdim // 8 elem => 2 st.v4 678263508Sdim // 11 elem => 3 st.v4 679263508Sdim unsigned VecSize = 4; 680263508Sdim if (EltVT.getSizeInBits() == 64) 681263508Sdim VecSize = 2; 682263508Sdim 683263508Sdim // This is potentially only part of a vector, so assume all elements 684263508Sdim // are packed together. 685263508Sdim unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 686263508Sdim 687263508Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 688263508Sdim // Get values 689263508Sdim SDValue StoreVal; 690263508Sdim SmallVector<SDValue, 8> Ops; 691263508Sdim Ops.push_back(Chain); 692263508Sdim Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); 693263508Sdim Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); 694263508Sdim 695263508Sdim unsigned Opc = NVPTXISD::StoreParamV2; 696263508Sdim 697263508Sdim StoreVal = OutVals[OIdx++]; 698263508Sdim if (NeedExtend) 699263508Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 700263508Sdim Ops.push_back(StoreVal); 701263508Sdim 702263508Sdim if (i + 1 < NumElts) { 703263508Sdim StoreVal = OutVals[OIdx++]; 704263508Sdim if (NeedExtend) 705263508Sdim StoreVal = 706263508Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 707263508Sdim } else { 708263508Sdim StoreVal = DAG.getUNDEF(EltVT); 709263508Sdim } 710263508Sdim Ops.push_back(StoreVal); 711263508Sdim 712263508Sdim if (VecSize == 4) { 713263508Sdim Opc = NVPTXISD::StoreParamV4; 714263508Sdim if (i + 2 < NumElts) { 715263508Sdim StoreVal = OutVals[OIdx++]; 716263508Sdim if (NeedExtend) 717263508Sdim StoreVal = 718263508Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 719263508Sdim } else { 720263508Sdim StoreVal = DAG.getUNDEF(EltVT); 721263508Sdim } 722263508Sdim Ops.push_back(StoreVal); 723263508Sdim 724263508Sdim if (i + 3 < NumElts) { 725263508Sdim StoreVal = OutVals[OIdx++]; 726263508Sdim if (NeedExtend) 727263508Sdim StoreVal = 728263508Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 729263508Sdim } else { 730263508Sdim StoreVal = DAG.getUNDEF(EltVT); 731263508Sdim } 732263508Sdim Ops.push_back(StoreVal); 733263508Sdim } 734263508Sdim 735263508Sdim Ops.push_back(InFlag); 736263508Sdim 737263508Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 738263508Sdim Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], 739263508Sdim Ops.size(), MemVT, 740263508Sdim MachinePointerInfo()); 741263508Sdim InFlag = Chain.getValue(1); 742263508Sdim curOffset += PerStoreOffset; 743263508Sdim } 744263508Sdim } 745263508Sdim ++paramCount; 746263508Sdim --OIdx; 747263508Sdim continue; 748263508Sdim } 749239310Sdim // Plain scalar 750239310Sdim // for ABI, declare .param .b<size> .param<n>; 751239310Sdim unsigned sz = VT.getSizeInBits(); 752263508Sdim bool needExtend = false; 753263508Sdim if (VT.isInteger()) { 754263508Sdim if (sz < 16) 755263508Sdim needExtend = true; 756263508Sdim if (sz < 32) 757263508Sdim sz = 32; 758263508Sdim } 759239310Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 760239310Sdim SDValue DeclareParamOps[] = { Chain, 761239310Sdim DAG.getConstant(paramCount, MVT::i32), 762239310Sdim DAG.getConstant(sz, MVT::i32), 763263508Sdim DAG.getConstant(0, MVT::i32), InFlag }; 764239310Sdim Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 765239310Sdim DeclareParamOps, 5); 766239310Sdim InFlag = Chain.getValue(1); 767263508Sdim SDValue OutV = OutVals[OIdx]; 768263508Sdim if (needExtend) { 769263508Sdim // zext/sext i1 to i16 770263508Sdim unsigned opc = ISD::ZERO_EXTEND; 771263508Sdim if (Outs[OIdx].Flags.isSExt()) 772263508Sdim opc = ISD::SIGN_EXTEND; 773263508Sdim OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 774263508Sdim } 775239310Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 776239310Sdim SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 777263508Sdim DAG.getConstant(0, MVT::i32), OutV, InFlag }; 778239310Sdim 779239310Sdim unsigned opcode = NVPTXISD::StoreParam; 780263508Sdim if (Outs[OIdx].Flags.isZExt()) 781263508Sdim opcode = NVPTXISD::StoreParamU32; 782263508Sdim else if (Outs[OIdx].Flags.isSExt()) 783263508Sdim opcode = NVPTXISD::StoreParamS32; 784263508Sdim Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5, 785263508Sdim VT, MachinePointerInfo()); 786239310Sdim 787239310Sdim InFlag = Chain.getValue(1); 788239310Sdim ++paramCount; 789239310Sdim continue; 790239310Sdim } 791239310Sdim // struct or vector 792239310Sdim SmallVector<EVT, 16> vtparts; 793239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 794249423Sdim assert(PTy && "Type of a byval parameter should be pointer"); 795239310Sdim ComputeValueVTs(*this, PTy->getElementType(), vtparts); 796239310Sdim 797263508Sdim // declare .param .align <align> .b8 .param<n>[<size>]; 798263508Sdim unsigned sz = Outs[OIdx].Flags.getByValSize(); 799263508Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 800263508Sdim // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 801263508Sdim // so we don't need to worry about natural alignment or not. 802263508Sdim // See TargetLowering::LowerCallTo(). 803263508Sdim SDValue DeclareParamOps[] = { 804263508Sdim Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), 805263508Sdim DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), 806263508Sdim InFlag 807263508Sdim }; 808263508Sdim Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 809263508Sdim DeclareParamOps, 5); 810263508Sdim InFlag = Chain.getValue(1); 811239310Sdim unsigned curOffset = 0; 812249423Sdim for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 813239310Sdim unsigned elems = 1; 814239310Sdim EVT elemtype = vtparts[j]; 815239310Sdim if (vtparts[j].isVector()) { 816239310Sdim elems = vtparts[j].getVectorNumElements(); 817239310Sdim elemtype = vtparts[j].getVectorElementType(); 818239310Sdim } 819249423Sdim for (unsigned k = 0, ke = elems; k != ke; ++k) { 820239310Sdim unsigned sz = elemtype.getSizeInBits(); 821263508Sdim if (elemtype.isInteger() && (sz < 8)) 822263508Sdim sz = 8; 823249423Sdim SDValue srcAddr = 824263508Sdim DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], 825249423Sdim DAG.getConstant(curOffset, getPointerTy())); 826263508Sdim SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 827263508Sdim MachinePointerInfo(), false, false, false, 828263508Sdim 0); 829263508Sdim if (elemtype.getSizeInBits() < 16) { 830263508Sdim theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 831263508Sdim } 832239310Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 833239310Sdim SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 834263508Sdim DAG.getConstant(curOffset, MVT::i32), theVal, 835239310Sdim InFlag }; 836263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 837263508Sdim CopyParamOps, 5, elemtype, 838263508Sdim MachinePointerInfo()); 839263508Sdim 840239310Sdim InFlag = Chain.getValue(1); 841263508Sdim curOffset += sz / 8; 842239310Sdim } 843239310Sdim } 844263508Sdim ++paramCount; 845239310Sdim } 846239310Sdim 847239310Sdim GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 848239310Sdim unsigned retAlignment = 0; 849239310Sdim 850239310Sdim // Handle Result 851239310Sdim if (Ins.size() > 0) { 852239310Sdim SmallVector<EVT, 16> resvtparts; 853239310Sdim ComputeValueVTs(*this, retTy, resvtparts); 854239310Sdim 855263508Sdim // Declare 856263508Sdim // .param .align 16 .b8 retval0[<size-in-bytes>], or 857263508Sdim // .param .b<size-in-bits> retval0 858263508Sdim unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); 859263508Sdim if (retTy->isPrimitiveType() || retTy->isIntegerTy() || 860263508Sdim retTy->isPointerTy()) { 861263508Sdim // Scalar needs to be at least 32bit wide 862263508Sdim if (resultsz < 32) 863263508Sdim resultsz = 32; 864263508Sdim SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 865263508Sdim SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 866263508Sdim DAG.getConstant(resultsz, MVT::i32), 867263508Sdim DAG.getConstant(0, MVT::i32), InFlag }; 868263508Sdim Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 869263508Sdim DeclareRetOps, 5); 870263508Sdim InFlag = Chain.getValue(1); 871263508Sdim } else { 872263508Sdim retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 873263508Sdim SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 874263508Sdim SDValue DeclareRetOps[] = { Chain, 875263508Sdim DAG.getConstant(retAlignment, MVT::i32), 876263508Sdim DAG.getConstant(resultsz / 8, MVT::i32), 877263508Sdim DAG.getConstant(0, MVT::i32), InFlag }; 878263508Sdim Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 879263508Sdim DeclareRetOps, 5); 880263508Sdim InFlag = Chain.getValue(1); 881239310Sdim } 882239310Sdim } 883239310Sdim 884239310Sdim if (!Func) { 885239310Sdim // This is indirect function call case : PTX requires a prototype of the 886239310Sdim // form 887239310Sdim // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 888239310Sdim // to be emitted, and the label has to used as the last arg of call 889239310Sdim // instruction. 890263508Sdim // The prototype is embedded in a string and put as the operand for a 891263508Sdim // CallPrototype SDNode which will print out to the value of the string. 892263508Sdim SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 893263508Sdim std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); 894263508Sdim const char *ProtoStr = 895263508Sdim nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 896263508Sdim SDValue ProtoOps[] = { 897263508Sdim Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 898249423Sdim }; 899263508Sdim Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3); 900239310Sdim InFlag = Chain.getValue(1); 901239310Sdim } 902239310Sdim // Op to just print "call" 903239310Sdim SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 904249423Sdim SDValue PrintCallOps[] = { 905263508Sdim Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag 906249423Sdim }; 907249423Sdim Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 908249423Sdim dl, PrintCallVTs, PrintCallOps, 3); 909239310Sdim InFlag = Chain.getValue(1); 910239310Sdim 911239310Sdim // Ops to print out the function name 912239310Sdim SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 913239310Sdim SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 914239310Sdim Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); 915239310Sdim InFlag = Chain.getValue(1); 916239310Sdim 917239310Sdim // Ops to print out the param list 918239310Sdim SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 919239310Sdim SDValue CallArgBeginOps[] = { Chain, InFlag }; 920239310Sdim Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 921239310Sdim CallArgBeginOps, 2); 922239310Sdim InFlag = Chain.getValue(1); 923239310Sdim 924249423Sdim for (unsigned i = 0, e = paramCount; i != e; ++i) { 925239310Sdim unsigned opcode; 926249423Sdim if (i == (e - 1)) 927239310Sdim opcode = NVPTXISD::LastCallArg; 928239310Sdim else 929239310Sdim opcode = NVPTXISD::CallArg; 930239310Sdim SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 931239310Sdim SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 932249423Sdim DAG.getConstant(i, MVT::i32), InFlag }; 933239310Sdim Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); 934239310Sdim InFlag = Chain.getValue(1); 935239310Sdim } 936239310Sdim SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 937249423Sdim SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), 938239310Sdim InFlag }; 939249423Sdim Chain = 940249423Sdim DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); 941239310Sdim InFlag = Chain.getValue(1); 942239310Sdim 943239310Sdim if (!Func) { 944239310Sdim SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 945249423Sdim SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), 946239310Sdim InFlag }; 947239310Sdim Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); 948239310Sdim InFlag = Chain.getValue(1); 949239310Sdim } 950239310Sdim 951239310Sdim // Generate loads from param memory/moves from registers for result 952239310Sdim if (Ins.size() > 0) { 953263508Sdim unsigned resoffset = 0; 954263508Sdim if (retTy && retTy->isVectorTy()) { 955263508Sdim EVT ObjectVT = getValueType(retTy); 956263508Sdim unsigned NumElts = ObjectVT.getVectorNumElements(); 957263508Sdim EVT EltVT = ObjectVT.getVectorElementType(); 958263508Sdim assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(), 959263508Sdim ObjectVT) == NumElts && 960263508Sdim "Vector was not scalarized"); 961263508Sdim unsigned sz = EltVT.getSizeInBits(); 962263508Sdim bool needTruncate = sz < 16 ? true : false; 963263508Sdim 964263508Sdim if (NumElts == 1) { 965263508Sdim // Just a simple load 966263508Sdim std::vector<EVT> LoadRetVTs; 967263508Sdim if (needTruncate) { 968263508Sdim // If loading i1 result, generate 969263508Sdim // load i16 970263508Sdim // trunc i16 to i1 971263508Sdim LoadRetVTs.push_back(MVT::i16); 972263508Sdim } else 973263508Sdim LoadRetVTs.push_back(EltVT); 974263508Sdim LoadRetVTs.push_back(MVT::Other); 975263508Sdim LoadRetVTs.push_back(MVT::Glue); 976263508Sdim std::vector<SDValue> LoadRetOps; 977263508Sdim LoadRetOps.push_back(Chain); 978263508Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 979263508Sdim LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 980263508Sdim LoadRetOps.push_back(InFlag); 981263508Sdim SDValue retval = DAG.getMemIntrinsicNode( 982263508Sdim NVPTXISD::LoadParam, dl, 983263508Sdim DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], 984263508Sdim LoadRetOps.size(), EltVT, MachinePointerInfo()); 985263508Sdim Chain = retval.getValue(1); 986263508Sdim InFlag = retval.getValue(2); 987263508Sdim SDValue Ret0 = retval; 988263508Sdim if (needTruncate) 989263508Sdim Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 990263508Sdim InVals.push_back(Ret0); 991263508Sdim } else if (NumElts == 2) { 992263508Sdim // LoadV2 993263508Sdim std::vector<EVT> LoadRetVTs; 994263508Sdim if (needTruncate) { 995263508Sdim // If loading i1 result, generate 996263508Sdim // load i16 997263508Sdim // trunc i16 to i1 998263508Sdim LoadRetVTs.push_back(MVT::i16); 999263508Sdim LoadRetVTs.push_back(MVT::i16); 1000263508Sdim } else { 1001263508Sdim LoadRetVTs.push_back(EltVT); 1002263508Sdim LoadRetVTs.push_back(EltVT); 1003263508Sdim } 1004263508Sdim LoadRetVTs.push_back(MVT::Other); 1005263508Sdim LoadRetVTs.push_back(MVT::Glue); 1006263508Sdim std::vector<SDValue> LoadRetOps; 1007263508Sdim LoadRetOps.push_back(Chain); 1008263508Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1009263508Sdim LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 1010263508Sdim LoadRetOps.push_back(InFlag); 1011263508Sdim SDValue retval = DAG.getMemIntrinsicNode( 1012263508Sdim NVPTXISD::LoadParamV2, dl, 1013263508Sdim DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], 1014263508Sdim LoadRetOps.size(), EltVT, MachinePointerInfo()); 1015263508Sdim Chain = retval.getValue(2); 1016263508Sdim InFlag = retval.getValue(3); 1017263508Sdim SDValue Ret0 = retval.getValue(0); 1018263508Sdim SDValue Ret1 = retval.getValue(1); 1019263508Sdim if (needTruncate) { 1020263508Sdim Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 1021263508Sdim InVals.push_back(Ret0); 1022263508Sdim Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 1023263508Sdim InVals.push_back(Ret1); 1024263508Sdim } else { 1025263508Sdim InVals.push_back(Ret0); 1026263508Sdim InVals.push_back(Ret1); 1027263508Sdim } 1028263508Sdim } else { 1029263508Sdim // Split into N LoadV4 1030263508Sdim unsigned Ofst = 0; 1031263508Sdim unsigned VecSize = 4; 1032263508Sdim unsigned Opc = NVPTXISD::LoadParamV4; 1033263508Sdim if (EltVT.getSizeInBits() == 64) { 1034263508Sdim VecSize = 2; 1035263508Sdim Opc = NVPTXISD::LoadParamV2; 1036263508Sdim } 1037263508Sdim EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1038263508Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 1039263508Sdim SmallVector<EVT, 8> LoadRetVTs; 1040263508Sdim if (needTruncate) { 1041263508Sdim // If loading i1 result, generate 1042263508Sdim // load i16 1043263508Sdim // trunc i16 to i1 1044263508Sdim for (unsigned j = 0; j < VecSize; ++j) 1045263508Sdim LoadRetVTs.push_back(MVT::i16); 1046263508Sdim } else { 1047263508Sdim for (unsigned j = 0; j < VecSize; ++j) 1048263508Sdim LoadRetVTs.push_back(EltVT); 1049263508Sdim } 1050263508Sdim LoadRetVTs.push_back(MVT::Other); 1051263508Sdim LoadRetVTs.push_back(MVT::Glue); 1052263508Sdim SmallVector<SDValue, 4> LoadRetOps; 1053263508Sdim LoadRetOps.push_back(Chain); 1054263508Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1055263508Sdim LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); 1056263508Sdim LoadRetOps.push_back(InFlag); 1057263508Sdim SDValue retval = DAG.getMemIntrinsicNode( 1058263508Sdim Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), 1059263508Sdim &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo()); 1060263508Sdim if (VecSize == 2) { 1061263508Sdim Chain = retval.getValue(2); 1062263508Sdim InFlag = retval.getValue(3); 1063263508Sdim } else { 1064263508Sdim Chain = retval.getValue(4); 1065263508Sdim InFlag = retval.getValue(5); 1066263508Sdim } 1067263508Sdim 1068263508Sdim for (unsigned j = 0; j < VecSize; ++j) { 1069263508Sdim if (i + j >= NumElts) 1070263508Sdim break; 1071263508Sdim SDValue Elt = retval.getValue(j); 1072263508Sdim if (needTruncate) 1073263508Sdim Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 1074263508Sdim InVals.push_back(Elt); 1075263508Sdim } 1076263508Sdim Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1077263508Sdim } 1078263508Sdim } 1079263508Sdim } else { 1080263508Sdim SmallVector<EVT, 16> VTs; 1081263508Sdim ComputePTXValueVTs(*this, retTy, VTs); 1082263508Sdim assert(VTs.size() == Ins.size() && "Bad value decomposition"); 1083249423Sdim for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 1084263508Sdim unsigned sz = VTs[i].getSizeInBits(); 1085263508Sdim bool needTruncate = sz < 8 ? true : false; 1086263508Sdim if (VTs[i].isInteger() && (sz < 8)) 1087249423Sdim sz = 8; 1088263508Sdim 1089263508Sdim SmallVector<EVT, 4> LoadRetVTs; 1090263508Sdim EVT TheLoadType = VTs[i]; 1091263508Sdim if (retTy->isIntegerTy() && 1092263508Sdim TD->getTypeAllocSizeInBits(retTy) < 32) { 1093263508Sdim // This is for integer types only, and specifically not for 1094263508Sdim // aggregates. 1095263508Sdim LoadRetVTs.push_back(MVT::i32); 1096263508Sdim TheLoadType = MVT::i32; 1097263508Sdim } else if (sz < 16) { 1098263508Sdim // If loading i1/i8 result, generate 1099263508Sdim // load i8 (-> i16) 1100263508Sdim // trunc i16 to i1/i8 1101263508Sdim LoadRetVTs.push_back(MVT::i16); 1102263508Sdim } else 1103263508Sdim LoadRetVTs.push_back(Ins[i].VT); 1104263508Sdim LoadRetVTs.push_back(MVT::Other); 1105263508Sdim LoadRetVTs.push_back(MVT::Glue); 1106263508Sdim 1107263508Sdim SmallVector<SDValue, 4> LoadRetOps; 1108263508Sdim LoadRetOps.push_back(Chain); 1109263508Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1110263508Sdim LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); 1111263508Sdim LoadRetOps.push_back(InFlag); 1112263508Sdim SDValue retval = DAG.getMemIntrinsicNode( 1113263508Sdim NVPTXISD::LoadParam, dl, 1114263508Sdim DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], 1115263508Sdim LoadRetOps.size(), TheLoadType, MachinePointerInfo()); 1116239310Sdim Chain = retval.getValue(1); 1117239310Sdim InFlag = retval.getValue(2); 1118263508Sdim SDValue Ret0 = retval.getValue(0); 1119263508Sdim if (needTruncate) 1120263508Sdim Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 1121263508Sdim InVals.push_back(Ret0); 1122249423Sdim resoffset += sz / 8; 1123239310Sdim } 1124239310Sdim } 1125239310Sdim } 1126263508Sdim 1127249423Sdim Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 1128249423Sdim DAG.getIntPtrConstant(uniqueCallSite + 1, true), 1129263508Sdim InFlag, dl); 1130239310Sdim uniqueCallSite++; 1131239310Sdim 1132239310Sdim // set isTailCall to false for now, until we figure out how to express 1133239310Sdim // tail call optimization in PTX 1134239310Sdim isTailCall = false; 1135239310Sdim return Chain; 1136239310Sdim} 1137239310Sdim 1138239310Sdim// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 1139239310Sdim// (see LegalizeDAG.cpp). This is slow and uses local memory. 1140239310Sdim// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 1141249423SdimSDValue 1142249423SdimNVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 1143239310Sdim SDNode *Node = Op.getNode(); 1144263508Sdim SDLoc dl(Node); 1145239310Sdim SmallVector<SDValue, 8> Ops; 1146239310Sdim unsigned NumOperands = Node->getNumOperands(); 1147249423Sdim for (unsigned i = 0; i < NumOperands; ++i) { 1148239310Sdim SDValue SubOp = Node->getOperand(i); 1149239310Sdim EVT VVT = SubOp.getNode()->getValueType(0); 1150239310Sdim EVT EltVT = VVT.getVectorElementType(); 1151239310Sdim unsigned NumSubElem = VVT.getVectorNumElements(); 1152249423Sdim for (unsigned j = 0; j < NumSubElem; ++j) { 1153239310Sdim Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 1154239310Sdim DAG.getIntPtrConstant(j))); 1155239310Sdim } 1156239310Sdim } 1157249423Sdim return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], 1158249423Sdim Ops.size()); 1159239310Sdim} 1160239310Sdim 1161249423SdimSDValue 1162249423SdimNVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 1163239310Sdim switch (Op.getOpcode()) { 1164249423Sdim case ISD::RETURNADDR: 1165249423Sdim return SDValue(); 1166249423Sdim case ISD::FRAMEADDR: 1167249423Sdim return SDValue(); 1168249423Sdim case ISD::GlobalAddress: 1169249423Sdim return LowerGlobalAddress(Op, DAG); 1170249423Sdim case ISD::INTRINSIC_W_CHAIN: 1171249423Sdim return Op; 1172239310Sdim case ISD::BUILD_VECTOR: 1173239310Sdim case ISD::EXTRACT_SUBVECTOR: 1174239310Sdim return Op; 1175249423Sdim case ISD::CONCAT_VECTORS: 1176249423Sdim return LowerCONCAT_VECTORS(Op, DAG); 1177249423Sdim case ISD::STORE: 1178249423Sdim return LowerSTORE(Op, DAG); 1179249423Sdim case ISD::LOAD: 1180249423Sdim return LowerLOAD(Op, DAG); 1181239310Sdim default: 1182239310Sdim llvm_unreachable("Custom lowering not defined for operation"); 1183239310Sdim } 1184239310Sdim} 1185239310Sdim 1186249423SdimSDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1187249423Sdim if (Op.getValueType() == MVT::i1) 1188249423Sdim return LowerLOADi1(Op, DAG); 1189249423Sdim else 1190249423Sdim return SDValue(); 1191249423Sdim} 1192243830Sdim 1193243830Sdim// v = ld i1* addr 1194243830Sdim// => 1195263508Sdim// v1 = ld i8* addr (-> i16) 1196263508Sdim// v = trunc i16 to i1 1197249423SdimSDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 1198243830Sdim SDNode *Node = Op.getNode(); 1199243830Sdim LoadSDNode *LD = cast<LoadSDNode>(Node); 1200263508Sdim SDLoc dl(Node); 1201249423Sdim assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 1202243830Sdim assert(Node->getValueType(0) == MVT::i1 && 1203243830Sdim "Custom lowering for i1 load only"); 1204249423Sdim SDValue newLD = 1205263508Sdim DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 1206249423Sdim LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 1207249423Sdim LD->isInvariant(), LD->getAlignment()); 1208243830Sdim SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 1209243830Sdim // The legalizer (the caller) is expecting two values from the legalized 1210243830Sdim // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 1211243830Sdim // in LegalizeDAG.cpp which also uses MergeValues. 1212249423Sdim SDValue Ops[] = { result, LD->getChain() }; 1213243830Sdim return DAG.getMergeValues(Ops, 2, dl); 1214243830Sdim} 1215243830Sdim 1216249423SdimSDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1217249423Sdim EVT ValVT = Op.getOperand(1).getValueType(); 1218249423Sdim if (ValVT == MVT::i1) 1219249423Sdim return LowerSTOREi1(Op, DAG); 1220249423Sdim else if (ValVT.isVector()) 1221249423Sdim return LowerSTOREVector(Op, DAG); 1222249423Sdim else 1223249423Sdim return SDValue(); 1224249423Sdim} 1225249423Sdim 1226249423SdimSDValue 1227249423SdimNVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 1228249423Sdim SDNode *N = Op.getNode(); 1229249423Sdim SDValue Val = N->getOperand(1); 1230263508Sdim SDLoc DL(N); 1231249423Sdim EVT ValVT = Val.getValueType(); 1232249423Sdim 1233249423Sdim if (ValVT.isVector()) { 1234249423Sdim // We only handle "native" vector sizes for now, e.g. <4 x double> is not 1235249423Sdim // legal. We can (and should) split that into 2 stores of <2 x double> here 1236249423Sdim // but I'm leaving that as a TODO for now. 1237249423Sdim if (!ValVT.isSimple()) 1238249423Sdim return SDValue(); 1239249423Sdim switch (ValVT.getSimpleVT().SimpleTy) { 1240249423Sdim default: 1241249423Sdim return SDValue(); 1242249423Sdim case MVT::v2i8: 1243249423Sdim case MVT::v2i16: 1244249423Sdim case MVT::v2i32: 1245249423Sdim case MVT::v2i64: 1246249423Sdim case MVT::v2f32: 1247249423Sdim case MVT::v2f64: 1248249423Sdim case MVT::v4i8: 1249249423Sdim case MVT::v4i16: 1250249423Sdim case MVT::v4i32: 1251249423Sdim case MVT::v4f32: 1252249423Sdim // This is a "native" vector type 1253249423Sdim break; 1254249423Sdim } 1255249423Sdim 1256249423Sdim unsigned Opcode = 0; 1257249423Sdim EVT EltVT = ValVT.getVectorElementType(); 1258249423Sdim unsigned NumElts = ValVT.getVectorNumElements(); 1259249423Sdim 1260249423Sdim // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 1261249423Sdim // Therefore, we must ensure the type is legal. For i1 and i8, we set the 1262249423Sdim // stored type to i16 and propogate the "real" type as the memory type. 1263249423Sdim bool NeedExt = false; 1264249423Sdim if (EltVT.getSizeInBits() < 16) 1265249423Sdim NeedExt = true; 1266249423Sdim 1267249423Sdim switch (NumElts) { 1268249423Sdim default: 1269249423Sdim return SDValue(); 1270249423Sdim case 2: 1271249423Sdim Opcode = NVPTXISD::StoreV2; 1272249423Sdim break; 1273249423Sdim case 4: { 1274249423Sdim Opcode = NVPTXISD::StoreV4; 1275249423Sdim break; 1276249423Sdim } 1277249423Sdim } 1278249423Sdim 1279249423Sdim SmallVector<SDValue, 8> Ops; 1280249423Sdim 1281249423Sdim // First is the chain 1282249423Sdim Ops.push_back(N->getOperand(0)); 1283249423Sdim 1284249423Sdim // Then the split values 1285249423Sdim for (unsigned i = 0; i < NumElts; ++i) { 1286249423Sdim SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 1287249423Sdim DAG.getIntPtrConstant(i)); 1288249423Sdim if (NeedExt) 1289249423Sdim ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 1290249423Sdim Ops.push_back(ExtVal); 1291249423Sdim } 1292249423Sdim 1293249423Sdim // Then any remaining arguments 1294249423Sdim for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { 1295249423Sdim Ops.push_back(N->getOperand(i)); 1296249423Sdim } 1297249423Sdim 1298249423Sdim MemSDNode *MemSD = cast<MemSDNode>(N); 1299249423Sdim 1300249423Sdim SDValue NewSt = DAG.getMemIntrinsicNode( 1301249423Sdim Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), 1302249423Sdim MemSD->getMemoryVT(), MemSD->getMemOperand()); 1303249423Sdim 1304249423Sdim //return DCI.CombineTo(N, NewSt, true); 1305249423Sdim return NewSt; 1306249423Sdim } 1307249423Sdim 1308249423Sdim return SDValue(); 1309249423Sdim} 1310249423Sdim 1311243830Sdim// st i1 v, addr 1312243830Sdim// => 1313263508Sdim// v1 = zxt v to i16 1314263508Sdim// st.u8 i16, addr 1315249423SdimSDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 1316243830Sdim SDNode *Node = Op.getNode(); 1317263508Sdim SDLoc dl(Node); 1318243830Sdim StoreSDNode *ST = cast<StoreSDNode>(Node); 1319243830Sdim SDValue Tmp1 = ST->getChain(); 1320243830Sdim SDValue Tmp2 = ST->getBasePtr(); 1321243830Sdim SDValue Tmp3 = ST->getValue(); 1322243830Sdim assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 1323243830Sdim unsigned Alignment = ST->getAlignment(); 1324243830Sdim bool isVolatile = ST->isVolatile(); 1325243830Sdim bool isNonTemporal = ST->isNonTemporal(); 1326263508Sdim Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 1327263508Sdim SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 1328263508Sdim ST->getPointerInfo(), MVT::i8, isNonTemporal, 1329263508Sdim isVolatile, Alignment); 1330243830Sdim return Result; 1331243830Sdim} 1332243830Sdim 1333249423SdimSDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, 1334249423Sdim int idx, EVT v) const { 1335239310Sdim std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 1336239310Sdim std::stringstream suffix; 1337239310Sdim suffix << idx; 1338239310Sdim *name += suffix.str(); 1339239310Sdim return DAG.getTargetExternalSymbol(name->c_str(), v); 1340239310Sdim} 1341239310Sdim 1342239310SdimSDValue 1343239310SdimNVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 1344263508Sdim std::string ParamSym; 1345263508Sdim raw_string_ostream ParamStr(ParamSym); 1346263508Sdim 1347263508Sdim ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 1348263508Sdim ParamStr.flush(); 1349263508Sdim 1350263508Sdim std::string *SavedStr = 1351263508Sdim nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 1352263508Sdim return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 1353239310Sdim} 1354239310Sdim 1355249423SdimSDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 1356239310Sdim return getExtSymb(DAG, ".HLPPARAM", idx); 1357239310Sdim} 1358239310Sdim 1359239310Sdim// Check to see if the kernel argument is image*_t or sampler_t 1360239310Sdim 1361239310Sdimbool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 1362249423Sdim static const char *const specialTypes[] = { "struct._image2d_t", 1363249423Sdim "struct._image3d_t", 1364249423Sdim "struct._sampler_t" }; 1365239310Sdim 1366239310Sdim const Type *Ty = arg->getType(); 1367239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Ty); 1368239310Sdim 1369239310Sdim if (!PTy) 1370239310Sdim return false; 1371239310Sdim 1372239310Sdim if (!context) 1373239310Sdim return false; 1374239310Sdim 1375239310Sdim const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 1376249423Sdim const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 1377239310Sdim 1378239310Sdim for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 1379239310Sdim if (TypeName == specialTypes[i]) 1380239310Sdim return true; 1381239310Sdim 1382239310Sdim return false; 1383239310Sdim} 1384239310Sdim 1385249423SdimSDValue NVPTXTargetLowering::LowerFormalArguments( 1386249423Sdim SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1387263508Sdim const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 1388249423Sdim SmallVectorImpl<SDValue> &InVals) const { 1389239310Sdim MachineFunction &MF = DAG.getMachineFunction(); 1390243830Sdim const DataLayout *TD = getDataLayout(); 1391239310Sdim 1392239310Sdim const Function *F = MF.getFunction(); 1393249423Sdim const AttributeSet &PAL = F->getAttributes(); 1394263508Sdim const TargetLowering *TLI = nvTM->getTargetLowering(); 1395239310Sdim 1396239310Sdim SDValue Root = DAG.getRoot(); 1397239310Sdim std::vector<SDValue> OutChains; 1398239310Sdim 1399239310Sdim bool isKernel = llvm::isKernelFunction(*F); 1400239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1401263508Sdim assert(isABI && "Non-ABI compilation is not supported"); 1402263508Sdim if (!isABI) 1403263508Sdim return Chain; 1404239310Sdim 1405239310Sdim std::vector<Type *> argTypes; 1406239310Sdim std::vector<const Argument *> theArgs; 1407239310Sdim for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1408249423Sdim I != E; ++I) { 1409239310Sdim theArgs.push_back(I); 1410239310Sdim argTypes.push_back(I->getType()); 1411239310Sdim } 1412263508Sdim // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 1413263508Sdim // Ins.size() will be larger 1414263508Sdim // * if there is an aggregate argument with multiple fields (each field 1415263508Sdim // showing up separately in Ins) 1416263508Sdim // * if there is a vector argument with more than typical vector-length 1417263508Sdim // elements (generally if more than 4) where each vector element is 1418263508Sdim // individually present in Ins. 1419263508Sdim // So a different index should be used for indexing into Ins. 1420263508Sdim // See similar issue in LowerCall. 1421263508Sdim unsigned InsIdx = 0; 1422239310Sdim 1423239310Sdim int idx = 0; 1424263508Sdim for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 1425239310Sdim Type *Ty = argTypes[i]; 1426239310Sdim 1427239310Sdim // If the kernel argument is image*_t or sampler_t, convert it to 1428239310Sdim // a i32 constant holding the parameter position. This can later 1429239310Sdim // matched in the AsmPrinter to output the correct mangled name. 1430249423Sdim if (isImageOrSamplerVal( 1431249423Sdim theArgs[i], 1432249423Sdim (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 1433249423Sdim : 0))) { 1434239310Sdim assert(isKernel && "Only kernels can have image/sampler params"); 1435249423Sdim InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); 1436239310Sdim continue; 1437239310Sdim } 1438239310Sdim 1439239310Sdim if (theArgs[i]->use_empty()) { 1440239310Sdim // argument is dead 1441263508Sdim if (Ty->isAggregateType()) { 1442263508Sdim SmallVector<EVT, 16> vtparts; 1443263508Sdim 1444263508Sdim ComputePTXValueVTs(*this, Ty, vtparts); 1445263508Sdim assert(vtparts.size() > 0 && "empty aggregate type not expected"); 1446263508Sdim for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 1447263508Sdim ++parti) { 1448263508Sdim EVT partVT = vtparts[parti]; 1449263508Sdim InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT)); 1450263508Sdim ++InsIdx; 1451249423Sdim } 1452263508Sdim if (vtparts.size() > 0) 1453263508Sdim --InsIdx; 1454263508Sdim continue; 1455249423Sdim } 1456263508Sdim if (Ty->isVectorTy()) { 1457263508Sdim EVT ObjectVT = getValueType(Ty); 1458263508Sdim unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 1459263508Sdim for (unsigned parti = 0; parti < NumRegs; ++parti) { 1460263508Sdim InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 1461263508Sdim ++InsIdx; 1462263508Sdim } 1463263508Sdim if (NumRegs > 0) 1464263508Sdim --InsIdx; 1465263508Sdim continue; 1466263508Sdim } 1467263508Sdim InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 1468239310Sdim continue; 1469239310Sdim } 1470239310Sdim 1471239310Sdim // In the following cases, assign a node order of "idx+1" 1472263508Sdim // to newly created nodes. The SDNodes for params have to 1473239310Sdim // appear in the same order as their order of appearance 1474239310Sdim // in the original function. "idx+1" holds that order. 1475249423Sdim if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { 1476263508Sdim if (Ty->isAggregateType()) { 1477263508Sdim SmallVector<EVT, 16> vtparts; 1478263508Sdim SmallVector<uint64_t, 16> offsets; 1479263508Sdim 1480263508Sdim // NOTE: Here, we lose the ability to issue vector loads for vectors 1481263508Sdim // that are a part of a struct. This should be investigated in the 1482263508Sdim // future. 1483263508Sdim ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); 1484263508Sdim assert(vtparts.size() > 0 && "empty aggregate type not expected"); 1485263508Sdim bool aggregateIsPacked = false; 1486263508Sdim if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 1487263508Sdim aggregateIsPacked = STy->isPacked(); 1488263508Sdim 1489263508Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1490263508Sdim for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 1491263508Sdim ++parti) { 1492263508Sdim EVT partVT = vtparts[parti]; 1493263508Sdim Value *srcValue = Constant::getNullValue( 1494263508Sdim PointerType::get(partVT.getTypeForEVT(F->getContext()), 1495263508Sdim llvm::ADDRESS_SPACE_PARAM)); 1496263508Sdim SDValue srcAddr = 1497263508Sdim DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1498263508Sdim DAG.getConstant(offsets[parti], getPointerTy())); 1499263508Sdim unsigned partAlign = 1500263508Sdim aggregateIsPacked ? 1 1501263508Sdim : TD->getABITypeAlignment( 1502263508Sdim partVT.getTypeForEVT(F->getContext())); 1503263508Sdim SDValue p; 1504263508Sdim if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 1505263508Sdim ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 1506263508Sdim ISD::SEXTLOAD : ISD::ZEXTLOAD; 1507263508Sdim p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 1508263508Sdim MachinePointerInfo(srcValue), partVT, false, 1509263508Sdim false, partAlign); 1510263508Sdim } else { 1511263508Sdim p = DAG.getLoad(partVT, dl, Root, srcAddr, 1512263508Sdim MachinePointerInfo(srcValue), false, false, false, 1513263508Sdim partAlign); 1514263508Sdim } 1515263508Sdim if (p.getNode()) 1516263508Sdim p.getNode()->setIROrder(idx + 1); 1517263508Sdim InVals.push_back(p); 1518263508Sdim ++InsIdx; 1519263508Sdim } 1520263508Sdim if (vtparts.size() > 0) 1521263508Sdim --InsIdx; 1522263508Sdim continue; 1523263508Sdim } 1524263508Sdim if (Ty->isVectorTy()) { 1525263508Sdim EVT ObjectVT = getValueType(Ty); 1526263508Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1527249423Sdim unsigned NumElts = ObjectVT.getVectorNumElements(); 1528263508Sdim assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 1529263508Sdim "Vector was not scalarized"); 1530263508Sdim unsigned Ofst = 0; 1531249423Sdim EVT EltVT = ObjectVT.getVectorElementType(); 1532263508Sdim 1533263508Sdim // V1 load 1534263508Sdim // f32 = load ... 1535263508Sdim if (NumElts == 1) { 1536263508Sdim // We only have one element, so just directly load it 1537249423Sdim Value *SrcValue = Constant::getNullValue(PointerType::get( 1538249423Sdim EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 1539263508Sdim SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1540263508Sdim DAG.getConstant(Ofst, getPointerTy())); 1541263508Sdim SDValue P = DAG.getLoad( 1542263508Sdim EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 1543263508Sdim false, true, 1544249423Sdim TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 1545263508Sdim if (P.getNode()) 1546263508Sdim P.getNode()->setIROrder(idx + 1); 1547263508Sdim 1548263508Sdim if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 1549263508Sdim P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 1550263508Sdim InVals.push_back(P); 1551263508Sdim Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); 1552263508Sdim ++InsIdx; 1553263508Sdim } else if (NumElts == 2) { 1554263508Sdim // V2 load 1555263508Sdim // f32,f32 = load ... 1556263508Sdim EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 1557263508Sdim Value *SrcValue = Constant::getNullValue(PointerType::get( 1558263508Sdim VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 1559263508Sdim SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1560263508Sdim DAG.getConstant(Ofst, getPointerTy())); 1561263508Sdim SDValue P = DAG.getLoad( 1562263508Sdim VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 1563263508Sdim false, true, 1564263508Sdim TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 1565263508Sdim if (P.getNode()) 1566263508Sdim P.getNode()->setIROrder(idx + 1); 1567263508Sdim 1568263508Sdim SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 1569263508Sdim DAG.getIntPtrConstant(0)); 1570263508Sdim SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 1571263508Sdim DAG.getIntPtrConstant(1)); 1572263508Sdim 1573263508Sdim if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 1574263508Sdim Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 1575263508Sdim Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 1576263508Sdim } 1577263508Sdim 1578263508Sdim InVals.push_back(Elt0); 1579263508Sdim InVals.push_back(Elt1); 1580263508Sdim Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1581263508Sdim InsIdx += 2; 1582263508Sdim } else { 1583263508Sdim // V4 loads 1584263508Sdim // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 1585263508Sdim // the 1586263508Sdim // vector will be expanded to a power of 2 elements, so we know we can 1587263508Sdim // always round up to the next multiple of 4 when creating the vector 1588263508Sdim // loads. 1589263508Sdim // e.g. 4 elem => 1 ld.v4 1590263508Sdim // 6 elem => 2 ld.v4 1591263508Sdim // 8 elem => 2 ld.v4 1592263508Sdim // 11 elem => 3 ld.v4 1593263508Sdim unsigned VecSize = 4; 1594263508Sdim if (EltVT.getSizeInBits() == 64) { 1595263508Sdim VecSize = 2; 1596263508Sdim } 1597263508Sdim EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1598263508Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 1599263508Sdim Value *SrcValue = Constant::getNullValue( 1600263508Sdim PointerType::get(VecVT.getTypeForEVT(F->getContext()), 1601263508Sdim llvm::ADDRESS_SPACE_PARAM)); 1602263508Sdim SDValue SrcAddr = 1603263508Sdim DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1604263508Sdim DAG.getConstant(Ofst, getPointerTy())); 1605263508Sdim SDValue P = DAG.getLoad( 1606263508Sdim VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 1607263508Sdim false, true, 1608263508Sdim TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 1609263508Sdim if (P.getNode()) 1610263508Sdim P.getNode()->setIROrder(idx + 1); 1611263508Sdim 1612263508Sdim for (unsigned j = 0; j < VecSize; ++j) { 1613263508Sdim if (i + j >= NumElts) 1614263508Sdim break; 1615263508Sdim SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 1616263508Sdim DAG.getIntPtrConstant(j)); 1617263508Sdim if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 1618263508Sdim Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 1619263508Sdim InVals.push_back(Elt); 1620263508Sdim } 1621263508Sdim Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1622263508Sdim } 1623263508Sdim InsIdx += NumElts; 1624249423Sdim } 1625263508Sdim 1626263508Sdim if (NumElts > 0) 1627263508Sdim --InsIdx; 1628249423Sdim continue; 1629249423Sdim } 1630239310Sdim // A plain scalar. 1631263508Sdim EVT ObjectVT = getValueType(Ty); 1632263508Sdim // If ABI, load from the param symbol 1633263508Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1634263508Sdim Value *srcValue = Constant::getNullValue(PointerType::get( 1635263508Sdim ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 1636263508Sdim SDValue p; 1637263508Sdim if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 1638263508Sdim ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 1639263508Sdim ISD::SEXTLOAD : ISD::ZEXTLOAD; 1640263508Sdim p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, 1641263508Sdim MachinePointerInfo(srcValue), ObjectVT, false, false, 1642263508Sdim TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 1643249423Sdim } else { 1644263508Sdim p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, 1645263508Sdim MachinePointerInfo(srcValue), false, false, false, 1646263508Sdim TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 1647239310Sdim } 1648263508Sdim if (p.getNode()) 1649263508Sdim p.getNode()->setIROrder(idx + 1); 1650263508Sdim InVals.push_back(p); 1651239310Sdim continue; 1652239310Sdim } 1653239310Sdim 1654239310Sdim // Param has ByVal attribute 1655263508Sdim // Return MoveParam(param symbol). 1656263508Sdim // Ideally, the param symbol can be returned directly, 1657263508Sdim // but when SDNode builder decides to use it in a CopyToReg(), 1658263508Sdim // machine instruction fails because TargetExternalSymbol 1659263508Sdim // (not lowered) is target dependent, and CopyToReg assumes 1660263508Sdim // the source is lowered. 1661263508Sdim EVT ObjectVT = getValueType(Ty); 1662263508Sdim assert(ObjectVT == Ins[InsIdx].VT && 1663263508Sdim "Ins type did not match function type"); 1664263508Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1665263508Sdim SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 1666263508Sdim if (p.getNode()) 1667263508Sdim p.getNode()->setIROrder(idx + 1); 1668263508Sdim if (isKernel) 1669263508Sdim InVals.push_back(p); 1670263508Sdim else { 1671263508Sdim SDValue p2 = DAG.getNode( 1672263508Sdim ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 1673263508Sdim DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); 1674263508Sdim InVals.push_back(p2); 1675239310Sdim } 1676239310Sdim } 1677239310Sdim 1678239310Sdim // Clang will check explicit VarArg and issue error if any. However, Clang 1679239310Sdim // will let code with 1680263508Sdim // implicit var arg like f() pass. See bug 617733. 1681239310Sdim // We treat this case as if the arg list is empty. 1682263508Sdim // if (F.isVarArg()) { 1683239310Sdim // assert(0 && "VarArg not supported yet!"); 1684239310Sdim //} 1685239310Sdim 1686239310Sdim if (!OutChains.empty()) 1687249423Sdim DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], 1688249423Sdim OutChains.size())); 1689239310Sdim 1690239310Sdim return Chain; 1691239310Sdim} 1692239310Sdim 1693239310Sdim 1694263508SdimSDValue 1695263508SdimNVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1696263508Sdim bool isVarArg, 1697263508Sdim const SmallVectorImpl<ISD::OutputArg> &Outs, 1698263508Sdim const SmallVectorImpl<SDValue> &OutVals, 1699263508Sdim SDLoc dl, SelectionDAG &DAG) const { 1700263508Sdim MachineFunction &MF = DAG.getMachineFunction(); 1701263508Sdim const Function *F = MF.getFunction(); 1702263508Sdim Type *RetTy = F->getReturnType(); 1703263508Sdim const DataLayout *TD = getDataLayout(); 1704263508Sdim 1705239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1706263508Sdim assert(isABI && "Non-ABI compilation is not supported"); 1707263508Sdim if (!isABI) 1708263508Sdim return Chain; 1709239310Sdim 1710263508Sdim if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 1711263508Sdim // If we have a vector type, the OutVals array will be the scalarized 1712263508Sdim // components and we have combine them into 1 or more vector stores. 1713263508Sdim unsigned NumElts = VTy->getNumElements(); 1714263508Sdim assert(NumElts == Outs.size() && "Bad scalarization of return value"); 1715263508Sdim 1716263508Sdim // const_cast can be removed in later LLVM versions 1717263508Sdim EVT EltVT = getValueType(RetTy).getVectorElementType(); 1718263508Sdim bool NeedExtend = false; 1719263508Sdim if (EltVT.getSizeInBits() < 16) 1720263508Sdim NeedExtend = true; 1721263508Sdim 1722263508Sdim // V1 store 1723263508Sdim if (NumElts == 1) { 1724263508Sdim SDValue StoreVal = OutVals[0]; 1725263508Sdim // We only have one element, so just directly store it 1726263508Sdim if (NeedExtend) 1727263508Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1728263508Sdim SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; 1729263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 1730263508Sdim DAG.getVTList(MVT::Other), &Ops[0], 3, 1731263508Sdim EltVT, MachinePointerInfo()); 1732263508Sdim 1733263508Sdim } else if (NumElts == 2) { 1734263508Sdim // V2 store 1735263508Sdim SDValue StoreVal0 = OutVals[0]; 1736263508Sdim SDValue StoreVal1 = OutVals[1]; 1737263508Sdim 1738263508Sdim if (NeedExtend) { 1739263508Sdim StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 1740263508Sdim StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 1741263508Sdim } 1742263508Sdim 1743263508Sdim SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, 1744263508Sdim StoreVal1 }; 1745263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 1746263508Sdim DAG.getVTList(MVT::Other), &Ops[0], 4, 1747263508Sdim EltVT, MachinePointerInfo()); 1748263508Sdim } else { 1749263508Sdim // V4 stores 1750263508Sdim // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 1751263508Sdim // vector will be expanded to a power of 2 elements, so we know we can 1752263508Sdim // always round up to the next multiple of 4 when creating the vector 1753263508Sdim // stores. 1754263508Sdim // e.g. 4 elem => 1 st.v4 1755263508Sdim // 6 elem => 2 st.v4 1756263508Sdim // 8 elem => 2 st.v4 1757263508Sdim // 11 elem => 3 st.v4 1758263508Sdim 1759263508Sdim unsigned VecSize = 4; 1760263508Sdim if (OutVals[0].getValueType().getSizeInBits() == 64) 1761263508Sdim VecSize = 2; 1762263508Sdim 1763263508Sdim unsigned Offset = 0; 1764263508Sdim 1765263508Sdim EVT VecVT = 1766263508Sdim EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize); 1767263508Sdim unsigned PerStoreOffset = 1768263508Sdim TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1769263508Sdim 1770263508Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 1771263508Sdim // Get values 1772263508Sdim SDValue StoreVal; 1773263508Sdim SmallVector<SDValue, 8> Ops; 1774263508Sdim Ops.push_back(Chain); 1775263508Sdim Ops.push_back(DAG.getConstant(Offset, MVT::i32)); 1776263508Sdim unsigned Opc = NVPTXISD::StoreRetvalV2; 1777263508Sdim EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 1778263508Sdim 1779263508Sdim StoreVal = OutVals[i]; 1780263508Sdim if (NeedExtend) 1781263508Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1782263508Sdim Ops.push_back(StoreVal); 1783263508Sdim 1784263508Sdim if (i + 1 < NumElts) { 1785263508Sdim StoreVal = OutVals[i + 1]; 1786263508Sdim if (NeedExtend) 1787263508Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1788263508Sdim } else { 1789263508Sdim StoreVal = DAG.getUNDEF(ExtendedVT); 1790263508Sdim } 1791263508Sdim Ops.push_back(StoreVal); 1792263508Sdim 1793263508Sdim if (VecSize == 4) { 1794263508Sdim Opc = NVPTXISD::StoreRetvalV4; 1795263508Sdim if (i + 2 < NumElts) { 1796263508Sdim StoreVal = OutVals[i + 2]; 1797263508Sdim if (NeedExtend) 1798263508Sdim StoreVal = 1799263508Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1800263508Sdim } else { 1801263508Sdim StoreVal = DAG.getUNDEF(ExtendedVT); 1802263508Sdim } 1803263508Sdim Ops.push_back(StoreVal); 1804263508Sdim 1805263508Sdim if (i + 3 < NumElts) { 1806263508Sdim StoreVal = OutVals[i + 3]; 1807263508Sdim if (NeedExtend) 1808263508Sdim StoreVal = 1809263508Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1810263508Sdim } else { 1811263508Sdim StoreVal = DAG.getUNDEF(ExtendedVT); 1812263508Sdim } 1813263508Sdim Ops.push_back(StoreVal); 1814263508Sdim } 1815263508Sdim 1816263508Sdim // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 1817263508Sdim Chain = 1818263508Sdim DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0], 1819263508Sdim Ops.size(), EltVT, MachinePointerInfo()); 1820263508Sdim Offset += PerStoreOffset; 1821263508Sdim } 1822239310Sdim } 1823263508Sdim } else { 1824263508Sdim SmallVector<EVT, 16> ValVTs; 1825263508Sdim // const_cast is necessary since we are still using an LLVM version from 1826263508Sdim // before the type system re-write. 1827263508Sdim ComputePTXValueVTs(*this, RetTy, ValVTs); 1828263508Sdim assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 1829263508Sdim 1830263508Sdim unsigned SizeSoFar = 0; 1831263508Sdim for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1832263508Sdim SDValue theVal = OutVals[i]; 1833263508Sdim EVT TheValType = theVal.getValueType(); 1834263508Sdim unsigned numElems = 1; 1835263508Sdim if (TheValType.isVector()) 1836263508Sdim numElems = TheValType.getVectorNumElements(); 1837263508Sdim for (unsigned j = 0, je = numElems; j != je; ++j) { 1838263508Sdim SDValue TmpVal = theVal; 1839263508Sdim if (TheValType.isVector()) 1840263508Sdim TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 1841263508Sdim TheValType.getVectorElementType(), TmpVal, 1842263508Sdim DAG.getIntPtrConstant(j)); 1843263508Sdim EVT TheStoreType = ValVTs[i]; 1844263508Sdim if (RetTy->isIntegerTy() && 1845263508Sdim TD->getTypeAllocSizeInBits(RetTy) < 32) { 1846263508Sdim // The following zero-extension is for integer types only, and 1847263508Sdim // specifically not for aggregates. 1848263508Sdim TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 1849263508Sdim TheStoreType = MVT::i32; 1850263508Sdim } 1851263508Sdim else if (TmpVal.getValueType().getSizeInBits() < 16) 1852263508Sdim TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 1853263508Sdim 1854263508Sdim SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal }; 1855263508Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 1856263508Sdim DAG.getVTList(MVT::Other), &Ops[0], 1857263508Sdim 3, TheStoreType, 1858263508Sdim MachinePointerInfo()); 1859263508Sdim if(TheValType.isVector()) 1860263508Sdim SizeSoFar += 1861263508Sdim TheStoreType.getVectorElementType().getStoreSizeInBits() / 8; 1862263508Sdim else 1863263508Sdim SizeSoFar += TheStoreType.getStoreSizeInBits()/8; 1864263508Sdim } 1865263508Sdim } 1866239310Sdim } 1867239310Sdim 1868239310Sdim return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 1869239310Sdim} 1870239310Sdim 1871263508Sdim 1872249423Sdimvoid NVPTXTargetLowering::LowerAsmOperandForConstraint( 1873249423Sdim SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 1874249423Sdim SelectionDAG &DAG) const { 1875239310Sdim if (Constraint.length() > 1) 1876239310Sdim return; 1877239310Sdim else 1878239310Sdim TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1879239310Sdim} 1880239310Sdim 1881239310Sdim// NVPTX suuport vector of legal types of any length in Intrinsics because the 1882239310Sdim// NVPTX specific type legalizer 1883239310Sdim// will legalize them to the PTX supported length. 1884249423Sdimbool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 1885239310Sdim if (isTypeLegal(VT)) 1886239310Sdim return true; 1887239310Sdim if (VT.isVector()) { 1888239310Sdim MVT eVT = VT.getVectorElementType(); 1889239310Sdim if (isTypeLegal(eVT)) 1890239310Sdim return true; 1891239310Sdim } 1892239310Sdim return false; 1893239310Sdim} 1894239310Sdim 1895239310Sdim// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 1896239310Sdim// TgtMemIntrinsic 1897239310Sdim// because we need the information that is only available in the "Value" type 1898239310Sdim// of destination 1899239310Sdim// pointer. In particular, the address space information. 1900249423Sdimbool NVPTXTargetLowering::getTgtMemIntrinsic( 1901249423Sdim IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 1902239310Sdim switch (Intrinsic) { 1903239310Sdim default: 1904239310Sdim return false; 1905239310Sdim 1906239310Sdim case Intrinsic::nvvm_atomic_load_add_f32: 1907239310Sdim Info.opc = ISD::INTRINSIC_W_CHAIN; 1908239310Sdim Info.memVT = MVT::f32; 1909239310Sdim Info.ptrVal = I.getArgOperand(0); 1910239310Sdim Info.offset = 0; 1911239310Sdim Info.vol = 0; 1912239310Sdim Info.readMem = true; 1913239310Sdim Info.writeMem = true; 1914239310Sdim Info.align = 0; 1915239310Sdim return true; 1916239310Sdim 1917239310Sdim case Intrinsic::nvvm_atomic_load_inc_32: 1918239310Sdim case Intrinsic::nvvm_atomic_load_dec_32: 1919239310Sdim Info.opc = ISD::INTRINSIC_W_CHAIN; 1920239310Sdim Info.memVT = MVT::i32; 1921239310Sdim Info.ptrVal = I.getArgOperand(0); 1922239310Sdim Info.offset = 0; 1923239310Sdim Info.vol = 0; 1924239310Sdim Info.readMem = true; 1925239310Sdim Info.writeMem = true; 1926239310Sdim Info.align = 0; 1927239310Sdim return true; 1928239310Sdim 1929239310Sdim case Intrinsic::nvvm_ldu_global_i: 1930239310Sdim case Intrinsic::nvvm_ldu_global_f: 1931239310Sdim case Intrinsic::nvvm_ldu_global_p: 1932239310Sdim 1933239310Sdim Info.opc = ISD::INTRINSIC_W_CHAIN; 1934239310Sdim if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 1935263508Sdim Info.memVT = getValueType(I.getType()); 1936239310Sdim else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) 1937263508Sdim Info.memVT = getValueType(I.getType()); 1938239310Sdim else 1939239310Sdim Info.memVT = MVT::f32; 1940239310Sdim Info.ptrVal = I.getArgOperand(0); 1941239310Sdim Info.offset = 0; 1942239310Sdim Info.vol = 0; 1943239310Sdim Info.readMem = true; 1944239310Sdim Info.writeMem = false; 1945239310Sdim Info.align = 0; 1946239310Sdim return true; 1947239310Sdim 1948239310Sdim } 1949239310Sdim return false; 1950239310Sdim} 1951239310Sdim 1952239310Sdim/// isLegalAddressingMode - Return true if the addressing mode represented 1953239310Sdim/// by AM is legal for this target, for a load/store of the specified type. 1954239310Sdim/// Used to guide target specific optimizations, like loop strength reduction 1955239310Sdim/// (LoopStrengthReduce.cpp) and memory optimization for address mode 1956239310Sdim/// (CodeGenPrepare.cpp) 1957249423Sdimbool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 1958249423Sdim Type *Ty) const { 1959239310Sdim 1960239310Sdim // AddrMode - This represents an addressing mode of: 1961239310Sdim // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 1962239310Sdim // 1963239310Sdim // The legal address modes are 1964239310Sdim // - [avar] 1965239310Sdim // - [areg] 1966239310Sdim // - [areg+immoff] 1967239310Sdim // - [immAddr] 1968239310Sdim 1969239310Sdim if (AM.BaseGV) { 1970239310Sdim if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 1971239310Sdim return false; 1972239310Sdim return true; 1973239310Sdim } 1974239310Sdim 1975239310Sdim switch (AM.Scale) { 1976249423Sdim case 0: // "r", "r+i" or "i" is allowed 1977239310Sdim break; 1978239310Sdim case 1: 1979249423Sdim if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 1980239310Sdim return false; 1981239310Sdim // Otherwise we have r+i. 1982239310Sdim break; 1983239310Sdim default: 1984239310Sdim // No scale > 1 is allowed 1985239310Sdim return false; 1986239310Sdim } 1987239310Sdim return true; 1988239310Sdim} 1989239310Sdim 1990239310Sdim//===----------------------------------------------------------------------===// 1991239310Sdim// NVPTX Inline Assembly Support 1992239310Sdim//===----------------------------------------------------------------------===// 1993239310Sdim 1994239310Sdim/// getConstraintType - Given a constraint letter, return the type of 1995239310Sdim/// constraint it is for this target. 1996239310SdimNVPTXTargetLowering::ConstraintType 1997239310SdimNVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 1998239310Sdim if (Constraint.size() == 1) { 1999239310Sdim switch (Constraint[0]) { 2000239310Sdim default: 2001239310Sdim break; 2002239310Sdim case 'r': 2003239310Sdim case 'h': 2004239310Sdim case 'c': 2005239310Sdim case 'l': 2006239310Sdim case 'f': 2007239310Sdim case 'd': 2008239310Sdim case '0': 2009239310Sdim case 'N': 2010239310Sdim return C_RegisterClass; 2011239310Sdim } 2012239310Sdim } 2013239310Sdim return TargetLowering::getConstraintType(Constraint); 2014239310Sdim} 2015239310Sdim 2016249423Sdimstd::pair<unsigned, const TargetRegisterClass *> 2017239310SdimNVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 2018263508Sdim MVT VT) const { 2019239310Sdim if (Constraint.size() == 1) { 2020239310Sdim switch (Constraint[0]) { 2021239310Sdim case 'c': 2022263508Sdim return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 2023239310Sdim case 'h': 2024239310Sdim return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 2025239310Sdim case 'r': 2026239310Sdim return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 2027239310Sdim case 'l': 2028239310Sdim case 'N': 2029239310Sdim return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 2030239310Sdim case 'f': 2031239310Sdim return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 2032239310Sdim case 'd': 2033239310Sdim return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 2034239310Sdim } 2035239310Sdim } 2036239310Sdim return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 2037239310Sdim} 2038239310Sdim 2039239310Sdim/// getFunctionAlignment - Return the Log2 alignment of this function. 2040239310Sdimunsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 2041239310Sdim return 4; 2042239310Sdim} 2043249423Sdim 2044249423Sdim/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 2045249423Sdimstatic void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 2046249423Sdim SmallVectorImpl<SDValue> &Results) { 2047249423Sdim EVT ResVT = N->getValueType(0); 2048263508Sdim SDLoc DL(N); 2049249423Sdim 2050249423Sdim assert(ResVT.isVector() && "Vector load must have vector type"); 2051249423Sdim 2052249423Sdim // We only handle "native" vector sizes for now, e.g. <4 x double> is not 2053249423Sdim // legal. We can (and should) split that into 2 loads of <2 x double> here 2054249423Sdim // but I'm leaving that as a TODO for now. 2055249423Sdim assert(ResVT.isSimple() && "Can only handle simple types"); 2056249423Sdim switch (ResVT.getSimpleVT().SimpleTy) { 2057249423Sdim default: 2058249423Sdim return; 2059249423Sdim case MVT::v2i8: 2060249423Sdim case MVT::v2i16: 2061249423Sdim case MVT::v2i32: 2062249423Sdim case MVT::v2i64: 2063249423Sdim case MVT::v2f32: 2064249423Sdim case MVT::v2f64: 2065249423Sdim case MVT::v4i8: 2066249423Sdim case MVT::v4i16: 2067249423Sdim case MVT::v4i32: 2068249423Sdim case MVT::v4f32: 2069249423Sdim // This is a "native" vector type 2070249423Sdim break; 2071249423Sdim } 2072249423Sdim 2073249423Sdim EVT EltVT = ResVT.getVectorElementType(); 2074249423Sdim unsigned NumElts = ResVT.getVectorNumElements(); 2075249423Sdim 2076249423Sdim // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 2077249423Sdim // Therefore, we must ensure the type is legal. For i1 and i8, we set the 2078249423Sdim // loaded type to i16 and propogate the "real" type as the memory type. 2079249423Sdim bool NeedTrunc = false; 2080249423Sdim if (EltVT.getSizeInBits() < 16) { 2081249423Sdim EltVT = MVT::i16; 2082249423Sdim NeedTrunc = true; 2083249423Sdim } 2084249423Sdim 2085249423Sdim unsigned Opcode = 0; 2086249423Sdim SDVTList LdResVTs; 2087249423Sdim 2088249423Sdim switch (NumElts) { 2089249423Sdim default: 2090249423Sdim return; 2091249423Sdim case 2: 2092249423Sdim Opcode = NVPTXISD::LoadV2; 2093249423Sdim LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 2094249423Sdim break; 2095249423Sdim case 4: { 2096249423Sdim Opcode = NVPTXISD::LoadV4; 2097249423Sdim EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 2098249423Sdim LdResVTs = DAG.getVTList(ListVTs, 5); 2099249423Sdim break; 2100249423Sdim } 2101249423Sdim } 2102249423Sdim 2103249423Sdim SmallVector<SDValue, 8> OtherOps; 2104249423Sdim 2105249423Sdim // Copy regular operands 2106249423Sdim for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 2107249423Sdim OtherOps.push_back(N->getOperand(i)); 2108249423Sdim 2109249423Sdim LoadSDNode *LD = cast<LoadSDNode>(N); 2110249423Sdim 2111249423Sdim // The select routine does not have access to the LoadSDNode instance, so 2112249423Sdim // pass along the extension information 2113249423Sdim OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); 2114249423Sdim 2115249423Sdim SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], 2116249423Sdim OtherOps.size(), LD->getMemoryVT(), 2117249423Sdim LD->getMemOperand()); 2118249423Sdim 2119249423Sdim SmallVector<SDValue, 4> ScalarRes; 2120249423Sdim 2121249423Sdim for (unsigned i = 0; i < NumElts; ++i) { 2122249423Sdim SDValue Res = NewLD.getValue(i); 2123249423Sdim if (NeedTrunc) 2124249423Sdim Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 2125249423Sdim ScalarRes.push_back(Res); 2126249423Sdim } 2127249423Sdim 2128249423Sdim SDValue LoadChain = NewLD.getValue(NumElts); 2129249423Sdim 2130249423Sdim SDValue BuildVec = 2131249423Sdim DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); 2132249423Sdim 2133249423Sdim Results.push_back(BuildVec); 2134249423Sdim Results.push_back(LoadChain); 2135249423Sdim} 2136249423Sdim 2137249423Sdimstatic void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 2138249423Sdim SmallVectorImpl<SDValue> &Results) { 2139249423Sdim SDValue Chain = N->getOperand(0); 2140249423Sdim SDValue Intrin = N->getOperand(1); 2141263508Sdim SDLoc DL(N); 2142249423Sdim 2143249423Sdim // Get the intrinsic ID 2144249423Sdim unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 2145249423Sdim switch (IntrinNo) { 2146249423Sdim default: 2147249423Sdim return; 2148249423Sdim case Intrinsic::nvvm_ldg_global_i: 2149249423Sdim case Intrinsic::nvvm_ldg_global_f: 2150249423Sdim case Intrinsic::nvvm_ldg_global_p: 2151249423Sdim case Intrinsic::nvvm_ldu_global_i: 2152249423Sdim case Intrinsic::nvvm_ldu_global_f: 2153249423Sdim case Intrinsic::nvvm_ldu_global_p: { 2154249423Sdim EVT ResVT = N->getValueType(0); 2155249423Sdim 2156249423Sdim if (ResVT.isVector()) { 2157249423Sdim // Vector LDG/LDU 2158249423Sdim 2159249423Sdim unsigned NumElts = ResVT.getVectorNumElements(); 2160249423Sdim EVT EltVT = ResVT.getVectorElementType(); 2161249423Sdim 2162263508Sdim // Since LDU/LDG are target nodes, we cannot rely on DAG type 2163263508Sdim // legalization. 2164249423Sdim // Therefore, we must ensure the type is legal. For i1 and i8, we set the 2165249423Sdim // loaded type to i16 and propogate the "real" type as the memory type. 2166249423Sdim bool NeedTrunc = false; 2167249423Sdim if (EltVT.getSizeInBits() < 16) { 2168249423Sdim EltVT = MVT::i16; 2169249423Sdim NeedTrunc = true; 2170249423Sdim } 2171249423Sdim 2172249423Sdim unsigned Opcode = 0; 2173249423Sdim SDVTList LdResVTs; 2174249423Sdim 2175249423Sdim switch (NumElts) { 2176249423Sdim default: 2177249423Sdim return; 2178249423Sdim case 2: 2179249423Sdim switch (IntrinNo) { 2180249423Sdim default: 2181249423Sdim return; 2182249423Sdim case Intrinsic::nvvm_ldg_global_i: 2183249423Sdim case Intrinsic::nvvm_ldg_global_f: 2184249423Sdim case Intrinsic::nvvm_ldg_global_p: 2185249423Sdim Opcode = NVPTXISD::LDGV2; 2186249423Sdim break; 2187249423Sdim case Intrinsic::nvvm_ldu_global_i: 2188249423Sdim case Intrinsic::nvvm_ldu_global_f: 2189249423Sdim case Intrinsic::nvvm_ldu_global_p: 2190249423Sdim Opcode = NVPTXISD::LDUV2; 2191249423Sdim break; 2192249423Sdim } 2193249423Sdim LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 2194249423Sdim break; 2195249423Sdim case 4: { 2196249423Sdim switch (IntrinNo) { 2197249423Sdim default: 2198249423Sdim return; 2199249423Sdim case Intrinsic::nvvm_ldg_global_i: 2200249423Sdim case Intrinsic::nvvm_ldg_global_f: 2201249423Sdim case Intrinsic::nvvm_ldg_global_p: 2202249423Sdim Opcode = NVPTXISD::LDGV4; 2203249423Sdim break; 2204249423Sdim case Intrinsic::nvvm_ldu_global_i: 2205249423Sdim case Intrinsic::nvvm_ldu_global_f: 2206249423Sdim case Intrinsic::nvvm_ldu_global_p: 2207249423Sdim Opcode = NVPTXISD::LDUV4; 2208249423Sdim break; 2209249423Sdim } 2210249423Sdim EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 2211249423Sdim LdResVTs = DAG.getVTList(ListVTs, 5); 2212249423Sdim break; 2213249423Sdim } 2214249423Sdim } 2215249423Sdim 2216249423Sdim SmallVector<SDValue, 8> OtherOps; 2217249423Sdim 2218249423Sdim // Copy regular operands 2219249423Sdim 2220249423Sdim OtherOps.push_back(Chain); // Chain 2221249423Sdim // Skip operand 1 (intrinsic ID) 2222263508Sdim // Others 2223249423Sdim for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) 2224249423Sdim OtherOps.push_back(N->getOperand(i)); 2225249423Sdim 2226249423Sdim MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 2227249423Sdim 2228249423Sdim SDValue NewLD = DAG.getMemIntrinsicNode( 2229249423Sdim Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), 2230249423Sdim MemSD->getMemoryVT(), MemSD->getMemOperand()); 2231249423Sdim 2232249423Sdim SmallVector<SDValue, 4> ScalarRes; 2233249423Sdim 2234249423Sdim for (unsigned i = 0; i < NumElts; ++i) { 2235249423Sdim SDValue Res = NewLD.getValue(i); 2236249423Sdim if (NeedTrunc) 2237249423Sdim Res = 2238249423Sdim DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 2239249423Sdim ScalarRes.push_back(Res); 2240249423Sdim } 2241249423Sdim 2242249423Sdim SDValue LoadChain = NewLD.getValue(NumElts); 2243249423Sdim 2244249423Sdim SDValue BuildVec = 2245249423Sdim DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); 2246249423Sdim 2247249423Sdim Results.push_back(BuildVec); 2248249423Sdim Results.push_back(LoadChain); 2249249423Sdim } else { 2250249423Sdim // i8 LDG/LDU 2251249423Sdim assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 2252249423Sdim "Custom handling of non-i8 ldu/ldg?"); 2253249423Sdim 2254249423Sdim // Just copy all operands as-is 2255249423Sdim SmallVector<SDValue, 4> Ops; 2256249423Sdim for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 2257249423Sdim Ops.push_back(N->getOperand(i)); 2258249423Sdim 2259249423Sdim // Force output to i16 2260249423Sdim SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 2261249423Sdim 2262249423Sdim MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 2263249423Sdim 2264249423Sdim // We make sure the memory type is i8, which will be used during isel 2265249423Sdim // to select the proper instruction. 2266249423Sdim SDValue NewLD = 2267249423Sdim DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], 2268249423Sdim Ops.size(), MVT::i8, MemSD->getMemOperand()); 2269249423Sdim 2270263508Sdim Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 2271263508Sdim NewLD.getValue(0))); 2272249423Sdim Results.push_back(NewLD.getValue(1)); 2273249423Sdim } 2274249423Sdim } 2275249423Sdim } 2276249423Sdim} 2277249423Sdim 2278249423Sdimvoid NVPTXTargetLowering::ReplaceNodeResults( 2279249423Sdim SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2280249423Sdim switch (N->getOpcode()) { 2281249423Sdim default: 2282249423Sdim report_fatal_error("Unhandled custom legalization"); 2283249423Sdim case ISD::LOAD: 2284249423Sdim ReplaceLoadVector(N, DAG, Results); 2285249423Sdim return; 2286249423Sdim case ISD::INTRINSIC_W_CHAIN: 2287249423Sdim ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 2288249423Sdim return; 2289249423Sdim } 2290249423Sdim} 2291263508Sdim 2292263508Sdim// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 2293263508Sdimvoid NVPTXSection::anchor() {} 2294263508Sdim 2295263508SdimNVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 2296263508Sdim delete TextSection; 2297263508Sdim delete DataSection; 2298263508Sdim delete BSSSection; 2299263508Sdim delete ReadOnlySection; 2300263508Sdim 2301263508Sdim delete StaticCtorSection; 2302263508Sdim delete StaticDtorSection; 2303263508Sdim delete LSDASection; 2304263508Sdim delete EHFrameSection; 2305263508Sdim delete DwarfAbbrevSection; 2306263508Sdim delete DwarfInfoSection; 2307263508Sdim delete DwarfLineSection; 2308263508Sdim delete DwarfFrameSection; 2309263508Sdim delete DwarfPubTypesSection; 2310263508Sdim delete DwarfDebugInlineSection; 2311263508Sdim delete DwarfStrSection; 2312263508Sdim delete DwarfLocSection; 2313263508Sdim delete DwarfARangesSection; 2314263508Sdim delete DwarfRangesSection; 2315263508Sdim delete DwarfMacroInfoSection; 2316263508Sdim} 2317