1239310Sdim// 2239310Sdim// The LLVM Compiler Infrastructure 3239310Sdim// 4239310Sdim// This file is distributed under the University of Illinois Open Source 5239310Sdim// License. See LICENSE.TXT for details. 6239310Sdim// 7239310Sdim//===----------------------------------------------------------------------===// 8239310Sdim// 9239310Sdim// This file defines the interfaces that NVPTX uses to lower LLVM code into a 10239310Sdim// selection DAG. 11239310Sdim// 12239310Sdim//===----------------------------------------------------------------------===// 13239310Sdim 14252723Sdim#include "NVPTXISelLowering.h" 15239310Sdim#include "NVPTX.h" 16239310Sdim#include "NVPTXTargetMachine.h" 17239310Sdim#include "NVPTXTargetObjectFile.h" 18239310Sdim#include "NVPTXUtilities.h" 19239310Sdim#include "llvm/CodeGen/Analysis.h" 20239310Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 21239310Sdim#include "llvm/CodeGen/MachineFunction.h" 22239310Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 23239310Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 24252723Sdim#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25252723Sdim#include "llvm/IR/DerivedTypes.h" 26252723Sdim#include "llvm/IR/Function.h" 27252723Sdim#include "llvm/IR/GlobalValue.h" 28252723Sdim#include "llvm/IR/IntrinsicInst.h" 29252723Sdim#include "llvm/IR/Intrinsics.h" 30252723Sdim#include "llvm/IR/Module.h" 31252723Sdim#include "llvm/MC/MCSectionELF.h" 32239310Sdim#include "llvm/Support/CallSite.h" 33252723Sdim#include "llvm/Support/CommandLine.h" 34252723Sdim#include "llvm/Support/Debug.h" 35239310Sdim#include "llvm/Support/ErrorHandling.h" 36239310Sdim#include "llvm/Support/raw_ostream.h" 37239310Sdim#include <sstream> 38239310Sdim 39239310Sdim#undef DEBUG_TYPE 40239310Sdim#define DEBUG_TYPE "nvptx-lower" 41239310Sdim 42239310Sdimusing namespace llvm; 43239310Sdim 44239310Sdimstatic unsigned int uniqueCallSite = 0; 45239310Sdim 46252723Sdimstatic cl::opt<bool> sched4reg( 47252723Sdim "nvptx-sched4reg", 48252723Sdim cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 49239310Sdim 50252723Sdimstatic bool IsPTXVectorType(MVT VT) { 51252723Sdim switch (VT.SimpleTy) { 52252723Sdim default: 53252723Sdim return false; 54263509Sdim case MVT::v2i1: 55263509Sdim case MVT::v4i1: 56252723Sdim case MVT::v2i8: 57252723Sdim case MVT::v4i8: 58252723Sdim case MVT::v2i16: 59252723Sdim case MVT::v4i16: 60252723Sdim case MVT::v2i32: 61252723Sdim case MVT::v4i32: 62252723Sdim case MVT::v2i64: 63252723Sdim case MVT::v2f32: 64252723Sdim case MVT::v4f32: 65252723Sdim case MVT::v2f64: 66252723Sdim return true; 67252723Sdim } 68252723Sdim} 69239310Sdim 70263509Sdim/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 71263509Sdim/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 72263509Sdim/// into their primitive components. 73263509Sdim/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 74263509Sdim/// same number of types as the Ins/Outs arrays in LowerFormalArguments, 75263509Sdim/// LowerCall, and LowerReturn. 76263509Sdimstatic void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, 77263509Sdim SmallVectorImpl<EVT> &ValueVTs, 78263509Sdim SmallVectorImpl<uint64_t> *Offsets = 0, 79263509Sdim uint64_t StartingOffset = 0) { 80263509Sdim SmallVector<EVT, 16> TempVTs; 81263509Sdim SmallVector<uint64_t, 16> TempOffsets; 82263509Sdim 83263509Sdim ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); 84263509Sdim for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 85263509Sdim EVT VT = TempVTs[i]; 86263509Sdim uint64_t Off = TempOffsets[i]; 87263509Sdim if (VT.isVector()) 88263509Sdim for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 89263509Sdim ValueVTs.push_back(VT.getVectorElementType()); 90263509Sdim if (Offsets) 91263509Sdim Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 92263509Sdim } 93263509Sdim else { 94263509Sdim ValueVTs.push_back(VT); 95263509Sdim if (Offsets) 96263509Sdim Offsets->push_back(Off); 97263509Sdim } 98263509Sdim } 99263509Sdim} 100263509Sdim 101239310Sdim// NVPTXTargetLowering Constructor. 102239310SdimNVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) 103252723Sdim : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), 104252723Sdim nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 105239310Sdim 106239310Sdim // always lower memset, memcpy, and memmove intrinsics to load/store 107239310Sdim // instructions, rather 108239310Sdim // then generating calls to memset, mempcy or memmove. 109252723Sdim MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 110252723Sdim MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 111252723Sdim MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 112239310Sdim 113239310Sdim setBooleanContents(ZeroOrNegativeOneBooleanContent); 114239310Sdim 115239310Sdim // Jump is Expensive. Don't create extra control flow for 'and', 'or' 116239310Sdim // condition branches. 117239310Sdim setJumpIsExpensive(true); 118239310Sdim 119239310Sdim // By default, use the Source scheduling 120239310Sdim if (sched4reg) 121239310Sdim setSchedulingPreference(Sched::RegPressure); 122239310Sdim else 123239310Sdim setSchedulingPreference(Sched::Source); 124239310Sdim 125239310Sdim addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 126239310Sdim addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 127239310Sdim addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 128239310Sdim addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 129239310Sdim addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 130239310Sdim addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 131239310Sdim 132239310Sdim // Operations not directly supported by NVPTX. 133252723Sdim setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 134252723Sdim setOperationAction(ISD::BR_CC, MVT::f32, Expand); 135252723Sdim setOperationAction(ISD::BR_CC, MVT::f64, Expand); 136252723Sdim setOperationAction(ISD::BR_CC, MVT::i1, Expand); 137252723Sdim setOperationAction(ISD::BR_CC, MVT::i8, Expand); 138252723Sdim setOperationAction(ISD::BR_CC, MVT::i16, Expand); 139252723Sdim setOperationAction(ISD::BR_CC, MVT::i32, Expand); 140252723Sdim setOperationAction(ISD::BR_CC, MVT::i64, Expand); 141263509Sdim // Some SIGN_EXTEND_INREG can be done using cvt instruction. 142263509Sdim // For others we will expand to a SHL/SRA pair. 143263509Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 144263509Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 145263509Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 146263509Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 147252723Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 148239310Sdim 149239310Sdim if (nvptxSubtarget.hasROT64()) { 150252723Sdim setOperationAction(ISD::ROTL, MVT::i64, Legal); 151252723Sdim setOperationAction(ISD::ROTR, MVT::i64, Legal); 152252723Sdim } else { 153252723Sdim setOperationAction(ISD::ROTL, MVT::i64, Expand); 154252723Sdim setOperationAction(ISD::ROTR, MVT::i64, Expand); 155239310Sdim } 156239310Sdim if (nvptxSubtarget.hasROT32()) { 157252723Sdim setOperationAction(ISD::ROTL, MVT::i32, Legal); 158252723Sdim setOperationAction(ISD::ROTR, MVT::i32, Legal); 159252723Sdim } else { 160252723Sdim setOperationAction(ISD::ROTL, MVT::i32, Expand); 161252723Sdim setOperationAction(ISD::ROTR, MVT::i32, Expand); 162239310Sdim } 163239310Sdim 164252723Sdim setOperationAction(ISD::ROTL, MVT::i16, Expand); 165252723Sdim setOperationAction(ISD::ROTR, MVT::i16, Expand); 166252723Sdim setOperationAction(ISD::ROTL, MVT::i8, Expand); 167252723Sdim setOperationAction(ISD::ROTR, MVT::i8, Expand); 168252723Sdim setOperationAction(ISD::BSWAP, MVT::i16, Expand); 169252723Sdim setOperationAction(ISD::BSWAP, MVT::i32, Expand); 170252723Sdim setOperationAction(ISD::BSWAP, MVT::i64, Expand); 171239310Sdim 172239310Sdim // Indirect branch is not supported. 173239310Sdim // This also disables Jump Table creation. 174252723Sdim setOperationAction(ISD::BR_JT, MVT::Other, Expand); 175252723Sdim setOperationAction(ISD::BRIND, MVT::Other, Expand); 176239310Sdim 177252723Sdim setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 178252723Sdim setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 179239310Sdim 180239310Sdim // We want to legalize constant related memmove and memcopy 181239310Sdim // intrinsics. 182239310Sdim setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 183239310Sdim 184239310Sdim // Turn FP extload into load/fextend 185239310Sdim setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 186239310Sdim // Turn FP truncstore into trunc + store. 187239310Sdim setTruncStoreAction(MVT::f64, MVT::f32, Expand); 188239310Sdim 189239310Sdim // PTX does not support load / store predicate registers 190245431Sdim setOperationAction(ISD::LOAD, MVT::i1, Custom); 191245431Sdim setOperationAction(ISD::STORE, MVT::i1, Custom); 192245431Sdim 193239310Sdim setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 194239310Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 195239310Sdim setTruncStoreAction(MVT::i64, MVT::i1, Expand); 196239310Sdim setTruncStoreAction(MVT::i32, MVT::i1, Expand); 197239310Sdim setTruncStoreAction(MVT::i16, MVT::i1, Expand); 198239310Sdim setTruncStoreAction(MVT::i8, MVT::i1, Expand); 199239310Sdim 200239310Sdim // This is legal in NVPTX 201252723Sdim setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 202252723Sdim setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 203239310Sdim 204239310Sdim // TRAP can be lowered to PTX trap 205252723Sdim setOperationAction(ISD::TRAP, MVT::Other, Legal); 206239310Sdim 207263509Sdim setOperationAction(ISD::ADDC, MVT::i64, Expand); 208263509Sdim setOperationAction(ISD::ADDE, MVT::i64, Expand); 209263509Sdim 210252723Sdim // Register custom handling for vector loads/stores 211252723Sdim for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; 212252723Sdim ++i) { 213252723Sdim MVT VT = (MVT::SimpleValueType) i; 214252723Sdim if (IsPTXVectorType(VT)) { 215252723Sdim setOperationAction(ISD::LOAD, VT, Custom); 216252723Sdim setOperationAction(ISD::STORE, VT, Custom); 217252723Sdim setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 218252723Sdim } 219252723Sdim } 220239310Sdim 221263509Sdim // Custom handling for i8 intrinsics 222263509Sdim setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 223263509Sdim 224263509Sdim setOperationAction(ISD::CTLZ, MVT::i16, Legal); 225263509Sdim setOperationAction(ISD::CTLZ, MVT::i32, Legal); 226263509Sdim setOperationAction(ISD::CTLZ, MVT::i64, Legal); 227263509Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); 228263509Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); 229263509Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); 230263509Sdim setOperationAction(ISD::CTTZ, MVT::i16, Expand); 231263509Sdim setOperationAction(ISD::CTTZ, MVT::i32, Expand); 232263509Sdim setOperationAction(ISD::CTTZ, MVT::i64, Expand); 233263509Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); 234263509Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 235263509Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 236263509Sdim setOperationAction(ISD::CTPOP, MVT::i16, Legal); 237263509Sdim setOperationAction(ISD::CTPOP, MVT::i32, Legal); 238263509Sdim setOperationAction(ISD::CTPOP, MVT::i64, Legal); 239263509Sdim 240239310Sdim // Now deduce the information based on the above mentioned 241239310Sdim // actions 242239310Sdim computeRegisterProperties(); 243239310Sdim} 244239310Sdim 245239310Sdimconst char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 246239310Sdim switch (Opcode) { 247252723Sdim default: 248252723Sdim return 0; 249252723Sdim case NVPTXISD::CALL: 250252723Sdim return "NVPTXISD::CALL"; 251252723Sdim case NVPTXISD::RET_FLAG: 252252723Sdim return "NVPTXISD::RET_FLAG"; 253252723Sdim case NVPTXISD::Wrapper: 254252723Sdim return "NVPTXISD::Wrapper"; 255252723Sdim case NVPTXISD::DeclareParam: 256252723Sdim return "NVPTXISD::DeclareParam"; 257239310Sdim case NVPTXISD::DeclareScalarParam: 258239310Sdim return "NVPTXISD::DeclareScalarParam"; 259252723Sdim case NVPTXISD::DeclareRet: 260252723Sdim return "NVPTXISD::DeclareRet"; 261252723Sdim case NVPTXISD::DeclareRetParam: 262252723Sdim return "NVPTXISD::DeclareRetParam"; 263252723Sdim case NVPTXISD::PrintCall: 264252723Sdim return "NVPTXISD::PrintCall"; 265252723Sdim case NVPTXISD::LoadParam: 266252723Sdim return "NVPTXISD::LoadParam"; 267263509Sdim case NVPTXISD::LoadParamV2: 268263509Sdim return "NVPTXISD::LoadParamV2"; 269263509Sdim case NVPTXISD::LoadParamV4: 270263509Sdim return "NVPTXISD::LoadParamV4"; 271252723Sdim case NVPTXISD::StoreParam: 272252723Sdim return "NVPTXISD::StoreParam"; 273263509Sdim case NVPTXISD::StoreParamV2: 274263509Sdim return "NVPTXISD::StoreParamV2"; 275263509Sdim case NVPTXISD::StoreParamV4: 276263509Sdim return "NVPTXISD::StoreParamV4"; 277252723Sdim case NVPTXISD::StoreParamS32: 278252723Sdim return "NVPTXISD::StoreParamS32"; 279252723Sdim case NVPTXISD::StoreParamU32: 280252723Sdim return "NVPTXISD::StoreParamU32"; 281252723Sdim case NVPTXISD::CallArgBegin: 282252723Sdim return "NVPTXISD::CallArgBegin"; 283252723Sdim case NVPTXISD::CallArg: 284252723Sdim return "NVPTXISD::CallArg"; 285252723Sdim case NVPTXISD::LastCallArg: 286252723Sdim return "NVPTXISD::LastCallArg"; 287252723Sdim case NVPTXISD::CallArgEnd: 288252723Sdim return "NVPTXISD::CallArgEnd"; 289252723Sdim case NVPTXISD::CallVoid: 290252723Sdim return "NVPTXISD::CallVoid"; 291252723Sdim case NVPTXISD::CallVal: 292252723Sdim return "NVPTXISD::CallVal"; 293252723Sdim case NVPTXISD::CallSymbol: 294252723Sdim return "NVPTXISD::CallSymbol"; 295252723Sdim case NVPTXISD::Prototype: 296252723Sdim return "NVPTXISD::Prototype"; 297252723Sdim case NVPTXISD::MoveParam: 298252723Sdim return "NVPTXISD::MoveParam"; 299252723Sdim case NVPTXISD::StoreRetval: 300252723Sdim return "NVPTXISD::StoreRetval"; 301263509Sdim case NVPTXISD::StoreRetvalV2: 302263509Sdim return "NVPTXISD::StoreRetvalV2"; 303263509Sdim case NVPTXISD::StoreRetvalV4: 304263509Sdim return "NVPTXISD::StoreRetvalV4"; 305252723Sdim case NVPTXISD::PseudoUseParam: 306252723Sdim return "NVPTXISD::PseudoUseParam"; 307252723Sdim case NVPTXISD::RETURN: 308252723Sdim return "NVPTXISD::RETURN"; 309252723Sdim case NVPTXISD::CallSeqBegin: 310252723Sdim return "NVPTXISD::CallSeqBegin"; 311252723Sdim case NVPTXISD::CallSeqEnd: 312252723Sdim return "NVPTXISD::CallSeqEnd"; 313263509Sdim case NVPTXISD::CallPrototype: 314263509Sdim return "NVPTXISD::CallPrototype"; 315252723Sdim case NVPTXISD::LoadV2: 316252723Sdim return "NVPTXISD::LoadV2"; 317252723Sdim case NVPTXISD::LoadV4: 318252723Sdim return "NVPTXISD::LoadV4"; 319252723Sdim case NVPTXISD::LDGV2: 320252723Sdim return "NVPTXISD::LDGV2"; 321252723Sdim case NVPTXISD::LDGV4: 322252723Sdim return "NVPTXISD::LDGV4"; 323252723Sdim case NVPTXISD::LDUV2: 324252723Sdim return "NVPTXISD::LDUV2"; 325252723Sdim case NVPTXISD::LDUV4: 326252723Sdim return "NVPTXISD::LDUV4"; 327252723Sdim case NVPTXISD::StoreV2: 328252723Sdim return "NVPTXISD::StoreV2"; 329252723Sdim case NVPTXISD::StoreV4: 330252723Sdim return "NVPTXISD::StoreV4"; 331239310Sdim } 332239310Sdim} 333239310Sdim 334252723Sdimbool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const { 335252723Sdim return VT == MVT::i1; 336252723Sdim} 337239310Sdim 338239310SdimSDValue 339239310SdimNVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 340263509Sdim SDLoc dl(Op); 341239310Sdim const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 342239310Sdim Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 343239310Sdim return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 344239310Sdim} 345239310Sdim 346263509Sdimstd::string 347263509SdimNVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, 348263509Sdim const SmallVectorImpl<ISD::OutputArg> &Outs, 349263509Sdim unsigned retAlignment, 350263509Sdim const ImmutableCallSite *CS) const { 351239310Sdim 352239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 353263509Sdim assert(isABI && "Non-ABI compilation is not supported"); 354263509Sdim if (!isABI) 355263509Sdim return ""; 356239310Sdim 357239310Sdim std::stringstream O; 358239310Sdim O << "prototype_" << uniqueCallSite << " : .callprototype "; 359239310Sdim 360263509Sdim if (retTy->getTypeID() == Type::VoidTyID) { 361239310Sdim O << "()"; 362263509Sdim } else { 363239310Sdim O << "("; 364263509Sdim if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { 365263509Sdim unsigned size = 0; 366263509Sdim if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 367263509Sdim size = ITy->getBitWidth(); 368263509Sdim if (size < 32) 369263509Sdim size = 32; 370263509Sdim } else { 371263509Sdim assert(retTy->isFloatingPointTy() && 372263509Sdim "Floating point type expected here"); 373263509Sdim size = retTy->getPrimitiveSizeInBits(); 374263509Sdim } 375239310Sdim 376263509Sdim O << ".param .b" << size << " _"; 377263509Sdim } else if (isa<PointerType>(retTy)) { 378263509Sdim O << ".param .b" << getPointerTy().getSizeInBits() << " _"; 379263509Sdim } else { 380263509Sdim if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { 381263509Sdim SmallVector<EVT, 16> vtparts; 382263509Sdim ComputeValueVTs(*this, retTy, vtparts); 383263509Sdim unsigned totalsz = 0; 384263509Sdim for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 385263509Sdim unsigned elems = 1; 386263509Sdim EVT elemtype = vtparts[i]; 387263509Sdim if (vtparts[i].isVector()) { 388263509Sdim elems = vtparts[i].getVectorNumElements(); 389263509Sdim elemtype = vtparts[i].getVectorElementType(); 390239310Sdim } 391263509Sdim // TODO: no need to loop 392263509Sdim for (unsigned j = 0, je = elems; j != je; ++j) { 393263509Sdim unsigned sz = elemtype.getSizeInBits(); 394263509Sdim if (elemtype.isInteger() && (sz < 8)) 395263509Sdim sz = 8; 396263509Sdim totalsz += sz / 8; 397263509Sdim } 398239310Sdim } 399263509Sdim O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; 400263509Sdim } else { 401263509Sdim assert(false && "Unknown return type"); 402239310Sdim } 403239310Sdim } 404239310Sdim O << ") "; 405239310Sdim } 406239310Sdim O << "_ ("; 407239310Sdim 408239310Sdim bool first = true; 409239310Sdim MVT thePointerTy = getPointerTy(); 410239310Sdim 411263509Sdim unsigned OIdx = 0; 412263509Sdim for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 413263509Sdim Type *Ty = Args[i].Ty; 414239310Sdim if (!first) { 415239310Sdim O << ", "; 416239310Sdim } 417239310Sdim first = false; 418239310Sdim 419263509Sdim if (Outs[OIdx].Flags.isByVal() == false) { 420263509Sdim if (Ty->isAggregateType() || Ty->isVectorTy()) { 421263509Sdim unsigned align = 0; 422263509Sdim const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 423263509Sdim const DataLayout *TD = getDataLayout(); 424263509Sdim // +1 because index 0 is reserved for return type alignment 425263509Sdim if (!llvm::getAlign(*CallI, i + 1, align)) 426263509Sdim align = TD->getABITypeAlignment(Ty); 427263509Sdim unsigned sz = TD->getTypeAllocSize(Ty); 428263509Sdim O << ".param .align " << align << " .b8 "; 429263509Sdim O << "_"; 430263509Sdim O << "[" << sz << "]"; 431263509Sdim // update the index for Outs 432263509Sdim SmallVector<EVT, 16> vtparts; 433263509Sdim ComputeValueVTs(*this, Ty, vtparts); 434263509Sdim if (unsigned len = vtparts.size()) 435263509Sdim OIdx += len - 1; 436263509Sdim continue; 437263509Sdim } 438263509Sdim // i8 types in IR will be i16 types in SDAG 439263509Sdim assert((getValueType(Ty) == Outs[OIdx].VT || 440263509Sdim (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 441263509Sdim "type mismatch between callee prototype and arguments"); 442263509Sdim // scalar type 443239310Sdim unsigned sz = 0; 444239310Sdim if (isa<IntegerType>(Ty)) { 445239310Sdim sz = cast<IntegerType>(Ty)->getBitWidth(); 446252723Sdim if (sz < 32) 447252723Sdim sz = 32; 448252723Sdim } else if (isa<PointerType>(Ty)) 449239310Sdim sz = thePointerTy.getSizeInBits(); 450239310Sdim else 451239310Sdim sz = Ty->getPrimitiveSizeInBits(); 452263509Sdim O << ".param .b" << sz << " "; 453239310Sdim O << "_"; 454239310Sdim continue; 455239310Sdim } 456239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Ty); 457252723Sdim assert(PTy && "Param with byval attribute should be a pointer type"); 458239310Sdim Type *ETy = PTy->getElementType(); 459239310Sdim 460263509Sdim unsigned align = Outs[OIdx].Flags.getByValAlign(); 461263509Sdim unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 462263509Sdim O << ".param .align " << align << " .b8 "; 463263509Sdim O << "_"; 464263509Sdim O << "[" << sz << "]"; 465263509Sdim } 466263509Sdim O << ");"; 467263509Sdim return O.str(); 468263509Sdim} 469239310Sdim 470263509Sdimunsigned 471263509SdimNVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 472263509Sdim const ImmutableCallSite *CS, 473263509Sdim Type *Ty, 474263509Sdim unsigned Idx) const { 475263509Sdim const DataLayout *TD = getDataLayout(); 476263509Sdim unsigned Align = 0; 477263509Sdim const Value *DirectCallee = CS->getCalledFunction(); 478263509Sdim 479263509Sdim if (!DirectCallee) { 480263509Sdim // We don't have a direct function symbol, but that may be because of 481263509Sdim // constant cast instructions in the call. 482263509Sdim const Instruction *CalleeI = CS->getInstruction(); 483263509Sdim assert(CalleeI && "Call target is not a function or derived value?"); 484263509Sdim 485263509Sdim // With bitcast'd call targets, the instruction will be the call 486263509Sdim if (isa<CallInst>(CalleeI)) { 487263509Sdim // Check if we have call alignment metadata 488263509Sdim if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 489263509Sdim return Align; 490263509Sdim 491263509Sdim const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 492263509Sdim // Ignore any bitcast instructions 493263509Sdim while(isa<ConstantExpr>(CalleeV)) { 494263509Sdim const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 495263509Sdim if (!CE->isCast()) 496263509Sdim break; 497263509Sdim // Look through the bitcast 498263509Sdim CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 499239310Sdim } 500263509Sdim 501263509Sdim // We have now looked past all of the bitcasts. Do we finally have a 502263509Sdim // Function? 503263509Sdim if (isa<Function>(CalleeV)) 504263509Sdim DirectCallee = CalleeV; 505239310Sdim } 506239310Sdim } 507263509Sdim 508263509Sdim // Check for function alignment information if we found that the 509263509Sdim // ultimate target is a Function 510263509Sdim if (DirectCallee) 511263509Sdim if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 512263509Sdim return Align; 513263509Sdim 514263509Sdim // Call is indirect or alignment information is not available, fall back to 515263509Sdim // the ABI type alignment 516263509Sdim return TD->getABITypeAlignment(Ty); 517239310Sdim} 518239310Sdim 519252723SdimSDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 520252723Sdim SmallVectorImpl<SDValue> &InVals) const { 521252723Sdim SelectionDAG &DAG = CLI.DAG; 522263509Sdim SDLoc dl = CLI.DL; 523263509Sdim SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 524263509Sdim SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 525263509Sdim SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 526252723Sdim SDValue Chain = CLI.Chain; 527252723Sdim SDValue Callee = CLI.Callee; 528252723Sdim bool &isTailCall = CLI.IsTailCall; 529252723Sdim ArgListTy &Args = CLI.Args; 530252723Sdim Type *retTy = CLI.RetTy; 531252723Sdim ImmutableCallSite *CS = CLI.CS; 532239310Sdim 533239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 534263509Sdim assert(isABI && "Non-ABI compilation is not supported"); 535263509Sdim if (!isABI) 536263509Sdim return Chain; 537263509Sdim const DataLayout *TD = getDataLayout(); 538263509Sdim MachineFunction &MF = DAG.getMachineFunction(); 539263509Sdim const Function *F = MF.getFunction(); 540239310Sdim 541239310Sdim SDValue tempChain = Chain; 542252723Sdim Chain = 543263509Sdim DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 544263509Sdim dl); 545239310Sdim SDValue InFlag = Chain.getValue(1); 546239310Sdim 547239310Sdim unsigned paramCount = 0; 548263509Sdim // Args.size() and Outs.size() need not match. 549263509Sdim // Outs.size() will be larger 550263509Sdim // * if there is an aggregate argument with multiple fields (each field 551263509Sdim // showing up separately in Outs) 552263509Sdim // * if there is a vector argument with more than typical vector-length 553263509Sdim // elements (generally if more than 4) where each vector element is 554263509Sdim // individually present in Outs. 555263509Sdim // So a different index should be used for indexing into Outs/OutVals. 556263509Sdim // See similar issue in LowerFormalArguments. 557263509Sdim unsigned OIdx = 0; 558239310Sdim // Declare the .params or .reg need to pass values 559239310Sdim // to the function 560263509Sdim for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 561263509Sdim EVT VT = Outs[OIdx].VT; 562263509Sdim Type *Ty = Args[i].Ty; 563239310Sdim 564263509Sdim if (Outs[OIdx].Flags.isByVal() == false) { 565263509Sdim if (Ty->isAggregateType()) { 566263509Sdim // aggregate 567263509Sdim SmallVector<EVT, 16> vtparts; 568263509Sdim ComputeValueVTs(*this, Ty, vtparts); 569263509Sdim 570263509Sdim unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 571263509Sdim // declare .param .align <align> .b8 .param<n>[<size>]; 572263509Sdim unsigned sz = TD->getTypeAllocSize(Ty); 573263509Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 574263509Sdim SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 575263509Sdim DAG.getConstant(paramCount, MVT::i32), 576263509Sdim DAG.getConstant(sz, MVT::i32), InFlag }; 577263509Sdim Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 578263509Sdim DeclareParamOps, 5); 579263509Sdim InFlag = Chain.getValue(1); 580263509Sdim unsigned curOffset = 0; 581263509Sdim for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 582263509Sdim unsigned elems = 1; 583263509Sdim EVT elemtype = vtparts[j]; 584263509Sdim if (vtparts[j].isVector()) { 585263509Sdim elems = vtparts[j].getVectorNumElements(); 586263509Sdim elemtype = vtparts[j].getVectorElementType(); 587263509Sdim } 588263509Sdim for (unsigned k = 0, ke = elems; k != ke; ++k) { 589263509Sdim unsigned sz = elemtype.getSizeInBits(); 590263509Sdim if (elemtype.isInteger() && (sz < 8)) 591263509Sdim sz = 8; 592263509Sdim SDValue StVal = OutVals[OIdx]; 593263509Sdim if (elemtype.getSizeInBits() < 16) { 594263509Sdim StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 595263509Sdim } 596263509Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 597263509Sdim SDValue CopyParamOps[] = { Chain, 598263509Sdim DAG.getConstant(paramCount, MVT::i32), 599263509Sdim DAG.getConstant(curOffset, MVT::i32), 600263509Sdim StVal, InFlag }; 601263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 602263509Sdim CopyParamVTs, &CopyParamOps[0], 5, 603263509Sdim elemtype, MachinePointerInfo()); 604263509Sdim InFlag = Chain.getValue(1); 605263509Sdim curOffset += sz / 8; 606263509Sdim ++OIdx; 607263509Sdim } 608263509Sdim } 609263509Sdim if (vtparts.size() > 0) 610263509Sdim --OIdx; 611263509Sdim ++paramCount; 612263509Sdim continue; 613263509Sdim } 614263509Sdim if (Ty->isVectorTy()) { 615263509Sdim EVT ObjectVT = getValueType(Ty); 616263509Sdim unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 617263509Sdim // declare .param .align <align> .b8 .param<n>[<size>]; 618263509Sdim unsigned sz = TD->getTypeAllocSize(Ty); 619263509Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 620263509Sdim SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 621263509Sdim DAG.getConstant(paramCount, MVT::i32), 622263509Sdim DAG.getConstant(sz, MVT::i32), InFlag }; 623263509Sdim Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 624263509Sdim DeclareParamOps, 5); 625263509Sdim InFlag = Chain.getValue(1); 626263509Sdim unsigned NumElts = ObjectVT.getVectorNumElements(); 627263509Sdim EVT EltVT = ObjectVT.getVectorElementType(); 628263509Sdim EVT MemVT = EltVT; 629263509Sdim bool NeedExtend = false; 630263509Sdim if (EltVT.getSizeInBits() < 16) { 631263509Sdim NeedExtend = true; 632263509Sdim EltVT = MVT::i16; 633263509Sdim } 634263509Sdim 635263509Sdim // V1 store 636263509Sdim if (NumElts == 1) { 637263509Sdim SDValue Elt = OutVals[OIdx++]; 638263509Sdim if (NeedExtend) 639263509Sdim Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 640263509Sdim 641263509Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 642263509Sdim SDValue CopyParamOps[] = { Chain, 643263509Sdim DAG.getConstant(paramCount, MVT::i32), 644263509Sdim DAG.getConstant(0, MVT::i32), Elt, 645263509Sdim InFlag }; 646263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 647263509Sdim CopyParamVTs, &CopyParamOps[0], 5, 648263509Sdim MemVT, MachinePointerInfo()); 649263509Sdim InFlag = Chain.getValue(1); 650263509Sdim } else if (NumElts == 2) { 651263509Sdim SDValue Elt0 = OutVals[OIdx++]; 652263509Sdim SDValue Elt1 = OutVals[OIdx++]; 653263509Sdim if (NeedExtend) { 654263509Sdim Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 655263509Sdim Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 656263509Sdim } 657263509Sdim 658263509Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 659263509Sdim SDValue CopyParamOps[] = { Chain, 660263509Sdim DAG.getConstant(paramCount, MVT::i32), 661263509Sdim DAG.getConstant(0, MVT::i32), Elt0, Elt1, 662263509Sdim InFlag }; 663263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 664263509Sdim CopyParamVTs, &CopyParamOps[0], 6, 665263509Sdim MemVT, MachinePointerInfo()); 666263509Sdim InFlag = Chain.getValue(1); 667263509Sdim } else { 668263509Sdim unsigned curOffset = 0; 669263509Sdim // V4 stores 670263509Sdim // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 671263509Sdim // the 672263509Sdim // vector will be expanded to a power of 2 elements, so we know we can 673263509Sdim // always round up to the next multiple of 4 when creating the vector 674263509Sdim // stores. 675263509Sdim // e.g. 4 elem => 1 st.v4 676263509Sdim // 6 elem => 2 st.v4 677263509Sdim // 8 elem => 2 st.v4 678263509Sdim // 11 elem => 3 st.v4 679263509Sdim unsigned VecSize = 4; 680263509Sdim if (EltVT.getSizeInBits() == 64) 681263509Sdim VecSize = 2; 682263509Sdim 683263509Sdim // This is potentially only part of a vector, so assume all elements 684263509Sdim // are packed together. 685263509Sdim unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 686263509Sdim 687263509Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 688263509Sdim // Get values 689263509Sdim SDValue StoreVal; 690263509Sdim SmallVector<SDValue, 8> Ops; 691263509Sdim Ops.push_back(Chain); 692263509Sdim Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); 693263509Sdim Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); 694263509Sdim 695263509Sdim unsigned Opc = NVPTXISD::StoreParamV2; 696263509Sdim 697263509Sdim StoreVal = OutVals[OIdx++]; 698263509Sdim if (NeedExtend) 699263509Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 700263509Sdim Ops.push_back(StoreVal); 701263509Sdim 702263509Sdim if (i + 1 < NumElts) { 703263509Sdim StoreVal = OutVals[OIdx++]; 704263509Sdim if (NeedExtend) 705263509Sdim StoreVal = 706263509Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 707263509Sdim } else { 708263509Sdim StoreVal = DAG.getUNDEF(EltVT); 709263509Sdim } 710263509Sdim Ops.push_back(StoreVal); 711263509Sdim 712263509Sdim if (VecSize == 4) { 713263509Sdim Opc = NVPTXISD::StoreParamV4; 714263509Sdim if (i + 2 < NumElts) { 715263509Sdim StoreVal = OutVals[OIdx++]; 716263509Sdim if (NeedExtend) 717263509Sdim StoreVal = 718263509Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 719263509Sdim } else { 720263509Sdim StoreVal = DAG.getUNDEF(EltVT); 721263509Sdim } 722263509Sdim Ops.push_back(StoreVal); 723263509Sdim 724263509Sdim if (i + 3 < NumElts) { 725263509Sdim StoreVal = OutVals[OIdx++]; 726263509Sdim if (NeedExtend) 727263509Sdim StoreVal = 728263509Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 729263509Sdim } else { 730263509Sdim StoreVal = DAG.getUNDEF(EltVT); 731263509Sdim } 732263509Sdim Ops.push_back(StoreVal); 733263509Sdim } 734263509Sdim 735263509Sdim Ops.push_back(InFlag); 736263509Sdim 737263509Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 738263509Sdim Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], 739263509Sdim Ops.size(), MemVT, 740263509Sdim MachinePointerInfo()); 741263509Sdim InFlag = Chain.getValue(1); 742263509Sdim curOffset += PerStoreOffset; 743263509Sdim } 744263509Sdim } 745263509Sdim ++paramCount; 746263509Sdim --OIdx; 747263509Sdim continue; 748263509Sdim } 749239310Sdim // Plain scalar 750239310Sdim // for ABI, declare .param .b<size> .param<n>; 751239310Sdim unsigned sz = VT.getSizeInBits(); 752263509Sdim bool needExtend = false; 753263509Sdim if (VT.isInteger()) { 754263509Sdim if (sz < 16) 755263509Sdim needExtend = true; 756263509Sdim if (sz < 32) 757263509Sdim sz = 32; 758263509Sdim } 759239310Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 760239310Sdim SDValue DeclareParamOps[] = { Chain, 761239310Sdim DAG.getConstant(paramCount, MVT::i32), 762239310Sdim DAG.getConstant(sz, MVT::i32), 763263509Sdim DAG.getConstant(0, MVT::i32), InFlag }; 764239310Sdim Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 765239310Sdim DeclareParamOps, 5); 766239310Sdim InFlag = Chain.getValue(1); 767263509Sdim SDValue OutV = OutVals[OIdx]; 768263509Sdim if (needExtend) { 769263509Sdim // zext/sext i1 to i16 770263509Sdim unsigned opc = ISD::ZERO_EXTEND; 771263509Sdim if (Outs[OIdx].Flags.isSExt()) 772263509Sdim opc = ISD::SIGN_EXTEND; 773263509Sdim OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 774263509Sdim } 775239310Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 776239310Sdim SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 777263509Sdim DAG.getConstant(0, MVT::i32), OutV, InFlag }; 778239310Sdim 779239310Sdim unsigned opcode = NVPTXISD::StoreParam; 780263509Sdim if (Outs[OIdx].Flags.isZExt()) 781263509Sdim opcode = NVPTXISD::StoreParamU32; 782263509Sdim else if (Outs[OIdx].Flags.isSExt()) 783263509Sdim opcode = NVPTXISD::StoreParamS32; 784263509Sdim Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5, 785263509Sdim VT, MachinePointerInfo()); 786239310Sdim 787239310Sdim InFlag = Chain.getValue(1); 788239310Sdim ++paramCount; 789239310Sdim continue; 790239310Sdim } 791239310Sdim // struct or vector 792239310Sdim SmallVector<EVT, 16> vtparts; 793239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 794252723Sdim assert(PTy && "Type of a byval parameter should be pointer"); 795239310Sdim ComputeValueVTs(*this, PTy->getElementType(), vtparts); 796239310Sdim 797263509Sdim // declare .param .align <align> .b8 .param<n>[<size>]; 798263509Sdim unsigned sz = Outs[OIdx].Flags.getByValSize(); 799263509Sdim SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 800263509Sdim // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 801263509Sdim // so we don't need to worry about natural alignment or not. 802263509Sdim // See TargetLowering::LowerCallTo(). 803263509Sdim SDValue DeclareParamOps[] = { 804263509Sdim Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), 805263509Sdim DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), 806263509Sdim InFlag 807263509Sdim }; 808263509Sdim Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 809263509Sdim DeclareParamOps, 5); 810263509Sdim InFlag = Chain.getValue(1); 811239310Sdim unsigned curOffset = 0; 812252723Sdim for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 813239310Sdim unsigned elems = 1; 814239310Sdim EVT elemtype = vtparts[j]; 815239310Sdim if (vtparts[j].isVector()) { 816239310Sdim elems = vtparts[j].getVectorNumElements(); 817239310Sdim elemtype = vtparts[j].getVectorElementType(); 818239310Sdim } 819252723Sdim for (unsigned k = 0, ke = elems; k != ke; ++k) { 820239310Sdim unsigned sz = elemtype.getSizeInBits(); 821263509Sdim if (elemtype.isInteger() && (sz < 8)) 822263509Sdim sz = 8; 823252723Sdim SDValue srcAddr = 824263509Sdim DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], 825252723Sdim DAG.getConstant(curOffset, getPointerTy())); 826263509Sdim SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 827263509Sdim MachinePointerInfo(), false, false, false, 828263509Sdim 0); 829263509Sdim if (elemtype.getSizeInBits() < 16) { 830263509Sdim theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 831263509Sdim } 832239310Sdim SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 833239310Sdim SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 834263509Sdim DAG.getConstant(curOffset, MVT::i32), theVal, 835239310Sdim InFlag }; 836263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 837263509Sdim CopyParamOps, 5, elemtype, 838263509Sdim MachinePointerInfo()); 839263509Sdim 840239310Sdim InFlag = Chain.getValue(1); 841263509Sdim curOffset += sz / 8; 842239310Sdim } 843239310Sdim } 844263509Sdim ++paramCount; 845239310Sdim } 846239310Sdim 847239310Sdim GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 848239310Sdim unsigned retAlignment = 0; 849239310Sdim 850239310Sdim // Handle Result 851239310Sdim if (Ins.size() > 0) { 852239310Sdim SmallVector<EVT, 16> resvtparts; 853239310Sdim ComputeValueVTs(*this, retTy, resvtparts); 854239310Sdim 855263509Sdim // Declare 856263509Sdim // .param .align 16 .b8 retval0[<size-in-bytes>], or 857263509Sdim // .param .b<size-in-bits> retval0 858263509Sdim unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); 859263509Sdim if (retTy->isPrimitiveType() || retTy->isIntegerTy() || 860263509Sdim retTy->isPointerTy()) { 861263509Sdim // Scalar needs to be at least 32bit wide 862263509Sdim if (resultsz < 32) 863263509Sdim resultsz = 32; 864263509Sdim SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 865263509Sdim SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 866263509Sdim DAG.getConstant(resultsz, MVT::i32), 867263509Sdim DAG.getConstant(0, MVT::i32), InFlag }; 868263509Sdim Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 869263509Sdim DeclareRetOps, 5); 870263509Sdim InFlag = Chain.getValue(1); 871263509Sdim } else { 872263509Sdim retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 873263509Sdim SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 874263509Sdim SDValue DeclareRetOps[] = { Chain, 875263509Sdim DAG.getConstant(retAlignment, MVT::i32), 876263509Sdim DAG.getConstant(resultsz / 8, MVT::i32), 877263509Sdim DAG.getConstant(0, MVT::i32), InFlag }; 878263509Sdim Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 879263509Sdim DeclareRetOps, 5); 880263509Sdim InFlag = Chain.getValue(1); 881239310Sdim } 882239310Sdim } 883239310Sdim 884239310Sdim if (!Func) { 885239310Sdim // This is indirect function call case : PTX requires a prototype of the 886239310Sdim // form 887239310Sdim // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 888239310Sdim // to be emitted, and the label has to used as the last arg of call 889239310Sdim // instruction. 890263509Sdim // The prototype is embedded in a string and put as the operand for a 891263509Sdim // CallPrototype SDNode which will print out to the value of the string. 892263509Sdim SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 893263509Sdim std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); 894263509Sdim const char *ProtoStr = 895263509Sdim nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 896263509Sdim SDValue ProtoOps[] = { 897263509Sdim Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 898252723Sdim }; 899263509Sdim Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3); 900239310Sdim InFlag = Chain.getValue(1); 901239310Sdim } 902239310Sdim // Op to just print "call" 903239310Sdim SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 904252723Sdim SDValue PrintCallOps[] = { 905263509Sdim Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag 906252723Sdim }; 907252723Sdim Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 908252723Sdim dl, PrintCallVTs, PrintCallOps, 3); 909239310Sdim InFlag = Chain.getValue(1); 910239310Sdim 911239310Sdim // Ops to print out the function name 912239310Sdim SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 913239310Sdim SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 914239310Sdim Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); 915239310Sdim InFlag = Chain.getValue(1); 916239310Sdim 917239310Sdim // Ops to print out the param list 918239310Sdim SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 919239310Sdim SDValue CallArgBeginOps[] = { Chain, InFlag }; 920239310Sdim Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 921239310Sdim CallArgBeginOps, 2); 922239310Sdim InFlag = Chain.getValue(1); 923239310Sdim 924252723Sdim for (unsigned i = 0, e = paramCount; i != e; ++i) { 925239310Sdim unsigned opcode; 926252723Sdim if (i == (e - 1)) 927239310Sdim opcode = NVPTXISD::LastCallArg; 928239310Sdim else 929239310Sdim opcode = NVPTXISD::CallArg; 930239310Sdim SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 931239310Sdim SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 932252723Sdim DAG.getConstant(i, MVT::i32), InFlag }; 933239310Sdim Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); 934239310Sdim InFlag = Chain.getValue(1); 935239310Sdim } 936239310Sdim SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 937252723Sdim SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), 938239310Sdim InFlag }; 939252723Sdim Chain = 940252723Sdim DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); 941239310Sdim InFlag = Chain.getValue(1); 942239310Sdim 943239310Sdim if (!Func) { 944239310Sdim SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 945252723Sdim SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), 946239310Sdim InFlag }; 947239310Sdim Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); 948239310Sdim InFlag = Chain.getValue(1); 949239310Sdim } 950239310Sdim 951239310Sdim // Generate loads from param memory/moves from registers for result 952239310Sdim if (Ins.size() > 0) { 953263509Sdim unsigned resoffset = 0; 954263509Sdim if (retTy && retTy->isVectorTy()) { 955263509Sdim EVT ObjectVT = getValueType(retTy); 956263509Sdim unsigned NumElts = ObjectVT.getVectorNumElements(); 957263509Sdim EVT EltVT = ObjectVT.getVectorElementType(); 958263509Sdim assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(), 959263509Sdim ObjectVT) == NumElts && 960263509Sdim "Vector was not scalarized"); 961263509Sdim unsigned sz = EltVT.getSizeInBits(); 962263509Sdim bool needTruncate = sz < 16 ? true : false; 963263509Sdim 964263509Sdim if (NumElts == 1) { 965263509Sdim // Just a simple load 966263509Sdim std::vector<EVT> LoadRetVTs; 967263509Sdim if (needTruncate) { 968263509Sdim // If loading i1 result, generate 969263509Sdim // load i16 970263509Sdim // trunc i16 to i1 971263509Sdim LoadRetVTs.push_back(MVT::i16); 972263509Sdim } else 973263509Sdim LoadRetVTs.push_back(EltVT); 974263509Sdim LoadRetVTs.push_back(MVT::Other); 975263509Sdim LoadRetVTs.push_back(MVT::Glue); 976263509Sdim std::vector<SDValue> LoadRetOps; 977263509Sdim LoadRetOps.push_back(Chain); 978263509Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 979263509Sdim LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 980263509Sdim LoadRetOps.push_back(InFlag); 981263509Sdim SDValue retval = DAG.getMemIntrinsicNode( 982263509Sdim NVPTXISD::LoadParam, dl, 983263509Sdim DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], 984263509Sdim LoadRetOps.size(), EltVT, MachinePointerInfo()); 985263509Sdim Chain = retval.getValue(1); 986263509Sdim InFlag = retval.getValue(2); 987263509Sdim SDValue Ret0 = retval; 988263509Sdim if (needTruncate) 989263509Sdim Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 990263509Sdim InVals.push_back(Ret0); 991263509Sdim } else if (NumElts == 2) { 992263509Sdim // LoadV2 993263509Sdim std::vector<EVT> LoadRetVTs; 994263509Sdim if (needTruncate) { 995263509Sdim // If loading i1 result, generate 996263509Sdim // load i16 997263509Sdim // trunc i16 to i1 998263509Sdim LoadRetVTs.push_back(MVT::i16); 999263509Sdim LoadRetVTs.push_back(MVT::i16); 1000263509Sdim } else { 1001263509Sdim LoadRetVTs.push_back(EltVT); 1002263509Sdim LoadRetVTs.push_back(EltVT); 1003263509Sdim } 1004263509Sdim LoadRetVTs.push_back(MVT::Other); 1005263509Sdim LoadRetVTs.push_back(MVT::Glue); 1006263509Sdim std::vector<SDValue> LoadRetOps; 1007263509Sdim LoadRetOps.push_back(Chain); 1008263509Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1009263509Sdim LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 1010263509Sdim LoadRetOps.push_back(InFlag); 1011263509Sdim SDValue retval = DAG.getMemIntrinsicNode( 1012263509Sdim NVPTXISD::LoadParamV2, dl, 1013263509Sdim DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], 1014263509Sdim LoadRetOps.size(), EltVT, MachinePointerInfo()); 1015263509Sdim Chain = retval.getValue(2); 1016263509Sdim InFlag = retval.getValue(3); 1017263509Sdim SDValue Ret0 = retval.getValue(0); 1018263509Sdim SDValue Ret1 = retval.getValue(1); 1019263509Sdim if (needTruncate) { 1020263509Sdim Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 1021263509Sdim InVals.push_back(Ret0); 1022263509Sdim Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 1023263509Sdim InVals.push_back(Ret1); 1024263509Sdim } else { 1025263509Sdim InVals.push_back(Ret0); 1026263509Sdim InVals.push_back(Ret1); 1027263509Sdim } 1028263509Sdim } else { 1029263509Sdim // Split into N LoadV4 1030263509Sdim unsigned Ofst = 0; 1031263509Sdim unsigned VecSize = 4; 1032263509Sdim unsigned Opc = NVPTXISD::LoadParamV4; 1033263509Sdim if (EltVT.getSizeInBits() == 64) { 1034263509Sdim VecSize = 2; 1035263509Sdim Opc = NVPTXISD::LoadParamV2; 1036263509Sdim } 1037263509Sdim EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1038263509Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 1039263509Sdim SmallVector<EVT, 8> LoadRetVTs; 1040263509Sdim if (needTruncate) { 1041263509Sdim // If loading i1 result, generate 1042263509Sdim // load i16 1043263509Sdim // trunc i16 to i1 1044263509Sdim for (unsigned j = 0; j < VecSize; ++j) 1045263509Sdim LoadRetVTs.push_back(MVT::i16); 1046263509Sdim } else { 1047263509Sdim for (unsigned j = 0; j < VecSize; ++j) 1048263509Sdim LoadRetVTs.push_back(EltVT); 1049263509Sdim } 1050263509Sdim LoadRetVTs.push_back(MVT::Other); 1051263509Sdim LoadRetVTs.push_back(MVT::Glue); 1052263509Sdim SmallVector<SDValue, 4> LoadRetOps; 1053263509Sdim LoadRetOps.push_back(Chain); 1054263509Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1055263509Sdim LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); 1056263509Sdim LoadRetOps.push_back(InFlag); 1057263509Sdim SDValue retval = DAG.getMemIntrinsicNode( 1058263509Sdim Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), 1059263509Sdim &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo()); 1060263509Sdim if (VecSize == 2) { 1061263509Sdim Chain = retval.getValue(2); 1062263509Sdim InFlag = retval.getValue(3); 1063263509Sdim } else { 1064263509Sdim Chain = retval.getValue(4); 1065263509Sdim InFlag = retval.getValue(5); 1066263509Sdim } 1067263509Sdim 1068263509Sdim for (unsigned j = 0; j < VecSize; ++j) { 1069263509Sdim if (i + j >= NumElts) 1070263509Sdim break; 1071263509Sdim SDValue Elt = retval.getValue(j); 1072263509Sdim if (needTruncate) 1073263509Sdim Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 1074263509Sdim InVals.push_back(Elt); 1075263509Sdim } 1076263509Sdim Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1077263509Sdim } 1078263509Sdim } 1079263509Sdim } else { 1080263509Sdim SmallVector<EVT, 16> VTs; 1081263509Sdim ComputePTXValueVTs(*this, retTy, VTs); 1082263509Sdim assert(VTs.size() == Ins.size() && "Bad value decomposition"); 1083252723Sdim for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 1084263509Sdim unsigned sz = VTs[i].getSizeInBits(); 1085263509Sdim bool needTruncate = sz < 8 ? true : false; 1086263509Sdim if (VTs[i].isInteger() && (sz < 8)) 1087252723Sdim sz = 8; 1088263509Sdim 1089263509Sdim SmallVector<EVT, 4> LoadRetVTs; 1090263509Sdim EVT TheLoadType = VTs[i]; 1091263509Sdim if (retTy->isIntegerTy() && 1092263509Sdim TD->getTypeAllocSizeInBits(retTy) < 32) { 1093263509Sdim // This is for integer types only, and specifically not for 1094263509Sdim // aggregates. 1095263509Sdim LoadRetVTs.push_back(MVT::i32); 1096263509Sdim TheLoadType = MVT::i32; 1097263509Sdim } else if (sz < 16) { 1098263509Sdim // If loading i1/i8 result, generate 1099263509Sdim // load i8 (-> i16) 1100263509Sdim // trunc i16 to i1/i8 1101263509Sdim LoadRetVTs.push_back(MVT::i16); 1102263509Sdim } else 1103263509Sdim LoadRetVTs.push_back(Ins[i].VT); 1104263509Sdim LoadRetVTs.push_back(MVT::Other); 1105263509Sdim LoadRetVTs.push_back(MVT::Glue); 1106263509Sdim 1107263509Sdim SmallVector<SDValue, 4> LoadRetOps; 1108263509Sdim LoadRetOps.push_back(Chain); 1109263509Sdim LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1110263509Sdim LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); 1111263509Sdim LoadRetOps.push_back(InFlag); 1112263509Sdim SDValue retval = DAG.getMemIntrinsicNode( 1113263509Sdim NVPTXISD::LoadParam, dl, 1114263509Sdim DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], 1115263509Sdim LoadRetOps.size(), TheLoadType, MachinePointerInfo()); 1116239310Sdim Chain = retval.getValue(1); 1117239310Sdim InFlag = retval.getValue(2); 1118263509Sdim SDValue Ret0 = retval.getValue(0); 1119263509Sdim if (needTruncate) 1120263509Sdim Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 1121263509Sdim InVals.push_back(Ret0); 1122252723Sdim resoffset += sz / 8; 1123239310Sdim } 1124239310Sdim } 1125239310Sdim } 1126263509Sdim 1127252723Sdim Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 1128252723Sdim DAG.getIntPtrConstant(uniqueCallSite + 1, true), 1129263509Sdim InFlag, dl); 1130239310Sdim uniqueCallSite++; 1131239310Sdim 1132239310Sdim // set isTailCall to false for now, until we figure out how to express 1133239310Sdim // tail call optimization in PTX 1134239310Sdim isTailCall = false; 1135239310Sdim return Chain; 1136239310Sdim} 1137239310Sdim 1138239310Sdim// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 1139239310Sdim// (see LegalizeDAG.cpp). This is slow and uses local memory. 1140239310Sdim// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 1141252723SdimSDValue 1142252723SdimNVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 1143239310Sdim SDNode *Node = Op.getNode(); 1144263509Sdim SDLoc dl(Node); 1145239310Sdim SmallVector<SDValue, 8> Ops; 1146239310Sdim unsigned NumOperands = Node->getNumOperands(); 1147252723Sdim for (unsigned i = 0; i < NumOperands; ++i) { 1148239310Sdim SDValue SubOp = Node->getOperand(i); 1149239310Sdim EVT VVT = SubOp.getNode()->getValueType(0); 1150239310Sdim EVT EltVT = VVT.getVectorElementType(); 1151239310Sdim unsigned NumSubElem = VVT.getVectorNumElements(); 1152252723Sdim for (unsigned j = 0; j < NumSubElem; ++j) { 1153239310Sdim Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 1154239310Sdim DAG.getIntPtrConstant(j))); 1155239310Sdim } 1156239310Sdim } 1157252723Sdim return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], 1158252723Sdim Ops.size()); 1159239310Sdim} 1160239310Sdim 1161252723SdimSDValue 1162252723SdimNVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 1163239310Sdim switch (Op.getOpcode()) { 1164252723Sdim case ISD::RETURNADDR: 1165252723Sdim return SDValue(); 1166252723Sdim case ISD::FRAMEADDR: 1167252723Sdim return SDValue(); 1168252723Sdim case ISD::GlobalAddress: 1169252723Sdim return LowerGlobalAddress(Op, DAG); 1170252723Sdim case ISD::INTRINSIC_W_CHAIN: 1171252723Sdim return Op; 1172239310Sdim case ISD::BUILD_VECTOR: 1173239310Sdim case ISD::EXTRACT_SUBVECTOR: 1174239310Sdim return Op; 1175252723Sdim case ISD::CONCAT_VECTORS: 1176252723Sdim return LowerCONCAT_VECTORS(Op, DAG); 1177252723Sdim case ISD::STORE: 1178252723Sdim return LowerSTORE(Op, DAG); 1179252723Sdim case ISD::LOAD: 1180252723Sdim return LowerLOAD(Op, DAG); 1181239310Sdim default: 1182239310Sdim llvm_unreachable("Custom lowering not defined for operation"); 1183239310Sdim } 1184239310Sdim} 1185239310Sdim 1186252723SdimSDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1187252723Sdim if (Op.getValueType() == MVT::i1) 1188252723Sdim return LowerLOADi1(Op, DAG); 1189252723Sdim else 1190252723Sdim return SDValue(); 1191252723Sdim} 1192245431Sdim 1193245431Sdim// v = ld i1* addr 1194245431Sdim// => 1195263509Sdim// v1 = ld i8* addr (-> i16) 1196263509Sdim// v = trunc i16 to i1 1197252723SdimSDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 1198245431Sdim SDNode *Node = Op.getNode(); 1199245431Sdim LoadSDNode *LD = cast<LoadSDNode>(Node); 1200263509Sdim SDLoc dl(Node); 1201252723Sdim assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 1202245431Sdim assert(Node->getValueType(0) == MVT::i1 && 1203245431Sdim "Custom lowering for i1 load only"); 1204252723Sdim SDValue newLD = 1205263509Sdim DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 1206252723Sdim LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 1207252723Sdim LD->isInvariant(), LD->getAlignment()); 1208245431Sdim SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 1209245431Sdim // The legalizer (the caller) is expecting two values from the legalized 1210245431Sdim // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 1211245431Sdim // in LegalizeDAG.cpp which also uses MergeValues. 1212252723Sdim SDValue Ops[] = { result, LD->getChain() }; 1213245431Sdim return DAG.getMergeValues(Ops, 2, dl); 1214245431Sdim} 1215245431Sdim 1216252723SdimSDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1217252723Sdim EVT ValVT = Op.getOperand(1).getValueType(); 1218252723Sdim if (ValVT == MVT::i1) 1219252723Sdim return LowerSTOREi1(Op, DAG); 1220252723Sdim else if (ValVT.isVector()) 1221252723Sdim return LowerSTOREVector(Op, DAG); 1222252723Sdim else 1223252723Sdim return SDValue(); 1224252723Sdim} 1225252723Sdim 1226252723SdimSDValue 1227252723SdimNVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 1228252723Sdim SDNode *N = Op.getNode(); 1229252723Sdim SDValue Val = N->getOperand(1); 1230263509Sdim SDLoc DL(N); 1231252723Sdim EVT ValVT = Val.getValueType(); 1232252723Sdim 1233252723Sdim if (ValVT.isVector()) { 1234252723Sdim // We only handle "native" vector sizes for now, e.g. <4 x double> is not 1235252723Sdim // legal. We can (and should) split that into 2 stores of <2 x double> here 1236252723Sdim // but I'm leaving that as a TODO for now. 1237252723Sdim if (!ValVT.isSimple()) 1238252723Sdim return SDValue(); 1239252723Sdim switch (ValVT.getSimpleVT().SimpleTy) { 1240252723Sdim default: 1241252723Sdim return SDValue(); 1242252723Sdim case MVT::v2i8: 1243252723Sdim case MVT::v2i16: 1244252723Sdim case MVT::v2i32: 1245252723Sdim case MVT::v2i64: 1246252723Sdim case MVT::v2f32: 1247252723Sdim case MVT::v2f64: 1248252723Sdim case MVT::v4i8: 1249252723Sdim case MVT::v4i16: 1250252723Sdim case MVT::v4i32: 1251252723Sdim case MVT::v4f32: 1252252723Sdim // This is a "native" vector type 1253252723Sdim break; 1254252723Sdim } 1255252723Sdim 1256252723Sdim unsigned Opcode = 0; 1257252723Sdim EVT EltVT = ValVT.getVectorElementType(); 1258252723Sdim unsigned NumElts = ValVT.getVectorNumElements(); 1259252723Sdim 1260252723Sdim // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 1261252723Sdim // Therefore, we must ensure the type is legal. For i1 and i8, we set the 1262252723Sdim // stored type to i16 and propogate the "real" type as the memory type. 1263252723Sdim bool NeedExt = false; 1264252723Sdim if (EltVT.getSizeInBits() < 16) 1265252723Sdim NeedExt = true; 1266252723Sdim 1267252723Sdim switch (NumElts) { 1268252723Sdim default: 1269252723Sdim return SDValue(); 1270252723Sdim case 2: 1271252723Sdim Opcode = NVPTXISD::StoreV2; 1272252723Sdim break; 1273252723Sdim case 4: { 1274252723Sdim Opcode = NVPTXISD::StoreV4; 1275252723Sdim break; 1276252723Sdim } 1277252723Sdim } 1278252723Sdim 1279252723Sdim SmallVector<SDValue, 8> Ops; 1280252723Sdim 1281252723Sdim // First is the chain 1282252723Sdim Ops.push_back(N->getOperand(0)); 1283252723Sdim 1284252723Sdim // Then the split values 1285252723Sdim for (unsigned i = 0; i < NumElts; ++i) { 1286252723Sdim SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 1287252723Sdim DAG.getIntPtrConstant(i)); 1288252723Sdim if (NeedExt) 1289252723Sdim ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 1290252723Sdim Ops.push_back(ExtVal); 1291252723Sdim } 1292252723Sdim 1293252723Sdim // Then any remaining arguments 1294252723Sdim for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { 1295252723Sdim Ops.push_back(N->getOperand(i)); 1296252723Sdim } 1297252723Sdim 1298252723Sdim MemSDNode *MemSD = cast<MemSDNode>(N); 1299252723Sdim 1300252723Sdim SDValue NewSt = DAG.getMemIntrinsicNode( 1301252723Sdim Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), 1302252723Sdim MemSD->getMemoryVT(), MemSD->getMemOperand()); 1303252723Sdim 1304252723Sdim //return DCI.CombineTo(N, NewSt, true); 1305252723Sdim return NewSt; 1306252723Sdim } 1307252723Sdim 1308252723Sdim return SDValue(); 1309252723Sdim} 1310252723Sdim 1311245431Sdim// st i1 v, addr 1312245431Sdim// => 1313263509Sdim// v1 = zxt v to i16 1314263509Sdim// st.u8 i16, addr 1315252723SdimSDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 1316245431Sdim SDNode *Node = Op.getNode(); 1317263509Sdim SDLoc dl(Node); 1318245431Sdim StoreSDNode *ST = cast<StoreSDNode>(Node); 1319245431Sdim SDValue Tmp1 = ST->getChain(); 1320245431Sdim SDValue Tmp2 = ST->getBasePtr(); 1321245431Sdim SDValue Tmp3 = ST->getValue(); 1322245431Sdim assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 1323245431Sdim unsigned Alignment = ST->getAlignment(); 1324245431Sdim bool isVolatile = ST->isVolatile(); 1325245431Sdim bool isNonTemporal = ST->isNonTemporal(); 1326263509Sdim Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 1327263509Sdim SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 1328263509Sdim ST->getPointerInfo(), MVT::i8, isNonTemporal, 1329263509Sdim isVolatile, Alignment); 1330245431Sdim return Result; 1331245431Sdim} 1332245431Sdim 1333252723SdimSDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, 1334252723Sdim int idx, EVT v) const { 1335239310Sdim std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 1336239310Sdim std::stringstream suffix; 1337239310Sdim suffix << idx; 1338239310Sdim *name += suffix.str(); 1339239310Sdim return DAG.getTargetExternalSymbol(name->c_str(), v); 1340239310Sdim} 1341239310Sdim 1342239310SdimSDValue 1343239310SdimNVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 1344263509Sdim std::string ParamSym; 1345263509Sdim raw_string_ostream ParamStr(ParamSym); 1346263509Sdim 1347263509Sdim ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 1348263509Sdim ParamStr.flush(); 1349263509Sdim 1350263509Sdim std::string *SavedStr = 1351263509Sdim nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 1352263509Sdim return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 1353239310Sdim} 1354239310Sdim 1355252723SdimSDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 1356239310Sdim return getExtSymb(DAG, ".HLPPARAM", idx); 1357239310Sdim} 1358239310Sdim 1359239310Sdim// Check to see if the kernel argument is image*_t or sampler_t 1360239310Sdim 1361239310Sdimbool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 1362252723Sdim static const char *const specialTypes[] = { "struct._image2d_t", 1363252723Sdim "struct._image3d_t", 1364252723Sdim "struct._sampler_t" }; 1365239310Sdim 1366239310Sdim const Type *Ty = arg->getType(); 1367239310Sdim const PointerType *PTy = dyn_cast<PointerType>(Ty); 1368239310Sdim 1369239310Sdim if (!PTy) 1370239310Sdim return false; 1371239310Sdim 1372239310Sdim if (!context) 1373239310Sdim return false; 1374239310Sdim 1375239310Sdim const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 1376252723Sdim const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 1377239310Sdim 1378239310Sdim for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 1379239310Sdim if (TypeName == specialTypes[i]) 1380239310Sdim return true; 1381239310Sdim 1382239310Sdim return false; 1383239310Sdim} 1384239310Sdim 1385252723SdimSDValue NVPTXTargetLowering::LowerFormalArguments( 1386252723Sdim SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1387263509Sdim const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 1388252723Sdim SmallVectorImpl<SDValue> &InVals) const { 1389239310Sdim MachineFunction &MF = DAG.getMachineFunction(); 1390245431Sdim const DataLayout *TD = getDataLayout(); 1391239310Sdim 1392239310Sdim const Function *F = MF.getFunction(); 1393252723Sdim const AttributeSet &PAL = F->getAttributes(); 1394263509Sdim const TargetLowering *TLI = nvTM->getTargetLowering(); 1395239310Sdim 1396239310Sdim SDValue Root = DAG.getRoot(); 1397239310Sdim std::vector<SDValue> OutChains; 1398239310Sdim 1399239310Sdim bool isKernel = llvm::isKernelFunction(*F); 1400239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1401263509Sdim assert(isABI && "Non-ABI compilation is not supported"); 1402263509Sdim if (!isABI) 1403263509Sdim return Chain; 1404239310Sdim 1405239310Sdim std::vector<Type *> argTypes; 1406239310Sdim std::vector<const Argument *> theArgs; 1407239310Sdim for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1408252723Sdim I != E; ++I) { 1409239310Sdim theArgs.push_back(I); 1410239310Sdim argTypes.push_back(I->getType()); 1411239310Sdim } 1412263509Sdim // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 1413263509Sdim // Ins.size() will be larger 1414263509Sdim // * if there is an aggregate argument with multiple fields (each field 1415263509Sdim // showing up separately in Ins) 1416263509Sdim // * if there is a vector argument with more than typical vector-length 1417263509Sdim // elements (generally if more than 4) where each vector element is 1418263509Sdim // individually present in Ins. 1419263509Sdim // So a different index should be used for indexing into Ins. 1420263509Sdim // See similar issue in LowerCall. 1421263509Sdim unsigned InsIdx = 0; 1422239310Sdim 1423239310Sdim int idx = 0; 1424263509Sdim for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 1425239310Sdim Type *Ty = argTypes[i]; 1426239310Sdim 1427239310Sdim // If the kernel argument is image*_t or sampler_t, convert it to 1428239310Sdim // a i32 constant holding the parameter position. This can later 1429239310Sdim // matched in the AsmPrinter to output the correct mangled name. 1430252723Sdim if (isImageOrSamplerVal( 1431252723Sdim theArgs[i], 1432252723Sdim (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 1433252723Sdim : 0))) { 1434239310Sdim assert(isKernel && "Only kernels can have image/sampler params"); 1435252723Sdim InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); 1436239310Sdim continue; 1437239310Sdim } 1438239310Sdim 1439239310Sdim if (theArgs[i]->use_empty()) { 1440239310Sdim // argument is dead 1441263509Sdim if (Ty->isAggregateType()) { 1442263509Sdim SmallVector<EVT, 16> vtparts; 1443263509Sdim 1444263509Sdim ComputePTXValueVTs(*this, Ty, vtparts); 1445263509Sdim assert(vtparts.size() > 0 && "empty aggregate type not expected"); 1446263509Sdim for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 1447263509Sdim ++parti) { 1448263509Sdim EVT partVT = vtparts[parti]; 1449263509Sdim InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT)); 1450263509Sdim ++InsIdx; 1451252723Sdim } 1452263509Sdim if (vtparts.size() > 0) 1453263509Sdim --InsIdx; 1454263509Sdim continue; 1455252723Sdim } 1456263509Sdim if (Ty->isVectorTy()) { 1457263509Sdim EVT ObjectVT = getValueType(Ty); 1458263509Sdim unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 1459263509Sdim for (unsigned parti = 0; parti < NumRegs; ++parti) { 1460263509Sdim InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 1461263509Sdim ++InsIdx; 1462263509Sdim } 1463263509Sdim if (NumRegs > 0) 1464263509Sdim --InsIdx; 1465263509Sdim continue; 1466263509Sdim } 1467263509Sdim InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 1468239310Sdim continue; 1469239310Sdim } 1470239310Sdim 1471239310Sdim // In the following cases, assign a node order of "idx+1" 1472263509Sdim // to newly created nodes. The SDNodes for params have to 1473239310Sdim // appear in the same order as their order of appearance 1474239310Sdim // in the original function. "idx+1" holds that order. 1475252723Sdim if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { 1476263509Sdim if (Ty->isAggregateType()) { 1477263509Sdim SmallVector<EVT, 16> vtparts; 1478263509Sdim SmallVector<uint64_t, 16> offsets; 1479263509Sdim 1480263509Sdim // NOTE: Here, we lose the ability to issue vector loads for vectors 1481263509Sdim // that are a part of a struct. This should be investigated in the 1482263509Sdim // future. 1483263509Sdim ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); 1484263509Sdim assert(vtparts.size() > 0 && "empty aggregate type not expected"); 1485263509Sdim bool aggregateIsPacked = false; 1486263509Sdim if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 1487263509Sdim aggregateIsPacked = STy->isPacked(); 1488263509Sdim 1489263509Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1490263509Sdim for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 1491263509Sdim ++parti) { 1492263509Sdim EVT partVT = vtparts[parti]; 1493263509Sdim Value *srcValue = Constant::getNullValue( 1494263509Sdim PointerType::get(partVT.getTypeForEVT(F->getContext()), 1495263509Sdim llvm::ADDRESS_SPACE_PARAM)); 1496263509Sdim SDValue srcAddr = 1497263509Sdim DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1498263509Sdim DAG.getConstant(offsets[parti], getPointerTy())); 1499263509Sdim unsigned partAlign = 1500263509Sdim aggregateIsPacked ? 1 1501263509Sdim : TD->getABITypeAlignment( 1502263509Sdim partVT.getTypeForEVT(F->getContext())); 1503263509Sdim SDValue p; 1504263509Sdim if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 1505263509Sdim ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 1506263509Sdim ISD::SEXTLOAD : ISD::ZEXTLOAD; 1507263509Sdim p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 1508263509Sdim MachinePointerInfo(srcValue), partVT, false, 1509263509Sdim false, partAlign); 1510263509Sdim } else { 1511263509Sdim p = DAG.getLoad(partVT, dl, Root, srcAddr, 1512263509Sdim MachinePointerInfo(srcValue), false, false, false, 1513263509Sdim partAlign); 1514263509Sdim } 1515263509Sdim if (p.getNode()) 1516263509Sdim p.getNode()->setIROrder(idx + 1); 1517263509Sdim InVals.push_back(p); 1518263509Sdim ++InsIdx; 1519263509Sdim } 1520263509Sdim if (vtparts.size() > 0) 1521263509Sdim --InsIdx; 1522263509Sdim continue; 1523263509Sdim } 1524263509Sdim if (Ty->isVectorTy()) { 1525263509Sdim EVT ObjectVT = getValueType(Ty); 1526263509Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1527252723Sdim unsigned NumElts = ObjectVT.getVectorNumElements(); 1528263509Sdim assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 1529263509Sdim "Vector was not scalarized"); 1530263509Sdim unsigned Ofst = 0; 1531252723Sdim EVT EltVT = ObjectVT.getVectorElementType(); 1532263509Sdim 1533263509Sdim // V1 load 1534263509Sdim // f32 = load ... 1535263509Sdim if (NumElts == 1) { 1536263509Sdim // We only have one element, so just directly load it 1537252723Sdim Value *SrcValue = Constant::getNullValue(PointerType::get( 1538252723Sdim EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 1539263509Sdim SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1540263509Sdim DAG.getConstant(Ofst, getPointerTy())); 1541263509Sdim SDValue P = DAG.getLoad( 1542263509Sdim EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 1543263509Sdim false, true, 1544252723Sdim TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 1545263509Sdim if (P.getNode()) 1546263509Sdim P.getNode()->setIROrder(idx + 1); 1547263509Sdim 1548263509Sdim if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 1549263509Sdim P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 1550263509Sdim InVals.push_back(P); 1551263509Sdim Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); 1552263509Sdim ++InsIdx; 1553263509Sdim } else if (NumElts == 2) { 1554263509Sdim // V2 load 1555263509Sdim // f32,f32 = load ... 1556263509Sdim EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 1557263509Sdim Value *SrcValue = Constant::getNullValue(PointerType::get( 1558263509Sdim VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 1559263509Sdim SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1560263509Sdim DAG.getConstant(Ofst, getPointerTy())); 1561263509Sdim SDValue P = DAG.getLoad( 1562263509Sdim VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 1563263509Sdim false, true, 1564263509Sdim TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 1565263509Sdim if (P.getNode()) 1566263509Sdim P.getNode()->setIROrder(idx + 1); 1567263509Sdim 1568263509Sdim SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 1569263509Sdim DAG.getIntPtrConstant(0)); 1570263509Sdim SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 1571263509Sdim DAG.getIntPtrConstant(1)); 1572263509Sdim 1573263509Sdim if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 1574263509Sdim Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 1575263509Sdim Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 1576263509Sdim } 1577263509Sdim 1578263509Sdim InVals.push_back(Elt0); 1579263509Sdim InVals.push_back(Elt1); 1580263509Sdim Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1581263509Sdim InsIdx += 2; 1582263509Sdim } else { 1583263509Sdim // V4 loads 1584263509Sdim // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 1585263509Sdim // the 1586263509Sdim // vector will be expanded to a power of 2 elements, so we know we can 1587263509Sdim // always round up to the next multiple of 4 when creating the vector 1588263509Sdim // loads. 1589263509Sdim // e.g. 4 elem => 1 ld.v4 1590263509Sdim // 6 elem => 2 ld.v4 1591263509Sdim // 8 elem => 2 ld.v4 1592263509Sdim // 11 elem => 3 ld.v4 1593263509Sdim unsigned VecSize = 4; 1594263509Sdim if (EltVT.getSizeInBits() == 64) { 1595263509Sdim VecSize = 2; 1596263509Sdim } 1597263509Sdim EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1598263509Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 1599263509Sdim Value *SrcValue = Constant::getNullValue( 1600263509Sdim PointerType::get(VecVT.getTypeForEVT(F->getContext()), 1601263509Sdim llvm::ADDRESS_SPACE_PARAM)); 1602263509Sdim SDValue SrcAddr = 1603263509Sdim DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 1604263509Sdim DAG.getConstant(Ofst, getPointerTy())); 1605263509Sdim SDValue P = DAG.getLoad( 1606263509Sdim VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 1607263509Sdim false, true, 1608263509Sdim TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 1609263509Sdim if (P.getNode()) 1610263509Sdim P.getNode()->setIROrder(idx + 1); 1611263509Sdim 1612263509Sdim for (unsigned j = 0; j < VecSize; ++j) { 1613263509Sdim if (i + j >= NumElts) 1614263509Sdim break; 1615263509Sdim SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 1616263509Sdim DAG.getIntPtrConstant(j)); 1617263509Sdim if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 1618263509Sdim Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 1619263509Sdim InVals.push_back(Elt); 1620263509Sdim } 1621263509Sdim Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1622263509Sdim } 1623263509Sdim InsIdx += NumElts; 1624252723Sdim } 1625263509Sdim 1626263509Sdim if (NumElts > 0) 1627263509Sdim --InsIdx; 1628252723Sdim continue; 1629252723Sdim } 1630239310Sdim // A plain scalar. 1631263509Sdim EVT ObjectVT = getValueType(Ty); 1632263509Sdim // If ABI, load from the param symbol 1633263509Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1634263509Sdim Value *srcValue = Constant::getNullValue(PointerType::get( 1635263509Sdim ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 1636263509Sdim SDValue p; 1637263509Sdim if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 1638263509Sdim ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 1639263509Sdim ISD::SEXTLOAD : ISD::ZEXTLOAD; 1640263509Sdim p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, 1641263509Sdim MachinePointerInfo(srcValue), ObjectVT, false, false, 1642263509Sdim TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 1643252723Sdim } else { 1644263509Sdim p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, 1645263509Sdim MachinePointerInfo(srcValue), false, false, false, 1646263509Sdim TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 1647239310Sdim } 1648263509Sdim if (p.getNode()) 1649263509Sdim p.getNode()->setIROrder(idx + 1); 1650263509Sdim InVals.push_back(p); 1651239310Sdim continue; 1652239310Sdim } 1653239310Sdim 1654239310Sdim // Param has ByVal attribute 1655263509Sdim // Return MoveParam(param symbol). 1656263509Sdim // Ideally, the param symbol can be returned directly, 1657263509Sdim // but when SDNode builder decides to use it in a CopyToReg(), 1658263509Sdim // machine instruction fails because TargetExternalSymbol 1659263509Sdim // (not lowered) is target dependent, and CopyToReg assumes 1660263509Sdim // the source is lowered. 1661263509Sdim EVT ObjectVT = getValueType(Ty); 1662263509Sdim assert(ObjectVT == Ins[InsIdx].VT && 1663263509Sdim "Ins type did not match function type"); 1664263509Sdim SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1665263509Sdim SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 1666263509Sdim if (p.getNode()) 1667263509Sdim p.getNode()->setIROrder(idx + 1); 1668263509Sdim if (isKernel) 1669263509Sdim InVals.push_back(p); 1670263509Sdim else { 1671263509Sdim SDValue p2 = DAG.getNode( 1672263509Sdim ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 1673263509Sdim DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); 1674263509Sdim InVals.push_back(p2); 1675239310Sdim } 1676239310Sdim } 1677239310Sdim 1678239310Sdim // Clang will check explicit VarArg and issue error if any. However, Clang 1679239310Sdim // will let code with 1680263509Sdim // implicit var arg like f() pass. See bug 617733. 1681239310Sdim // We treat this case as if the arg list is empty. 1682263509Sdim // if (F.isVarArg()) { 1683239310Sdim // assert(0 && "VarArg not supported yet!"); 1684239310Sdim //} 1685239310Sdim 1686239310Sdim if (!OutChains.empty()) 1687252723Sdim DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], 1688252723Sdim OutChains.size())); 1689239310Sdim 1690239310Sdim return Chain; 1691239310Sdim} 1692239310Sdim 1693239310Sdim 1694263509SdimSDValue 1695263509SdimNVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1696263509Sdim bool isVarArg, 1697263509Sdim const SmallVectorImpl<ISD::OutputArg> &Outs, 1698263509Sdim const SmallVectorImpl<SDValue> &OutVals, 1699263509Sdim SDLoc dl, SelectionDAG &DAG) const { 1700263509Sdim MachineFunction &MF = DAG.getMachineFunction(); 1701263509Sdim const Function *F = MF.getFunction(); 1702263509Sdim Type *RetTy = F->getReturnType(); 1703263509Sdim const DataLayout *TD = getDataLayout(); 1704263509Sdim 1705239310Sdim bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1706263509Sdim assert(isABI && "Non-ABI compilation is not supported"); 1707263509Sdim if (!isABI) 1708263509Sdim return Chain; 1709239310Sdim 1710263509Sdim if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 1711263509Sdim // If we have a vector type, the OutVals array will be the scalarized 1712263509Sdim // components and we have combine them into 1 or more vector stores. 1713263509Sdim unsigned NumElts = VTy->getNumElements(); 1714263509Sdim assert(NumElts == Outs.size() && "Bad scalarization of return value"); 1715263509Sdim 1716263509Sdim // const_cast can be removed in later LLVM versions 1717263509Sdim EVT EltVT = getValueType(RetTy).getVectorElementType(); 1718263509Sdim bool NeedExtend = false; 1719263509Sdim if (EltVT.getSizeInBits() < 16) 1720263509Sdim NeedExtend = true; 1721263509Sdim 1722263509Sdim // V1 store 1723263509Sdim if (NumElts == 1) { 1724263509Sdim SDValue StoreVal = OutVals[0]; 1725263509Sdim // We only have one element, so just directly store it 1726263509Sdim if (NeedExtend) 1727263509Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1728263509Sdim SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; 1729263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 1730263509Sdim DAG.getVTList(MVT::Other), &Ops[0], 3, 1731263509Sdim EltVT, MachinePointerInfo()); 1732263509Sdim 1733263509Sdim } else if (NumElts == 2) { 1734263509Sdim // V2 store 1735263509Sdim SDValue StoreVal0 = OutVals[0]; 1736263509Sdim SDValue StoreVal1 = OutVals[1]; 1737263509Sdim 1738263509Sdim if (NeedExtend) { 1739263509Sdim StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 1740263509Sdim StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 1741263509Sdim } 1742263509Sdim 1743263509Sdim SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, 1744263509Sdim StoreVal1 }; 1745263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 1746263509Sdim DAG.getVTList(MVT::Other), &Ops[0], 4, 1747263509Sdim EltVT, MachinePointerInfo()); 1748263509Sdim } else { 1749263509Sdim // V4 stores 1750263509Sdim // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 1751263509Sdim // vector will be expanded to a power of 2 elements, so we know we can 1752263509Sdim // always round up to the next multiple of 4 when creating the vector 1753263509Sdim // stores. 1754263509Sdim // e.g. 4 elem => 1 st.v4 1755263509Sdim // 6 elem => 2 st.v4 1756263509Sdim // 8 elem => 2 st.v4 1757263509Sdim // 11 elem => 3 st.v4 1758263509Sdim 1759263509Sdim unsigned VecSize = 4; 1760263509Sdim if (OutVals[0].getValueType().getSizeInBits() == 64) 1761263509Sdim VecSize = 2; 1762263509Sdim 1763263509Sdim unsigned Offset = 0; 1764263509Sdim 1765263509Sdim EVT VecVT = 1766263509Sdim EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize); 1767263509Sdim unsigned PerStoreOffset = 1768263509Sdim TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1769263509Sdim 1770263509Sdim for (unsigned i = 0; i < NumElts; i += VecSize) { 1771263509Sdim // Get values 1772263509Sdim SDValue StoreVal; 1773263509Sdim SmallVector<SDValue, 8> Ops; 1774263509Sdim Ops.push_back(Chain); 1775263509Sdim Ops.push_back(DAG.getConstant(Offset, MVT::i32)); 1776263509Sdim unsigned Opc = NVPTXISD::StoreRetvalV2; 1777263509Sdim EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 1778263509Sdim 1779263509Sdim StoreVal = OutVals[i]; 1780263509Sdim if (NeedExtend) 1781263509Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1782263509Sdim Ops.push_back(StoreVal); 1783263509Sdim 1784263509Sdim if (i + 1 < NumElts) { 1785263509Sdim StoreVal = OutVals[i + 1]; 1786263509Sdim if (NeedExtend) 1787263509Sdim StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1788263509Sdim } else { 1789263509Sdim StoreVal = DAG.getUNDEF(ExtendedVT); 1790263509Sdim } 1791263509Sdim Ops.push_back(StoreVal); 1792263509Sdim 1793263509Sdim if (VecSize == 4) { 1794263509Sdim Opc = NVPTXISD::StoreRetvalV4; 1795263509Sdim if (i + 2 < NumElts) { 1796263509Sdim StoreVal = OutVals[i + 2]; 1797263509Sdim if (NeedExtend) 1798263509Sdim StoreVal = 1799263509Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1800263509Sdim } else { 1801263509Sdim StoreVal = DAG.getUNDEF(ExtendedVT); 1802263509Sdim } 1803263509Sdim Ops.push_back(StoreVal); 1804263509Sdim 1805263509Sdim if (i + 3 < NumElts) { 1806263509Sdim StoreVal = OutVals[i + 3]; 1807263509Sdim if (NeedExtend) 1808263509Sdim StoreVal = 1809263509Sdim DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 1810263509Sdim } else { 1811263509Sdim StoreVal = DAG.getUNDEF(ExtendedVT); 1812263509Sdim } 1813263509Sdim Ops.push_back(StoreVal); 1814263509Sdim } 1815263509Sdim 1816263509Sdim // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 1817263509Sdim Chain = 1818263509Sdim DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0], 1819263509Sdim Ops.size(), EltVT, MachinePointerInfo()); 1820263509Sdim Offset += PerStoreOffset; 1821263509Sdim } 1822239310Sdim } 1823263509Sdim } else { 1824263509Sdim SmallVector<EVT, 16> ValVTs; 1825263509Sdim // const_cast is necessary since we are still using an LLVM version from 1826263509Sdim // before the type system re-write. 1827263509Sdim ComputePTXValueVTs(*this, RetTy, ValVTs); 1828263509Sdim assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 1829263509Sdim 1830263509Sdim unsigned SizeSoFar = 0; 1831263509Sdim for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 1832263509Sdim SDValue theVal = OutVals[i]; 1833263509Sdim EVT TheValType = theVal.getValueType(); 1834263509Sdim unsigned numElems = 1; 1835263509Sdim if (TheValType.isVector()) 1836263509Sdim numElems = TheValType.getVectorNumElements(); 1837263509Sdim for (unsigned j = 0, je = numElems; j != je; ++j) { 1838263509Sdim SDValue TmpVal = theVal; 1839263509Sdim if (TheValType.isVector()) 1840263509Sdim TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 1841263509Sdim TheValType.getVectorElementType(), TmpVal, 1842263509Sdim DAG.getIntPtrConstant(j)); 1843263509Sdim EVT TheStoreType = ValVTs[i]; 1844263509Sdim if (RetTy->isIntegerTy() && 1845263509Sdim TD->getTypeAllocSizeInBits(RetTy) < 32) { 1846263509Sdim // The following zero-extension is for integer types only, and 1847263509Sdim // specifically not for aggregates. 1848263509Sdim TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 1849263509Sdim TheStoreType = MVT::i32; 1850263509Sdim } 1851263509Sdim else if (TmpVal.getValueType().getSizeInBits() < 16) 1852263509Sdim TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 1853263509Sdim 1854263509Sdim SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal }; 1855263509Sdim Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 1856263509Sdim DAG.getVTList(MVT::Other), &Ops[0], 1857263509Sdim 3, TheStoreType, 1858263509Sdim MachinePointerInfo()); 1859263509Sdim if(TheValType.isVector()) 1860263509Sdim SizeSoFar += 1861263509Sdim TheStoreType.getVectorElementType().getStoreSizeInBits() / 8; 1862263509Sdim else 1863263509Sdim SizeSoFar += TheStoreType.getStoreSizeInBits()/8; 1864263509Sdim } 1865263509Sdim } 1866239310Sdim } 1867239310Sdim 1868239310Sdim return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 1869239310Sdim} 1870239310Sdim 1871263509Sdim 1872252723Sdimvoid NVPTXTargetLowering::LowerAsmOperandForConstraint( 1873252723Sdim SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 1874252723Sdim SelectionDAG &DAG) const { 1875239310Sdim if (Constraint.length() > 1) 1876239310Sdim return; 1877239310Sdim else 1878239310Sdim TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1879239310Sdim} 1880239310Sdim 1881239310Sdim// NVPTX suuport vector of legal types of any length in Intrinsics because the 1882239310Sdim// NVPTX specific type legalizer 1883239310Sdim// will legalize them to the PTX supported length. 1884252723Sdimbool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 1885239310Sdim if (isTypeLegal(VT)) 1886239310Sdim return true; 1887239310Sdim if (VT.isVector()) { 1888239310Sdim MVT eVT = VT.getVectorElementType(); 1889239310Sdim if (isTypeLegal(eVT)) 1890239310Sdim return true; 1891239310Sdim } 1892239310Sdim return false; 1893239310Sdim} 1894239310Sdim 1895239310Sdim// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 1896239310Sdim// TgtMemIntrinsic 1897239310Sdim// because we need the information that is only available in the "Value" type 1898239310Sdim// of destination 1899239310Sdim// pointer. In particular, the address space information. 1900252723Sdimbool NVPTXTargetLowering::getTgtMemIntrinsic( 1901252723Sdim IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 1902239310Sdim switch (Intrinsic) { 1903239310Sdim default: 1904239310Sdim return false; 1905239310Sdim 1906239310Sdim case Intrinsic::nvvm_atomic_load_add_f32: 1907239310Sdim Info.opc = ISD::INTRINSIC_W_CHAIN; 1908239310Sdim Info.memVT = MVT::f32; 1909239310Sdim Info.ptrVal = I.getArgOperand(0); 1910239310Sdim Info.offset = 0; 1911239310Sdim Info.vol = 0; 1912239310Sdim Info.readMem = true; 1913239310Sdim Info.writeMem = true; 1914239310Sdim Info.align = 0; 1915239310Sdim return true; 1916239310Sdim 1917239310Sdim case Intrinsic::nvvm_atomic_load_inc_32: 1918239310Sdim case Intrinsic::nvvm_atomic_load_dec_32: 1919239310Sdim Info.opc = ISD::INTRINSIC_W_CHAIN; 1920239310Sdim Info.memVT = MVT::i32; 1921239310Sdim Info.ptrVal = I.getArgOperand(0); 1922239310Sdim Info.offset = 0; 1923239310Sdim Info.vol = 0; 1924239310Sdim Info.readMem = true; 1925239310Sdim Info.writeMem = true; 1926239310Sdim Info.align = 0; 1927239310Sdim return true; 1928239310Sdim 1929239310Sdim case Intrinsic::nvvm_ldu_global_i: 1930239310Sdim case Intrinsic::nvvm_ldu_global_f: 1931239310Sdim case Intrinsic::nvvm_ldu_global_p: 1932239310Sdim 1933239310Sdim Info.opc = ISD::INTRINSIC_W_CHAIN; 1934239310Sdim if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 1935263509Sdim Info.memVT = getValueType(I.getType()); 1936239310Sdim else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) 1937263509Sdim Info.memVT = getValueType(I.getType()); 1938239310Sdim else 1939239310Sdim Info.memVT = MVT::f32; 1940239310Sdim Info.ptrVal = I.getArgOperand(0); 1941239310Sdim Info.offset = 0; 1942239310Sdim Info.vol = 0; 1943239310Sdim Info.readMem = true; 1944239310Sdim Info.writeMem = false; 1945239310Sdim Info.align = 0; 1946239310Sdim return true; 1947239310Sdim 1948239310Sdim } 1949239310Sdim return false; 1950239310Sdim} 1951239310Sdim 1952239310Sdim/// isLegalAddressingMode - Return true if the addressing mode represented 1953239310Sdim/// by AM is legal for this target, for a load/store of the specified type. 1954239310Sdim/// Used to guide target specific optimizations, like loop strength reduction 1955239310Sdim/// (LoopStrengthReduce.cpp) and memory optimization for address mode 1956239310Sdim/// (CodeGenPrepare.cpp) 1957252723Sdimbool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 1958252723Sdim Type *Ty) const { 1959239310Sdim 1960239310Sdim // AddrMode - This represents an addressing mode of: 1961239310Sdim // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 1962239310Sdim // 1963239310Sdim // The legal address modes are 1964239310Sdim // - [avar] 1965239310Sdim // - [areg] 1966239310Sdim // - [areg+immoff] 1967239310Sdim // - [immAddr] 1968239310Sdim 1969239310Sdim if (AM.BaseGV) { 1970239310Sdim if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 1971239310Sdim return false; 1972239310Sdim return true; 1973239310Sdim } 1974239310Sdim 1975239310Sdim switch (AM.Scale) { 1976252723Sdim case 0: // "r", "r+i" or "i" is allowed 1977239310Sdim break; 1978239310Sdim case 1: 1979252723Sdim if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 1980239310Sdim return false; 1981239310Sdim // Otherwise we have r+i. 1982239310Sdim break; 1983239310Sdim default: 1984239310Sdim // No scale > 1 is allowed 1985239310Sdim return false; 1986239310Sdim } 1987239310Sdim return true; 1988239310Sdim} 1989239310Sdim 1990239310Sdim//===----------------------------------------------------------------------===// 1991239310Sdim// NVPTX Inline Assembly Support 1992239310Sdim//===----------------------------------------------------------------------===// 1993239310Sdim 1994239310Sdim/// getConstraintType - Given a constraint letter, return the type of 1995239310Sdim/// constraint it is for this target. 1996239310SdimNVPTXTargetLowering::ConstraintType 1997239310SdimNVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 1998239310Sdim if (Constraint.size() == 1) { 1999239310Sdim switch (Constraint[0]) { 2000239310Sdim default: 2001239310Sdim break; 2002239310Sdim case 'r': 2003239310Sdim case 'h': 2004239310Sdim case 'c': 2005239310Sdim case 'l': 2006239310Sdim case 'f': 2007239310Sdim case 'd': 2008239310Sdim case '0': 2009239310Sdim case 'N': 2010239310Sdim return C_RegisterClass; 2011239310Sdim } 2012239310Sdim } 2013239310Sdim return TargetLowering::getConstraintType(Constraint); 2014239310Sdim} 2015239310Sdim 2016252723Sdimstd::pair<unsigned, const TargetRegisterClass *> 2017239310SdimNVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 2018263509Sdim MVT VT) const { 2019239310Sdim if (Constraint.size() == 1) { 2020239310Sdim switch (Constraint[0]) { 2021239310Sdim case 'c': 2022263509Sdim return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 2023239310Sdim case 'h': 2024239310Sdim return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 2025239310Sdim case 'r': 2026239310Sdim return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 2027239310Sdim case 'l': 2028239310Sdim case 'N': 2029239310Sdim return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 2030239310Sdim case 'f': 2031239310Sdim return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 2032239310Sdim case 'd': 2033239310Sdim return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 2034239310Sdim } 2035239310Sdim } 2036239310Sdim return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 2037239310Sdim} 2038239310Sdim 2039239310Sdim/// getFunctionAlignment - Return the Log2 alignment of this function. 2040239310Sdimunsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 2041239310Sdim return 4; 2042239310Sdim} 2043252723Sdim 2044252723Sdim/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 2045252723Sdimstatic void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 2046252723Sdim SmallVectorImpl<SDValue> &Results) { 2047252723Sdim EVT ResVT = N->getValueType(0); 2048263509Sdim SDLoc DL(N); 2049252723Sdim 2050252723Sdim assert(ResVT.isVector() && "Vector load must have vector type"); 2051252723Sdim 2052252723Sdim // We only handle "native" vector sizes for now, e.g. <4 x double> is not 2053252723Sdim // legal. We can (and should) split that into 2 loads of <2 x double> here 2054252723Sdim // but I'm leaving that as a TODO for now. 2055252723Sdim assert(ResVT.isSimple() && "Can only handle simple types"); 2056252723Sdim switch (ResVT.getSimpleVT().SimpleTy) { 2057252723Sdim default: 2058252723Sdim return; 2059252723Sdim case MVT::v2i8: 2060252723Sdim case MVT::v2i16: 2061252723Sdim case MVT::v2i32: 2062252723Sdim case MVT::v2i64: 2063252723Sdim case MVT::v2f32: 2064252723Sdim case MVT::v2f64: 2065252723Sdim case MVT::v4i8: 2066252723Sdim case MVT::v4i16: 2067252723Sdim case MVT::v4i32: 2068252723Sdim case MVT::v4f32: 2069252723Sdim // This is a "native" vector type 2070252723Sdim break; 2071252723Sdim } 2072252723Sdim 2073252723Sdim EVT EltVT = ResVT.getVectorElementType(); 2074252723Sdim unsigned NumElts = ResVT.getVectorNumElements(); 2075252723Sdim 2076252723Sdim // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 2077252723Sdim // Therefore, we must ensure the type is legal. For i1 and i8, we set the 2078252723Sdim // loaded type to i16 and propogate the "real" type as the memory type. 2079252723Sdim bool NeedTrunc = false; 2080252723Sdim if (EltVT.getSizeInBits() < 16) { 2081252723Sdim EltVT = MVT::i16; 2082252723Sdim NeedTrunc = true; 2083252723Sdim } 2084252723Sdim 2085252723Sdim unsigned Opcode = 0; 2086252723Sdim SDVTList LdResVTs; 2087252723Sdim 2088252723Sdim switch (NumElts) { 2089252723Sdim default: 2090252723Sdim return; 2091252723Sdim case 2: 2092252723Sdim Opcode = NVPTXISD::LoadV2; 2093252723Sdim LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 2094252723Sdim break; 2095252723Sdim case 4: { 2096252723Sdim Opcode = NVPTXISD::LoadV4; 2097252723Sdim EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 2098252723Sdim LdResVTs = DAG.getVTList(ListVTs, 5); 2099252723Sdim break; 2100252723Sdim } 2101252723Sdim } 2102252723Sdim 2103252723Sdim SmallVector<SDValue, 8> OtherOps; 2104252723Sdim 2105252723Sdim // Copy regular operands 2106252723Sdim for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 2107252723Sdim OtherOps.push_back(N->getOperand(i)); 2108252723Sdim 2109252723Sdim LoadSDNode *LD = cast<LoadSDNode>(N); 2110252723Sdim 2111252723Sdim // The select routine does not have access to the LoadSDNode instance, so 2112252723Sdim // pass along the extension information 2113252723Sdim OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); 2114252723Sdim 2115252723Sdim SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], 2116252723Sdim OtherOps.size(), LD->getMemoryVT(), 2117252723Sdim LD->getMemOperand()); 2118252723Sdim 2119252723Sdim SmallVector<SDValue, 4> ScalarRes; 2120252723Sdim 2121252723Sdim for (unsigned i = 0; i < NumElts; ++i) { 2122252723Sdim SDValue Res = NewLD.getValue(i); 2123252723Sdim if (NeedTrunc) 2124252723Sdim Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 2125252723Sdim ScalarRes.push_back(Res); 2126252723Sdim } 2127252723Sdim 2128252723Sdim SDValue LoadChain = NewLD.getValue(NumElts); 2129252723Sdim 2130252723Sdim SDValue BuildVec = 2131252723Sdim DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); 2132252723Sdim 2133252723Sdim Results.push_back(BuildVec); 2134252723Sdim Results.push_back(LoadChain); 2135252723Sdim} 2136252723Sdim 2137252723Sdimstatic void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 2138252723Sdim SmallVectorImpl<SDValue> &Results) { 2139252723Sdim SDValue Chain = N->getOperand(0); 2140252723Sdim SDValue Intrin = N->getOperand(1); 2141263509Sdim SDLoc DL(N); 2142252723Sdim 2143252723Sdim // Get the intrinsic ID 2144252723Sdim unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 2145252723Sdim switch (IntrinNo) { 2146252723Sdim default: 2147252723Sdim return; 2148252723Sdim case Intrinsic::nvvm_ldg_global_i: 2149252723Sdim case Intrinsic::nvvm_ldg_global_f: 2150252723Sdim case Intrinsic::nvvm_ldg_global_p: 2151252723Sdim case Intrinsic::nvvm_ldu_global_i: 2152252723Sdim case Intrinsic::nvvm_ldu_global_f: 2153252723Sdim case Intrinsic::nvvm_ldu_global_p: { 2154252723Sdim EVT ResVT = N->getValueType(0); 2155252723Sdim 2156252723Sdim if (ResVT.isVector()) { 2157252723Sdim // Vector LDG/LDU 2158252723Sdim 2159252723Sdim unsigned NumElts = ResVT.getVectorNumElements(); 2160252723Sdim EVT EltVT = ResVT.getVectorElementType(); 2161252723Sdim 2162263509Sdim // Since LDU/LDG are target nodes, we cannot rely on DAG type 2163263509Sdim // legalization. 2164252723Sdim // Therefore, we must ensure the type is legal. For i1 and i8, we set the 2165252723Sdim // loaded type to i16 and propogate the "real" type as the memory type. 2166252723Sdim bool NeedTrunc = false; 2167252723Sdim if (EltVT.getSizeInBits() < 16) { 2168252723Sdim EltVT = MVT::i16; 2169252723Sdim NeedTrunc = true; 2170252723Sdim } 2171252723Sdim 2172252723Sdim unsigned Opcode = 0; 2173252723Sdim SDVTList LdResVTs; 2174252723Sdim 2175252723Sdim switch (NumElts) { 2176252723Sdim default: 2177252723Sdim return; 2178252723Sdim case 2: 2179252723Sdim switch (IntrinNo) { 2180252723Sdim default: 2181252723Sdim return; 2182252723Sdim case Intrinsic::nvvm_ldg_global_i: 2183252723Sdim case Intrinsic::nvvm_ldg_global_f: 2184252723Sdim case Intrinsic::nvvm_ldg_global_p: 2185252723Sdim Opcode = NVPTXISD::LDGV2; 2186252723Sdim break; 2187252723Sdim case Intrinsic::nvvm_ldu_global_i: 2188252723Sdim case Intrinsic::nvvm_ldu_global_f: 2189252723Sdim case Intrinsic::nvvm_ldu_global_p: 2190252723Sdim Opcode = NVPTXISD::LDUV2; 2191252723Sdim break; 2192252723Sdim } 2193252723Sdim LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 2194252723Sdim break; 2195252723Sdim case 4: { 2196252723Sdim switch (IntrinNo) { 2197252723Sdim default: 2198252723Sdim return; 2199252723Sdim case Intrinsic::nvvm_ldg_global_i: 2200252723Sdim case Intrinsic::nvvm_ldg_global_f: 2201252723Sdim case Intrinsic::nvvm_ldg_global_p: 2202252723Sdim Opcode = NVPTXISD::LDGV4; 2203252723Sdim break; 2204252723Sdim case Intrinsic::nvvm_ldu_global_i: 2205252723Sdim case Intrinsic::nvvm_ldu_global_f: 2206252723Sdim case Intrinsic::nvvm_ldu_global_p: 2207252723Sdim Opcode = NVPTXISD::LDUV4; 2208252723Sdim break; 2209252723Sdim } 2210252723Sdim EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 2211252723Sdim LdResVTs = DAG.getVTList(ListVTs, 5); 2212252723Sdim break; 2213252723Sdim } 2214252723Sdim } 2215252723Sdim 2216252723Sdim SmallVector<SDValue, 8> OtherOps; 2217252723Sdim 2218252723Sdim // Copy regular operands 2219252723Sdim 2220252723Sdim OtherOps.push_back(Chain); // Chain 2221252723Sdim // Skip operand 1 (intrinsic ID) 2222263509Sdim // Others 2223252723Sdim for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) 2224252723Sdim OtherOps.push_back(N->getOperand(i)); 2225252723Sdim 2226252723Sdim MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 2227252723Sdim 2228252723Sdim SDValue NewLD = DAG.getMemIntrinsicNode( 2229252723Sdim Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), 2230252723Sdim MemSD->getMemoryVT(), MemSD->getMemOperand()); 2231252723Sdim 2232252723Sdim SmallVector<SDValue, 4> ScalarRes; 2233252723Sdim 2234252723Sdim for (unsigned i = 0; i < NumElts; ++i) { 2235252723Sdim SDValue Res = NewLD.getValue(i); 2236252723Sdim if (NeedTrunc) 2237252723Sdim Res = 2238252723Sdim DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 2239252723Sdim ScalarRes.push_back(Res); 2240252723Sdim } 2241252723Sdim 2242252723Sdim SDValue LoadChain = NewLD.getValue(NumElts); 2243252723Sdim 2244252723Sdim SDValue BuildVec = 2245252723Sdim DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); 2246252723Sdim 2247252723Sdim Results.push_back(BuildVec); 2248252723Sdim Results.push_back(LoadChain); 2249252723Sdim } else { 2250252723Sdim // i8 LDG/LDU 2251252723Sdim assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 2252252723Sdim "Custom handling of non-i8 ldu/ldg?"); 2253252723Sdim 2254252723Sdim // Just copy all operands as-is 2255252723Sdim SmallVector<SDValue, 4> Ops; 2256252723Sdim for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 2257252723Sdim Ops.push_back(N->getOperand(i)); 2258252723Sdim 2259252723Sdim // Force output to i16 2260252723Sdim SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 2261252723Sdim 2262252723Sdim MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 2263252723Sdim 2264252723Sdim // We make sure the memory type is i8, which will be used during isel 2265252723Sdim // to select the proper instruction. 2266252723Sdim SDValue NewLD = 2267252723Sdim DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], 2268252723Sdim Ops.size(), MVT::i8, MemSD->getMemOperand()); 2269252723Sdim 2270263509Sdim Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 2271263509Sdim NewLD.getValue(0))); 2272252723Sdim Results.push_back(NewLD.getValue(1)); 2273252723Sdim } 2274252723Sdim } 2275252723Sdim } 2276252723Sdim} 2277252723Sdim 2278252723Sdimvoid NVPTXTargetLowering::ReplaceNodeResults( 2279252723Sdim SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2280252723Sdim switch (N->getOpcode()) { 2281252723Sdim default: 2282252723Sdim report_fatal_error("Unhandled custom legalization"); 2283252723Sdim case ISD::LOAD: 2284252723Sdim ReplaceLoadVector(N, DAG, Results); 2285252723Sdim return; 2286252723Sdim case ISD::INTRINSIC_W_CHAIN: 2287252723Sdim ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 2288252723Sdim return; 2289252723Sdim } 2290252723Sdim} 2291263509Sdim 2292263509Sdim// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 2293263509Sdimvoid NVPTXSection::anchor() {} 2294263509Sdim 2295263509SdimNVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 2296263509Sdim delete TextSection; 2297263509Sdim delete DataSection; 2298263509Sdim delete BSSSection; 2299263509Sdim delete ReadOnlySection; 2300263509Sdim 2301263509Sdim delete StaticCtorSection; 2302263509Sdim delete StaticDtorSection; 2303263509Sdim delete LSDASection; 2304263509Sdim delete EHFrameSection; 2305263509Sdim delete DwarfAbbrevSection; 2306263509Sdim delete DwarfInfoSection; 2307263509Sdim delete DwarfLineSection; 2308263509Sdim delete DwarfFrameSection; 2309263509Sdim delete DwarfPubTypesSection; 2310263509Sdim delete DwarfDebugInlineSection; 2311263509Sdim delete DwarfStrSection; 2312263509Sdim delete DwarfLocSection; 2313263509Sdim delete DwarfARangesSection; 2314263509Sdim delete DwarfRangesSection; 2315263509Sdim delete DwarfMacroInfoSection; 2316263509Sdim} 2317