NVPTXISelLowering.cpp revision 243830
1// 2// The LLVM Compiler Infrastructure 3// 4// This file is distributed under the University of Illinois Open Source 5// License. See LICENSE.TXT for details. 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the interfaces that NVPTX uses to lower LLVM code into a 10// selection DAG. 11// 12//===----------------------------------------------------------------------===// 13 14 15#include "NVPTX.h" 16#include "NVPTXISelLowering.h" 17#include "NVPTXTargetMachine.h" 18#include "NVPTXTargetObjectFile.h" 19#include "NVPTXUtilities.h" 20#include "llvm/Intrinsics.h" 21#include "llvm/IntrinsicInst.h" 22#include "llvm/Support/CommandLine.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/GlobalValue.h" 25#include "llvm/Module.h" 26#include "llvm/Function.h" 27#include "llvm/CodeGen/Analysis.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineInstrBuilder.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 37#include "llvm/MC/MCSectionELF.h" 38#include <sstream> 39 40#undef DEBUG_TYPE 41#define DEBUG_TYPE "nvptx-lower" 42 43using namespace llvm; 44 45static unsigned int uniqueCallSite = 0; 46 47static cl::opt<bool> 48RetainVectorOperands("nvptx-codegen-vectors", 49 cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), 50 cl::init(true)); 51 52static cl::opt<bool> 53sched4reg("nvptx-sched4reg", 54 cl::desc("NVPTX Specific: schedule for register pressue"), 55 cl::init(false)); 56 57// NVPTXTargetLowering Constructor. 58NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) 59: TargetLowering(TM, new NVPTXTargetObjectFile()), 60 nvTM(&TM), 61 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 62 63 // always lower memset, memcpy, and memmove intrinsics to load/store 64 // instructions, rather 65 // then generating calls to memset, mempcy or memmove. 66 maxStoresPerMemset = (unsigned)0xFFFFFFFF; 67 maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; 68 maxStoresPerMemmove = (unsigned)0xFFFFFFFF; 69 70 setBooleanContents(ZeroOrNegativeOneBooleanContent); 71 72 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 73 // condition branches. 74 setJumpIsExpensive(true); 75 76 // By default, use the Source scheduling 77 if (sched4reg) 78 setSchedulingPreference(Sched::RegPressure); 79 else 80 setSchedulingPreference(Sched::Source); 81 82 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 83 addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass); 84 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 85 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 86 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 87 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 88 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 89 90 if (RetainVectorOperands) { 91 addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); 92 addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); 93 addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); 94 addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); 95 addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); 96 addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); 97 addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); 98 addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); 99 addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); 100 addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); 101 102 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); 103 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); 104 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); 105 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); 106 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); 107 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); 108 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); 109 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); 110 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); 111 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); 112 113 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); 114 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); 115 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); 116 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); 117 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); 118 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); 119 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); 121 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); 122 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); 123 } 124 125 // Operations not directly supported by NVPTX. 126 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 127 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 128 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); 129 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 130 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 131 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 132 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 133 134 if (nvptxSubtarget.hasROT64()) { 135 setOperationAction(ISD::ROTL , MVT::i64, Legal); 136 setOperationAction(ISD::ROTR , MVT::i64, Legal); 137 } 138 else { 139 setOperationAction(ISD::ROTL , MVT::i64, Expand); 140 setOperationAction(ISD::ROTR , MVT::i64, Expand); 141 } 142 if (nvptxSubtarget.hasROT32()) { 143 setOperationAction(ISD::ROTL , MVT::i32, Legal); 144 setOperationAction(ISD::ROTR , MVT::i32, Legal); 145 } 146 else { 147 setOperationAction(ISD::ROTL , MVT::i32, Expand); 148 setOperationAction(ISD::ROTR , MVT::i32, Expand); 149 } 150 151 setOperationAction(ISD::ROTL , MVT::i16, Expand); 152 setOperationAction(ISD::ROTR , MVT::i16, Expand); 153 setOperationAction(ISD::ROTL , MVT::i8, Expand); 154 setOperationAction(ISD::ROTR , MVT::i8, Expand); 155 setOperationAction(ISD::BSWAP , MVT::i16, Expand); 156 setOperationAction(ISD::BSWAP , MVT::i32, Expand); 157 setOperationAction(ISD::BSWAP , MVT::i64, Expand); 158 159 // Indirect branch is not supported. 160 // This also disables Jump Table creation. 161 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 162 setOperationAction(ISD::BRIND, MVT::Other, Expand); 163 164 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 165 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 166 167 // We want to legalize constant related memmove and memcopy 168 // intrinsics. 169 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 170 171 // Turn FP extload into load/fextend 172 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 173 // Turn FP truncstore into trunc + store. 174 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 175 176 // PTX does not support load / store predicate registers 177 setOperationAction(ISD::LOAD, MVT::i1, Custom); 178 setOperationAction(ISD::STORE, MVT::i1, Custom); 179 180 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 181 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 182 setTruncStoreAction(MVT::i64, MVT::i1, Expand); 183 setTruncStoreAction(MVT::i32, MVT::i1, Expand); 184 setTruncStoreAction(MVT::i16, MVT::i1, Expand); 185 setTruncStoreAction(MVT::i8, MVT::i1, Expand); 186 187 // This is legal in NVPTX 188 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 189 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 190 191 // TRAP can be lowered to PTX trap 192 setOperationAction(ISD::TRAP, MVT::Other, Legal); 193 194 // By default, CONCAT_VECTORS is implemented via store/load 195 // through stack. It is slow and uses local memory. We need 196 // to custom-lowering them. 197 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); 198 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); 199 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); 200 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); 201 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); 202 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); 203 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); 204 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); 205 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); 206 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); 207 208 // Expand vector int to float and float to int conversions 209 // - For SINT_TO_FP and UINT_TO_FP, the src type 210 // (Node->getOperand(0).getValueType()) 211 // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, 212 // the dest type (Node->getValueType(0)) is used. 213 // 214 // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector 215 // case, and 216 // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. 217 // 218 // That is why v4i32 or v2i32 are used here. 219 // 220 // The expansion for vectors happens in VectorLegalizer::LegalizeOp() 221 // (LegalizeVectorOps.cpp). 222 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); 223 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); 224 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); 225 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); 226 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); 227 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); 228 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); 229 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); 230 231 // Now deduce the information based on the above mentioned 232 // actions 233 computeRegisterProperties(); 234} 235 236 237const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 238 switch (Opcode) { 239 default: return 0; 240 case NVPTXISD::CALL: return "NVPTXISD::CALL"; 241 case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; 242 case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; 243 case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; 244 case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; 245 case NVPTXISD::DeclareScalarParam: 246 return "NVPTXISD::DeclareScalarParam"; 247 case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; 248 case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; 249 case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; 250 case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; 251 case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; 252 case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; 253 case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; 254 case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; 255 case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; 256 case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; 257 case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; 258 case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; 259 case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; 260 case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; 261 case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; 262 case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; 263 case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; 264 case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; 265 case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; 266 case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; 267 case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; 268 case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; 269 case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; 270 case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; 271 } 272} 273 274 275SDValue 276NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 277 DebugLoc dl = Op.getDebugLoc(); 278 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 279 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 280 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 281} 282 283std::string NVPTXTargetLowering::getPrototype(Type *retTy, 284 const ArgListTy &Args, 285 const SmallVectorImpl<ISD::OutputArg> &Outs, 286 unsigned retAlignment) const { 287 288 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 289 290 std::stringstream O; 291 O << "prototype_" << uniqueCallSite << " : .callprototype "; 292 293 if (retTy->getTypeID() == Type::VoidTyID) 294 O << "()"; 295 else { 296 O << "("; 297 if (isABI) { 298 if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { 299 unsigned size = 0; 300 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 301 size = ITy->getBitWidth(); 302 if (size < 32) size = 32; 303 } 304 else { 305 assert(retTy->isFloatingPointTy() && 306 "Floating point type expected here"); 307 size = retTy->getPrimitiveSizeInBits(); 308 } 309 310 O << ".param .b" << size << " _"; 311 } 312 else if (isa<PointerType>(retTy)) 313 O << ".param .b" << getPointerTy().getSizeInBits() 314 << " _"; 315 else { 316 if ((retTy->getTypeID() == Type::StructTyID) || 317 isa<VectorType>(retTy)) { 318 SmallVector<EVT, 16> vtparts; 319 ComputeValueVTs(*this, retTy, vtparts); 320 unsigned totalsz = 0; 321 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 322 unsigned elems = 1; 323 EVT elemtype = vtparts[i]; 324 if (vtparts[i].isVector()) { 325 elems = vtparts[i].getVectorNumElements(); 326 elemtype = vtparts[i].getVectorElementType(); 327 } 328 for (unsigned j=0, je=elems; j!=je; ++j) { 329 unsigned sz = elemtype.getSizeInBits(); 330 if (elemtype.isInteger() && (sz < 8)) sz = 8; 331 totalsz += sz/8; 332 } 333 } 334 O << ".param .align " 335 << retAlignment 336 << " .b8 _[" 337 << totalsz << "]"; 338 } 339 else { 340 assert(false && 341 "Unknown return type"); 342 } 343 } 344 } 345 else { 346 SmallVector<EVT, 16> vtparts; 347 ComputeValueVTs(*this, retTy, vtparts); 348 unsigned idx = 0; 349 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 350 unsigned elems = 1; 351 EVT elemtype = vtparts[i]; 352 if (vtparts[i].isVector()) { 353 elems = vtparts[i].getVectorNumElements(); 354 elemtype = vtparts[i].getVectorElementType(); 355 } 356 357 for (unsigned j=0, je=elems; j!=je; ++j) { 358 unsigned sz = elemtype.getSizeInBits(); 359 if (elemtype.isInteger() && (sz < 32)) sz = 32; 360 O << ".reg .b" << sz << " _"; 361 if (j<je-1) O << ", "; 362 ++idx; 363 } 364 if (i < e-1) 365 O << ", "; 366 } 367 } 368 O << ") "; 369 } 370 O << "_ ("; 371 372 bool first = true; 373 MVT thePointerTy = getPointerTy(); 374 375 for (unsigned i=0,e=Args.size(); i!=e; ++i) { 376 const Type *Ty = Args[i].Ty; 377 if (!first) { 378 O << ", "; 379 } 380 first = false; 381 382 if (Outs[i].Flags.isByVal() == false) { 383 unsigned sz = 0; 384 if (isa<IntegerType>(Ty)) { 385 sz = cast<IntegerType>(Ty)->getBitWidth(); 386 if (sz < 32) sz = 32; 387 } 388 else if (isa<PointerType>(Ty)) 389 sz = thePointerTy.getSizeInBits(); 390 else 391 sz = Ty->getPrimitiveSizeInBits(); 392 if (isABI) 393 O << ".param .b" << sz << " "; 394 else 395 O << ".reg .b" << sz << " "; 396 O << "_"; 397 continue; 398 } 399 const PointerType *PTy = dyn_cast<PointerType>(Ty); 400 assert(PTy && 401 "Param with byval attribute should be a pointer type"); 402 Type *ETy = PTy->getElementType(); 403 404 if (isABI) { 405 unsigned align = Outs[i].Flags.getByValAlign(); 406 unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 407 O << ".param .align " << align 408 << " .b8 "; 409 O << "_"; 410 O << "[" << sz << "]"; 411 continue; 412 } 413 else { 414 SmallVector<EVT, 16> vtparts; 415 ComputeValueVTs(*this, ETy, vtparts); 416 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 417 unsigned elems = 1; 418 EVT elemtype = vtparts[i]; 419 if (vtparts[i].isVector()) { 420 elems = vtparts[i].getVectorNumElements(); 421 elemtype = vtparts[i].getVectorElementType(); 422 } 423 424 for (unsigned j=0,je=elems; j!=je; ++j) { 425 unsigned sz = elemtype.getSizeInBits(); 426 if (elemtype.isInteger() && (sz < 32)) sz = 32; 427 O << ".reg .b" << sz << " "; 428 O << "_"; 429 if (j<je-1) O << ", "; 430 } 431 if (i<e-1) 432 O << ", "; 433 } 434 continue; 435 } 436 } 437 O << ");"; 438 return O.str(); 439} 440 441 442SDValue 443NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 444 SmallVectorImpl<SDValue> &InVals) const { 445 SelectionDAG &DAG = CLI.DAG; 446 DebugLoc &dl = CLI.DL; 447 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 448 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 449 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 450 SDValue Chain = CLI.Chain; 451 SDValue Callee = CLI.Callee; 452 bool &isTailCall = CLI.IsTailCall; 453 ArgListTy &Args = CLI.Args; 454 Type *retTy = CLI.RetTy; 455 ImmutableCallSite *CS = CLI.CS; 456 457 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 458 459 SDValue tempChain = Chain; 460 Chain = DAG.getCALLSEQ_START(Chain, 461 DAG.getIntPtrConstant(uniqueCallSite, true)); 462 SDValue InFlag = Chain.getValue(1); 463 464 assert((Outs.size() == Args.size()) && 465 "Unexpected number of arguments to function call"); 466 unsigned paramCount = 0; 467 // Declare the .params or .reg need to pass values 468 // to the function 469 for (unsigned i=0, e=Outs.size(); i!=e; ++i) { 470 EVT VT = Outs[i].VT; 471 472 if (Outs[i].Flags.isByVal() == false) { 473 // Plain scalar 474 // for ABI, declare .param .b<size> .param<n>; 475 // for nonABI, declare .reg .b<size> .param<n>; 476 unsigned isReg = 1; 477 if (isABI) 478 isReg = 0; 479 unsigned sz = VT.getSizeInBits(); 480 if (VT.isInteger() && (sz < 32)) sz = 32; 481 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 482 SDValue DeclareParamOps[] = { Chain, 483 DAG.getConstant(paramCount, MVT::i32), 484 DAG.getConstant(sz, MVT::i32), 485 DAG.getConstant(isReg, MVT::i32), 486 InFlag }; 487 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 488 DeclareParamOps, 5); 489 InFlag = Chain.getValue(1); 490 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 491 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 492 DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; 493 494 unsigned opcode = NVPTXISD::StoreParam; 495 if (isReg) 496 opcode = NVPTXISD::MoveToParam; 497 else { 498 if (Outs[i].Flags.isZExt()) 499 opcode = NVPTXISD::StoreParamU32; 500 else if (Outs[i].Flags.isSExt()) 501 opcode = NVPTXISD::StoreParamS32; 502 } 503 Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5); 504 505 InFlag = Chain.getValue(1); 506 ++paramCount; 507 continue; 508 } 509 // struct or vector 510 SmallVector<EVT, 16> vtparts; 511 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 512 assert(PTy && 513 "Type of a byval parameter should be pointer"); 514 ComputeValueVTs(*this, PTy->getElementType(), vtparts); 515 516 if (isABI) { 517 // declare .param .align 16 .b8 .param<n>[<size>]; 518 unsigned sz = Outs[i].Flags.getByValSize(); 519 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 520 // The ByValAlign in the Outs[i].Flags is alway set at this point, so we 521 // don't need to 522 // worry about natural alignment or not. See TargetLowering::LowerCallTo() 523 SDValue DeclareParamOps[] = { Chain, 524 DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), 525 DAG.getConstant(paramCount, MVT::i32), 526 DAG.getConstant(sz, MVT::i32), 527 InFlag }; 528 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 529 DeclareParamOps, 5); 530 InFlag = Chain.getValue(1); 531 unsigned curOffset = 0; 532 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { 533 unsigned elems = 1; 534 EVT elemtype = vtparts[j]; 535 if (vtparts[j].isVector()) { 536 elems = vtparts[j].getVectorNumElements(); 537 elemtype = vtparts[j].getVectorElementType(); 538 } 539 for (unsigned k=0,ke=elems; k!=ke; ++k) { 540 unsigned sz = elemtype.getSizeInBits(); 541 if (elemtype.isInteger() && (sz < 8)) sz = 8; 542 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), 543 OutVals[i], 544 DAG.getConstant(curOffset, 545 getPointerTy())); 546 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 547 MachinePointerInfo(), false, false, false, 0); 548 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 549 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, 550 MVT::i32), 551 DAG.getConstant(curOffset, MVT::i32), 552 theVal, InFlag }; 553 Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 554 CopyParamOps, 5); 555 InFlag = Chain.getValue(1); 556 curOffset += sz/8; 557 } 558 } 559 ++paramCount; 560 continue; 561 } 562 // Non-abi, struct or vector 563 // Declare a bunch or .reg .b<size> .param<n> 564 unsigned curOffset = 0; 565 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { 566 unsigned elems = 1; 567 EVT elemtype = vtparts[j]; 568 if (vtparts[j].isVector()) { 569 elems = vtparts[j].getVectorNumElements(); 570 elemtype = vtparts[j].getVectorElementType(); 571 } 572 for (unsigned k=0,ke=elems; k!=ke; ++k) { 573 unsigned sz = elemtype.getSizeInBits(); 574 if (elemtype.isInteger() && (sz < 32)) sz = 32; 575 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 576 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, 577 MVT::i32), 578 DAG.getConstant(sz, MVT::i32), 579 DAG.getConstant(1, MVT::i32), 580 InFlag }; 581 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 582 DeclareParamOps, 5); 583 InFlag = Chain.getValue(1); 584 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], 585 DAG.getConstant(curOffset, 586 getPointerTy())); 587 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 588 MachinePointerInfo(), false, false, false, 0); 589 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 590 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 591 DAG.getConstant(0, MVT::i32), theVal, 592 InFlag }; 593 Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs, 594 CopyParamOps, 5); 595 InFlag = Chain.getValue(1); 596 ++paramCount; 597 } 598 } 599 } 600 601 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 602 unsigned retAlignment = 0; 603 604 // Handle Result 605 unsigned retCount = 0; 606 if (Ins.size() > 0) { 607 SmallVector<EVT, 16> resvtparts; 608 ComputeValueVTs(*this, retTy, resvtparts); 609 610 // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or 611 // individual .reg .b<size> func_retval<0..> for non ABI 612 unsigned resultsz = 0; 613 for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { 614 unsigned elems = 1; 615 EVT elemtype = resvtparts[i]; 616 if (resvtparts[i].isVector()) { 617 elems = resvtparts[i].getVectorNumElements(); 618 elemtype = resvtparts[i].getVectorElementType(); 619 } 620 for (unsigned j=0,je=elems; j!=je; ++j) { 621 unsigned sz = elemtype.getSizeInBits(); 622 if (isABI == false) { 623 if (elemtype.isInteger() && (sz < 32)) sz = 32; 624 } 625 else { 626 if (elemtype.isInteger() && (sz < 8)) sz = 8; 627 } 628 if (isABI == false) { 629 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 630 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32), 631 DAG.getConstant(sz, MVT::i32), 632 DAG.getConstant(retCount, MVT::i32), 633 InFlag }; 634 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 635 DeclareRetOps, 5); 636 InFlag = Chain.getValue(1); 637 ++retCount; 638 } 639 resultsz += sz; 640 } 641 } 642 if (isABI) { 643 if (retTy->isPrimitiveType() || retTy->isIntegerTy() || 644 retTy->isPointerTy() ) { 645 // Scalar needs to be at least 32bit wide 646 if (resultsz < 32) 647 resultsz = 32; 648 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 649 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 650 DAG.getConstant(resultsz, MVT::i32), 651 DAG.getConstant(0, MVT::i32), InFlag }; 652 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 653 DeclareRetOps, 5); 654 InFlag = Chain.getValue(1); 655 } 656 else { 657 if (Func) { // direct call 658 if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) 659 retAlignment = getDataLayout()->getABITypeAlignment(retTy); 660 } else { // indirect call 661 const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction()); 662 if (!llvm::getAlign(*CallI, 0, retAlignment)) 663 retAlignment = getDataLayout()->getABITypeAlignment(retTy); 664 } 665 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 666 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, 667 MVT::i32), 668 DAG.getConstant(resultsz/8, MVT::i32), 669 DAG.getConstant(0, MVT::i32), InFlag }; 670 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 671 DeclareRetOps, 5); 672 InFlag = Chain.getValue(1); 673 } 674 } 675 } 676 677 if (!Func) { 678 // This is indirect function call case : PTX requires a prototype of the 679 // form 680 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 681 // to be emitted, and the label has to used as the last arg of call 682 // instruction. 683 // The prototype is embedded in a string and put as the operand for an 684 // INLINEASM SDNode. 685 SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); 686 std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); 687 const char *asmstr = nvTM->getManagedStrPool()-> 688 getManagedString(proto_string.c_str())->c_str(); 689 SDValue InlineAsmOps[] = { Chain, 690 DAG.getTargetExternalSymbol(asmstr, 691 getPointerTy()), 692 DAG.getMDNode(0), 693 DAG.getTargetConstant(0, MVT::i32), InFlag }; 694 Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); 695 InFlag = Chain.getValue(1); 696 } 697 // Op to just print "call" 698 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 699 SDValue PrintCallOps[] = { Chain, 700 DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) 701 : retCount, MVT::i32), 702 InFlag }; 703 Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, 704 PrintCallVTs, PrintCallOps, 3); 705 InFlag = Chain.getValue(1); 706 707 // Ops to print out the function name 708 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 709 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 710 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); 711 InFlag = Chain.getValue(1); 712 713 // Ops to print out the param list 714 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 715 SDValue CallArgBeginOps[] = { Chain, InFlag }; 716 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 717 CallArgBeginOps, 2); 718 InFlag = Chain.getValue(1); 719 720 for (unsigned i=0, e=paramCount; i!=e; ++i) { 721 unsigned opcode; 722 if (i==(e-1)) 723 opcode = NVPTXISD::LastCallArg; 724 else 725 opcode = NVPTXISD::CallArg; 726 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 727 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 728 DAG.getConstant(i, MVT::i32), 729 InFlag }; 730 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); 731 InFlag = Chain.getValue(1); 732 } 733 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 734 SDValue CallArgEndOps[] = { Chain, 735 DAG.getConstant(Func ? 1 : 0, MVT::i32), 736 InFlag }; 737 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 738 3); 739 InFlag = Chain.getValue(1); 740 741 if (!Func) { 742 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 743 SDValue PrototypeOps[] = { Chain, 744 DAG.getConstant(uniqueCallSite, MVT::i32), 745 InFlag }; 746 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); 747 InFlag = Chain.getValue(1); 748 } 749 750 // Generate loads from param memory/moves from registers for result 751 if (Ins.size() > 0) { 752 if (isABI) { 753 unsigned resoffset = 0; 754 for (unsigned i=0,e=Ins.size(); i!=e; ++i) { 755 unsigned sz = Ins[i].VT.getSizeInBits(); 756 if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; 757 std::vector<EVT> LoadRetVTs; 758 LoadRetVTs.push_back(Ins[i].VT); 759 LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); 760 std::vector<SDValue> LoadRetOps; 761 LoadRetOps.push_back(Chain); 762 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 763 LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); 764 LoadRetOps.push_back(InFlag); 765 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, 766 &LoadRetOps[0], LoadRetOps.size()); 767 Chain = retval.getValue(1); 768 InFlag = retval.getValue(2); 769 InVals.push_back(retval); 770 resoffset += sz/8; 771 } 772 } 773 else { 774 SmallVector<EVT, 16> resvtparts; 775 ComputeValueVTs(*this, retTy, resvtparts); 776 777 assert(Ins.size() == resvtparts.size() && 778 "Unexpected number of return values in non-ABI case"); 779 unsigned paramNum = 0; 780 for (unsigned i=0,e=Ins.size(); i!=e; ++i) { 781 assert(EVT(Ins[i].VT) == resvtparts[i] && 782 "Unexpected EVT type in non-ABI case"); 783 unsigned numelems = 1; 784 EVT elemtype = Ins[i].VT; 785 if (Ins[i].VT.isVector()) { 786 numelems = Ins[i].VT.getVectorNumElements(); 787 elemtype = Ins[i].VT.getVectorElementType(); 788 } 789 std::vector<SDValue> tempRetVals; 790 for (unsigned j=0; j<numelems; ++j) { 791 std::vector<EVT> MoveRetVTs; 792 MoveRetVTs.push_back(elemtype); 793 MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); 794 std::vector<SDValue> MoveRetOps; 795 MoveRetOps.push_back(Chain); 796 MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); 797 MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); 798 MoveRetOps.push_back(InFlag); 799 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, 800 &MoveRetOps[0], MoveRetOps.size()); 801 Chain = retval.getValue(1); 802 InFlag = retval.getValue(2); 803 tempRetVals.push_back(retval); 804 ++paramNum; 805 } 806 if (Ins[i].VT.isVector()) 807 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT, 808 &tempRetVals[0], tempRetVals.size())); 809 else 810 InVals.push_back(tempRetVals[0]); 811 } 812 } 813 } 814 Chain = DAG.getCALLSEQ_END(Chain, 815 DAG.getIntPtrConstant(uniqueCallSite, true), 816 DAG.getIntPtrConstant(uniqueCallSite+1, true), 817 InFlag); 818 uniqueCallSite++; 819 820 // set isTailCall to false for now, until we figure out how to express 821 // tail call optimization in PTX 822 isTailCall = false; 823 return Chain; 824} 825 826// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 827// (see LegalizeDAG.cpp). This is slow and uses local memory. 828// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 829SDValue NVPTXTargetLowering:: 830LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 831 SDNode *Node = Op.getNode(); 832 DebugLoc dl = Node->getDebugLoc(); 833 SmallVector<SDValue, 8> Ops; 834 unsigned NumOperands = Node->getNumOperands(); 835 for (unsigned i=0; i < NumOperands; ++i) { 836 SDValue SubOp = Node->getOperand(i); 837 EVT VVT = SubOp.getNode()->getValueType(0); 838 EVT EltVT = VVT.getVectorElementType(); 839 unsigned NumSubElem = VVT.getVectorNumElements(); 840 for (unsigned j=0; j < NumSubElem; ++j) { 841 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 842 DAG.getIntPtrConstant(j))); 843 } 844 } 845 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), 846 &Ops[0], Ops.size()); 847} 848 849SDValue NVPTXTargetLowering:: 850LowerOperation(SDValue Op, SelectionDAG &DAG) const { 851 switch (Op.getOpcode()) { 852 case ISD::RETURNADDR: return SDValue(); 853 case ISD::FRAMEADDR: return SDValue(); 854 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 855 case ISD::INTRINSIC_W_CHAIN: return Op; 856 case ISD::BUILD_VECTOR: 857 case ISD::EXTRACT_SUBVECTOR: 858 return Op; 859 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 860 case ISD::STORE: return LowerSTORE(Op, DAG); 861 case ISD::LOAD: return LowerLOAD(Op, DAG); 862 default: 863 llvm_unreachable("Custom lowering not defined for operation"); 864 } 865} 866 867 868// v = ld i1* addr 869// => 870// v1 = ld i8* addr 871// v = trunc v1 to i1 872SDValue NVPTXTargetLowering:: 873LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 874 SDNode *Node = Op.getNode(); 875 LoadSDNode *LD = cast<LoadSDNode>(Node); 876 DebugLoc dl = Node->getDebugLoc(); 877 assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; 878 assert(Node->getValueType(0) == MVT::i1 && 879 "Custom lowering for i1 load only"); 880 SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), 881 LD->getPointerInfo(), 882 LD->isVolatile(), LD->isNonTemporal(), 883 LD->isInvariant(), 884 LD->getAlignment()); 885 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 886 // The legalizer (the caller) is expecting two values from the legalized 887 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 888 // in LegalizeDAG.cpp which also uses MergeValues. 889 SDValue Ops[] = {result, LD->getChain()}; 890 return DAG.getMergeValues(Ops, 2, dl); 891} 892 893// st i1 v, addr 894// => 895// v1 = zxt v to i8 896// st i8, addr 897SDValue NVPTXTargetLowering:: 898LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 899 SDNode *Node = Op.getNode(); 900 DebugLoc dl = Node->getDebugLoc(); 901 StoreSDNode *ST = cast<StoreSDNode>(Node); 902 SDValue Tmp1 = ST->getChain(); 903 SDValue Tmp2 = ST->getBasePtr(); 904 SDValue Tmp3 = ST->getValue(); 905 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 906 unsigned Alignment = ST->getAlignment(); 907 bool isVolatile = ST->isVolatile(); 908 bool isNonTemporal = ST->isNonTemporal(); 909 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, 910 MVT::i8, Tmp3); 911 SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, 912 ST->getPointerInfo(), isVolatile, 913 isNonTemporal, Alignment); 914 return Result; 915} 916 917 918SDValue 919NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, 920 EVT v) const { 921 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 922 std::stringstream suffix; 923 suffix << idx; 924 *name += suffix.str(); 925 return DAG.getTargetExternalSymbol(name->c_str(), v); 926} 927 928SDValue 929NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 930 return getExtSymb(DAG, ".PARAM", idx, v); 931} 932 933SDValue 934NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 935 return getExtSymb(DAG, ".HLPPARAM", idx); 936} 937 938// Check to see if the kernel argument is image*_t or sampler_t 939 940bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 941 static const char *const specialTypes[] = { 942 "struct._image2d_t", 943 "struct._image3d_t", 944 "struct._sampler_t" 945 }; 946 947 const Type *Ty = arg->getType(); 948 const PointerType *PTy = dyn_cast<PointerType>(Ty); 949 950 if (!PTy) 951 return false; 952 953 if (!context) 954 return false; 955 956 const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 957 const std::string TypeName = STy ? STy->getName() : ""; 958 959 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 960 if (TypeName == specialTypes[i]) 961 return true; 962 963 return false; 964} 965 966SDValue 967NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, 968 CallingConv::ID CallConv, bool isVarArg, 969 const SmallVectorImpl<ISD::InputArg> &Ins, 970 DebugLoc dl, SelectionDAG &DAG, 971 SmallVectorImpl<SDValue> &InVals) const { 972 MachineFunction &MF = DAG.getMachineFunction(); 973 const DataLayout *TD = getDataLayout(); 974 975 const Function *F = MF.getFunction(); 976 const AttrListPtr &PAL = F->getAttributes(); 977 978 SDValue Root = DAG.getRoot(); 979 std::vector<SDValue> OutChains; 980 981 bool isKernel = llvm::isKernelFunction(*F); 982 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 983 984 std::vector<Type *> argTypes; 985 std::vector<const Argument *> theArgs; 986 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 987 I != E; ++I) { 988 theArgs.push_back(I); 989 argTypes.push_back(I->getType()); 990 } 991 assert(argTypes.size() == Ins.size() && 992 "Ins types and function types did not match"); 993 994 int idx = 0; 995 for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { 996 Type *Ty = argTypes[i]; 997 EVT ObjectVT = getValueType(Ty); 998 assert(ObjectVT == Ins[i].VT && 999 "Ins type did not match function type"); 1000 1001 // If the kernel argument is image*_t or sampler_t, convert it to 1002 // a i32 constant holding the parameter position. This can later 1003 // matched in the AsmPrinter to output the correct mangled name. 1004 if (isImageOrSamplerVal(theArgs[i], 1005 (theArgs[i]->getParent() ? 1006 theArgs[i]->getParent()->getParent() : 0))) { 1007 assert(isKernel && "Only kernels can have image/sampler params"); 1008 InVals.push_back(DAG.getConstant(i+1, MVT::i32)); 1009 continue; 1010 } 1011 1012 if (theArgs[i]->use_empty()) { 1013 // argument is dead 1014 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); 1015 continue; 1016 } 1017 1018 // In the following cases, assign a node order of "idx+1" 1019 // to newly created nodes. The SDNOdes for params have to 1020 // appear in the same order as their order of appearance 1021 // in the original function. "idx+1" holds that order. 1022 if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { 1023 // A plain scalar. 1024 if (isABI || isKernel) { 1025 // If ABI, load from the param symbol 1026 SDValue Arg = getParamSymbol(DAG, idx); 1027 Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( 1028 F->getContext()), 1029 llvm::ADDRESS_SPACE_PARAM)); 1030 SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, 1031 MachinePointerInfo(srcValue), false, false, 1032 false, 1033 TD->getABITypeAlignment(ObjectVT.getTypeForEVT( 1034 F->getContext()))); 1035 if (p.getNode()) 1036 DAG.AssignOrdering(p.getNode(), idx+1); 1037 InVals.push_back(p); 1038 } 1039 else { 1040 // If no ABI, just move the param symbol 1041 SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); 1042 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 1043 if (p.getNode()) 1044 DAG.AssignOrdering(p.getNode(), idx+1); 1045 InVals.push_back(p); 1046 } 1047 continue; 1048 } 1049 1050 // Param has ByVal attribute 1051 if (isABI || isKernel) { 1052 // Return MoveParam(param symbol). 1053 // Ideally, the param symbol can be returned directly, 1054 // but when SDNode builder decides to use it in a CopyToReg(), 1055 // machine instruction fails because TargetExternalSymbol 1056 // (not lowered) is target dependent, and CopyToReg assumes 1057 // the source is lowered. 1058 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 1059 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 1060 if (p.getNode()) 1061 DAG.AssignOrdering(p.getNode(), idx+1); 1062 if (isKernel) 1063 InVals.push_back(p); 1064 else { 1065 SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 1066 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), 1067 p); 1068 InVals.push_back(p2); 1069 } 1070 } else { 1071 // Have to move a set of param symbols to registers and 1072 // store them locally and return the local pointer in InVals 1073 const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); 1074 assert(elemPtrType && 1075 "Byval parameter should be a pointer type"); 1076 Type *elemType = elemPtrType->getElementType(); 1077 // Compute the constituent parts 1078 SmallVector<EVT, 16> vtparts; 1079 SmallVector<uint64_t, 16> offsets; 1080 ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); 1081 unsigned totalsize = 0; 1082 for (unsigned j=0, je=vtparts.size(); j!=je; ++j) 1083 totalsize += vtparts[j].getStoreSizeInBits(); 1084 SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> 1085 CreateStackObject(totalsize/8, 16, false), 1086 getPointerTy()); 1087 unsigned sizesofar = 0; 1088 std::vector<SDValue> theChains; 1089 for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { 1090 unsigned numElems = 1; 1091 if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); 1092 for (unsigned k=0, ke=numElems; k!=ke; ++k) { 1093 EVT tmpvt = vtparts[j]; 1094 if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); 1095 SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, 1096 getParamSymbol(DAG, idx, tmpvt)); 1097 SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, 1098 DAG.getConstant(sizesofar, getPointerTy())); 1099 theChains.push_back(DAG.getStore(Chain, dl, arg, addr, 1100 MachinePointerInfo(), false, false, 0)); 1101 sizesofar += tmpvt.getStoreSizeInBits()/8; 1102 ++idx; 1103 } 1104 } 1105 --idx; 1106 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], 1107 theChains.size()); 1108 InVals.push_back(localcopy); 1109 } 1110 } 1111 1112 // Clang will check explicit VarArg and issue error if any. However, Clang 1113 // will let code with 1114 // implicit var arg like f() pass. 1115 // We treat this case as if the arg list is empty. 1116 //if (F.isVarArg()) { 1117 // assert(0 && "VarArg not supported yet!"); 1118 //} 1119 1120 if (!OutChains.empty()) 1121 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1122 &OutChains[0], OutChains.size())); 1123 1124 return Chain; 1125} 1126 1127SDValue 1128NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1129 bool isVarArg, 1130 const SmallVectorImpl<ISD::OutputArg> &Outs, 1131 const SmallVectorImpl<SDValue> &OutVals, 1132 DebugLoc dl, SelectionDAG &DAG) const { 1133 1134 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1135 1136 unsigned sizesofar = 0; 1137 unsigned idx = 0; 1138 for (unsigned i=0, e=Outs.size(); i!=e; ++i) { 1139 SDValue theVal = OutVals[i]; 1140 EVT theValType = theVal.getValueType(); 1141 unsigned numElems = 1; 1142 if (theValType.isVector()) numElems = theValType.getVectorNumElements(); 1143 for (unsigned j=0,je=numElems; j!=je; ++j) { 1144 SDValue tmpval = theVal; 1145 if (theValType.isVector()) 1146 tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 1147 theValType.getVectorElementType(), 1148 tmpval, DAG.getIntPtrConstant(j)); 1149 Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, 1150 dl, MVT::Other, 1151 Chain, 1152 DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), 1153 tmpval); 1154 if (theValType.isVector()) 1155 sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; 1156 else 1157 sizesofar += theValType.getStoreSizeInBits()/8; 1158 ++idx; 1159 } 1160 } 1161 1162 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 1163} 1164 1165void 1166NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 1167 std::string &Constraint, 1168 std::vector<SDValue> &Ops, 1169 SelectionDAG &DAG) const 1170{ 1171 if (Constraint.length() > 1) 1172 return; 1173 else 1174 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1175} 1176 1177// NVPTX suuport vector of legal types of any length in Intrinsics because the 1178// NVPTX specific type legalizer 1179// will legalize them to the PTX supported length. 1180bool 1181NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 1182 if (isTypeLegal(VT)) 1183 return true; 1184 if (VT.isVector()) { 1185 MVT eVT = VT.getVectorElementType(); 1186 if (isTypeLegal(eVT)) 1187 return true; 1188 } 1189 return false; 1190} 1191 1192 1193// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 1194// TgtMemIntrinsic 1195// because we need the information that is only available in the "Value" type 1196// of destination 1197// pointer. In particular, the address space information. 1198bool 1199NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, 1200 unsigned Intrinsic) const { 1201 switch (Intrinsic) { 1202 default: 1203 return false; 1204 1205 case Intrinsic::nvvm_atomic_load_add_f32: 1206 Info.opc = ISD::INTRINSIC_W_CHAIN; 1207 Info.memVT = MVT::f32; 1208 Info.ptrVal = I.getArgOperand(0); 1209 Info.offset = 0; 1210 Info.vol = 0; 1211 Info.readMem = true; 1212 Info.writeMem = true; 1213 Info.align = 0; 1214 return true; 1215 1216 case Intrinsic::nvvm_atomic_load_inc_32: 1217 case Intrinsic::nvvm_atomic_load_dec_32: 1218 Info.opc = ISD::INTRINSIC_W_CHAIN; 1219 Info.memVT = MVT::i32; 1220 Info.ptrVal = I.getArgOperand(0); 1221 Info.offset = 0; 1222 Info.vol = 0; 1223 Info.readMem = true; 1224 Info.writeMem = true; 1225 Info.align = 0; 1226 return true; 1227 1228 case Intrinsic::nvvm_ldu_global_i: 1229 case Intrinsic::nvvm_ldu_global_f: 1230 case Intrinsic::nvvm_ldu_global_p: 1231 1232 Info.opc = ISD::INTRINSIC_W_CHAIN; 1233 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 1234 Info.memVT = MVT::i32; 1235 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) 1236 Info.memVT = getPointerTy(); 1237 else 1238 Info.memVT = MVT::f32; 1239 Info.ptrVal = I.getArgOperand(0); 1240 Info.offset = 0; 1241 Info.vol = 0; 1242 Info.readMem = true; 1243 Info.writeMem = false; 1244 Info.align = 0; 1245 return true; 1246 1247 } 1248 return false; 1249} 1250 1251/// isLegalAddressingMode - Return true if the addressing mode represented 1252/// by AM is legal for this target, for a load/store of the specified type. 1253/// Used to guide target specific optimizations, like loop strength reduction 1254/// (LoopStrengthReduce.cpp) and memory optimization for address mode 1255/// (CodeGenPrepare.cpp) 1256bool 1257NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 1258 Type *Ty) const { 1259 1260 // AddrMode - This represents an addressing mode of: 1261 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 1262 // 1263 // The legal address modes are 1264 // - [avar] 1265 // - [areg] 1266 // - [areg+immoff] 1267 // - [immAddr] 1268 1269 if (AM.BaseGV) { 1270 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 1271 return false; 1272 return true; 1273 } 1274 1275 switch (AM.Scale) { 1276 case 0: // "r", "r+i" or "i" is allowed 1277 break; 1278 case 1: 1279 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 1280 return false; 1281 // Otherwise we have r+i. 1282 break; 1283 default: 1284 // No scale > 1 is allowed 1285 return false; 1286 } 1287 return true; 1288} 1289 1290//===----------------------------------------------------------------------===// 1291// NVPTX Inline Assembly Support 1292//===----------------------------------------------------------------------===// 1293 1294/// getConstraintType - Given a constraint letter, return the type of 1295/// constraint it is for this target. 1296NVPTXTargetLowering::ConstraintType 1297NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 1298 if (Constraint.size() == 1) { 1299 switch (Constraint[0]) { 1300 default: 1301 break; 1302 case 'r': 1303 case 'h': 1304 case 'c': 1305 case 'l': 1306 case 'f': 1307 case 'd': 1308 case '0': 1309 case 'N': 1310 return C_RegisterClass; 1311 } 1312 } 1313 return TargetLowering::getConstraintType(Constraint); 1314} 1315 1316 1317std::pair<unsigned, const TargetRegisterClass*> 1318NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 1319 EVT VT) const { 1320 if (Constraint.size() == 1) { 1321 switch (Constraint[0]) { 1322 case 'c': 1323 return std::make_pair(0U, &NVPTX::Int8RegsRegClass); 1324 case 'h': 1325 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 1326 case 'r': 1327 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 1328 case 'l': 1329 case 'N': 1330 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 1331 case 'f': 1332 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 1333 case 'd': 1334 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 1335 } 1336 } 1337 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 1338} 1339 1340 1341 1342/// getFunctionAlignment - Return the Log2 alignment of this function. 1343unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 1344 return 4; 1345} 1346