NVPTXISelLowering.cpp revision 280031
1// 2// The LLVM Compiler Infrastructure 3// 4// This file is distributed under the University of Illinois Open Source 5// License. See LICENSE.TXT for details. 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the interfaces that NVPTX uses to lower LLVM code into a 10// selection DAG. 11// 12//===----------------------------------------------------------------------===// 13 14#include "NVPTXISelLowering.h" 15#include "NVPTX.h" 16#include "NVPTXTargetMachine.h" 17#include "NVPTXTargetObjectFile.h" 18#include "NVPTXUtilities.h" 19#include "llvm/CodeGen/Analysis.h" 20#include "llvm/CodeGen/MachineFrameInfo.h" 21#include "llvm/CodeGen/MachineFunction.h" 22#include "llvm/CodeGen/MachineInstrBuilder.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25#include "llvm/IR/CallSite.h" 26#include "llvm/IR/DerivedTypes.h" 27#include "llvm/IR/Function.h" 28#include "llvm/IR/GlobalValue.h" 29#include "llvm/IR/IntrinsicInst.h" 30#include "llvm/IR/Intrinsics.h" 31#include "llvm/IR/Module.h" 32#include "llvm/MC/MCSectionELF.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/ErrorHandling.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/raw_ostream.h" 38#include <sstream> 39 40#undef DEBUG_TYPE 41#define DEBUG_TYPE "nvptx-lower" 42 43using namespace llvm; 44 45static unsigned int uniqueCallSite = 0; 46 47static cl::opt<bool> sched4reg( 48 "nvptx-sched4reg", 49 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 50 51static cl::opt<unsigned> 52FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, 53 cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 54 " 1: do it 2: do it aggressively"), 55 cl::init(2)); 56 57static bool IsPTXVectorType(MVT VT) { 58 switch (VT.SimpleTy) { 59 default: 60 return false; 61 case MVT::v2i1: 62 case MVT::v4i1: 63 case MVT::v2i8: 64 case MVT::v4i8: 65 case MVT::v2i16: 66 case MVT::v4i16: 67 case MVT::v2i32: 68 case MVT::v4i32: 69 case MVT::v2i64: 70 case MVT::v2f32: 71 case MVT::v4f32: 72 case MVT::v2f64: 73 return true; 74 } 75} 76 77/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 78/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 79/// into their primitive components. 80/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 81/// same number of types as the Ins/Outs arrays in LowerFormalArguments, 82/// LowerCall, and LowerReturn. 83static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, 84 SmallVectorImpl<EVT> &ValueVTs, 85 SmallVectorImpl<uint64_t> *Offsets = nullptr, 86 uint64_t StartingOffset = 0) { 87 SmallVector<EVT, 16> TempVTs; 88 SmallVector<uint64_t, 16> TempOffsets; 89 90 ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); 91 for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 92 EVT VT = TempVTs[i]; 93 uint64_t Off = TempOffsets[i]; 94 if (VT.isVector()) 95 for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 96 ValueVTs.push_back(VT.getVectorElementType()); 97 if (Offsets) 98 Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 99 } 100 else { 101 ValueVTs.push_back(VT); 102 if (Offsets) 103 Offsets->push_back(Off); 104 } 105 } 106} 107 108// NVPTXTargetLowering Constructor. 109NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) 110 : TargetLowering(TM), nvTM(&TM), 111 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 112 113 // always lower memset, memcpy, and memmove intrinsics to load/store 114 // instructions, rather 115 // then generating calls to memset, mempcy or memmove. 116 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 117 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 118 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 119 120 setBooleanContents(ZeroOrNegativeOneBooleanContent); 121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 122 123 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 124 // condition branches. 125 setJumpIsExpensive(true); 126 127 // By default, use the Source scheduling 128 if (sched4reg) 129 setSchedulingPreference(Sched::RegPressure); 130 else 131 setSchedulingPreference(Sched::Source); 132 133 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 134 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 135 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 136 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 137 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 138 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 139 140 // Operations not directly supported by NVPTX. 141 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 142 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 143 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); 144 setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); 145 setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); 146 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 147 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 148 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 149 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 150 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 151 setOperationAction(ISD::BR_CC, MVT::i8, Expand); 152 setOperationAction(ISD::BR_CC, MVT::i16, Expand); 153 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 154 setOperationAction(ISD::BR_CC, MVT::i64, Expand); 155 // Some SIGN_EXTEND_INREG can be done using cvt instruction. 156 // For others we will expand to a SHL/SRA pair. 157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 162 163 setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); 164 setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); 165 setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); 166 setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); 167 setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); 168 setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); 169 170 if (nvptxSubtarget.hasROT64()) { 171 setOperationAction(ISD::ROTL, MVT::i64, Legal); 172 setOperationAction(ISD::ROTR, MVT::i64, Legal); 173 } else { 174 setOperationAction(ISD::ROTL, MVT::i64, Expand); 175 setOperationAction(ISD::ROTR, MVT::i64, Expand); 176 } 177 if (nvptxSubtarget.hasROT32()) { 178 setOperationAction(ISD::ROTL, MVT::i32, Legal); 179 setOperationAction(ISD::ROTR, MVT::i32, Legal); 180 } else { 181 setOperationAction(ISD::ROTL, MVT::i32, Expand); 182 setOperationAction(ISD::ROTR, MVT::i32, Expand); 183 } 184 185 setOperationAction(ISD::ROTL, MVT::i16, Expand); 186 setOperationAction(ISD::ROTR, MVT::i16, Expand); 187 setOperationAction(ISD::ROTL, MVT::i8, Expand); 188 setOperationAction(ISD::ROTR, MVT::i8, Expand); 189 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 190 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 191 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 192 193 // Indirect branch is not supported. 194 // This also disables Jump Table creation. 195 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 196 setOperationAction(ISD::BRIND, MVT::Other, Expand); 197 198 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 199 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 200 201 // We want to legalize constant related memmove and memcopy 202 // intrinsics. 203 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 204 205 // Turn FP extload into load/fextend 206 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 207 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 208 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 209 // Turn FP truncstore into trunc + store. 210 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 211 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 212 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 213 214 // PTX does not support load / store predicate registers 215 setOperationAction(ISD::LOAD, MVT::i1, Custom); 216 setOperationAction(ISD::STORE, MVT::i1, Custom); 217 218 for (MVT VT : MVT::integer_valuetypes()) { 219 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 220 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 221 setTruncStoreAction(VT, MVT::i1, Expand); 222 } 223 224 // This is legal in NVPTX 225 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 226 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 227 228 // TRAP can be lowered to PTX trap 229 setOperationAction(ISD::TRAP, MVT::Other, Legal); 230 231 setOperationAction(ISD::ADDC, MVT::i64, Expand); 232 setOperationAction(ISD::ADDE, MVT::i64, Expand); 233 234 // Register custom handling for vector loads/stores 235 for (MVT VT : MVT::vector_valuetypes()) { 236 if (IsPTXVectorType(VT)) { 237 setOperationAction(ISD::LOAD, VT, Custom); 238 setOperationAction(ISD::STORE, VT, Custom); 239 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 240 } 241 } 242 243 // Custom handling for i8 intrinsics 244 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 245 246 setOperationAction(ISD::CTLZ, MVT::i16, Legal); 247 setOperationAction(ISD::CTLZ, MVT::i32, Legal); 248 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 249 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); 250 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); 251 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); 252 setOperationAction(ISD::CTTZ, MVT::i16, Expand); 253 setOperationAction(ISD::CTTZ, MVT::i32, Expand); 254 setOperationAction(ISD::CTTZ, MVT::i64, Expand); 255 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); 256 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 257 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 258 setOperationAction(ISD::CTPOP, MVT::i16, Legal); 259 setOperationAction(ISD::CTPOP, MVT::i32, Legal); 260 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 261 262 // We have some custom DAG combine patterns for these nodes 263 setTargetDAGCombine(ISD::ADD); 264 setTargetDAGCombine(ISD::AND); 265 setTargetDAGCombine(ISD::FADD); 266 setTargetDAGCombine(ISD::MUL); 267 setTargetDAGCombine(ISD::SHL); 268 269 // Now deduce the information based on the above mentioned 270 // actions 271 computeRegisterProperties(); 272} 273 274const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 275 switch (Opcode) { 276 default: 277 return nullptr; 278 case NVPTXISD::CALL: 279 return "NVPTXISD::CALL"; 280 case NVPTXISD::RET_FLAG: 281 return "NVPTXISD::RET_FLAG"; 282 case NVPTXISD::Wrapper: 283 return "NVPTXISD::Wrapper"; 284 case NVPTXISD::DeclareParam: 285 return "NVPTXISD::DeclareParam"; 286 case NVPTXISD::DeclareScalarParam: 287 return "NVPTXISD::DeclareScalarParam"; 288 case NVPTXISD::DeclareRet: 289 return "NVPTXISD::DeclareRet"; 290 case NVPTXISD::DeclareRetParam: 291 return "NVPTXISD::DeclareRetParam"; 292 case NVPTXISD::PrintCall: 293 return "NVPTXISD::PrintCall"; 294 case NVPTXISD::LoadParam: 295 return "NVPTXISD::LoadParam"; 296 case NVPTXISD::LoadParamV2: 297 return "NVPTXISD::LoadParamV2"; 298 case NVPTXISD::LoadParamV4: 299 return "NVPTXISD::LoadParamV4"; 300 case NVPTXISD::StoreParam: 301 return "NVPTXISD::StoreParam"; 302 case NVPTXISD::StoreParamV2: 303 return "NVPTXISD::StoreParamV2"; 304 case NVPTXISD::StoreParamV4: 305 return "NVPTXISD::StoreParamV4"; 306 case NVPTXISD::StoreParamS32: 307 return "NVPTXISD::StoreParamS32"; 308 case NVPTXISD::StoreParamU32: 309 return "NVPTXISD::StoreParamU32"; 310 case NVPTXISD::CallArgBegin: 311 return "NVPTXISD::CallArgBegin"; 312 case NVPTXISD::CallArg: 313 return "NVPTXISD::CallArg"; 314 case NVPTXISD::LastCallArg: 315 return "NVPTXISD::LastCallArg"; 316 case NVPTXISD::CallArgEnd: 317 return "NVPTXISD::CallArgEnd"; 318 case NVPTXISD::CallVoid: 319 return "NVPTXISD::CallVoid"; 320 case NVPTXISD::CallVal: 321 return "NVPTXISD::CallVal"; 322 case NVPTXISD::CallSymbol: 323 return "NVPTXISD::CallSymbol"; 324 case NVPTXISD::Prototype: 325 return "NVPTXISD::Prototype"; 326 case NVPTXISD::MoveParam: 327 return "NVPTXISD::MoveParam"; 328 case NVPTXISD::StoreRetval: 329 return "NVPTXISD::StoreRetval"; 330 case NVPTXISD::StoreRetvalV2: 331 return "NVPTXISD::StoreRetvalV2"; 332 case NVPTXISD::StoreRetvalV4: 333 return "NVPTXISD::StoreRetvalV4"; 334 case NVPTXISD::PseudoUseParam: 335 return "NVPTXISD::PseudoUseParam"; 336 case NVPTXISD::RETURN: 337 return "NVPTXISD::RETURN"; 338 case NVPTXISD::CallSeqBegin: 339 return "NVPTXISD::CallSeqBegin"; 340 case NVPTXISD::CallSeqEnd: 341 return "NVPTXISD::CallSeqEnd"; 342 case NVPTXISD::CallPrototype: 343 return "NVPTXISD::CallPrototype"; 344 case NVPTXISD::LoadV2: 345 return "NVPTXISD::LoadV2"; 346 case NVPTXISD::LoadV4: 347 return "NVPTXISD::LoadV4"; 348 case NVPTXISD::LDGV2: 349 return "NVPTXISD::LDGV2"; 350 case NVPTXISD::LDGV4: 351 return "NVPTXISD::LDGV4"; 352 case NVPTXISD::LDUV2: 353 return "NVPTXISD::LDUV2"; 354 case NVPTXISD::LDUV4: 355 return "NVPTXISD::LDUV4"; 356 case NVPTXISD::StoreV2: 357 return "NVPTXISD::StoreV2"; 358 case NVPTXISD::StoreV4: 359 return "NVPTXISD::StoreV4"; 360 case NVPTXISD::FUN_SHFL_CLAMP: 361 return "NVPTXISD::FUN_SHFL_CLAMP"; 362 case NVPTXISD::FUN_SHFR_CLAMP: 363 return "NVPTXISD::FUN_SHFR_CLAMP"; 364 case NVPTXISD::IMAD: 365 return "NVPTXISD::IMAD"; 366 case NVPTXISD::MUL_WIDE_SIGNED: 367 return "NVPTXISD::MUL_WIDE_SIGNED"; 368 case NVPTXISD::MUL_WIDE_UNSIGNED: 369 return "NVPTXISD::MUL_WIDE_UNSIGNED"; 370 case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; 371 case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; 372 case NVPTXISD::Tex1DFloatFloatLevel: 373 return "NVPTXISD::Tex1DFloatFloatLevel"; 374 case NVPTXISD::Tex1DFloatFloatGrad: 375 return "NVPTXISD::Tex1DFloatFloatGrad"; 376 case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; 377 case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; 378 case NVPTXISD::Tex1DS32FloatLevel: 379 return "NVPTXISD::Tex1DS32FloatLevel"; 380 case NVPTXISD::Tex1DS32FloatGrad: 381 return "NVPTXISD::Tex1DS32FloatGrad"; 382 case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; 383 case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; 384 case NVPTXISD::Tex1DU32FloatLevel: 385 return "NVPTXISD::Tex1DU32FloatLevel"; 386 case NVPTXISD::Tex1DU32FloatGrad: 387 return "NVPTXISD::Tex1DU32FloatGrad"; 388 case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; 389 case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; 390 case NVPTXISD::Tex1DArrayFloatFloatLevel: 391 return "NVPTXISD::Tex1DArrayFloatFloatLevel"; 392 case NVPTXISD::Tex1DArrayFloatFloatGrad: 393 return "NVPTXISD::Tex1DArrayFloatFloatGrad"; 394 case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; 395 case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; 396 case NVPTXISD::Tex1DArrayS32FloatLevel: 397 return "NVPTXISD::Tex1DArrayS32FloatLevel"; 398 case NVPTXISD::Tex1DArrayS32FloatGrad: 399 return "NVPTXISD::Tex1DArrayS32FloatGrad"; 400 case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; 401 case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; 402 case NVPTXISD::Tex1DArrayU32FloatLevel: 403 return "NVPTXISD::Tex1DArrayU32FloatLevel"; 404 case NVPTXISD::Tex1DArrayU32FloatGrad: 405 return "NVPTXISD::Tex1DArrayU32FloatGrad"; 406 case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; 407 case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; 408 case NVPTXISD::Tex2DFloatFloatLevel: 409 return "NVPTXISD::Tex2DFloatFloatLevel"; 410 case NVPTXISD::Tex2DFloatFloatGrad: 411 return "NVPTXISD::Tex2DFloatFloatGrad"; 412 case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; 413 case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; 414 case NVPTXISD::Tex2DS32FloatLevel: 415 return "NVPTXISD::Tex2DS32FloatLevel"; 416 case NVPTXISD::Tex2DS32FloatGrad: 417 return "NVPTXISD::Tex2DS32FloatGrad"; 418 case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; 419 case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; 420 case NVPTXISD::Tex2DU32FloatLevel: 421 return "NVPTXISD::Tex2DU32FloatLevel"; 422 case NVPTXISD::Tex2DU32FloatGrad: 423 return "NVPTXISD::Tex2DU32FloatGrad"; 424 case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; 425 case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; 426 case NVPTXISD::Tex2DArrayFloatFloatLevel: 427 return "NVPTXISD::Tex2DArrayFloatFloatLevel"; 428 case NVPTXISD::Tex2DArrayFloatFloatGrad: 429 return "NVPTXISD::Tex2DArrayFloatFloatGrad"; 430 case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; 431 case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; 432 case NVPTXISD::Tex2DArrayS32FloatLevel: 433 return "NVPTXISD::Tex2DArrayS32FloatLevel"; 434 case NVPTXISD::Tex2DArrayS32FloatGrad: 435 return "NVPTXISD::Tex2DArrayS32FloatGrad"; 436 case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; 437 case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; 438 case NVPTXISD::Tex2DArrayU32FloatLevel: 439 return "NVPTXISD::Tex2DArrayU32FloatLevel"; 440 case NVPTXISD::Tex2DArrayU32FloatGrad: 441 return "NVPTXISD::Tex2DArrayU32FloatGrad"; 442 case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; 443 case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; 444 case NVPTXISD::Tex3DFloatFloatLevel: 445 return "NVPTXISD::Tex3DFloatFloatLevel"; 446 case NVPTXISD::Tex3DFloatFloatGrad: 447 return "NVPTXISD::Tex3DFloatFloatGrad"; 448 case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; 449 case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; 450 case NVPTXISD::Tex3DS32FloatLevel: 451 return "NVPTXISD::Tex3DS32FloatLevel"; 452 case NVPTXISD::Tex3DS32FloatGrad: 453 return "NVPTXISD::Tex3DS32FloatGrad"; 454 case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; 455 case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; 456 case NVPTXISD::Tex3DU32FloatLevel: 457 return "NVPTXISD::Tex3DU32FloatLevel"; 458 case NVPTXISD::Tex3DU32FloatGrad: 459 return "NVPTXISD::Tex3DU32FloatGrad"; 460 case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; 461 case NVPTXISD::TexCubeFloatFloatLevel: 462 return "NVPTXISD::TexCubeFloatFloatLevel"; 463 case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; 464 case NVPTXISD::TexCubeS32FloatLevel: 465 return "NVPTXISD::TexCubeS32FloatLevel"; 466 case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; 467 case NVPTXISD::TexCubeU32FloatLevel: 468 return "NVPTXISD::TexCubeU32FloatLevel"; 469 case NVPTXISD::TexCubeArrayFloatFloat: 470 return "NVPTXISD::TexCubeArrayFloatFloat"; 471 case NVPTXISD::TexCubeArrayFloatFloatLevel: 472 return "NVPTXISD::TexCubeArrayFloatFloatLevel"; 473 case NVPTXISD::TexCubeArrayS32Float: 474 return "NVPTXISD::TexCubeArrayS32Float"; 475 case NVPTXISD::TexCubeArrayS32FloatLevel: 476 return "NVPTXISD::TexCubeArrayS32FloatLevel"; 477 case NVPTXISD::TexCubeArrayU32Float: 478 return "NVPTXISD::TexCubeArrayU32Float"; 479 case NVPTXISD::TexCubeArrayU32FloatLevel: 480 return "NVPTXISD::TexCubeArrayU32FloatLevel"; 481 case NVPTXISD::Tld4R2DFloatFloat: 482 return "NVPTXISD::Tld4R2DFloatFloat"; 483 case NVPTXISD::Tld4G2DFloatFloat: 484 return "NVPTXISD::Tld4G2DFloatFloat"; 485 case NVPTXISD::Tld4B2DFloatFloat: 486 return "NVPTXISD::Tld4B2DFloatFloat"; 487 case NVPTXISD::Tld4A2DFloatFloat: 488 return "NVPTXISD::Tld4A2DFloatFloat"; 489 case NVPTXISD::Tld4R2DS64Float: 490 return "NVPTXISD::Tld4R2DS64Float"; 491 case NVPTXISD::Tld4G2DS64Float: 492 return "NVPTXISD::Tld4G2DS64Float"; 493 case NVPTXISD::Tld4B2DS64Float: 494 return "NVPTXISD::Tld4B2DS64Float"; 495 case NVPTXISD::Tld4A2DS64Float: 496 return "NVPTXISD::Tld4A2DS64Float"; 497 case NVPTXISD::Tld4R2DU64Float: 498 return "NVPTXISD::Tld4R2DU64Float"; 499 case NVPTXISD::Tld4G2DU64Float: 500 return "NVPTXISD::Tld4G2DU64Float"; 501 case NVPTXISD::Tld4B2DU64Float: 502 return "NVPTXISD::Tld4B2DU64Float"; 503 case NVPTXISD::Tld4A2DU64Float: 504 return "NVPTXISD::Tld4A2DU64Float"; 505 506 case NVPTXISD::TexUnified1DFloatS32: 507 return "NVPTXISD::TexUnified1DFloatS32"; 508 case NVPTXISD::TexUnified1DFloatFloat: 509 return "NVPTXISD::TexUnified1DFloatFloat"; 510 case NVPTXISD::TexUnified1DFloatFloatLevel: 511 return "NVPTXISD::TexUnified1DFloatFloatLevel"; 512 case NVPTXISD::TexUnified1DFloatFloatGrad: 513 return "NVPTXISD::TexUnified1DFloatFloatGrad"; 514 case NVPTXISD::TexUnified1DS32S32: 515 return "NVPTXISD::TexUnified1DS32S32"; 516 case NVPTXISD::TexUnified1DS32Float: 517 return "NVPTXISD::TexUnified1DS32Float"; 518 case NVPTXISD::TexUnified1DS32FloatLevel: 519 return "NVPTXISD::TexUnified1DS32FloatLevel"; 520 case NVPTXISD::TexUnified1DS32FloatGrad: 521 return "NVPTXISD::TexUnified1DS32FloatGrad"; 522 case NVPTXISD::TexUnified1DU32S32: 523 return "NVPTXISD::TexUnified1DU32S32"; 524 case NVPTXISD::TexUnified1DU32Float: 525 return "NVPTXISD::TexUnified1DU32Float"; 526 case NVPTXISD::TexUnified1DU32FloatLevel: 527 return "NVPTXISD::TexUnified1DU32FloatLevel"; 528 case NVPTXISD::TexUnified1DU32FloatGrad: 529 return "NVPTXISD::TexUnified1DU32FloatGrad"; 530 case NVPTXISD::TexUnified1DArrayFloatS32: 531 return "NVPTXISD::TexUnified1DArrayFloatS32"; 532 case NVPTXISD::TexUnified1DArrayFloatFloat: 533 return "NVPTXISD::TexUnified1DArrayFloatFloat"; 534 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 535 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; 536 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 537 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; 538 case NVPTXISD::TexUnified1DArrayS32S32: 539 return "NVPTXISD::TexUnified1DArrayS32S32"; 540 case NVPTXISD::TexUnified1DArrayS32Float: 541 return "NVPTXISD::TexUnified1DArrayS32Float"; 542 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 543 return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; 544 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 545 return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; 546 case NVPTXISD::TexUnified1DArrayU32S32: 547 return "NVPTXISD::TexUnified1DArrayU32S32"; 548 case NVPTXISD::TexUnified1DArrayU32Float: 549 return "NVPTXISD::TexUnified1DArrayU32Float"; 550 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 551 return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; 552 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 553 return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; 554 case NVPTXISD::TexUnified2DFloatS32: 555 return "NVPTXISD::TexUnified2DFloatS32"; 556 case NVPTXISD::TexUnified2DFloatFloat: 557 return "NVPTXISD::TexUnified2DFloatFloat"; 558 case NVPTXISD::TexUnified2DFloatFloatLevel: 559 return "NVPTXISD::TexUnified2DFloatFloatLevel"; 560 case NVPTXISD::TexUnified2DFloatFloatGrad: 561 return "NVPTXISD::TexUnified2DFloatFloatGrad"; 562 case NVPTXISD::TexUnified2DS32S32: 563 return "NVPTXISD::TexUnified2DS32S32"; 564 case NVPTXISD::TexUnified2DS32Float: 565 return "NVPTXISD::TexUnified2DS32Float"; 566 case NVPTXISD::TexUnified2DS32FloatLevel: 567 return "NVPTXISD::TexUnified2DS32FloatLevel"; 568 case NVPTXISD::TexUnified2DS32FloatGrad: 569 return "NVPTXISD::TexUnified2DS32FloatGrad"; 570 case NVPTXISD::TexUnified2DU32S32: 571 return "NVPTXISD::TexUnified2DU32S32"; 572 case NVPTXISD::TexUnified2DU32Float: 573 return "NVPTXISD::TexUnified2DU32Float"; 574 case NVPTXISD::TexUnified2DU32FloatLevel: 575 return "NVPTXISD::TexUnified2DU32FloatLevel"; 576 case NVPTXISD::TexUnified2DU32FloatGrad: 577 return "NVPTXISD::TexUnified2DU32FloatGrad"; 578 case NVPTXISD::TexUnified2DArrayFloatS32: 579 return "NVPTXISD::TexUnified2DArrayFloatS32"; 580 case NVPTXISD::TexUnified2DArrayFloatFloat: 581 return "NVPTXISD::TexUnified2DArrayFloatFloat"; 582 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 583 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; 584 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 585 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; 586 case NVPTXISD::TexUnified2DArrayS32S32: 587 return "NVPTXISD::TexUnified2DArrayS32S32"; 588 case NVPTXISD::TexUnified2DArrayS32Float: 589 return "NVPTXISD::TexUnified2DArrayS32Float"; 590 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 591 return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; 592 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 593 return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; 594 case NVPTXISD::TexUnified2DArrayU32S32: 595 return "NVPTXISD::TexUnified2DArrayU32S32"; 596 case NVPTXISD::TexUnified2DArrayU32Float: 597 return "NVPTXISD::TexUnified2DArrayU32Float"; 598 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 599 return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; 600 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 601 return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; 602 case NVPTXISD::TexUnified3DFloatS32: 603 return "NVPTXISD::TexUnified3DFloatS32"; 604 case NVPTXISD::TexUnified3DFloatFloat: 605 return "NVPTXISD::TexUnified3DFloatFloat"; 606 case NVPTXISD::TexUnified3DFloatFloatLevel: 607 return "NVPTXISD::TexUnified3DFloatFloatLevel"; 608 case NVPTXISD::TexUnified3DFloatFloatGrad: 609 return "NVPTXISD::TexUnified3DFloatFloatGrad"; 610 case NVPTXISD::TexUnified3DS32S32: 611 return "NVPTXISD::TexUnified3DS32S32"; 612 case NVPTXISD::TexUnified3DS32Float: 613 return "NVPTXISD::TexUnified3DS32Float"; 614 case NVPTXISD::TexUnified3DS32FloatLevel: 615 return "NVPTXISD::TexUnified3DS32FloatLevel"; 616 case NVPTXISD::TexUnified3DS32FloatGrad: 617 return "NVPTXISD::TexUnified3DS32FloatGrad"; 618 case NVPTXISD::TexUnified3DU32S32: 619 return "NVPTXISD::TexUnified3DU32S32"; 620 case NVPTXISD::TexUnified3DU32Float: 621 return "NVPTXISD::TexUnified3DU32Float"; 622 case NVPTXISD::TexUnified3DU32FloatLevel: 623 return "NVPTXISD::TexUnified3DU32FloatLevel"; 624 case NVPTXISD::TexUnified3DU32FloatGrad: 625 return "NVPTXISD::TexUnified3DU32FloatGrad"; 626 case NVPTXISD::TexUnifiedCubeFloatFloat: 627 return "NVPTXISD::TexUnifiedCubeFloatFloat"; 628 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 629 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; 630 case NVPTXISD::TexUnifiedCubeS32Float: 631 return "NVPTXISD::TexUnifiedCubeS32Float"; 632 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 633 return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; 634 case NVPTXISD::TexUnifiedCubeU32Float: 635 return "NVPTXISD::TexUnifiedCubeU32Float"; 636 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 637 return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; 638 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 639 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; 640 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 641 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; 642 case NVPTXISD::TexUnifiedCubeArrayS32Float: 643 return "NVPTXISD::TexUnifiedCubeArrayS32Float"; 644 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 645 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; 646 case NVPTXISD::TexUnifiedCubeArrayU32Float: 647 return "NVPTXISD::TexUnifiedCubeArrayU32Float"; 648 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 649 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; 650 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 651 return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; 652 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 653 return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; 654 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 655 return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; 656 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 657 return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; 658 case NVPTXISD::Tld4UnifiedR2DS64Float: 659 return "NVPTXISD::Tld4UnifiedR2DS64Float"; 660 case NVPTXISD::Tld4UnifiedG2DS64Float: 661 return "NVPTXISD::Tld4UnifiedG2DS64Float"; 662 case NVPTXISD::Tld4UnifiedB2DS64Float: 663 return "NVPTXISD::Tld4UnifiedB2DS64Float"; 664 case NVPTXISD::Tld4UnifiedA2DS64Float: 665 return "NVPTXISD::Tld4UnifiedA2DS64Float"; 666 case NVPTXISD::Tld4UnifiedR2DU64Float: 667 return "NVPTXISD::Tld4UnifiedR2DU64Float"; 668 case NVPTXISD::Tld4UnifiedG2DU64Float: 669 return "NVPTXISD::Tld4UnifiedG2DU64Float"; 670 case NVPTXISD::Tld4UnifiedB2DU64Float: 671 return "NVPTXISD::Tld4UnifiedB2DU64Float"; 672 case NVPTXISD::Tld4UnifiedA2DU64Float: 673 return "NVPTXISD::Tld4UnifiedA2DU64Float"; 674 675 case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; 676 case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; 677 case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; 678 case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; 679 case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; 680 case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; 681 case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; 682 case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; 683 case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; 684 case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; 685 case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; 686 687 case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; 688 case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; 689 case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; 690 case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; 691 case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; 692 case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; 693 case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; 694 case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; 695 case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; 696 case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; 697 case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; 698 699 case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; 700 case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; 701 case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; 702 case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; 703 case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; 704 case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; 705 case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; 706 case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; 707 case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; 708 case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; 709 case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; 710 711 case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; 712 case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; 713 case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; 714 case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; 715 case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; 716 case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; 717 case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; 718 case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; 719 case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; 720 case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; 721 case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; 722 723 case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; 724 case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; 725 case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; 726 case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; 727 case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; 728 case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; 729 case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; 730 case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; 731 case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; 732 case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; 733 case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; 734 735 case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; 736 case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; 737 case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; 738 case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; 739 case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; 740 case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; 741 case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; 742 case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; 743 case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; 744 case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; 745 case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; 746 747 case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; 748 case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; 749 case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; 750 case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; 751 case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; 752 case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; 753 case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; 754 case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; 755 case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; 756 case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; 757 case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; 758 759 case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; 760 case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; 761 case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; 762 case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; 763 case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; 764 case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; 765 case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; 766 case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; 767 case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; 768 case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; 769 case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; 770 771 case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; 772 case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; 773 case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; 774 case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; 775 case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; 776 case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; 777 case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; 778 case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; 779 case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; 780 case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; 781 case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; 782 783 case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; 784 case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; 785 case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; 786 case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; 787 case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; 788 case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; 789 case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; 790 case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; 791 case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; 792 case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; 793 case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; 794 795 case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; 796 case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; 797 case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; 798 case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; 799 case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; 800 case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; 801 case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; 802 case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; 803 case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; 804 case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; 805 case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; 806 807 case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; 808 case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; 809 case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; 810 case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; 811 case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; 812 case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; 813 case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; 814 case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; 815 case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; 816 case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; 817 case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; 818 819 case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; 820 case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; 821 case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; 822 case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; 823 case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; 824 case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; 825 case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; 826 case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; 827 case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; 828 case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; 829 case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; 830 831 case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; 832 case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; 833 case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; 834 case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; 835 case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; 836 case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; 837 case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; 838 case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; 839 case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; 840 case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; 841 case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; 842 843 case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; 844 case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; 845 case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; 846 case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; 847 case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; 848 case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; 849 case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; 850 case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; 851 case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; 852 case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; 853 case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; 854 } 855} 856 857TargetLoweringBase::LegalizeTypeAction 858NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { 859 if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) 860 return TypeSplitVector; 861 862 return TargetLoweringBase::getPreferredVectorAction(VT); 863} 864 865SDValue 866NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 867 SDLoc dl(Op); 868 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 869 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 870 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 871} 872 873std::string 874NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, 875 const SmallVectorImpl<ISD::OutputArg> &Outs, 876 unsigned retAlignment, 877 const ImmutableCallSite *CS) const { 878 879 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 880 assert(isABI && "Non-ABI compilation is not supported"); 881 if (!isABI) 882 return ""; 883 884 std::stringstream O; 885 O << "prototype_" << uniqueCallSite << " : .callprototype "; 886 887 if (retTy->getTypeID() == Type::VoidTyID) { 888 O << "()"; 889 } else { 890 O << "("; 891 if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { 892 unsigned size = 0; 893 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 894 size = ITy->getBitWidth(); 895 if (size < 32) 896 size = 32; 897 } else { 898 assert(retTy->isFloatingPointTy() && 899 "Floating point type expected here"); 900 size = retTy->getPrimitiveSizeInBits(); 901 } 902 903 O << ".param .b" << size << " _"; 904 } else if (isa<PointerType>(retTy)) { 905 O << ".param .b" << getPointerTy().getSizeInBits() << " _"; 906 } else if ((retTy->getTypeID() == Type::StructTyID) || 907 isa<VectorType>(retTy)) { 908 O << ".param .align " 909 << retAlignment 910 << " .b8 _[" 911 << getDataLayout()->getTypeAllocSize(retTy) << "]"; 912 } else { 913 llvm_unreachable("Unknown return type"); 914 } 915 O << ") "; 916 } 917 O << "_ ("; 918 919 bool first = true; 920 MVT thePointerTy = getPointerTy(); 921 922 unsigned OIdx = 0; 923 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 924 Type *Ty = Args[i].Ty; 925 if (!first) { 926 O << ", "; 927 } 928 first = false; 929 930 if (Outs[OIdx].Flags.isByVal() == false) { 931 if (Ty->isAggregateType() || Ty->isVectorTy()) { 932 unsigned align = 0; 933 const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 934 const DataLayout *TD = getDataLayout(); 935 // +1 because index 0 is reserved for return type alignment 936 if (!llvm::getAlign(*CallI, i + 1, align)) 937 align = TD->getABITypeAlignment(Ty); 938 unsigned sz = TD->getTypeAllocSize(Ty); 939 O << ".param .align " << align << " .b8 "; 940 O << "_"; 941 O << "[" << sz << "]"; 942 // update the index for Outs 943 SmallVector<EVT, 16> vtparts; 944 ComputeValueVTs(*this, Ty, vtparts); 945 if (unsigned len = vtparts.size()) 946 OIdx += len - 1; 947 continue; 948 } 949 // i8 types in IR will be i16 types in SDAG 950 assert((getValueType(Ty) == Outs[OIdx].VT || 951 (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 952 "type mismatch between callee prototype and arguments"); 953 // scalar type 954 unsigned sz = 0; 955 if (isa<IntegerType>(Ty)) { 956 sz = cast<IntegerType>(Ty)->getBitWidth(); 957 if (sz < 32) 958 sz = 32; 959 } else if (isa<PointerType>(Ty)) 960 sz = thePointerTy.getSizeInBits(); 961 else 962 sz = Ty->getPrimitiveSizeInBits(); 963 O << ".param .b" << sz << " "; 964 O << "_"; 965 continue; 966 } 967 const PointerType *PTy = dyn_cast<PointerType>(Ty); 968 assert(PTy && "Param with byval attribute should be a pointer type"); 969 Type *ETy = PTy->getElementType(); 970 971 unsigned align = Outs[OIdx].Flags.getByValAlign(); 972 unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 973 O << ".param .align " << align << " .b8 "; 974 O << "_"; 975 O << "[" << sz << "]"; 976 } 977 O << ");"; 978 return O.str(); 979} 980 981unsigned 982NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 983 const ImmutableCallSite *CS, 984 Type *Ty, 985 unsigned Idx) const { 986 const DataLayout *TD = getDataLayout(); 987 unsigned Align = 0; 988 const Value *DirectCallee = CS->getCalledFunction(); 989 990 if (!DirectCallee) { 991 // We don't have a direct function symbol, but that may be because of 992 // constant cast instructions in the call. 993 const Instruction *CalleeI = CS->getInstruction(); 994 assert(CalleeI && "Call target is not a function or derived value?"); 995 996 // With bitcast'd call targets, the instruction will be the call 997 if (isa<CallInst>(CalleeI)) { 998 // Check if we have call alignment metadata 999 if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 1000 return Align; 1001 1002 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 1003 // Ignore any bitcast instructions 1004 while(isa<ConstantExpr>(CalleeV)) { 1005 const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 1006 if (!CE->isCast()) 1007 break; 1008 // Look through the bitcast 1009 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 1010 } 1011 1012 // We have now looked past all of the bitcasts. Do we finally have a 1013 // Function? 1014 if (isa<Function>(CalleeV)) 1015 DirectCallee = CalleeV; 1016 } 1017 } 1018 1019 // Check for function alignment information if we found that the 1020 // ultimate target is a Function 1021 if (DirectCallee) 1022 if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 1023 return Align; 1024 1025 // Call is indirect or alignment information is not available, fall back to 1026 // the ABI type alignment 1027 return TD->getABITypeAlignment(Ty); 1028} 1029 1030SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1031 SmallVectorImpl<SDValue> &InVals) const { 1032 SelectionDAG &DAG = CLI.DAG; 1033 SDLoc dl = CLI.DL; 1034 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1035 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1036 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1037 SDValue Chain = CLI.Chain; 1038 SDValue Callee = CLI.Callee; 1039 bool &isTailCall = CLI.IsTailCall; 1040 ArgListTy &Args = CLI.getArgs(); 1041 Type *retTy = CLI.RetTy; 1042 ImmutableCallSite *CS = CLI.CS; 1043 1044 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1045 assert(isABI && "Non-ABI compilation is not supported"); 1046 if (!isABI) 1047 return Chain; 1048 const DataLayout *TD = getDataLayout(); 1049 MachineFunction &MF = DAG.getMachineFunction(); 1050 const Function *F = MF.getFunction(); 1051 1052 SDValue tempChain = Chain; 1053 Chain = 1054 DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 1055 dl); 1056 SDValue InFlag = Chain.getValue(1); 1057 1058 unsigned paramCount = 0; 1059 // Args.size() and Outs.size() need not match. 1060 // Outs.size() will be larger 1061 // * if there is an aggregate argument with multiple fields (each field 1062 // showing up separately in Outs) 1063 // * if there is a vector argument with more than typical vector-length 1064 // elements (generally if more than 4) where each vector element is 1065 // individually present in Outs. 1066 // So a different index should be used for indexing into Outs/OutVals. 1067 // See similar issue in LowerFormalArguments. 1068 unsigned OIdx = 0; 1069 // Declare the .params or .reg need to pass values 1070 // to the function 1071 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 1072 EVT VT = Outs[OIdx].VT; 1073 Type *Ty = Args[i].Ty; 1074 1075 if (Outs[OIdx].Flags.isByVal() == false) { 1076 if (Ty->isAggregateType()) { 1077 // aggregate 1078 SmallVector<EVT, 16> vtparts; 1079 SmallVector<uint64_t, 16> Offsets; 1080 ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); 1081 1082 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1083 // declare .param .align <align> .b8 .param<n>[<size>]; 1084 unsigned sz = TD->getTypeAllocSize(Ty); 1085 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1086 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 1087 DAG.getConstant(paramCount, MVT::i32), 1088 DAG.getConstant(sz, MVT::i32), InFlag }; 1089 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1090 DeclareParamOps); 1091 InFlag = Chain.getValue(1); 1092 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1093 EVT elemtype = vtparts[j]; 1094 unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); 1095 if (elemtype.isInteger() && (sz < 8)) 1096 sz = 8; 1097 SDValue StVal = OutVals[OIdx]; 1098 if (elemtype.getSizeInBits() < 16) { 1099 StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 1100 } 1101 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1102 SDValue CopyParamOps[] = { Chain, 1103 DAG.getConstant(paramCount, MVT::i32), 1104 DAG.getConstant(Offsets[j], MVT::i32), 1105 StVal, InFlag }; 1106 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1107 CopyParamVTs, CopyParamOps, 1108 elemtype, MachinePointerInfo(), 1109 ArgAlign); 1110 InFlag = Chain.getValue(1); 1111 ++OIdx; 1112 } 1113 if (vtparts.size() > 0) 1114 --OIdx; 1115 ++paramCount; 1116 continue; 1117 } 1118 if (Ty->isVectorTy()) { 1119 EVT ObjectVT = getValueType(Ty); 1120 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1121 // declare .param .align <align> .b8 .param<n>[<size>]; 1122 unsigned sz = TD->getTypeAllocSize(Ty); 1123 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1124 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 1125 DAG.getConstant(paramCount, MVT::i32), 1126 DAG.getConstant(sz, MVT::i32), InFlag }; 1127 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1128 DeclareParamOps); 1129 InFlag = Chain.getValue(1); 1130 unsigned NumElts = ObjectVT.getVectorNumElements(); 1131 EVT EltVT = ObjectVT.getVectorElementType(); 1132 EVT MemVT = EltVT; 1133 bool NeedExtend = false; 1134 if (EltVT.getSizeInBits() < 16) { 1135 NeedExtend = true; 1136 EltVT = MVT::i16; 1137 } 1138 1139 // V1 store 1140 if (NumElts == 1) { 1141 SDValue Elt = OutVals[OIdx++]; 1142 if (NeedExtend) 1143 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 1144 1145 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1146 SDValue CopyParamOps[] = { Chain, 1147 DAG.getConstant(paramCount, MVT::i32), 1148 DAG.getConstant(0, MVT::i32), Elt, 1149 InFlag }; 1150 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1151 CopyParamVTs, CopyParamOps, 1152 MemVT, MachinePointerInfo()); 1153 InFlag = Chain.getValue(1); 1154 } else if (NumElts == 2) { 1155 SDValue Elt0 = OutVals[OIdx++]; 1156 SDValue Elt1 = OutVals[OIdx++]; 1157 if (NeedExtend) { 1158 Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 1159 Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 1160 } 1161 1162 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1163 SDValue CopyParamOps[] = { Chain, 1164 DAG.getConstant(paramCount, MVT::i32), 1165 DAG.getConstant(0, MVT::i32), Elt0, Elt1, 1166 InFlag }; 1167 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 1168 CopyParamVTs, CopyParamOps, 1169 MemVT, MachinePointerInfo()); 1170 InFlag = Chain.getValue(1); 1171 } else { 1172 unsigned curOffset = 0; 1173 // V4 stores 1174 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 1175 // the 1176 // vector will be expanded to a power of 2 elements, so we know we can 1177 // always round up to the next multiple of 4 when creating the vector 1178 // stores. 1179 // e.g. 4 elem => 1 st.v4 1180 // 6 elem => 2 st.v4 1181 // 8 elem => 2 st.v4 1182 // 11 elem => 3 st.v4 1183 unsigned VecSize = 4; 1184 if (EltVT.getSizeInBits() == 64) 1185 VecSize = 2; 1186 1187 // This is potentially only part of a vector, so assume all elements 1188 // are packed together. 1189 unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 1190 1191 for (unsigned i = 0; i < NumElts; i += VecSize) { 1192 // Get values 1193 SDValue StoreVal; 1194 SmallVector<SDValue, 8> Ops; 1195 Ops.push_back(Chain); 1196 Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); 1197 Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); 1198 1199 unsigned Opc = NVPTXISD::StoreParamV2; 1200 1201 StoreVal = OutVals[OIdx++]; 1202 if (NeedExtend) 1203 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1204 Ops.push_back(StoreVal); 1205 1206 if (i + 1 < NumElts) { 1207 StoreVal = OutVals[OIdx++]; 1208 if (NeedExtend) 1209 StoreVal = 1210 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1211 } else { 1212 StoreVal = DAG.getUNDEF(EltVT); 1213 } 1214 Ops.push_back(StoreVal); 1215 1216 if (VecSize == 4) { 1217 Opc = NVPTXISD::StoreParamV4; 1218 if (i + 2 < NumElts) { 1219 StoreVal = OutVals[OIdx++]; 1220 if (NeedExtend) 1221 StoreVal = 1222 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1223 } else { 1224 StoreVal = DAG.getUNDEF(EltVT); 1225 } 1226 Ops.push_back(StoreVal); 1227 1228 if (i + 3 < NumElts) { 1229 StoreVal = OutVals[OIdx++]; 1230 if (NeedExtend) 1231 StoreVal = 1232 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1233 } else { 1234 StoreVal = DAG.getUNDEF(EltVT); 1235 } 1236 Ops.push_back(StoreVal); 1237 } 1238 1239 Ops.push_back(InFlag); 1240 1241 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1242 Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, 1243 MemVT, MachinePointerInfo()); 1244 InFlag = Chain.getValue(1); 1245 curOffset += PerStoreOffset; 1246 } 1247 } 1248 ++paramCount; 1249 --OIdx; 1250 continue; 1251 } 1252 // Plain scalar 1253 // for ABI, declare .param .b<size> .param<n>; 1254 unsigned sz = VT.getSizeInBits(); 1255 bool needExtend = false; 1256 if (VT.isInteger()) { 1257 if (sz < 16) 1258 needExtend = true; 1259 if (sz < 32) 1260 sz = 32; 1261 } 1262 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1263 SDValue DeclareParamOps[] = { Chain, 1264 DAG.getConstant(paramCount, MVT::i32), 1265 DAG.getConstant(sz, MVT::i32), 1266 DAG.getConstant(0, MVT::i32), InFlag }; 1267 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 1268 DeclareParamOps); 1269 InFlag = Chain.getValue(1); 1270 SDValue OutV = OutVals[OIdx]; 1271 if (needExtend) { 1272 // zext/sext i1 to i16 1273 unsigned opc = ISD::ZERO_EXTEND; 1274 if (Outs[OIdx].Flags.isSExt()) 1275 opc = ISD::SIGN_EXTEND; 1276 OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 1277 } 1278 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1279 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 1280 DAG.getConstant(0, MVT::i32), OutV, InFlag }; 1281 1282 unsigned opcode = NVPTXISD::StoreParam; 1283 if (Outs[OIdx].Flags.isZExt()) 1284 opcode = NVPTXISD::StoreParamU32; 1285 else if (Outs[OIdx].Flags.isSExt()) 1286 opcode = NVPTXISD::StoreParamS32; 1287 Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 1288 VT, MachinePointerInfo()); 1289 1290 InFlag = Chain.getValue(1); 1291 ++paramCount; 1292 continue; 1293 } 1294 // struct or vector 1295 SmallVector<EVT, 16> vtparts; 1296 SmallVector<uint64_t, 16> Offsets; 1297 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 1298 assert(PTy && "Type of a byval parameter should be pointer"); 1299 ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); 1300 1301 // declare .param .align <align> .b8 .param<n>[<size>]; 1302 unsigned sz = Outs[OIdx].Flags.getByValSize(); 1303 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1304 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); 1305 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 1306 // so we don't need to worry about natural alignment or not. 1307 // See TargetLowering::LowerCallTo(). 1308 SDValue DeclareParamOps[] = { 1309 Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), 1310 DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), 1311 InFlag 1312 }; 1313 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1314 DeclareParamOps); 1315 InFlag = Chain.getValue(1); 1316 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1317 EVT elemtype = vtparts[j]; 1318 int curOffset = Offsets[j]; 1319 unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); 1320 SDValue srcAddr = 1321 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], 1322 DAG.getConstant(curOffset, getPointerTy())); 1323 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 1324 MachinePointerInfo(), false, false, false, 1325 PartAlign); 1326 if (elemtype.getSizeInBits() < 16) { 1327 theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 1328 } 1329 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1330 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 1331 DAG.getConstant(curOffset, MVT::i32), theVal, 1332 InFlag }; 1333 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 1334 CopyParamOps, elemtype, 1335 MachinePointerInfo()); 1336 1337 InFlag = Chain.getValue(1); 1338 } 1339 ++paramCount; 1340 } 1341 1342 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 1343 unsigned retAlignment = 0; 1344 1345 // Handle Result 1346 if (Ins.size() > 0) { 1347 SmallVector<EVT, 16> resvtparts; 1348 ComputeValueVTs(*this, retTy, resvtparts); 1349 1350 // Declare 1351 // .param .align 16 .b8 retval0[<size-in-bytes>], or 1352 // .param .b<size-in-bits> retval0 1353 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); 1354 // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for 1355 // these three types to match the logic in 1356 // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. 1357 // Plus, this behavior is consistent with nvcc's. 1358 if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || 1359 retTy->isPointerTy()) { 1360 // Scalar needs to be at least 32bit wide 1361 if (resultsz < 32) 1362 resultsz = 32; 1363 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1364 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 1365 DAG.getConstant(resultsz, MVT::i32), 1366 DAG.getConstant(0, MVT::i32), InFlag }; 1367 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 1368 DeclareRetOps); 1369 InFlag = Chain.getValue(1); 1370 } else { 1371 retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 1372 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1373 SDValue DeclareRetOps[] = { Chain, 1374 DAG.getConstant(retAlignment, MVT::i32), 1375 DAG.getConstant(resultsz / 8, MVT::i32), 1376 DAG.getConstant(0, MVT::i32), InFlag }; 1377 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 1378 DeclareRetOps); 1379 InFlag = Chain.getValue(1); 1380 } 1381 } 1382 1383 if (!Func) { 1384 // This is indirect function call case : PTX requires a prototype of the 1385 // form 1386 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 1387 // to be emitted, and the label has to used as the last arg of call 1388 // instruction. 1389 // The prototype is embedded in a string and put as the operand for a 1390 // CallPrototype SDNode which will print out to the value of the string. 1391 SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1392 std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); 1393 const char *ProtoStr = 1394 nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 1395 SDValue ProtoOps[] = { 1396 Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 1397 }; 1398 Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); 1399 InFlag = Chain.getValue(1); 1400 } 1401 // Op to just print "call" 1402 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1403 SDValue PrintCallOps[] = { 1404 Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag 1405 }; 1406 Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 1407 dl, PrintCallVTs, PrintCallOps); 1408 InFlag = Chain.getValue(1); 1409 1410 // Ops to print out the function name 1411 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1412 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 1413 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); 1414 InFlag = Chain.getValue(1); 1415 1416 // Ops to print out the param list 1417 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1418 SDValue CallArgBeginOps[] = { Chain, InFlag }; 1419 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 1420 CallArgBeginOps); 1421 InFlag = Chain.getValue(1); 1422 1423 for (unsigned i = 0, e = paramCount; i != e; ++i) { 1424 unsigned opcode; 1425 if (i == (e - 1)) 1426 opcode = NVPTXISD::LastCallArg; 1427 else 1428 opcode = NVPTXISD::CallArg; 1429 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1430 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 1431 DAG.getConstant(i, MVT::i32), InFlag }; 1432 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); 1433 InFlag = Chain.getValue(1); 1434 } 1435 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1436 SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), 1437 InFlag }; 1438 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); 1439 InFlag = Chain.getValue(1); 1440 1441 if (!Func) { 1442 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1443 SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), 1444 InFlag }; 1445 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); 1446 InFlag = Chain.getValue(1); 1447 } 1448 1449 // Generate loads from param memory/moves from registers for result 1450 if (Ins.size() > 0) { 1451 if (retTy && retTy->isVectorTy()) { 1452 EVT ObjectVT = getValueType(retTy); 1453 unsigned NumElts = ObjectVT.getVectorNumElements(); 1454 EVT EltVT = ObjectVT.getVectorElementType(); 1455 assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( 1456 F->getContext(), ObjectVT) == NumElts && 1457 "Vector was not scalarized"); 1458 unsigned sz = EltVT.getSizeInBits(); 1459 bool needTruncate = sz < 8 ? true : false; 1460 1461 if (NumElts == 1) { 1462 // Just a simple load 1463 SmallVector<EVT, 4> LoadRetVTs; 1464 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1465 // If loading i1/i8 result, generate 1466 // load.b8 i16 1467 // if i1 1468 // trunc i16 to i1 1469 LoadRetVTs.push_back(MVT::i16); 1470 } else 1471 LoadRetVTs.push_back(EltVT); 1472 LoadRetVTs.push_back(MVT::Other); 1473 LoadRetVTs.push_back(MVT::Glue); 1474 SmallVector<SDValue, 4> LoadRetOps; 1475 LoadRetOps.push_back(Chain); 1476 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1477 LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 1478 LoadRetOps.push_back(InFlag); 1479 SDValue retval = DAG.getMemIntrinsicNode( 1480 NVPTXISD::LoadParam, dl, 1481 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1482 Chain = retval.getValue(1); 1483 InFlag = retval.getValue(2); 1484 SDValue Ret0 = retval; 1485 if (needTruncate) 1486 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 1487 InVals.push_back(Ret0); 1488 } else if (NumElts == 2) { 1489 // LoadV2 1490 SmallVector<EVT, 4> LoadRetVTs; 1491 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1492 // If loading i1/i8 result, generate 1493 // load.b8 i16 1494 // if i1 1495 // trunc i16 to i1 1496 LoadRetVTs.push_back(MVT::i16); 1497 LoadRetVTs.push_back(MVT::i16); 1498 } else { 1499 LoadRetVTs.push_back(EltVT); 1500 LoadRetVTs.push_back(EltVT); 1501 } 1502 LoadRetVTs.push_back(MVT::Other); 1503 LoadRetVTs.push_back(MVT::Glue); 1504 SmallVector<SDValue, 4> LoadRetOps; 1505 LoadRetOps.push_back(Chain); 1506 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1507 LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 1508 LoadRetOps.push_back(InFlag); 1509 SDValue retval = DAG.getMemIntrinsicNode( 1510 NVPTXISD::LoadParamV2, dl, 1511 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1512 Chain = retval.getValue(2); 1513 InFlag = retval.getValue(3); 1514 SDValue Ret0 = retval.getValue(0); 1515 SDValue Ret1 = retval.getValue(1); 1516 if (needTruncate) { 1517 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 1518 InVals.push_back(Ret0); 1519 Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 1520 InVals.push_back(Ret1); 1521 } else { 1522 InVals.push_back(Ret0); 1523 InVals.push_back(Ret1); 1524 } 1525 } else { 1526 // Split into N LoadV4 1527 unsigned Ofst = 0; 1528 unsigned VecSize = 4; 1529 unsigned Opc = NVPTXISD::LoadParamV4; 1530 if (EltVT.getSizeInBits() == 64) { 1531 VecSize = 2; 1532 Opc = NVPTXISD::LoadParamV2; 1533 } 1534 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1535 for (unsigned i = 0; i < NumElts; i += VecSize) { 1536 SmallVector<EVT, 8> LoadRetVTs; 1537 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1538 // If loading i1/i8 result, generate 1539 // load.b8 i16 1540 // if i1 1541 // trunc i16 to i1 1542 for (unsigned j = 0; j < VecSize; ++j) 1543 LoadRetVTs.push_back(MVT::i16); 1544 } else { 1545 for (unsigned j = 0; j < VecSize; ++j) 1546 LoadRetVTs.push_back(EltVT); 1547 } 1548 LoadRetVTs.push_back(MVT::Other); 1549 LoadRetVTs.push_back(MVT::Glue); 1550 SmallVector<SDValue, 4> LoadRetOps; 1551 LoadRetOps.push_back(Chain); 1552 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1553 LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); 1554 LoadRetOps.push_back(InFlag); 1555 SDValue retval = DAG.getMemIntrinsicNode( 1556 Opc, dl, DAG.getVTList(LoadRetVTs), 1557 LoadRetOps, EltVT, MachinePointerInfo()); 1558 if (VecSize == 2) { 1559 Chain = retval.getValue(2); 1560 InFlag = retval.getValue(3); 1561 } else { 1562 Chain = retval.getValue(4); 1563 InFlag = retval.getValue(5); 1564 } 1565 1566 for (unsigned j = 0; j < VecSize; ++j) { 1567 if (i + j >= NumElts) 1568 break; 1569 SDValue Elt = retval.getValue(j); 1570 if (needTruncate) 1571 Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 1572 InVals.push_back(Elt); 1573 } 1574 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1575 } 1576 } 1577 } else { 1578 SmallVector<EVT, 16> VTs; 1579 SmallVector<uint64_t, 16> Offsets; 1580 ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); 1581 assert(VTs.size() == Ins.size() && "Bad value decomposition"); 1582 unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); 1583 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 1584 unsigned sz = VTs[i].getSizeInBits(); 1585 unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); 1586 bool needTruncate = sz < 8 ? true : false; 1587 if (VTs[i].isInteger() && (sz < 8)) 1588 sz = 8; 1589 1590 SmallVector<EVT, 4> LoadRetVTs; 1591 EVT TheLoadType = VTs[i]; 1592 if (retTy->isIntegerTy() && 1593 TD->getTypeAllocSizeInBits(retTy) < 32) { 1594 // This is for integer types only, and specifically not for 1595 // aggregates. 1596 LoadRetVTs.push_back(MVT::i32); 1597 TheLoadType = MVT::i32; 1598 } else if (sz < 16) { 1599 // If loading i1/i8 result, generate 1600 // load i8 (-> i16) 1601 // trunc i16 to i1/i8 1602 LoadRetVTs.push_back(MVT::i16); 1603 } else 1604 LoadRetVTs.push_back(Ins[i].VT); 1605 LoadRetVTs.push_back(MVT::Other); 1606 LoadRetVTs.push_back(MVT::Glue); 1607 1608 SmallVector<SDValue, 4> LoadRetOps; 1609 LoadRetOps.push_back(Chain); 1610 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 1611 LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); 1612 LoadRetOps.push_back(InFlag); 1613 SDValue retval = DAG.getMemIntrinsicNode( 1614 NVPTXISD::LoadParam, dl, 1615 DAG.getVTList(LoadRetVTs), LoadRetOps, 1616 TheLoadType, MachinePointerInfo(), AlignI); 1617 Chain = retval.getValue(1); 1618 InFlag = retval.getValue(2); 1619 SDValue Ret0 = retval.getValue(0); 1620 if (needTruncate) 1621 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 1622 InVals.push_back(Ret0); 1623 } 1624 } 1625 } 1626 1627 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 1628 DAG.getIntPtrConstant(uniqueCallSite + 1, true), 1629 InFlag, dl); 1630 uniqueCallSite++; 1631 1632 // set isTailCall to false for now, until we figure out how to express 1633 // tail call optimization in PTX 1634 isTailCall = false; 1635 return Chain; 1636} 1637 1638// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 1639// (see LegalizeDAG.cpp). This is slow and uses local memory. 1640// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 1641SDValue 1642NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 1643 SDNode *Node = Op.getNode(); 1644 SDLoc dl(Node); 1645 SmallVector<SDValue, 8> Ops; 1646 unsigned NumOperands = Node->getNumOperands(); 1647 for (unsigned i = 0; i < NumOperands; ++i) { 1648 SDValue SubOp = Node->getOperand(i); 1649 EVT VVT = SubOp.getNode()->getValueType(0); 1650 EVT EltVT = VVT.getVectorElementType(); 1651 unsigned NumSubElem = VVT.getVectorNumElements(); 1652 for (unsigned j = 0; j < NumSubElem; ++j) { 1653 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 1654 DAG.getIntPtrConstant(j))); 1655 } 1656 } 1657 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); 1658} 1659 1660/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which 1661/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1662/// amount, or 1663/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1664/// amount. 1665SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, 1666 SelectionDAG &DAG) const { 1667 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1668 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 1669 1670 EVT VT = Op.getValueType(); 1671 unsigned VTBits = VT.getSizeInBits(); 1672 SDLoc dl(Op); 1673 SDValue ShOpLo = Op.getOperand(0); 1674 SDValue ShOpHi = Op.getOperand(1); 1675 SDValue ShAmt = Op.getOperand(2); 1676 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 1677 1678 if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { 1679 1680 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1681 // {dHi, dLo} = {aHi, aLo} >> Amt 1682 // dHi = aHi >> Amt 1683 // dLo = shf.r.clamp aLo, aHi, Amt 1684 1685 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1686 SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, 1687 ShAmt); 1688 1689 SDValue Ops[2] = { Lo, Hi }; 1690 return DAG.getMergeValues(Ops, dl); 1691 } 1692 else { 1693 1694 // {dHi, dLo} = {aHi, aLo} >> Amt 1695 // - if (Amt>=size) then 1696 // dLo = aHi >> (Amt-size) 1697 // dHi = aHi >> Amt (this is either all 0 or all 1) 1698 // else 1699 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) 1700 // dHi = aHi >> Amt 1701 1702 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1703 DAG.getConstant(VTBits, MVT::i32), ShAmt); 1704 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 1705 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1706 DAG.getConstant(VTBits, MVT::i32)); 1707 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 1708 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1709 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 1710 1711 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1712 DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); 1713 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1714 SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1715 1716 SDValue Ops[2] = { Lo, Hi }; 1717 return DAG.getMergeValues(Ops, dl); 1718 } 1719} 1720 1721/// LowerShiftLeftParts - Lower SHL_PARTS, which 1722/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1723/// amount, or 1724/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1725/// amount. 1726SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, 1727 SelectionDAG &DAG) const { 1728 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1729 assert(Op.getOpcode() == ISD::SHL_PARTS); 1730 1731 EVT VT = Op.getValueType(); 1732 unsigned VTBits = VT.getSizeInBits(); 1733 SDLoc dl(Op); 1734 SDValue ShOpLo = Op.getOperand(0); 1735 SDValue ShOpHi = Op.getOperand(1); 1736 SDValue ShAmt = Op.getOperand(2); 1737 1738 if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { 1739 1740 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1741 // {dHi, dLo} = {aHi, aLo} << Amt 1742 // dHi = shf.l.clamp aLo, aHi, Amt 1743 // dLo = aLo << Amt 1744 1745 SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, 1746 ShAmt); 1747 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1748 1749 SDValue Ops[2] = { Lo, Hi }; 1750 return DAG.getMergeValues(Ops, dl); 1751 } 1752 else { 1753 1754 // {dHi, dLo} = {aHi, aLo} << Amt 1755 // - if (Amt>=size) then 1756 // dLo = aLo << Amt (all 0) 1757 // dLo = aLo << (Amt-size) 1758 // else 1759 // dLo = aLo << Amt 1760 // dHi = (aHi << Amt) | (aLo >> (size-Amt)) 1761 1762 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1763 DAG.getConstant(VTBits, MVT::i32), ShAmt); 1764 SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 1765 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1766 DAG.getConstant(VTBits, MVT::i32)); 1767 SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 1768 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1769 SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 1770 1771 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1772 DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); 1773 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1774 SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1775 1776 SDValue Ops[2] = { Lo, Hi }; 1777 return DAG.getMergeValues(Ops, dl); 1778 } 1779} 1780 1781SDValue 1782NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 1783 switch (Op.getOpcode()) { 1784 case ISD::RETURNADDR: 1785 return SDValue(); 1786 case ISD::FRAMEADDR: 1787 return SDValue(); 1788 case ISD::GlobalAddress: 1789 return LowerGlobalAddress(Op, DAG); 1790 case ISD::INTRINSIC_W_CHAIN: 1791 return Op; 1792 case ISD::BUILD_VECTOR: 1793 case ISD::EXTRACT_SUBVECTOR: 1794 return Op; 1795 case ISD::CONCAT_VECTORS: 1796 return LowerCONCAT_VECTORS(Op, DAG); 1797 case ISD::STORE: 1798 return LowerSTORE(Op, DAG); 1799 case ISD::LOAD: 1800 return LowerLOAD(Op, DAG); 1801 case ISD::SHL_PARTS: 1802 return LowerShiftLeftParts(Op, DAG); 1803 case ISD::SRA_PARTS: 1804 case ISD::SRL_PARTS: 1805 return LowerShiftRightParts(Op, DAG); 1806 default: 1807 llvm_unreachable("Custom lowering not defined for operation"); 1808 } 1809} 1810 1811SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1812 if (Op.getValueType() == MVT::i1) 1813 return LowerLOADi1(Op, DAG); 1814 else 1815 return SDValue(); 1816} 1817 1818// v = ld i1* addr 1819// => 1820// v1 = ld i8* addr (-> i16) 1821// v = trunc i16 to i1 1822SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 1823 SDNode *Node = Op.getNode(); 1824 LoadSDNode *LD = cast<LoadSDNode>(Node); 1825 SDLoc dl(Node); 1826 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 1827 assert(Node->getValueType(0) == MVT::i1 && 1828 "Custom lowering for i1 load only"); 1829 SDValue newLD = 1830 DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 1831 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 1832 LD->isInvariant(), LD->getAlignment()); 1833 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 1834 // The legalizer (the caller) is expecting two values from the legalized 1835 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 1836 // in LegalizeDAG.cpp which also uses MergeValues. 1837 SDValue Ops[] = { result, LD->getChain() }; 1838 return DAG.getMergeValues(Ops, dl); 1839} 1840 1841SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1842 EVT ValVT = Op.getOperand(1).getValueType(); 1843 if (ValVT == MVT::i1) 1844 return LowerSTOREi1(Op, DAG); 1845 else if (ValVT.isVector()) 1846 return LowerSTOREVector(Op, DAG); 1847 else 1848 return SDValue(); 1849} 1850 1851SDValue 1852NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 1853 SDNode *N = Op.getNode(); 1854 SDValue Val = N->getOperand(1); 1855 SDLoc DL(N); 1856 EVT ValVT = Val.getValueType(); 1857 1858 if (ValVT.isVector()) { 1859 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 1860 // legal. We can (and should) split that into 2 stores of <2 x double> here 1861 // but I'm leaving that as a TODO for now. 1862 if (!ValVT.isSimple()) 1863 return SDValue(); 1864 switch (ValVT.getSimpleVT().SimpleTy) { 1865 default: 1866 return SDValue(); 1867 case MVT::v2i8: 1868 case MVT::v2i16: 1869 case MVT::v2i32: 1870 case MVT::v2i64: 1871 case MVT::v2f32: 1872 case MVT::v2f64: 1873 case MVT::v4i8: 1874 case MVT::v4i16: 1875 case MVT::v4i32: 1876 case MVT::v4f32: 1877 // This is a "native" vector type 1878 break; 1879 } 1880 1881 MemSDNode *MemSD = cast<MemSDNode>(N); 1882 const DataLayout *TD = getDataLayout(); 1883 1884 unsigned Align = MemSD->getAlignment(); 1885 unsigned PrefAlign = 1886 TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); 1887 if (Align < PrefAlign) { 1888 // This store is not sufficiently aligned, so bail out and let this vector 1889 // store be scalarized. Note that we may still be able to emit smaller 1890 // vector stores. For example, if we are storing a <4 x float> with an 1891 // alignment of 8, this check will fail but the legalizer will try again 1892 // with 2 x <2 x float>, which will succeed with an alignment of 8. 1893 return SDValue(); 1894 } 1895 1896 unsigned Opcode = 0; 1897 EVT EltVT = ValVT.getVectorElementType(); 1898 unsigned NumElts = ValVT.getVectorNumElements(); 1899 1900 // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 1901 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 1902 // stored type to i16 and propagate the "real" type as the memory type. 1903 bool NeedExt = false; 1904 if (EltVT.getSizeInBits() < 16) 1905 NeedExt = true; 1906 1907 switch (NumElts) { 1908 default: 1909 return SDValue(); 1910 case 2: 1911 Opcode = NVPTXISD::StoreV2; 1912 break; 1913 case 4: { 1914 Opcode = NVPTXISD::StoreV4; 1915 break; 1916 } 1917 } 1918 1919 SmallVector<SDValue, 8> Ops; 1920 1921 // First is the chain 1922 Ops.push_back(N->getOperand(0)); 1923 1924 // Then the split values 1925 for (unsigned i = 0; i < NumElts; ++i) { 1926 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 1927 DAG.getIntPtrConstant(i)); 1928 if (NeedExt) 1929 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 1930 Ops.push_back(ExtVal); 1931 } 1932 1933 // Then any remaining arguments 1934 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { 1935 Ops.push_back(N->getOperand(i)); 1936 } 1937 1938 SDValue NewSt = DAG.getMemIntrinsicNode( 1939 Opcode, DL, DAG.getVTList(MVT::Other), Ops, 1940 MemSD->getMemoryVT(), MemSD->getMemOperand()); 1941 1942 //return DCI.CombineTo(N, NewSt, true); 1943 return NewSt; 1944 } 1945 1946 return SDValue(); 1947} 1948 1949// st i1 v, addr 1950// => 1951// v1 = zxt v to i16 1952// st.u8 i16, addr 1953SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 1954 SDNode *Node = Op.getNode(); 1955 SDLoc dl(Node); 1956 StoreSDNode *ST = cast<StoreSDNode>(Node); 1957 SDValue Tmp1 = ST->getChain(); 1958 SDValue Tmp2 = ST->getBasePtr(); 1959 SDValue Tmp3 = ST->getValue(); 1960 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 1961 unsigned Alignment = ST->getAlignment(); 1962 bool isVolatile = ST->isVolatile(); 1963 bool isNonTemporal = ST->isNonTemporal(); 1964 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 1965 SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 1966 ST->getPointerInfo(), MVT::i8, isNonTemporal, 1967 isVolatile, Alignment); 1968 return Result; 1969} 1970 1971SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, 1972 int idx, EVT v) const { 1973 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 1974 std::stringstream suffix; 1975 suffix << idx; 1976 *name += suffix.str(); 1977 return DAG.getTargetExternalSymbol(name->c_str(), v); 1978} 1979 1980SDValue 1981NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 1982 std::string ParamSym; 1983 raw_string_ostream ParamStr(ParamSym); 1984 1985 ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 1986 ParamStr.flush(); 1987 1988 std::string *SavedStr = 1989 nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 1990 return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 1991} 1992 1993SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 1994 return getExtSymb(DAG, ".HLPPARAM", idx); 1995} 1996 1997// Check to see if the kernel argument is image*_t or sampler_t 1998 1999bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 2000 static const char *const specialTypes[] = { "struct._image2d_t", 2001 "struct._image3d_t", 2002 "struct._sampler_t" }; 2003 2004 const Type *Ty = arg->getType(); 2005 const PointerType *PTy = dyn_cast<PointerType>(Ty); 2006 2007 if (!PTy) 2008 return false; 2009 2010 if (!context) 2011 return false; 2012 2013 const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 2014 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 2015 2016 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 2017 if (TypeName == specialTypes[i]) 2018 return true; 2019 2020 return false; 2021} 2022 2023SDValue NVPTXTargetLowering::LowerFormalArguments( 2024 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2025 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 2026 SmallVectorImpl<SDValue> &InVals) const { 2027 MachineFunction &MF = DAG.getMachineFunction(); 2028 const DataLayout *TD = getDataLayout(); 2029 2030 const Function *F = MF.getFunction(); 2031 const AttributeSet &PAL = F->getAttributes(); 2032 const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); 2033 2034 SDValue Root = DAG.getRoot(); 2035 std::vector<SDValue> OutChains; 2036 2037 bool isKernel = llvm::isKernelFunction(*F); 2038 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 2039 assert(isABI && "Non-ABI compilation is not supported"); 2040 if (!isABI) 2041 return Chain; 2042 2043 std::vector<Type *> argTypes; 2044 std::vector<const Argument *> theArgs; 2045 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 2046 I != E; ++I) { 2047 theArgs.push_back(I); 2048 argTypes.push_back(I->getType()); 2049 } 2050 // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 2051 // Ins.size() will be larger 2052 // * if there is an aggregate argument with multiple fields (each field 2053 // showing up separately in Ins) 2054 // * if there is a vector argument with more than typical vector-length 2055 // elements (generally if more than 4) where each vector element is 2056 // individually present in Ins. 2057 // So a different index should be used for indexing into Ins. 2058 // See similar issue in LowerCall. 2059 unsigned InsIdx = 0; 2060 2061 int idx = 0; 2062 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 2063 Type *Ty = argTypes[i]; 2064 2065 // If the kernel argument is image*_t or sampler_t, convert it to 2066 // a i32 constant holding the parameter position. This can later 2067 // matched in the AsmPrinter to output the correct mangled name. 2068 if (isImageOrSamplerVal( 2069 theArgs[i], 2070 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 2071 : nullptr))) { 2072 assert(isKernel && "Only kernels can have image/sampler params"); 2073 InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); 2074 continue; 2075 } 2076 2077 if (theArgs[i]->use_empty()) { 2078 // argument is dead 2079 if (Ty->isAggregateType()) { 2080 SmallVector<EVT, 16> vtparts; 2081 2082 ComputePTXValueVTs(*this, Ty, vtparts); 2083 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2084 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2085 ++parti) { 2086 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2087 ++InsIdx; 2088 } 2089 if (vtparts.size() > 0) 2090 --InsIdx; 2091 continue; 2092 } 2093 if (Ty->isVectorTy()) { 2094 EVT ObjectVT = getValueType(Ty); 2095 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 2096 for (unsigned parti = 0; parti < NumRegs; ++parti) { 2097 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2098 ++InsIdx; 2099 } 2100 if (NumRegs > 0) 2101 --InsIdx; 2102 continue; 2103 } 2104 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2105 continue; 2106 } 2107 2108 // In the following cases, assign a node order of "idx+1" 2109 // to newly created nodes. The SDNodes for params have to 2110 // appear in the same order as their order of appearance 2111 // in the original function. "idx+1" holds that order. 2112 if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { 2113 if (Ty->isAggregateType()) { 2114 SmallVector<EVT, 16> vtparts; 2115 SmallVector<uint64_t, 16> offsets; 2116 2117 // NOTE: Here, we lose the ability to issue vector loads for vectors 2118 // that are a part of a struct. This should be investigated in the 2119 // future. 2120 ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); 2121 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2122 bool aggregateIsPacked = false; 2123 if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 2124 aggregateIsPacked = STy->isPacked(); 2125 2126 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2127 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2128 ++parti) { 2129 EVT partVT = vtparts[parti]; 2130 Value *srcValue = Constant::getNullValue( 2131 PointerType::get(partVT.getTypeForEVT(F->getContext()), 2132 llvm::ADDRESS_SPACE_PARAM)); 2133 SDValue srcAddr = 2134 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 2135 DAG.getConstant(offsets[parti], getPointerTy())); 2136 unsigned partAlign = 2137 aggregateIsPacked ? 1 2138 : TD->getABITypeAlignment( 2139 partVT.getTypeForEVT(F->getContext())); 2140 SDValue p; 2141 if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 2142 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2143 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2144 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 2145 MachinePointerInfo(srcValue), partVT, false, 2146 false, false, partAlign); 2147 } else { 2148 p = DAG.getLoad(partVT, dl, Root, srcAddr, 2149 MachinePointerInfo(srcValue), false, false, false, 2150 partAlign); 2151 } 2152 if (p.getNode()) 2153 p.getNode()->setIROrder(idx + 1); 2154 InVals.push_back(p); 2155 ++InsIdx; 2156 } 2157 if (vtparts.size() > 0) 2158 --InsIdx; 2159 continue; 2160 } 2161 if (Ty->isVectorTy()) { 2162 EVT ObjectVT = getValueType(Ty); 2163 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2164 unsigned NumElts = ObjectVT.getVectorNumElements(); 2165 assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 2166 "Vector was not scalarized"); 2167 EVT EltVT = ObjectVT.getVectorElementType(); 2168 2169 // V1 load 2170 // f32 = load ... 2171 if (NumElts == 1) { 2172 // We only have one element, so just directly load it 2173 Value *SrcValue = Constant::getNullValue(PointerType::get( 2174 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2175 SDValue P = DAG.getLoad( 2176 EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, 2177 false, true, 2178 TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 2179 if (P.getNode()) 2180 P.getNode()->setIROrder(idx + 1); 2181 2182 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2183 P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 2184 InVals.push_back(P); 2185 ++InsIdx; 2186 } else if (NumElts == 2) { 2187 // V2 load 2188 // f32,f32 = load ... 2189 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 2190 Value *SrcValue = Constant::getNullValue(PointerType::get( 2191 VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2192 SDValue P = DAG.getLoad( 2193 VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, 2194 false, true, 2195 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2196 if (P.getNode()) 2197 P.getNode()->setIROrder(idx + 1); 2198 2199 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2200 DAG.getIntPtrConstant(0)); 2201 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2202 DAG.getIntPtrConstant(1)); 2203 2204 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 2205 Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 2206 Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 2207 } 2208 2209 InVals.push_back(Elt0); 2210 InVals.push_back(Elt1); 2211 InsIdx += 2; 2212 } else { 2213 // V4 loads 2214 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 2215 // the 2216 // vector will be expanded to a power of 2 elements, so we know we can 2217 // always round up to the next multiple of 4 when creating the vector 2218 // loads. 2219 // e.g. 4 elem => 1 ld.v4 2220 // 6 elem => 2 ld.v4 2221 // 8 elem => 2 ld.v4 2222 // 11 elem => 3 ld.v4 2223 unsigned VecSize = 4; 2224 if (EltVT.getSizeInBits() == 64) { 2225 VecSize = 2; 2226 } 2227 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2228 unsigned Ofst = 0; 2229 for (unsigned i = 0; i < NumElts; i += VecSize) { 2230 Value *SrcValue = Constant::getNullValue( 2231 PointerType::get(VecVT.getTypeForEVT(F->getContext()), 2232 llvm::ADDRESS_SPACE_PARAM)); 2233 SDValue SrcAddr = 2234 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 2235 DAG.getConstant(Ofst, getPointerTy())); 2236 SDValue P = DAG.getLoad( 2237 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 2238 false, true, 2239 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2240 if (P.getNode()) 2241 P.getNode()->setIROrder(idx + 1); 2242 2243 for (unsigned j = 0; j < VecSize; ++j) { 2244 if (i + j >= NumElts) 2245 break; 2246 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2247 DAG.getIntPtrConstant(j)); 2248 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2249 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 2250 InVals.push_back(Elt); 2251 } 2252 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2253 } 2254 InsIdx += NumElts; 2255 } 2256 2257 if (NumElts > 0) 2258 --InsIdx; 2259 continue; 2260 } 2261 // A plain scalar. 2262 EVT ObjectVT = getValueType(Ty); 2263 // If ABI, load from the param symbol 2264 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2265 Value *srcValue = Constant::getNullValue(PointerType::get( 2266 ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2267 SDValue p; 2268 if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 2269 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2270 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2271 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, 2272 MachinePointerInfo(srcValue), ObjectVT, false, false, 2273 false, 2274 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2275 } else { 2276 p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, 2277 MachinePointerInfo(srcValue), false, false, false, 2278 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2279 } 2280 if (p.getNode()) 2281 p.getNode()->setIROrder(idx + 1); 2282 InVals.push_back(p); 2283 continue; 2284 } 2285 2286 // Param has ByVal attribute 2287 // Return MoveParam(param symbol). 2288 // Ideally, the param symbol can be returned directly, 2289 // but when SDNode builder decides to use it in a CopyToReg(), 2290 // machine instruction fails because TargetExternalSymbol 2291 // (not lowered) is target dependent, and CopyToReg assumes 2292 // the source is lowered. 2293 EVT ObjectVT = getValueType(Ty); 2294 assert(ObjectVT == Ins[InsIdx].VT && 2295 "Ins type did not match function type"); 2296 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2297 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 2298 if (p.getNode()) 2299 p.getNode()->setIROrder(idx + 1); 2300 if (isKernel) 2301 InVals.push_back(p); 2302 else { 2303 SDValue p2 = DAG.getNode( 2304 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 2305 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); 2306 InVals.push_back(p2); 2307 } 2308 } 2309 2310 // Clang will check explicit VarArg and issue error if any. However, Clang 2311 // will let code with 2312 // implicit var arg like f() pass. See bug 617733. 2313 // We treat this case as if the arg list is empty. 2314 // if (F.isVarArg()) { 2315 // assert(0 && "VarArg not supported yet!"); 2316 //} 2317 2318 if (!OutChains.empty()) 2319 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); 2320 2321 return Chain; 2322} 2323 2324 2325SDValue 2326NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2327 bool isVarArg, 2328 const SmallVectorImpl<ISD::OutputArg> &Outs, 2329 const SmallVectorImpl<SDValue> &OutVals, 2330 SDLoc dl, SelectionDAG &DAG) const { 2331 MachineFunction &MF = DAG.getMachineFunction(); 2332 const Function *F = MF.getFunction(); 2333 Type *RetTy = F->getReturnType(); 2334 const DataLayout *TD = getDataLayout(); 2335 2336 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 2337 assert(isABI && "Non-ABI compilation is not supported"); 2338 if (!isABI) 2339 return Chain; 2340 2341 if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 2342 // If we have a vector type, the OutVals array will be the scalarized 2343 // components and we have combine them into 1 or more vector stores. 2344 unsigned NumElts = VTy->getNumElements(); 2345 assert(NumElts == Outs.size() && "Bad scalarization of return value"); 2346 2347 // const_cast can be removed in later LLVM versions 2348 EVT EltVT = getValueType(RetTy).getVectorElementType(); 2349 bool NeedExtend = false; 2350 if (EltVT.getSizeInBits() < 16) 2351 NeedExtend = true; 2352 2353 // V1 store 2354 if (NumElts == 1) { 2355 SDValue StoreVal = OutVals[0]; 2356 // We only have one element, so just directly store it 2357 if (NeedExtend) 2358 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 2359 SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; 2360 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2361 DAG.getVTList(MVT::Other), Ops, 2362 EltVT, MachinePointerInfo()); 2363 2364 } else if (NumElts == 2) { 2365 // V2 store 2366 SDValue StoreVal0 = OutVals[0]; 2367 SDValue StoreVal1 = OutVals[1]; 2368 2369 if (NeedExtend) { 2370 StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 2371 StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 2372 } 2373 2374 SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, 2375 StoreVal1 }; 2376 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 2377 DAG.getVTList(MVT::Other), Ops, 2378 EltVT, MachinePointerInfo()); 2379 } else { 2380 // V4 stores 2381 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 2382 // vector will be expanded to a power of 2 elements, so we know we can 2383 // always round up to the next multiple of 4 when creating the vector 2384 // stores. 2385 // e.g. 4 elem => 1 st.v4 2386 // 6 elem => 2 st.v4 2387 // 8 elem => 2 st.v4 2388 // 11 elem => 3 st.v4 2389 2390 unsigned VecSize = 4; 2391 if (OutVals[0].getValueType().getSizeInBits() == 64) 2392 VecSize = 2; 2393 2394 unsigned Offset = 0; 2395 2396 EVT VecVT = 2397 EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2398 unsigned PerStoreOffset = 2399 TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2400 2401 for (unsigned i = 0; i < NumElts; i += VecSize) { 2402 // Get values 2403 SDValue StoreVal; 2404 SmallVector<SDValue, 8> Ops; 2405 Ops.push_back(Chain); 2406 Ops.push_back(DAG.getConstant(Offset, MVT::i32)); 2407 unsigned Opc = NVPTXISD::StoreRetvalV2; 2408 EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 2409 2410 StoreVal = OutVals[i]; 2411 if (NeedExtend) 2412 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2413 Ops.push_back(StoreVal); 2414 2415 if (i + 1 < NumElts) { 2416 StoreVal = OutVals[i + 1]; 2417 if (NeedExtend) 2418 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2419 } else { 2420 StoreVal = DAG.getUNDEF(ExtendedVT); 2421 } 2422 Ops.push_back(StoreVal); 2423 2424 if (VecSize == 4) { 2425 Opc = NVPTXISD::StoreRetvalV4; 2426 if (i + 2 < NumElts) { 2427 StoreVal = OutVals[i + 2]; 2428 if (NeedExtend) 2429 StoreVal = 2430 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2431 } else { 2432 StoreVal = DAG.getUNDEF(ExtendedVT); 2433 } 2434 Ops.push_back(StoreVal); 2435 2436 if (i + 3 < NumElts) { 2437 StoreVal = OutVals[i + 3]; 2438 if (NeedExtend) 2439 StoreVal = 2440 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2441 } else { 2442 StoreVal = DAG.getUNDEF(ExtendedVT); 2443 } 2444 Ops.push_back(StoreVal); 2445 } 2446 2447 // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 2448 Chain = 2449 DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, 2450 EltVT, MachinePointerInfo()); 2451 Offset += PerStoreOffset; 2452 } 2453 } 2454 } else { 2455 SmallVector<EVT, 16> ValVTs; 2456 SmallVector<uint64_t, 16> Offsets; 2457 ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); 2458 assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 2459 2460 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2461 SDValue theVal = OutVals[i]; 2462 EVT TheValType = theVal.getValueType(); 2463 unsigned numElems = 1; 2464 if (TheValType.isVector()) 2465 numElems = TheValType.getVectorNumElements(); 2466 for (unsigned j = 0, je = numElems; j != je; ++j) { 2467 SDValue TmpVal = theVal; 2468 if (TheValType.isVector()) 2469 TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 2470 TheValType.getVectorElementType(), TmpVal, 2471 DAG.getIntPtrConstant(j)); 2472 EVT TheStoreType = ValVTs[i]; 2473 if (RetTy->isIntegerTy() && 2474 TD->getTypeAllocSizeInBits(RetTy) < 32) { 2475 // The following zero-extension is for integer types only, and 2476 // specifically not for aggregates. 2477 TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 2478 TheStoreType = MVT::i32; 2479 } 2480 else if (TmpVal.getValueType().getSizeInBits() < 16) 2481 TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 2482 2483 SDValue Ops[] = { 2484 Chain, 2485 DAG.getConstant(Offsets[i], MVT::i32), 2486 TmpVal }; 2487 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2488 DAG.getVTList(MVT::Other), Ops, 2489 TheStoreType, 2490 MachinePointerInfo()); 2491 } 2492 } 2493 } 2494 2495 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 2496} 2497 2498 2499void NVPTXTargetLowering::LowerAsmOperandForConstraint( 2500 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 2501 SelectionDAG &DAG) const { 2502 if (Constraint.length() > 1) 2503 return; 2504 else 2505 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2506} 2507 2508// NVPTX suuport vector of legal types of any length in Intrinsics because the 2509// NVPTX specific type legalizer 2510// will legalize them to the PTX supported length. 2511bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 2512 if (isTypeLegal(VT)) 2513 return true; 2514 if (VT.isVector()) { 2515 MVT eVT = VT.getVectorElementType(); 2516 if (isTypeLegal(eVT)) 2517 return true; 2518 } 2519 return false; 2520} 2521 2522static unsigned getOpcForTextureInstr(unsigned Intrinsic) { 2523 switch (Intrinsic) { 2524 default: 2525 return 0; 2526 2527 case Intrinsic::nvvm_tex_1d_v4f32_s32: 2528 return NVPTXISD::Tex1DFloatS32; 2529 case Intrinsic::nvvm_tex_1d_v4f32_f32: 2530 return NVPTXISD::Tex1DFloatFloat; 2531 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 2532 return NVPTXISD::Tex1DFloatFloatLevel; 2533 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 2534 return NVPTXISD::Tex1DFloatFloatGrad; 2535 case Intrinsic::nvvm_tex_1d_v4s32_s32: 2536 return NVPTXISD::Tex1DS32S32; 2537 case Intrinsic::nvvm_tex_1d_v4s32_f32: 2538 return NVPTXISD::Tex1DS32Float; 2539 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 2540 return NVPTXISD::Tex1DS32FloatLevel; 2541 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 2542 return NVPTXISD::Tex1DS32FloatGrad; 2543 case Intrinsic::nvvm_tex_1d_v4u32_s32: 2544 return NVPTXISD::Tex1DU32S32; 2545 case Intrinsic::nvvm_tex_1d_v4u32_f32: 2546 return NVPTXISD::Tex1DU32Float; 2547 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 2548 return NVPTXISD::Tex1DU32FloatLevel; 2549 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 2550 return NVPTXISD::Tex1DU32FloatGrad; 2551 2552 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 2553 return NVPTXISD::Tex1DArrayFloatS32; 2554 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 2555 return NVPTXISD::Tex1DArrayFloatFloat; 2556 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 2557 return NVPTXISD::Tex1DArrayFloatFloatLevel; 2558 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 2559 return NVPTXISD::Tex1DArrayFloatFloatGrad; 2560 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 2561 return NVPTXISD::Tex1DArrayS32S32; 2562 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 2563 return NVPTXISD::Tex1DArrayS32Float; 2564 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 2565 return NVPTXISD::Tex1DArrayS32FloatLevel; 2566 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 2567 return NVPTXISD::Tex1DArrayS32FloatGrad; 2568 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 2569 return NVPTXISD::Tex1DArrayU32S32; 2570 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 2571 return NVPTXISD::Tex1DArrayU32Float; 2572 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 2573 return NVPTXISD::Tex1DArrayU32FloatLevel; 2574 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 2575 return NVPTXISD::Tex1DArrayU32FloatGrad; 2576 2577 case Intrinsic::nvvm_tex_2d_v4f32_s32: 2578 return NVPTXISD::Tex2DFloatS32; 2579 case Intrinsic::nvvm_tex_2d_v4f32_f32: 2580 return NVPTXISD::Tex2DFloatFloat; 2581 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 2582 return NVPTXISD::Tex2DFloatFloatLevel; 2583 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 2584 return NVPTXISD::Tex2DFloatFloatGrad; 2585 case Intrinsic::nvvm_tex_2d_v4s32_s32: 2586 return NVPTXISD::Tex2DS32S32; 2587 case Intrinsic::nvvm_tex_2d_v4s32_f32: 2588 return NVPTXISD::Tex2DS32Float; 2589 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 2590 return NVPTXISD::Tex2DS32FloatLevel; 2591 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 2592 return NVPTXISD::Tex2DS32FloatGrad; 2593 case Intrinsic::nvvm_tex_2d_v4u32_s32: 2594 return NVPTXISD::Tex2DU32S32; 2595 case Intrinsic::nvvm_tex_2d_v4u32_f32: 2596 return NVPTXISD::Tex2DU32Float; 2597 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 2598 return NVPTXISD::Tex2DU32FloatLevel; 2599 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 2600 return NVPTXISD::Tex2DU32FloatGrad; 2601 2602 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 2603 return NVPTXISD::Tex2DArrayFloatS32; 2604 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 2605 return NVPTXISD::Tex2DArrayFloatFloat; 2606 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 2607 return NVPTXISD::Tex2DArrayFloatFloatLevel; 2608 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 2609 return NVPTXISD::Tex2DArrayFloatFloatGrad; 2610 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 2611 return NVPTXISD::Tex2DArrayS32S32; 2612 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 2613 return NVPTXISD::Tex2DArrayS32Float; 2614 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 2615 return NVPTXISD::Tex2DArrayS32FloatLevel; 2616 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 2617 return NVPTXISD::Tex2DArrayS32FloatGrad; 2618 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 2619 return NVPTXISD::Tex2DArrayU32S32; 2620 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 2621 return NVPTXISD::Tex2DArrayU32Float; 2622 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 2623 return NVPTXISD::Tex2DArrayU32FloatLevel; 2624 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 2625 return NVPTXISD::Tex2DArrayU32FloatGrad; 2626 2627 case Intrinsic::nvvm_tex_3d_v4f32_s32: 2628 return NVPTXISD::Tex3DFloatS32; 2629 case Intrinsic::nvvm_tex_3d_v4f32_f32: 2630 return NVPTXISD::Tex3DFloatFloat; 2631 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 2632 return NVPTXISD::Tex3DFloatFloatLevel; 2633 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 2634 return NVPTXISD::Tex3DFloatFloatGrad; 2635 case Intrinsic::nvvm_tex_3d_v4s32_s32: 2636 return NVPTXISD::Tex3DS32S32; 2637 case Intrinsic::nvvm_tex_3d_v4s32_f32: 2638 return NVPTXISD::Tex3DS32Float; 2639 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 2640 return NVPTXISD::Tex3DS32FloatLevel; 2641 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 2642 return NVPTXISD::Tex3DS32FloatGrad; 2643 case Intrinsic::nvvm_tex_3d_v4u32_s32: 2644 return NVPTXISD::Tex3DU32S32; 2645 case Intrinsic::nvvm_tex_3d_v4u32_f32: 2646 return NVPTXISD::Tex3DU32Float; 2647 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 2648 return NVPTXISD::Tex3DU32FloatLevel; 2649 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 2650 return NVPTXISD::Tex3DU32FloatGrad; 2651 2652 case Intrinsic::nvvm_tex_cube_v4f32_f32: 2653 return NVPTXISD::TexCubeFloatFloat; 2654 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 2655 return NVPTXISD::TexCubeFloatFloatLevel; 2656 case Intrinsic::nvvm_tex_cube_v4s32_f32: 2657 return NVPTXISD::TexCubeS32Float; 2658 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 2659 return NVPTXISD::TexCubeS32FloatLevel; 2660 case Intrinsic::nvvm_tex_cube_v4u32_f32: 2661 return NVPTXISD::TexCubeU32Float; 2662 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 2663 return NVPTXISD::TexCubeU32FloatLevel; 2664 2665 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 2666 return NVPTXISD::TexCubeArrayFloatFloat; 2667 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 2668 return NVPTXISD::TexCubeArrayFloatFloatLevel; 2669 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 2670 return NVPTXISD::TexCubeArrayS32Float; 2671 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 2672 return NVPTXISD::TexCubeArrayS32FloatLevel; 2673 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 2674 return NVPTXISD::TexCubeArrayU32Float; 2675 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 2676 return NVPTXISD::TexCubeArrayU32FloatLevel; 2677 2678 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 2679 return NVPTXISD::Tld4R2DFloatFloat; 2680 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 2681 return NVPTXISD::Tld4G2DFloatFloat; 2682 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 2683 return NVPTXISD::Tld4B2DFloatFloat; 2684 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 2685 return NVPTXISD::Tld4A2DFloatFloat; 2686 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 2687 return NVPTXISD::Tld4R2DS64Float; 2688 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 2689 return NVPTXISD::Tld4G2DS64Float; 2690 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 2691 return NVPTXISD::Tld4B2DS64Float; 2692 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 2693 return NVPTXISD::Tld4A2DS64Float; 2694 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 2695 return NVPTXISD::Tld4R2DU64Float; 2696 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 2697 return NVPTXISD::Tld4G2DU64Float; 2698 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 2699 return NVPTXISD::Tld4B2DU64Float; 2700 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 2701 return NVPTXISD::Tld4A2DU64Float; 2702 2703 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 2704 return NVPTXISD::TexUnified1DFloatS32; 2705 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 2706 return NVPTXISD::TexUnified1DFloatFloat; 2707 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 2708 return NVPTXISD::TexUnified1DFloatFloatLevel; 2709 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 2710 return NVPTXISD::TexUnified1DFloatFloatGrad; 2711 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 2712 return NVPTXISD::TexUnified1DS32S32; 2713 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 2714 return NVPTXISD::TexUnified1DS32Float; 2715 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 2716 return NVPTXISD::TexUnified1DS32FloatLevel; 2717 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 2718 return NVPTXISD::TexUnified1DS32FloatGrad; 2719 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 2720 return NVPTXISD::TexUnified1DU32S32; 2721 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 2722 return NVPTXISD::TexUnified1DU32Float; 2723 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 2724 return NVPTXISD::TexUnified1DU32FloatLevel; 2725 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 2726 return NVPTXISD::TexUnified1DU32FloatGrad; 2727 2728 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 2729 return NVPTXISD::TexUnified1DArrayFloatS32; 2730 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 2731 return NVPTXISD::TexUnified1DArrayFloatFloat; 2732 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 2733 return NVPTXISD::TexUnified1DArrayFloatFloatLevel; 2734 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 2735 return NVPTXISD::TexUnified1DArrayFloatFloatGrad; 2736 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 2737 return NVPTXISD::TexUnified1DArrayS32S32; 2738 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 2739 return NVPTXISD::TexUnified1DArrayS32Float; 2740 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 2741 return NVPTXISD::TexUnified1DArrayS32FloatLevel; 2742 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 2743 return NVPTXISD::TexUnified1DArrayS32FloatGrad; 2744 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 2745 return NVPTXISD::TexUnified1DArrayU32S32; 2746 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 2747 return NVPTXISD::TexUnified1DArrayU32Float; 2748 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 2749 return NVPTXISD::TexUnified1DArrayU32FloatLevel; 2750 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 2751 return NVPTXISD::TexUnified1DArrayU32FloatGrad; 2752 2753 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 2754 return NVPTXISD::TexUnified2DFloatS32; 2755 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 2756 return NVPTXISD::TexUnified2DFloatFloat; 2757 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 2758 return NVPTXISD::TexUnified2DFloatFloatLevel; 2759 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 2760 return NVPTXISD::TexUnified2DFloatFloatGrad; 2761 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 2762 return NVPTXISD::TexUnified2DS32S32; 2763 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 2764 return NVPTXISD::TexUnified2DS32Float; 2765 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 2766 return NVPTXISD::TexUnified2DS32FloatLevel; 2767 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 2768 return NVPTXISD::TexUnified2DS32FloatGrad; 2769 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 2770 return NVPTXISD::TexUnified2DU32S32; 2771 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 2772 return NVPTXISD::TexUnified2DU32Float; 2773 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 2774 return NVPTXISD::TexUnified2DU32FloatLevel; 2775 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 2776 return NVPTXISD::TexUnified2DU32FloatGrad; 2777 2778 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 2779 return NVPTXISD::TexUnified2DArrayFloatS32; 2780 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 2781 return NVPTXISD::TexUnified2DArrayFloatFloat; 2782 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 2783 return NVPTXISD::TexUnified2DArrayFloatFloatLevel; 2784 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 2785 return NVPTXISD::TexUnified2DArrayFloatFloatGrad; 2786 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 2787 return NVPTXISD::TexUnified2DArrayS32S32; 2788 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 2789 return NVPTXISD::TexUnified2DArrayS32Float; 2790 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 2791 return NVPTXISD::TexUnified2DArrayS32FloatLevel; 2792 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 2793 return NVPTXISD::TexUnified2DArrayS32FloatGrad; 2794 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 2795 return NVPTXISD::TexUnified2DArrayU32S32; 2796 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 2797 return NVPTXISD::TexUnified2DArrayU32Float; 2798 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 2799 return NVPTXISD::TexUnified2DArrayU32FloatLevel; 2800 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 2801 return NVPTXISD::TexUnified2DArrayU32FloatGrad; 2802 2803 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 2804 return NVPTXISD::TexUnified3DFloatS32; 2805 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 2806 return NVPTXISD::TexUnified3DFloatFloat; 2807 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 2808 return NVPTXISD::TexUnified3DFloatFloatLevel; 2809 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 2810 return NVPTXISD::TexUnified3DFloatFloatGrad; 2811 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 2812 return NVPTXISD::TexUnified3DS32S32; 2813 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 2814 return NVPTXISD::TexUnified3DS32Float; 2815 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 2816 return NVPTXISD::TexUnified3DS32FloatLevel; 2817 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 2818 return NVPTXISD::TexUnified3DS32FloatGrad; 2819 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 2820 return NVPTXISD::TexUnified3DU32S32; 2821 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 2822 return NVPTXISD::TexUnified3DU32Float; 2823 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 2824 return NVPTXISD::TexUnified3DU32FloatLevel; 2825 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 2826 return NVPTXISD::TexUnified3DU32FloatGrad; 2827 2828 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 2829 return NVPTXISD::TexUnifiedCubeFloatFloat; 2830 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 2831 return NVPTXISD::TexUnifiedCubeFloatFloatLevel; 2832 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 2833 return NVPTXISD::TexUnifiedCubeS32Float; 2834 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 2835 return NVPTXISD::TexUnifiedCubeS32FloatLevel; 2836 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 2837 return NVPTXISD::TexUnifiedCubeU32Float; 2838 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 2839 return NVPTXISD::TexUnifiedCubeU32FloatLevel; 2840 2841 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 2842 return NVPTXISD::TexUnifiedCubeArrayFloatFloat; 2843 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 2844 return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; 2845 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 2846 return NVPTXISD::TexUnifiedCubeArrayS32Float; 2847 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 2848 return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; 2849 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 2850 return NVPTXISD::TexUnifiedCubeArrayU32Float; 2851 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 2852 return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; 2853 2854 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 2855 return NVPTXISD::Tld4UnifiedR2DFloatFloat; 2856 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 2857 return NVPTXISD::Tld4UnifiedG2DFloatFloat; 2858 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 2859 return NVPTXISD::Tld4UnifiedB2DFloatFloat; 2860 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 2861 return NVPTXISD::Tld4UnifiedA2DFloatFloat; 2862 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 2863 return NVPTXISD::Tld4UnifiedR2DS64Float; 2864 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 2865 return NVPTXISD::Tld4UnifiedG2DS64Float; 2866 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 2867 return NVPTXISD::Tld4UnifiedB2DS64Float; 2868 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 2869 return NVPTXISD::Tld4UnifiedA2DS64Float; 2870 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 2871 return NVPTXISD::Tld4UnifiedR2DU64Float; 2872 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 2873 return NVPTXISD::Tld4UnifiedG2DU64Float; 2874 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 2875 return NVPTXISD::Tld4UnifiedB2DU64Float; 2876 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 2877 return NVPTXISD::Tld4UnifiedA2DU64Float; 2878 } 2879} 2880 2881static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { 2882 switch (Intrinsic) { 2883 default: 2884 return 0; 2885 case Intrinsic::nvvm_suld_1d_i8_clamp: 2886 return NVPTXISD::Suld1DI8Clamp; 2887 case Intrinsic::nvvm_suld_1d_i16_clamp: 2888 return NVPTXISD::Suld1DI16Clamp; 2889 case Intrinsic::nvvm_suld_1d_i32_clamp: 2890 return NVPTXISD::Suld1DI32Clamp; 2891 case Intrinsic::nvvm_suld_1d_i64_clamp: 2892 return NVPTXISD::Suld1DI64Clamp; 2893 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 2894 return NVPTXISD::Suld1DV2I8Clamp; 2895 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 2896 return NVPTXISD::Suld1DV2I16Clamp; 2897 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 2898 return NVPTXISD::Suld1DV2I32Clamp; 2899 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 2900 return NVPTXISD::Suld1DV2I64Clamp; 2901 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 2902 return NVPTXISD::Suld1DV4I8Clamp; 2903 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 2904 return NVPTXISD::Suld1DV4I16Clamp; 2905 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 2906 return NVPTXISD::Suld1DV4I32Clamp; 2907 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 2908 return NVPTXISD::Suld1DArrayI8Clamp; 2909 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 2910 return NVPTXISD::Suld1DArrayI16Clamp; 2911 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 2912 return NVPTXISD::Suld1DArrayI32Clamp; 2913 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 2914 return NVPTXISD::Suld1DArrayI64Clamp; 2915 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 2916 return NVPTXISD::Suld1DArrayV2I8Clamp; 2917 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 2918 return NVPTXISD::Suld1DArrayV2I16Clamp; 2919 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 2920 return NVPTXISD::Suld1DArrayV2I32Clamp; 2921 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 2922 return NVPTXISD::Suld1DArrayV2I64Clamp; 2923 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 2924 return NVPTXISD::Suld1DArrayV4I8Clamp; 2925 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 2926 return NVPTXISD::Suld1DArrayV4I16Clamp; 2927 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 2928 return NVPTXISD::Suld1DArrayV4I32Clamp; 2929 case Intrinsic::nvvm_suld_2d_i8_clamp: 2930 return NVPTXISD::Suld2DI8Clamp; 2931 case Intrinsic::nvvm_suld_2d_i16_clamp: 2932 return NVPTXISD::Suld2DI16Clamp; 2933 case Intrinsic::nvvm_suld_2d_i32_clamp: 2934 return NVPTXISD::Suld2DI32Clamp; 2935 case Intrinsic::nvvm_suld_2d_i64_clamp: 2936 return NVPTXISD::Suld2DI64Clamp; 2937 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 2938 return NVPTXISD::Suld2DV2I8Clamp; 2939 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 2940 return NVPTXISD::Suld2DV2I16Clamp; 2941 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 2942 return NVPTXISD::Suld2DV2I32Clamp; 2943 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 2944 return NVPTXISD::Suld2DV2I64Clamp; 2945 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 2946 return NVPTXISD::Suld2DV4I8Clamp; 2947 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 2948 return NVPTXISD::Suld2DV4I16Clamp; 2949 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 2950 return NVPTXISD::Suld2DV4I32Clamp; 2951 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 2952 return NVPTXISD::Suld2DArrayI8Clamp; 2953 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 2954 return NVPTXISD::Suld2DArrayI16Clamp; 2955 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 2956 return NVPTXISD::Suld2DArrayI32Clamp; 2957 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 2958 return NVPTXISD::Suld2DArrayI64Clamp; 2959 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 2960 return NVPTXISD::Suld2DArrayV2I8Clamp; 2961 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 2962 return NVPTXISD::Suld2DArrayV2I16Clamp; 2963 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 2964 return NVPTXISD::Suld2DArrayV2I32Clamp; 2965 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 2966 return NVPTXISD::Suld2DArrayV2I64Clamp; 2967 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 2968 return NVPTXISD::Suld2DArrayV4I8Clamp; 2969 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 2970 return NVPTXISD::Suld2DArrayV4I16Clamp; 2971 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 2972 return NVPTXISD::Suld2DArrayV4I32Clamp; 2973 case Intrinsic::nvvm_suld_3d_i8_clamp: 2974 return NVPTXISD::Suld3DI8Clamp; 2975 case Intrinsic::nvvm_suld_3d_i16_clamp: 2976 return NVPTXISD::Suld3DI16Clamp; 2977 case Intrinsic::nvvm_suld_3d_i32_clamp: 2978 return NVPTXISD::Suld3DI32Clamp; 2979 case Intrinsic::nvvm_suld_3d_i64_clamp: 2980 return NVPTXISD::Suld3DI64Clamp; 2981 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 2982 return NVPTXISD::Suld3DV2I8Clamp; 2983 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 2984 return NVPTXISD::Suld3DV2I16Clamp; 2985 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 2986 return NVPTXISD::Suld3DV2I32Clamp; 2987 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 2988 return NVPTXISD::Suld3DV2I64Clamp; 2989 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 2990 return NVPTXISD::Suld3DV4I8Clamp; 2991 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 2992 return NVPTXISD::Suld3DV4I16Clamp; 2993 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 2994 return NVPTXISD::Suld3DV4I32Clamp; 2995 case Intrinsic::nvvm_suld_1d_i8_trap: 2996 return NVPTXISD::Suld1DI8Trap; 2997 case Intrinsic::nvvm_suld_1d_i16_trap: 2998 return NVPTXISD::Suld1DI16Trap; 2999 case Intrinsic::nvvm_suld_1d_i32_trap: 3000 return NVPTXISD::Suld1DI32Trap; 3001 case Intrinsic::nvvm_suld_1d_i64_trap: 3002 return NVPTXISD::Suld1DI64Trap; 3003 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3004 return NVPTXISD::Suld1DV2I8Trap; 3005 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3006 return NVPTXISD::Suld1DV2I16Trap; 3007 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3008 return NVPTXISD::Suld1DV2I32Trap; 3009 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3010 return NVPTXISD::Suld1DV2I64Trap; 3011 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3012 return NVPTXISD::Suld1DV4I8Trap; 3013 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3014 return NVPTXISD::Suld1DV4I16Trap; 3015 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3016 return NVPTXISD::Suld1DV4I32Trap; 3017 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3018 return NVPTXISD::Suld1DArrayI8Trap; 3019 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3020 return NVPTXISD::Suld1DArrayI16Trap; 3021 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3022 return NVPTXISD::Suld1DArrayI32Trap; 3023 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3024 return NVPTXISD::Suld1DArrayI64Trap; 3025 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3026 return NVPTXISD::Suld1DArrayV2I8Trap; 3027 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3028 return NVPTXISD::Suld1DArrayV2I16Trap; 3029 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3030 return NVPTXISD::Suld1DArrayV2I32Trap; 3031 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3032 return NVPTXISD::Suld1DArrayV2I64Trap; 3033 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3034 return NVPTXISD::Suld1DArrayV4I8Trap; 3035 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3036 return NVPTXISD::Suld1DArrayV4I16Trap; 3037 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3038 return NVPTXISD::Suld1DArrayV4I32Trap; 3039 case Intrinsic::nvvm_suld_2d_i8_trap: 3040 return NVPTXISD::Suld2DI8Trap; 3041 case Intrinsic::nvvm_suld_2d_i16_trap: 3042 return NVPTXISD::Suld2DI16Trap; 3043 case Intrinsic::nvvm_suld_2d_i32_trap: 3044 return NVPTXISD::Suld2DI32Trap; 3045 case Intrinsic::nvvm_suld_2d_i64_trap: 3046 return NVPTXISD::Suld2DI64Trap; 3047 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3048 return NVPTXISD::Suld2DV2I8Trap; 3049 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3050 return NVPTXISD::Suld2DV2I16Trap; 3051 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3052 return NVPTXISD::Suld2DV2I32Trap; 3053 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3054 return NVPTXISD::Suld2DV2I64Trap; 3055 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3056 return NVPTXISD::Suld2DV4I8Trap; 3057 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3058 return NVPTXISD::Suld2DV4I16Trap; 3059 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3060 return NVPTXISD::Suld2DV4I32Trap; 3061 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3062 return NVPTXISD::Suld2DArrayI8Trap; 3063 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3064 return NVPTXISD::Suld2DArrayI16Trap; 3065 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3066 return NVPTXISD::Suld2DArrayI32Trap; 3067 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3068 return NVPTXISD::Suld2DArrayI64Trap; 3069 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3070 return NVPTXISD::Suld2DArrayV2I8Trap; 3071 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3072 return NVPTXISD::Suld2DArrayV2I16Trap; 3073 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3074 return NVPTXISD::Suld2DArrayV2I32Trap; 3075 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3076 return NVPTXISD::Suld2DArrayV2I64Trap; 3077 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3078 return NVPTXISD::Suld2DArrayV4I8Trap; 3079 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3080 return NVPTXISD::Suld2DArrayV4I16Trap; 3081 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3082 return NVPTXISD::Suld2DArrayV4I32Trap; 3083 case Intrinsic::nvvm_suld_3d_i8_trap: 3084 return NVPTXISD::Suld3DI8Trap; 3085 case Intrinsic::nvvm_suld_3d_i16_trap: 3086 return NVPTXISD::Suld3DI16Trap; 3087 case Intrinsic::nvvm_suld_3d_i32_trap: 3088 return NVPTXISD::Suld3DI32Trap; 3089 case Intrinsic::nvvm_suld_3d_i64_trap: 3090 return NVPTXISD::Suld3DI64Trap; 3091 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3092 return NVPTXISD::Suld3DV2I8Trap; 3093 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3094 return NVPTXISD::Suld3DV2I16Trap; 3095 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3096 return NVPTXISD::Suld3DV2I32Trap; 3097 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3098 return NVPTXISD::Suld3DV2I64Trap; 3099 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3100 return NVPTXISD::Suld3DV4I8Trap; 3101 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3102 return NVPTXISD::Suld3DV4I16Trap; 3103 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3104 return NVPTXISD::Suld3DV4I32Trap; 3105 case Intrinsic::nvvm_suld_1d_i8_zero: 3106 return NVPTXISD::Suld1DI8Zero; 3107 case Intrinsic::nvvm_suld_1d_i16_zero: 3108 return NVPTXISD::Suld1DI16Zero; 3109 case Intrinsic::nvvm_suld_1d_i32_zero: 3110 return NVPTXISD::Suld1DI32Zero; 3111 case Intrinsic::nvvm_suld_1d_i64_zero: 3112 return NVPTXISD::Suld1DI64Zero; 3113 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3114 return NVPTXISD::Suld1DV2I8Zero; 3115 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3116 return NVPTXISD::Suld1DV2I16Zero; 3117 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3118 return NVPTXISD::Suld1DV2I32Zero; 3119 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3120 return NVPTXISD::Suld1DV2I64Zero; 3121 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3122 return NVPTXISD::Suld1DV4I8Zero; 3123 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3124 return NVPTXISD::Suld1DV4I16Zero; 3125 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3126 return NVPTXISD::Suld1DV4I32Zero; 3127 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3128 return NVPTXISD::Suld1DArrayI8Zero; 3129 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3130 return NVPTXISD::Suld1DArrayI16Zero; 3131 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3132 return NVPTXISD::Suld1DArrayI32Zero; 3133 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3134 return NVPTXISD::Suld1DArrayI64Zero; 3135 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3136 return NVPTXISD::Suld1DArrayV2I8Zero; 3137 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3138 return NVPTXISD::Suld1DArrayV2I16Zero; 3139 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3140 return NVPTXISD::Suld1DArrayV2I32Zero; 3141 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3142 return NVPTXISD::Suld1DArrayV2I64Zero; 3143 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3144 return NVPTXISD::Suld1DArrayV4I8Zero; 3145 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3146 return NVPTXISD::Suld1DArrayV4I16Zero; 3147 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3148 return NVPTXISD::Suld1DArrayV4I32Zero; 3149 case Intrinsic::nvvm_suld_2d_i8_zero: 3150 return NVPTXISD::Suld2DI8Zero; 3151 case Intrinsic::nvvm_suld_2d_i16_zero: 3152 return NVPTXISD::Suld2DI16Zero; 3153 case Intrinsic::nvvm_suld_2d_i32_zero: 3154 return NVPTXISD::Suld2DI32Zero; 3155 case Intrinsic::nvvm_suld_2d_i64_zero: 3156 return NVPTXISD::Suld2DI64Zero; 3157 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3158 return NVPTXISD::Suld2DV2I8Zero; 3159 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3160 return NVPTXISD::Suld2DV2I16Zero; 3161 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3162 return NVPTXISD::Suld2DV2I32Zero; 3163 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3164 return NVPTXISD::Suld2DV2I64Zero; 3165 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3166 return NVPTXISD::Suld2DV4I8Zero; 3167 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3168 return NVPTXISD::Suld2DV4I16Zero; 3169 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3170 return NVPTXISD::Suld2DV4I32Zero; 3171 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3172 return NVPTXISD::Suld2DArrayI8Zero; 3173 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3174 return NVPTXISD::Suld2DArrayI16Zero; 3175 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3176 return NVPTXISD::Suld2DArrayI32Zero; 3177 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3178 return NVPTXISD::Suld2DArrayI64Zero; 3179 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3180 return NVPTXISD::Suld2DArrayV2I8Zero; 3181 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3182 return NVPTXISD::Suld2DArrayV2I16Zero; 3183 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3184 return NVPTXISD::Suld2DArrayV2I32Zero; 3185 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3186 return NVPTXISD::Suld2DArrayV2I64Zero; 3187 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3188 return NVPTXISD::Suld2DArrayV4I8Zero; 3189 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3190 return NVPTXISD::Suld2DArrayV4I16Zero; 3191 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3192 return NVPTXISD::Suld2DArrayV4I32Zero; 3193 case Intrinsic::nvvm_suld_3d_i8_zero: 3194 return NVPTXISD::Suld3DI8Zero; 3195 case Intrinsic::nvvm_suld_3d_i16_zero: 3196 return NVPTXISD::Suld3DI16Zero; 3197 case Intrinsic::nvvm_suld_3d_i32_zero: 3198 return NVPTXISD::Suld3DI32Zero; 3199 case Intrinsic::nvvm_suld_3d_i64_zero: 3200 return NVPTXISD::Suld3DI64Zero; 3201 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3202 return NVPTXISD::Suld3DV2I8Zero; 3203 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3204 return NVPTXISD::Suld3DV2I16Zero; 3205 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3206 return NVPTXISD::Suld3DV2I32Zero; 3207 case Intrinsic::nvvm_suld_3d_v2i64_zero: 3208 return NVPTXISD::Suld3DV2I64Zero; 3209 case Intrinsic::nvvm_suld_3d_v4i8_zero: 3210 return NVPTXISD::Suld3DV4I8Zero; 3211 case Intrinsic::nvvm_suld_3d_v4i16_zero: 3212 return NVPTXISD::Suld3DV4I16Zero; 3213 case Intrinsic::nvvm_suld_3d_v4i32_zero: 3214 return NVPTXISD::Suld3DV4I32Zero; 3215 } 3216} 3217 3218// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 3219// TgtMemIntrinsic 3220// because we need the information that is only available in the "Value" type 3221// of destination 3222// pointer. In particular, the address space information. 3223bool NVPTXTargetLowering::getTgtMemIntrinsic( 3224 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 3225 switch (Intrinsic) { 3226 default: 3227 return false; 3228 3229 case Intrinsic::nvvm_atomic_load_add_f32: 3230 Info.opc = ISD::INTRINSIC_W_CHAIN; 3231 Info.memVT = MVT::f32; 3232 Info.ptrVal = I.getArgOperand(0); 3233 Info.offset = 0; 3234 Info.vol = 0; 3235 Info.readMem = true; 3236 Info.writeMem = true; 3237 Info.align = 0; 3238 return true; 3239 3240 case Intrinsic::nvvm_atomic_load_inc_32: 3241 case Intrinsic::nvvm_atomic_load_dec_32: 3242 Info.opc = ISD::INTRINSIC_W_CHAIN; 3243 Info.memVT = MVT::i32; 3244 Info.ptrVal = I.getArgOperand(0); 3245 Info.offset = 0; 3246 Info.vol = 0; 3247 Info.readMem = true; 3248 Info.writeMem = true; 3249 Info.align = 0; 3250 return true; 3251 3252 case Intrinsic::nvvm_ldu_global_i: 3253 case Intrinsic::nvvm_ldu_global_f: 3254 case Intrinsic::nvvm_ldu_global_p: { 3255 3256 Info.opc = ISD::INTRINSIC_W_CHAIN; 3257 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 3258 Info.memVT = getValueType(I.getType()); 3259 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) 3260 Info.memVT = getPointerTy(); 3261 else 3262 Info.memVT = getValueType(I.getType()); 3263 Info.ptrVal = I.getArgOperand(0); 3264 Info.offset = 0; 3265 Info.vol = 0; 3266 Info.readMem = true; 3267 Info.writeMem = false; 3268 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3269 3270 return true; 3271 } 3272 case Intrinsic::nvvm_ldg_global_i: 3273 case Intrinsic::nvvm_ldg_global_f: 3274 case Intrinsic::nvvm_ldg_global_p: { 3275 3276 Info.opc = ISD::INTRINSIC_W_CHAIN; 3277 if (Intrinsic == Intrinsic::nvvm_ldg_global_i) 3278 Info.memVT = getValueType(I.getType()); 3279 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) 3280 Info.memVT = getPointerTy(); 3281 else 3282 Info.memVT = getValueType(I.getType()); 3283 Info.ptrVal = I.getArgOperand(0); 3284 Info.offset = 0; 3285 Info.vol = 0; 3286 Info.readMem = true; 3287 Info.writeMem = false; 3288 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3289 3290 return true; 3291 } 3292 3293 case Intrinsic::nvvm_tex_1d_v4f32_s32: 3294 case Intrinsic::nvvm_tex_1d_v4f32_f32: 3295 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 3296 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 3297 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 3298 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 3299 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 3300 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 3301 case Intrinsic::nvvm_tex_2d_v4f32_s32: 3302 case Intrinsic::nvvm_tex_2d_v4f32_f32: 3303 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 3304 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 3305 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 3306 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 3307 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 3308 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 3309 case Intrinsic::nvvm_tex_3d_v4f32_s32: 3310 case Intrinsic::nvvm_tex_3d_v4f32_f32: 3311 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 3312 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 3313 case Intrinsic::nvvm_tex_cube_v4f32_f32: 3314 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 3315 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 3316 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 3317 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 3318 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 3319 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 3320 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 3321 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 3322 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 3323 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 3324 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 3325 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 3326 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 3327 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 3328 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 3329 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 3330 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 3331 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 3332 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 3333 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 3334 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 3335 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 3336 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 3337 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 3338 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 3339 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 3340 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 3341 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 3342 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 3343 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 3344 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 3345 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 3346 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 3347 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 3348 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { 3349 Info.opc = getOpcForTextureInstr(Intrinsic); 3350 Info.memVT = MVT::v4f32; 3351 Info.ptrVal = nullptr; 3352 Info.offset = 0; 3353 Info.vol = 0; 3354 Info.readMem = true; 3355 Info.writeMem = false; 3356 Info.align = 16; 3357 return true; 3358 } 3359 case Intrinsic::nvvm_tex_1d_v4s32_s32: 3360 case Intrinsic::nvvm_tex_1d_v4s32_f32: 3361 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 3362 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 3363 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 3364 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 3365 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 3366 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 3367 case Intrinsic::nvvm_tex_2d_v4s32_s32: 3368 case Intrinsic::nvvm_tex_2d_v4s32_f32: 3369 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 3370 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 3371 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 3372 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 3373 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 3374 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 3375 case Intrinsic::nvvm_tex_3d_v4s32_s32: 3376 case Intrinsic::nvvm_tex_3d_v4s32_f32: 3377 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 3378 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 3379 case Intrinsic::nvvm_tex_cube_v4s32_f32: 3380 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 3381 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 3382 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 3383 case Intrinsic::nvvm_tex_cube_v4u32_f32: 3384 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 3385 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 3386 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 3387 case Intrinsic::nvvm_tex_1d_v4u32_s32: 3388 case Intrinsic::nvvm_tex_1d_v4u32_f32: 3389 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 3390 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 3391 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 3392 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 3393 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 3394 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 3395 case Intrinsic::nvvm_tex_2d_v4u32_s32: 3396 case Intrinsic::nvvm_tex_2d_v4u32_f32: 3397 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 3398 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 3399 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 3400 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 3401 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 3402 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 3403 case Intrinsic::nvvm_tex_3d_v4u32_s32: 3404 case Intrinsic::nvvm_tex_3d_v4u32_f32: 3405 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 3406 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 3407 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 3408 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 3409 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 3410 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 3411 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 3412 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 3413 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 3414 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 3415 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 3416 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 3417 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 3418 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 3419 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 3420 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 3421 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 3422 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 3423 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 3424 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 3425 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 3426 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 3427 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 3428 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 3429 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 3430 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 3431 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 3432 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 3433 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 3434 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 3435 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 3436 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 3437 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 3438 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 3439 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 3440 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 3441 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 3442 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 3443 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 3444 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 3445 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 3446 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 3447 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 3448 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 3449 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 3450 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 3451 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 3452 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 3453 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 3454 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 3455 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 3456 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 3457 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 3458 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 3459 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 3460 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 3461 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 3462 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 3463 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 3464 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 3465 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 3466 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 3467 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 3468 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 3469 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 3470 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { 3471 Info.opc = getOpcForTextureInstr(Intrinsic); 3472 Info.memVT = MVT::v4i32; 3473 Info.ptrVal = nullptr; 3474 Info.offset = 0; 3475 Info.vol = 0; 3476 Info.readMem = true; 3477 Info.writeMem = false; 3478 Info.align = 16; 3479 return true; 3480 } 3481 case Intrinsic::nvvm_suld_1d_i8_clamp: 3482 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 3483 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 3484 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 3485 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 3486 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 3487 case Intrinsic::nvvm_suld_2d_i8_clamp: 3488 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 3489 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 3490 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 3491 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 3492 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 3493 case Intrinsic::nvvm_suld_3d_i8_clamp: 3494 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 3495 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 3496 case Intrinsic::nvvm_suld_1d_i8_trap: 3497 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3498 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3499 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3500 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3501 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3502 case Intrinsic::nvvm_suld_2d_i8_trap: 3503 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3504 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3505 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3506 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3507 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3508 case Intrinsic::nvvm_suld_3d_i8_trap: 3509 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3510 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3511 case Intrinsic::nvvm_suld_1d_i8_zero: 3512 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3513 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3514 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3515 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3516 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3517 case Intrinsic::nvvm_suld_2d_i8_zero: 3518 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3519 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3520 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3521 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3522 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3523 case Intrinsic::nvvm_suld_3d_i8_zero: 3524 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3525 case Intrinsic::nvvm_suld_3d_v4i8_zero: { 3526 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3527 Info.memVT = MVT::i8; 3528 Info.ptrVal = nullptr; 3529 Info.offset = 0; 3530 Info.vol = 0; 3531 Info.readMem = true; 3532 Info.writeMem = false; 3533 Info.align = 16; 3534 return true; 3535 } 3536 case Intrinsic::nvvm_suld_1d_i16_clamp: 3537 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 3538 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 3539 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 3540 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 3541 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 3542 case Intrinsic::nvvm_suld_2d_i16_clamp: 3543 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 3544 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 3545 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 3546 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 3547 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 3548 case Intrinsic::nvvm_suld_3d_i16_clamp: 3549 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 3550 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 3551 case Intrinsic::nvvm_suld_1d_i16_trap: 3552 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3553 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3554 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3555 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3556 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3557 case Intrinsic::nvvm_suld_2d_i16_trap: 3558 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3559 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3560 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3561 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3562 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3563 case Intrinsic::nvvm_suld_3d_i16_trap: 3564 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3565 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3566 case Intrinsic::nvvm_suld_1d_i16_zero: 3567 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3568 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3569 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3570 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3571 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3572 case Intrinsic::nvvm_suld_2d_i16_zero: 3573 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3574 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3575 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3576 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3577 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3578 case Intrinsic::nvvm_suld_3d_i16_zero: 3579 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3580 case Intrinsic::nvvm_suld_3d_v4i16_zero: { 3581 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3582 Info.memVT = MVT::i16; 3583 Info.ptrVal = nullptr; 3584 Info.offset = 0; 3585 Info.vol = 0; 3586 Info.readMem = true; 3587 Info.writeMem = false; 3588 Info.align = 16; 3589 return true; 3590 } 3591 case Intrinsic::nvvm_suld_1d_i32_clamp: 3592 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 3593 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 3594 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 3595 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 3596 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 3597 case Intrinsic::nvvm_suld_2d_i32_clamp: 3598 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 3599 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 3600 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 3601 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 3602 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 3603 case Intrinsic::nvvm_suld_3d_i32_clamp: 3604 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 3605 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3606 case Intrinsic::nvvm_suld_1d_i32_trap: 3607 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3608 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3609 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3610 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3611 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3612 case Intrinsic::nvvm_suld_2d_i32_trap: 3613 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3614 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3615 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3616 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3617 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3618 case Intrinsic::nvvm_suld_3d_i32_trap: 3619 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3620 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3621 case Intrinsic::nvvm_suld_1d_i32_zero: 3622 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3623 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3624 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3625 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3626 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3627 case Intrinsic::nvvm_suld_2d_i32_zero: 3628 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3629 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3630 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3631 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3632 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3633 case Intrinsic::nvvm_suld_3d_i32_zero: 3634 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3635 case Intrinsic::nvvm_suld_3d_v4i32_zero: { 3636 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3637 Info.memVT = MVT::i32; 3638 Info.ptrVal = nullptr; 3639 Info.offset = 0; 3640 Info.vol = 0; 3641 Info.readMem = true; 3642 Info.writeMem = false; 3643 Info.align = 16; 3644 return true; 3645 } 3646 case Intrinsic::nvvm_suld_1d_i64_clamp: 3647 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 3648 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 3649 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 3650 case Intrinsic::nvvm_suld_2d_i64_clamp: 3651 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 3652 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 3653 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 3654 case Intrinsic::nvvm_suld_3d_i64_clamp: 3655 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 3656 case Intrinsic::nvvm_suld_1d_i64_trap: 3657 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3658 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3659 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3660 case Intrinsic::nvvm_suld_2d_i64_trap: 3661 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3662 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3663 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3664 case Intrinsic::nvvm_suld_3d_i64_trap: 3665 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3666 case Intrinsic::nvvm_suld_1d_i64_zero: 3667 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3668 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3669 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3670 case Intrinsic::nvvm_suld_2d_i64_zero: 3671 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3672 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3673 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3674 case Intrinsic::nvvm_suld_3d_i64_zero: 3675 case Intrinsic::nvvm_suld_3d_v2i64_zero: { 3676 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3677 Info.memVT = MVT::i64; 3678 Info.ptrVal = nullptr; 3679 Info.offset = 0; 3680 Info.vol = 0; 3681 Info.readMem = true; 3682 Info.writeMem = false; 3683 Info.align = 16; 3684 return true; 3685 } 3686 } 3687 return false; 3688} 3689 3690/// isLegalAddressingMode - Return true if the addressing mode represented 3691/// by AM is legal for this target, for a load/store of the specified type. 3692/// Used to guide target specific optimizations, like loop strength reduction 3693/// (LoopStrengthReduce.cpp) and memory optimization for address mode 3694/// (CodeGenPrepare.cpp) 3695bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3696 Type *Ty) const { 3697 3698 // AddrMode - This represents an addressing mode of: 3699 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 3700 // 3701 // The legal address modes are 3702 // - [avar] 3703 // - [areg] 3704 // - [areg+immoff] 3705 // - [immAddr] 3706 3707 if (AM.BaseGV) { 3708 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 3709 return false; 3710 return true; 3711 } 3712 3713 switch (AM.Scale) { 3714 case 0: // "r", "r+i" or "i" is allowed 3715 break; 3716 case 1: 3717 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 3718 return false; 3719 // Otherwise we have r+i. 3720 break; 3721 default: 3722 // No scale > 1 is allowed 3723 return false; 3724 } 3725 return true; 3726} 3727 3728//===----------------------------------------------------------------------===// 3729// NVPTX Inline Assembly Support 3730//===----------------------------------------------------------------------===// 3731 3732/// getConstraintType - Given a constraint letter, return the type of 3733/// constraint it is for this target. 3734NVPTXTargetLowering::ConstraintType 3735NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 3736 if (Constraint.size() == 1) { 3737 switch (Constraint[0]) { 3738 default: 3739 break; 3740 case 'b': 3741 case 'r': 3742 case 'h': 3743 case 'c': 3744 case 'l': 3745 case 'f': 3746 case 'd': 3747 case '0': 3748 case 'N': 3749 return C_RegisterClass; 3750 } 3751 } 3752 return TargetLowering::getConstraintType(Constraint); 3753} 3754 3755std::pair<unsigned, const TargetRegisterClass *> 3756NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3757 MVT VT) const { 3758 if (Constraint.size() == 1) { 3759 switch (Constraint[0]) { 3760 case 'b': 3761 return std::make_pair(0U, &NVPTX::Int1RegsRegClass); 3762 case 'c': 3763 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3764 case 'h': 3765 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3766 case 'r': 3767 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 3768 case 'l': 3769 case 'N': 3770 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 3771 case 'f': 3772 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 3773 case 'd': 3774 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 3775 } 3776 } 3777 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3778} 3779 3780/// getFunctionAlignment - Return the Log2 alignment of this function. 3781unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 3782 return 4; 3783} 3784 3785//===----------------------------------------------------------------------===// 3786// NVPTX DAG Combining 3787//===----------------------------------------------------------------------===// 3788 3789bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, 3790 CodeGenOpt::Level OptLevel) const { 3791 const Function *F = MF.getFunction(); 3792 const TargetOptions &TO = MF.getTarget().Options; 3793 3794 // Always honor command-line argument 3795 if (FMAContractLevelOpt.getNumOccurrences() > 0) { 3796 return FMAContractLevelOpt > 0; 3797 } else if (OptLevel == 0) { 3798 // Do not contract if we're not optimizing the code 3799 return false; 3800 } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { 3801 // Honor TargetOptions flags that explicitly say fusion is okay 3802 return true; 3803 } else if (F->hasFnAttribute("unsafe-fp-math")) { 3804 // Check for unsafe-fp-math=true coming from Clang 3805 Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 3806 StringRef Val = Attr.getValueAsString(); 3807 if (Val == "true") 3808 return true; 3809 } 3810 3811 // We did not have a clear indication that fusion is allowed, so assume not 3812 return false; 3813} 3814 3815/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 3816/// operands N0 and N1. This is a helper for PerformADDCombine that is 3817/// called with the default operands, and if that fails, with commuted 3818/// operands. 3819static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 3820 TargetLowering::DAGCombinerInfo &DCI, 3821 const NVPTXSubtarget &Subtarget, 3822 CodeGenOpt::Level OptLevel) { 3823 SelectionDAG &DAG = DCI.DAG; 3824 // Skip non-integer, non-scalar case 3825 EVT VT=N0.getValueType(); 3826 if (VT.isVector()) 3827 return SDValue(); 3828 3829 // fold (add (mul a, b), c) -> (mad a, b, c) 3830 // 3831 if (N0.getOpcode() == ISD::MUL) { 3832 assert (VT.isInteger()); 3833 // For integer: 3834 // Since integer multiply-add costs the same as integer multiply 3835 // but is more costly than integer add, do the fusion only when 3836 // the mul is only used in the add. 3837 if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || 3838 !N0.getNode()->hasOneUse()) 3839 return SDValue(); 3840 3841 // Do the folding 3842 return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, 3843 N0.getOperand(0), N0.getOperand(1), N1); 3844 } 3845 else if (N0.getOpcode() == ISD::FMUL) { 3846 if (VT == MVT::f32 || VT == MVT::f64) { 3847 const auto *TLI = static_cast<const NVPTXTargetLowering *>( 3848 &DAG.getTargetLoweringInfo()); 3849 if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) 3850 return SDValue(); 3851 3852 // For floating point: 3853 // Do the fusion only when the mul has less than 5 uses and all 3854 // are add. 3855 // The heuristic is that if a use is not an add, then that use 3856 // cannot be fused into fma, therefore mul is still needed anyway. 3857 // If there are more than 4 uses, even if they are all add, fusing 3858 // them will increase register pressue. 3859 // 3860 int numUses = 0; 3861 int nonAddCount = 0; 3862 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 3863 UE = N0.getNode()->use_end(); 3864 UI != UE; ++UI) { 3865 numUses++; 3866 SDNode *User = *UI; 3867 if (User->getOpcode() != ISD::FADD) 3868 ++nonAddCount; 3869 } 3870 if (numUses >= 5) 3871 return SDValue(); 3872 if (nonAddCount) { 3873 int orderNo = N->getIROrder(); 3874 int orderNo2 = N0.getNode()->getIROrder(); 3875 // simple heuristics here for considering potential register 3876 // pressure, the logics here is that the differnce are used 3877 // to measure the distance between def and use, the longer distance 3878 // more likely cause register pressure. 3879 if (orderNo - orderNo2 < 500) 3880 return SDValue(); 3881 3882 // Now, check if at least one of the FMUL's operands is live beyond the node N, 3883 // which guarantees that the FMA will not increase register pressure at node N. 3884 bool opIsLive = false; 3885 const SDNode *left = N0.getOperand(0).getNode(); 3886 const SDNode *right = N0.getOperand(1).getNode(); 3887 3888 if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) 3889 opIsLive = true; 3890 3891 if (!opIsLive) 3892 for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { 3893 SDNode *User = *UI; 3894 int orderNo3 = User->getIROrder(); 3895 if (orderNo3 > orderNo) { 3896 opIsLive = true; 3897 break; 3898 } 3899 } 3900 3901 if (!opIsLive) 3902 for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { 3903 SDNode *User = *UI; 3904 int orderNo3 = User->getIROrder(); 3905 if (orderNo3 > orderNo) { 3906 opIsLive = true; 3907 break; 3908 } 3909 } 3910 3911 if (!opIsLive) 3912 return SDValue(); 3913 } 3914 3915 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 3916 N0.getOperand(0), N0.getOperand(1), N1); 3917 } 3918 } 3919 3920 return SDValue(); 3921} 3922 3923/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3924/// 3925static SDValue PerformADDCombine(SDNode *N, 3926 TargetLowering::DAGCombinerInfo &DCI, 3927 const NVPTXSubtarget &Subtarget, 3928 CodeGenOpt::Level OptLevel) { 3929 SDValue N0 = N->getOperand(0); 3930 SDValue N1 = N->getOperand(1); 3931 3932 // First try with the default operand order. 3933 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, 3934 OptLevel); 3935 if (Result.getNode()) 3936 return Result; 3937 3938 // If that didn't work, try again with the operands commuted. 3939 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); 3940} 3941 3942static SDValue PerformANDCombine(SDNode *N, 3943 TargetLowering::DAGCombinerInfo &DCI) { 3944 // The type legalizer turns a vector load of i8 values into a zextload to i16 3945 // registers, optionally ANY_EXTENDs it (if target type is integer), 3946 // and ANDs off the high 8 bits. Since we turn this load into a 3947 // target-specific DAG node, the DAG combiner fails to eliminate these AND 3948 // nodes. Do that here. 3949 SDValue Val = N->getOperand(0); 3950 SDValue Mask = N->getOperand(1); 3951 3952 if (isa<ConstantSDNode>(Val)) { 3953 std::swap(Val, Mask); 3954 } 3955 3956 SDValue AExt; 3957 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and 3958 if (Val.getOpcode() == ISD::ANY_EXTEND) { 3959 AExt = Val; 3960 Val = Val->getOperand(0); 3961 } 3962 3963 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { 3964 Val = Val->getOperand(0); 3965 } 3966 3967 if (Val->getOpcode() == NVPTXISD::LoadV2 || 3968 Val->getOpcode() == NVPTXISD::LoadV4) { 3969 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 3970 if (!MaskCnst) { 3971 // Not an AND with a constant 3972 return SDValue(); 3973 } 3974 3975 uint64_t MaskVal = MaskCnst->getZExtValue(); 3976 if (MaskVal != 0xff) { 3977 // Not an AND that chops off top 8 bits 3978 return SDValue(); 3979 } 3980 3981 MemSDNode *Mem = dyn_cast<MemSDNode>(Val); 3982 if (!Mem) { 3983 // Not a MemSDNode?!? 3984 return SDValue(); 3985 } 3986 3987 EVT MemVT = Mem->getMemoryVT(); 3988 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { 3989 // We only handle the i8 case 3990 return SDValue(); 3991 } 3992 3993 unsigned ExtType = 3994 cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> 3995 getZExtValue(); 3996 if (ExtType == ISD::SEXTLOAD) { 3997 // If for some reason the load is a sextload, the and is needed to zero 3998 // out the high 8 bits 3999 return SDValue(); 4000 } 4001 4002 bool AddTo = false; 4003 if (AExt.getNode() != 0) { 4004 // Re-insert the ext as a zext. 4005 Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4006 AExt.getValueType(), Val); 4007 AddTo = true; 4008 } 4009 4010 // If we get here, the AND is unnecessary. Just replace it with the load 4011 DCI.CombineTo(N, Val, AddTo); 4012 } 4013 4014 return SDValue(); 4015} 4016 4017enum OperandSignedness { 4018 Signed = 0, 4019 Unsigned, 4020 Unknown 4021}; 4022 4023/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand 4024/// that can be demoted to \p OptSize bits without loss of information. The 4025/// signedness of the operand, if determinable, is placed in \p S. 4026static bool IsMulWideOperandDemotable(SDValue Op, 4027 unsigned OptSize, 4028 OperandSignedness &S) { 4029 S = Unknown; 4030 4031 if (Op.getOpcode() == ISD::SIGN_EXTEND || 4032 Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 4033 EVT OrigVT = Op.getOperand(0).getValueType(); 4034 if (OrigVT.getSizeInBits() <= OptSize) { 4035 S = Signed; 4036 return true; 4037 } 4038 } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { 4039 EVT OrigVT = Op.getOperand(0).getValueType(); 4040 if (OrigVT.getSizeInBits() <= OptSize) { 4041 S = Unsigned; 4042 return true; 4043 } 4044 } 4045 4046 return false; 4047} 4048 4049/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can 4050/// be demoted to \p OptSize bits without loss of information. If the operands 4051/// contain a constant, it should appear as the RHS operand. The signedness of 4052/// the operands is placed in \p IsSigned. 4053static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, 4054 unsigned OptSize, 4055 bool &IsSigned) { 4056 4057 OperandSignedness LHSSign; 4058 4059 // The LHS operand must be a demotable op 4060 if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) 4061 return false; 4062 4063 // We should have been able to determine the signedness from the LHS 4064 if (LHSSign == Unknown) 4065 return false; 4066 4067 IsSigned = (LHSSign == Signed); 4068 4069 // The RHS can be a demotable op or a constant 4070 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { 4071 APInt Val = CI->getAPIntValue(); 4072 if (LHSSign == Unsigned) { 4073 if (Val.isIntN(OptSize)) { 4074 return true; 4075 } 4076 return false; 4077 } else { 4078 if (Val.isSignedIntN(OptSize)) { 4079 return true; 4080 } 4081 return false; 4082 } 4083 } else { 4084 OperandSignedness RHSSign; 4085 if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) 4086 return false; 4087 4088 if (LHSSign != RHSSign) 4089 return false; 4090 4091 return true; 4092 } 4093} 4094 4095/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply 4096/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform 4097/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift 4098/// amount. 4099static SDValue TryMULWIDECombine(SDNode *N, 4100 TargetLowering::DAGCombinerInfo &DCI) { 4101 EVT MulType = N->getValueType(0); 4102 if (MulType != MVT::i32 && MulType != MVT::i64) { 4103 return SDValue(); 4104 } 4105 4106 unsigned OptSize = MulType.getSizeInBits() >> 1; 4107 SDValue LHS = N->getOperand(0); 4108 SDValue RHS = N->getOperand(1); 4109 4110 // Canonicalize the multiply so the constant (if any) is on the right 4111 if (N->getOpcode() == ISD::MUL) { 4112 if (isa<ConstantSDNode>(LHS)) { 4113 std::swap(LHS, RHS); 4114 } 4115 } 4116 4117 // If we have a SHL, determine the actual multiply amount 4118 if (N->getOpcode() == ISD::SHL) { 4119 ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); 4120 if (!ShlRHS) { 4121 return SDValue(); 4122 } 4123 4124 APInt ShiftAmt = ShlRHS->getAPIntValue(); 4125 unsigned BitWidth = MulType.getSizeInBits(); 4126 if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { 4127 APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; 4128 RHS = DCI.DAG.getConstant(MulVal, MulType); 4129 } else { 4130 return SDValue(); 4131 } 4132 } 4133 4134 bool Signed; 4135 // Verify that our operands are demotable 4136 if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { 4137 return SDValue(); 4138 } 4139 4140 EVT DemotedVT; 4141 if (MulType == MVT::i32) { 4142 DemotedVT = MVT::i16; 4143 } else { 4144 DemotedVT = MVT::i32; 4145 } 4146 4147 // Truncate the operands to the correct size. Note that these are just for 4148 // type consistency and will (likely) be eliminated in later phases. 4149 SDValue TruncLHS = 4150 DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); 4151 SDValue TruncRHS = 4152 DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); 4153 4154 unsigned Opc; 4155 if (Signed) { 4156 Opc = NVPTXISD::MUL_WIDE_SIGNED; 4157 } else { 4158 Opc = NVPTXISD::MUL_WIDE_UNSIGNED; 4159 } 4160 4161 return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); 4162} 4163 4164/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. 4165static SDValue PerformMULCombine(SDNode *N, 4166 TargetLowering::DAGCombinerInfo &DCI, 4167 CodeGenOpt::Level OptLevel) { 4168 if (OptLevel > 0) { 4169 // Try mul.wide combining at OptLevel > 0 4170 SDValue Ret = TryMULWIDECombine(N, DCI); 4171 if (Ret.getNode()) 4172 return Ret; 4173 } 4174 4175 return SDValue(); 4176} 4177 4178/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. 4179static SDValue PerformSHLCombine(SDNode *N, 4180 TargetLowering::DAGCombinerInfo &DCI, 4181 CodeGenOpt::Level OptLevel) { 4182 if (OptLevel > 0) { 4183 // Try mul.wide combining at OptLevel > 0 4184 SDValue Ret = TryMULWIDECombine(N, DCI); 4185 if (Ret.getNode()) 4186 return Ret; 4187 } 4188 4189 return SDValue(); 4190} 4191 4192SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, 4193 DAGCombinerInfo &DCI) const { 4194 CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); 4195 switch (N->getOpcode()) { 4196 default: break; 4197 case ISD::ADD: 4198 case ISD::FADD: 4199 return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); 4200 case ISD::MUL: 4201 return PerformMULCombine(N, DCI, OptLevel); 4202 case ISD::SHL: 4203 return PerformSHLCombine(N, DCI, OptLevel); 4204 case ISD::AND: 4205 return PerformANDCombine(N, DCI); 4206 } 4207 return SDValue(); 4208} 4209 4210/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 4211static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 4212 const DataLayout *TD, 4213 SmallVectorImpl<SDValue> &Results) { 4214 EVT ResVT = N->getValueType(0); 4215 SDLoc DL(N); 4216 4217 assert(ResVT.isVector() && "Vector load must have vector type"); 4218 4219 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 4220 // legal. We can (and should) split that into 2 loads of <2 x double> here 4221 // but I'm leaving that as a TODO for now. 4222 assert(ResVT.isSimple() && "Can only handle simple types"); 4223 switch (ResVT.getSimpleVT().SimpleTy) { 4224 default: 4225 return; 4226 case MVT::v2i8: 4227 case MVT::v2i16: 4228 case MVT::v2i32: 4229 case MVT::v2i64: 4230 case MVT::v2f32: 4231 case MVT::v2f64: 4232 case MVT::v4i8: 4233 case MVT::v4i16: 4234 case MVT::v4i32: 4235 case MVT::v4f32: 4236 // This is a "native" vector type 4237 break; 4238 } 4239 4240 LoadSDNode *LD = cast<LoadSDNode>(N); 4241 4242 unsigned Align = LD->getAlignment(); 4243 unsigned PrefAlign = 4244 TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); 4245 if (Align < PrefAlign) { 4246 // This load is not sufficiently aligned, so bail out and let this vector 4247 // load be scalarized. Note that we may still be able to emit smaller 4248 // vector loads. For example, if we are loading a <4 x float> with an 4249 // alignment of 8, this check will fail but the legalizer will try again 4250 // with 2 x <2 x float>, which will succeed with an alignment of 8. 4251 return; 4252 } 4253 4254 EVT EltVT = ResVT.getVectorElementType(); 4255 unsigned NumElts = ResVT.getVectorNumElements(); 4256 4257 // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 4258 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4259 // loaded type to i16 and propagate the "real" type as the memory type. 4260 bool NeedTrunc = false; 4261 if (EltVT.getSizeInBits() < 16) { 4262 EltVT = MVT::i16; 4263 NeedTrunc = true; 4264 } 4265 4266 unsigned Opcode = 0; 4267 SDVTList LdResVTs; 4268 4269 switch (NumElts) { 4270 default: 4271 return; 4272 case 2: 4273 Opcode = NVPTXISD::LoadV2; 4274 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4275 break; 4276 case 4: { 4277 Opcode = NVPTXISD::LoadV4; 4278 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4279 LdResVTs = DAG.getVTList(ListVTs); 4280 break; 4281 } 4282 } 4283 4284 SmallVector<SDValue, 8> OtherOps; 4285 4286 // Copy regular operands 4287 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 4288 OtherOps.push_back(N->getOperand(i)); 4289 4290 // The select routine does not have access to the LoadSDNode instance, so 4291 // pass along the extension information 4292 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); 4293 4294 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4295 LD->getMemoryVT(), 4296 LD->getMemOperand()); 4297 4298 SmallVector<SDValue, 4> ScalarRes; 4299 4300 for (unsigned i = 0; i < NumElts; ++i) { 4301 SDValue Res = NewLD.getValue(i); 4302 if (NeedTrunc) 4303 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4304 ScalarRes.push_back(Res); 4305 } 4306 4307 SDValue LoadChain = NewLD.getValue(NumElts); 4308 4309 SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 4310 4311 Results.push_back(BuildVec); 4312 Results.push_back(LoadChain); 4313} 4314 4315static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 4316 SmallVectorImpl<SDValue> &Results) { 4317 SDValue Chain = N->getOperand(0); 4318 SDValue Intrin = N->getOperand(1); 4319 SDLoc DL(N); 4320 4321 // Get the intrinsic ID 4322 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 4323 switch (IntrinNo) { 4324 default: 4325 return; 4326 case Intrinsic::nvvm_ldg_global_i: 4327 case Intrinsic::nvvm_ldg_global_f: 4328 case Intrinsic::nvvm_ldg_global_p: 4329 case Intrinsic::nvvm_ldu_global_i: 4330 case Intrinsic::nvvm_ldu_global_f: 4331 case Intrinsic::nvvm_ldu_global_p: { 4332 EVT ResVT = N->getValueType(0); 4333 4334 if (ResVT.isVector()) { 4335 // Vector LDG/LDU 4336 4337 unsigned NumElts = ResVT.getVectorNumElements(); 4338 EVT EltVT = ResVT.getVectorElementType(); 4339 4340 // Since LDU/LDG are target nodes, we cannot rely on DAG type 4341 // legalization. 4342 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4343 // loaded type to i16 and propagate the "real" type as the memory type. 4344 bool NeedTrunc = false; 4345 if (EltVT.getSizeInBits() < 16) { 4346 EltVT = MVT::i16; 4347 NeedTrunc = true; 4348 } 4349 4350 unsigned Opcode = 0; 4351 SDVTList LdResVTs; 4352 4353 switch (NumElts) { 4354 default: 4355 return; 4356 case 2: 4357 switch (IntrinNo) { 4358 default: 4359 return; 4360 case Intrinsic::nvvm_ldg_global_i: 4361 case Intrinsic::nvvm_ldg_global_f: 4362 case Intrinsic::nvvm_ldg_global_p: 4363 Opcode = NVPTXISD::LDGV2; 4364 break; 4365 case Intrinsic::nvvm_ldu_global_i: 4366 case Intrinsic::nvvm_ldu_global_f: 4367 case Intrinsic::nvvm_ldu_global_p: 4368 Opcode = NVPTXISD::LDUV2; 4369 break; 4370 } 4371 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4372 break; 4373 case 4: { 4374 switch (IntrinNo) { 4375 default: 4376 return; 4377 case Intrinsic::nvvm_ldg_global_i: 4378 case Intrinsic::nvvm_ldg_global_f: 4379 case Intrinsic::nvvm_ldg_global_p: 4380 Opcode = NVPTXISD::LDGV4; 4381 break; 4382 case Intrinsic::nvvm_ldu_global_i: 4383 case Intrinsic::nvvm_ldu_global_f: 4384 case Intrinsic::nvvm_ldu_global_p: 4385 Opcode = NVPTXISD::LDUV4; 4386 break; 4387 } 4388 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4389 LdResVTs = DAG.getVTList(ListVTs); 4390 break; 4391 } 4392 } 4393 4394 SmallVector<SDValue, 8> OtherOps; 4395 4396 // Copy regular operands 4397 4398 OtherOps.push_back(Chain); // Chain 4399 // Skip operand 1 (intrinsic ID) 4400 // Others 4401 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) 4402 OtherOps.push_back(N->getOperand(i)); 4403 4404 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4405 4406 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4407 MemSD->getMemoryVT(), 4408 MemSD->getMemOperand()); 4409 4410 SmallVector<SDValue, 4> ScalarRes; 4411 4412 for (unsigned i = 0; i < NumElts; ++i) { 4413 SDValue Res = NewLD.getValue(i); 4414 if (NeedTrunc) 4415 Res = 4416 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4417 ScalarRes.push_back(Res); 4418 } 4419 4420 SDValue LoadChain = NewLD.getValue(NumElts); 4421 4422 SDValue BuildVec = 4423 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 4424 4425 Results.push_back(BuildVec); 4426 Results.push_back(LoadChain); 4427 } else { 4428 // i8 LDG/LDU 4429 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 4430 "Custom handling of non-i8 ldu/ldg?"); 4431 4432 // Just copy all operands as-is 4433 SmallVector<SDValue, 4> Ops; 4434 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 4435 Ops.push_back(N->getOperand(i)); 4436 4437 // Force output to i16 4438 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 4439 4440 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4441 4442 // We make sure the memory type is i8, which will be used during isel 4443 // to select the proper instruction. 4444 SDValue NewLD = 4445 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, 4446 MVT::i8, MemSD->getMemOperand()); 4447 4448 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 4449 NewLD.getValue(0))); 4450 Results.push_back(NewLD.getValue(1)); 4451 } 4452 } 4453 } 4454} 4455 4456void NVPTXTargetLowering::ReplaceNodeResults( 4457 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 4458 switch (N->getOpcode()) { 4459 default: 4460 report_fatal_error("Unhandled custom legalization"); 4461 case ISD::LOAD: 4462 ReplaceLoadVector(N, DAG, getDataLayout(), Results); 4463 return; 4464 case ISD::INTRINSIC_W_CHAIN: 4465 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 4466 return; 4467 } 4468} 4469 4470// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 4471void NVPTXSection::anchor() {} 4472 4473NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 4474 delete TextSection; 4475 delete DataSection; 4476 delete BSSSection; 4477 delete ReadOnlySection; 4478 4479 delete StaticCtorSection; 4480 delete StaticDtorSection; 4481 delete LSDASection; 4482 delete EHFrameSection; 4483 delete DwarfAbbrevSection; 4484 delete DwarfInfoSection; 4485 delete DwarfLineSection; 4486 delete DwarfFrameSection; 4487 delete DwarfPubTypesSection; 4488 delete DwarfDebugInlineSection; 4489 delete DwarfStrSection; 4490 delete DwarfLocSection; 4491 delete DwarfARangesSection; 4492 delete DwarfRangesSection; 4493 delete DwarfMacroInfoSection; 4494} 4495 4496const MCSection * 4497NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, 4498 SectionKind Kind, Mangler &Mang, 4499 const TargetMachine &TM) const { 4500 return getDataSection(); 4501} 4502