NVPTXISelLowering.cpp revision 280031
1//
2//                     The LLVM Compiler Infrastructure
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelLowering.h"
15#include "NVPTX.h"
16#include "NVPTXTargetMachine.h"
17#include "NVPTXTargetObjectFile.h"
18#include "NVPTXUtilities.h"
19#include "llvm/CodeGen/Analysis.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25#include "llvm/IR/CallSite.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/GlobalValue.h"
29#include "llvm/IR/IntrinsicInst.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCSectionELF.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Support/raw_ostream.h"
38#include <sstream>
39
40#undef DEBUG_TYPE
41#define DEBUG_TYPE "nvptx-lower"
42
43using namespace llvm;
44
45static unsigned int uniqueCallSite = 0;
46
47static cl::opt<bool> sched4reg(
48    "nvptx-sched4reg",
49    cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
50
51static cl::opt<unsigned>
52FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
53                    cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
54                             " 1: do it  2: do it aggressively"),
55                    cl::init(2));
56
57static bool IsPTXVectorType(MVT VT) {
58  switch (VT.SimpleTy) {
59  default:
60    return false;
61  case MVT::v2i1:
62  case MVT::v4i1:
63  case MVT::v2i8:
64  case MVT::v4i8:
65  case MVT::v2i16:
66  case MVT::v4i16:
67  case MVT::v2i32:
68  case MVT::v4i32:
69  case MVT::v2i64:
70  case MVT::v2f32:
71  case MVT::v4f32:
72  case MVT::v2f64:
73    return true;
74  }
75}
76
77/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
78/// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
79/// into their primitive components.
80/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
81/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
82/// LowerCall, and LowerReturn.
83static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
84                               SmallVectorImpl<EVT> &ValueVTs,
85                               SmallVectorImpl<uint64_t> *Offsets = nullptr,
86                               uint64_t StartingOffset = 0) {
87  SmallVector<EVT, 16> TempVTs;
88  SmallVector<uint64_t, 16> TempOffsets;
89
90  ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
91  for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
92    EVT VT = TempVTs[i];
93    uint64_t Off = TempOffsets[i];
94    if (VT.isVector())
95      for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
96        ValueVTs.push_back(VT.getVectorElementType());
97        if (Offsets)
98          Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
99      }
100    else {
101      ValueVTs.push_back(VT);
102      if (Offsets)
103        Offsets->push_back(Off);
104    }
105  }
106}
107
108// NVPTXTargetLowering Constructor.
109NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
110    : TargetLowering(TM), nvTM(&TM),
111      nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
112
113  // always lower memset, memcpy, and memmove intrinsics to load/store
114  // instructions, rather
115  // then generating calls to memset, mempcy or memmove.
116  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
117  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
118  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
119
120  setBooleanContents(ZeroOrNegativeOneBooleanContent);
121  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
122
123  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
124  // condition branches.
125  setJumpIsExpensive(true);
126
127  // By default, use the Source scheduling
128  if (sched4reg)
129    setSchedulingPreference(Sched::RegPressure);
130  else
131    setSchedulingPreference(Sched::Source);
132
133  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
134  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
135  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
136  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
137  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
138  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
139
140  // Operations not directly supported by NVPTX.
141  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
142  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
143  setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
144  setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
145  setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
146  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
147  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
148  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
149  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
150  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
151  setOperationAction(ISD::BR_CC, MVT::i8, Expand);
152  setOperationAction(ISD::BR_CC, MVT::i16, Expand);
153  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
154  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
155  // Some SIGN_EXTEND_INREG can be done using cvt instruction.
156  // For others we will expand to a SHL/SRA pair.
157  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
158  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
159  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
160  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
161  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
162
163  setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
164  setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
165  setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
166  setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
167  setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
168  setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
169
170  if (nvptxSubtarget.hasROT64()) {
171    setOperationAction(ISD::ROTL, MVT::i64, Legal);
172    setOperationAction(ISD::ROTR, MVT::i64, Legal);
173  } else {
174    setOperationAction(ISD::ROTL, MVT::i64, Expand);
175    setOperationAction(ISD::ROTR, MVT::i64, Expand);
176  }
177  if (nvptxSubtarget.hasROT32()) {
178    setOperationAction(ISD::ROTL, MVT::i32, Legal);
179    setOperationAction(ISD::ROTR, MVT::i32, Legal);
180  } else {
181    setOperationAction(ISD::ROTL, MVT::i32, Expand);
182    setOperationAction(ISD::ROTR, MVT::i32, Expand);
183  }
184
185  setOperationAction(ISD::ROTL, MVT::i16, Expand);
186  setOperationAction(ISD::ROTR, MVT::i16, Expand);
187  setOperationAction(ISD::ROTL, MVT::i8, Expand);
188  setOperationAction(ISD::ROTR, MVT::i8, Expand);
189  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
190  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
191  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
192
193  // Indirect branch is not supported.
194  // This also disables Jump Table creation.
195  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
196  setOperationAction(ISD::BRIND, MVT::Other, Expand);
197
198  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
199  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
200
201  // We want to legalize constant related memmove and memcopy
202  // intrinsics.
203  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
204
205  // Turn FP extload into load/fextend
206  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
207  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
208  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
209  // Turn FP truncstore into trunc + store.
210  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
211  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
212  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
213
214  // PTX does not support load / store predicate registers
215  setOperationAction(ISD::LOAD, MVT::i1, Custom);
216  setOperationAction(ISD::STORE, MVT::i1, Custom);
217
218  for (MVT VT : MVT::integer_valuetypes()) {
219    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
220    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
221    setTruncStoreAction(VT, MVT::i1, Expand);
222  }
223
224  // This is legal in NVPTX
225  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
226  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
227
228  // TRAP can be lowered to PTX trap
229  setOperationAction(ISD::TRAP, MVT::Other, Legal);
230
231  setOperationAction(ISD::ADDC, MVT::i64, Expand);
232  setOperationAction(ISD::ADDE, MVT::i64, Expand);
233
234  // Register custom handling for vector loads/stores
235  for (MVT VT : MVT::vector_valuetypes()) {
236    if (IsPTXVectorType(VT)) {
237      setOperationAction(ISD::LOAD, VT, Custom);
238      setOperationAction(ISD::STORE, VT, Custom);
239      setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
240    }
241  }
242
243  // Custom handling for i8 intrinsics
244  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
245
246  setOperationAction(ISD::CTLZ, MVT::i16, Legal);
247  setOperationAction(ISD::CTLZ, MVT::i32, Legal);
248  setOperationAction(ISD::CTLZ, MVT::i64, Legal);
249  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
250  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
251  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
252  setOperationAction(ISD::CTTZ, MVT::i16, Expand);
253  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
254  setOperationAction(ISD::CTTZ, MVT::i64, Expand);
255  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
256  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
257  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
258  setOperationAction(ISD::CTPOP, MVT::i16, Legal);
259  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
260  setOperationAction(ISD::CTPOP, MVT::i64, Legal);
261
262  // We have some custom DAG combine patterns for these nodes
263  setTargetDAGCombine(ISD::ADD);
264  setTargetDAGCombine(ISD::AND);
265  setTargetDAGCombine(ISD::FADD);
266  setTargetDAGCombine(ISD::MUL);
267  setTargetDAGCombine(ISD::SHL);
268
269  // Now deduce the information based on the above mentioned
270  // actions
271  computeRegisterProperties();
272}
273
274const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
275  switch (Opcode) {
276  default:
277    return nullptr;
278  case NVPTXISD::CALL:
279    return "NVPTXISD::CALL";
280  case NVPTXISD::RET_FLAG:
281    return "NVPTXISD::RET_FLAG";
282  case NVPTXISD::Wrapper:
283    return "NVPTXISD::Wrapper";
284  case NVPTXISD::DeclareParam:
285    return "NVPTXISD::DeclareParam";
286  case NVPTXISD::DeclareScalarParam:
287    return "NVPTXISD::DeclareScalarParam";
288  case NVPTXISD::DeclareRet:
289    return "NVPTXISD::DeclareRet";
290  case NVPTXISD::DeclareRetParam:
291    return "NVPTXISD::DeclareRetParam";
292  case NVPTXISD::PrintCall:
293    return "NVPTXISD::PrintCall";
294  case NVPTXISD::LoadParam:
295    return "NVPTXISD::LoadParam";
296  case NVPTXISD::LoadParamV2:
297    return "NVPTXISD::LoadParamV2";
298  case NVPTXISD::LoadParamV4:
299    return "NVPTXISD::LoadParamV4";
300  case NVPTXISD::StoreParam:
301    return "NVPTXISD::StoreParam";
302  case NVPTXISD::StoreParamV2:
303    return "NVPTXISD::StoreParamV2";
304  case NVPTXISD::StoreParamV4:
305    return "NVPTXISD::StoreParamV4";
306  case NVPTXISD::StoreParamS32:
307    return "NVPTXISD::StoreParamS32";
308  case NVPTXISD::StoreParamU32:
309    return "NVPTXISD::StoreParamU32";
310  case NVPTXISD::CallArgBegin:
311    return "NVPTXISD::CallArgBegin";
312  case NVPTXISD::CallArg:
313    return "NVPTXISD::CallArg";
314  case NVPTXISD::LastCallArg:
315    return "NVPTXISD::LastCallArg";
316  case NVPTXISD::CallArgEnd:
317    return "NVPTXISD::CallArgEnd";
318  case NVPTXISD::CallVoid:
319    return "NVPTXISD::CallVoid";
320  case NVPTXISD::CallVal:
321    return "NVPTXISD::CallVal";
322  case NVPTXISD::CallSymbol:
323    return "NVPTXISD::CallSymbol";
324  case NVPTXISD::Prototype:
325    return "NVPTXISD::Prototype";
326  case NVPTXISD::MoveParam:
327    return "NVPTXISD::MoveParam";
328  case NVPTXISD::StoreRetval:
329    return "NVPTXISD::StoreRetval";
330  case NVPTXISD::StoreRetvalV2:
331    return "NVPTXISD::StoreRetvalV2";
332  case NVPTXISD::StoreRetvalV4:
333    return "NVPTXISD::StoreRetvalV4";
334  case NVPTXISD::PseudoUseParam:
335    return "NVPTXISD::PseudoUseParam";
336  case NVPTXISD::RETURN:
337    return "NVPTXISD::RETURN";
338  case NVPTXISD::CallSeqBegin:
339    return "NVPTXISD::CallSeqBegin";
340  case NVPTXISD::CallSeqEnd:
341    return "NVPTXISD::CallSeqEnd";
342  case NVPTXISD::CallPrototype:
343    return "NVPTXISD::CallPrototype";
344  case NVPTXISD::LoadV2:
345    return "NVPTXISD::LoadV2";
346  case NVPTXISD::LoadV4:
347    return "NVPTXISD::LoadV4";
348  case NVPTXISD::LDGV2:
349    return "NVPTXISD::LDGV2";
350  case NVPTXISD::LDGV4:
351    return "NVPTXISD::LDGV4";
352  case NVPTXISD::LDUV2:
353    return "NVPTXISD::LDUV2";
354  case NVPTXISD::LDUV4:
355    return "NVPTXISD::LDUV4";
356  case NVPTXISD::StoreV2:
357    return "NVPTXISD::StoreV2";
358  case NVPTXISD::StoreV4:
359    return "NVPTXISD::StoreV4";
360  case NVPTXISD::FUN_SHFL_CLAMP:
361    return "NVPTXISD::FUN_SHFL_CLAMP";
362  case NVPTXISD::FUN_SHFR_CLAMP:
363    return "NVPTXISD::FUN_SHFR_CLAMP";
364  case NVPTXISD::IMAD:
365    return "NVPTXISD::IMAD";
366  case NVPTXISD::MUL_WIDE_SIGNED:
367    return "NVPTXISD::MUL_WIDE_SIGNED";
368  case NVPTXISD::MUL_WIDE_UNSIGNED:
369    return "NVPTXISD::MUL_WIDE_UNSIGNED";
370  case NVPTXISD::Tex1DFloatS32:        return "NVPTXISD::Tex1DFloatS32";
371  case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat";
372  case NVPTXISD::Tex1DFloatFloatLevel:
373    return "NVPTXISD::Tex1DFloatFloatLevel";
374  case NVPTXISD::Tex1DFloatFloatGrad:
375    return "NVPTXISD::Tex1DFloatFloatGrad";
376  case NVPTXISD::Tex1DS32S32:          return "NVPTXISD::Tex1DS32S32";
377  case NVPTXISD::Tex1DS32Float:        return "NVPTXISD::Tex1DS32Float";
378  case NVPTXISD::Tex1DS32FloatLevel:
379    return "NVPTXISD::Tex1DS32FloatLevel";
380  case NVPTXISD::Tex1DS32FloatGrad:
381    return "NVPTXISD::Tex1DS32FloatGrad";
382  case NVPTXISD::Tex1DU32S32:          return "NVPTXISD::Tex1DU32S32";
383  case NVPTXISD::Tex1DU32Float:        return "NVPTXISD::Tex1DU32Float";
384  case NVPTXISD::Tex1DU32FloatLevel:
385    return "NVPTXISD::Tex1DU32FloatLevel";
386  case NVPTXISD::Tex1DU32FloatGrad:
387    return "NVPTXISD::Tex1DU32FloatGrad";
388  case NVPTXISD::Tex1DArrayFloatS32:   return "NVPTXISD::Tex1DArrayFloatS32";
389  case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
390  case NVPTXISD::Tex1DArrayFloatFloatLevel:
391    return "NVPTXISD::Tex1DArrayFloatFloatLevel";
392  case NVPTXISD::Tex1DArrayFloatFloatGrad:
393    return "NVPTXISD::Tex1DArrayFloatFloatGrad";
394  case NVPTXISD::Tex1DArrayS32S32:     return "NVPTXISD::Tex1DArrayS32S32";
395  case NVPTXISD::Tex1DArrayS32Float:   return "NVPTXISD::Tex1DArrayS32Float";
396  case NVPTXISD::Tex1DArrayS32FloatLevel:
397    return "NVPTXISD::Tex1DArrayS32FloatLevel";
398  case NVPTXISD::Tex1DArrayS32FloatGrad:
399    return "NVPTXISD::Tex1DArrayS32FloatGrad";
400  case NVPTXISD::Tex1DArrayU32S32:     return "NVPTXISD::Tex1DArrayU32S32";
401  case NVPTXISD::Tex1DArrayU32Float:   return "NVPTXISD::Tex1DArrayU32Float";
402  case NVPTXISD::Tex1DArrayU32FloatLevel:
403    return "NVPTXISD::Tex1DArrayU32FloatLevel";
404  case NVPTXISD::Tex1DArrayU32FloatGrad:
405    return "NVPTXISD::Tex1DArrayU32FloatGrad";
406  case NVPTXISD::Tex2DFloatS32:        return "NVPTXISD::Tex2DFloatS32";
407  case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat";
408  case NVPTXISD::Tex2DFloatFloatLevel:
409    return "NVPTXISD::Tex2DFloatFloatLevel";
410  case NVPTXISD::Tex2DFloatFloatGrad:
411    return "NVPTXISD::Tex2DFloatFloatGrad";
412  case NVPTXISD::Tex2DS32S32:          return "NVPTXISD::Tex2DS32S32";
413  case NVPTXISD::Tex2DS32Float:        return "NVPTXISD::Tex2DS32Float";
414  case NVPTXISD::Tex2DS32FloatLevel:
415    return "NVPTXISD::Tex2DS32FloatLevel";
416  case NVPTXISD::Tex2DS32FloatGrad:
417    return "NVPTXISD::Tex2DS32FloatGrad";
418  case NVPTXISD::Tex2DU32S32:          return "NVPTXISD::Tex2DU32S32";
419  case NVPTXISD::Tex2DU32Float:        return "NVPTXISD::Tex2DU32Float";
420  case NVPTXISD::Tex2DU32FloatLevel:
421    return "NVPTXISD::Tex2DU32FloatLevel";
422  case NVPTXISD::Tex2DU32FloatGrad:
423    return "NVPTXISD::Tex2DU32FloatGrad";
424  case NVPTXISD::Tex2DArrayFloatS32:   return "NVPTXISD::Tex2DArrayFloatS32";
425  case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
426  case NVPTXISD::Tex2DArrayFloatFloatLevel:
427    return "NVPTXISD::Tex2DArrayFloatFloatLevel";
428  case NVPTXISD::Tex2DArrayFloatFloatGrad:
429    return "NVPTXISD::Tex2DArrayFloatFloatGrad";
430  case NVPTXISD::Tex2DArrayS32S32:     return "NVPTXISD::Tex2DArrayS32S32";
431  case NVPTXISD::Tex2DArrayS32Float:   return "NVPTXISD::Tex2DArrayS32Float";
432  case NVPTXISD::Tex2DArrayS32FloatLevel:
433    return "NVPTXISD::Tex2DArrayS32FloatLevel";
434  case NVPTXISD::Tex2DArrayS32FloatGrad:
435    return "NVPTXISD::Tex2DArrayS32FloatGrad";
436  case NVPTXISD::Tex2DArrayU32S32:     return "NVPTXISD::Tex2DArrayU32S32";
437  case NVPTXISD::Tex2DArrayU32Float:   return "NVPTXISD::Tex2DArrayU32Float";
438  case NVPTXISD::Tex2DArrayU32FloatLevel:
439    return "NVPTXISD::Tex2DArrayU32FloatLevel";
440  case NVPTXISD::Tex2DArrayU32FloatGrad:
441    return "NVPTXISD::Tex2DArrayU32FloatGrad";
442  case NVPTXISD::Tex3DFloatS32:        return "NVPTXISD::Tex3DFloatS32";
443  case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat";
444  case NVPTXISD::Tex3DFloatFloatLevel:
445    return "NVPTXISD::Tex3DFloatFloatLevel";
446  case NVPTXISD::Tex3DFloatFloatGrad:
447    return "NVPTXISD::Tex3DFloatFloatGrad";
448  case NVPTXISD::Tex3DS32S32:          return "NVPTXISD::Tex3DS32S32";
449  case NVPTXISD::Tex3DS32Float:        return "NVPTXISD::Tex3DS32Float";
450  case NVPTXISD::Tex3DS32FloatLevel:
451    return "NVPTXISD::Tex3DS32FloatLevel";
452  case NVPTXISD::Tex3DS32FloatGrad:
453    return "NVPTXISD::Tex3DS32FloatGrad";
454  case NVPTXISD::Tex3DU32S32:          return "NVPTXISD::Tex3DU32S32";
455  case NVPTXISD::Tex3DU32Float:        return "NVPTXISD::Tex3DU32Float";
456  case NVPTXISD::Tex3DU32FloatLevel:
457    return "NVPTXISD::Tex3DU32FloatLevel";
458  case NVPTXISD::Tex3DU32FloatGrad:
459    return "NVPTXISD::Tex3DU32FloatGrad";
460  case NVPTXISD::TexCubeFloatFloat:      return "NVPTXISD::TexCubeFloatFloat";
461  case NVPTXISD::TexCubeFloatFloatLevel:
462    return "NVPTXISD::TexCubeFloatFloatLevel";
463  case NVPTXISD::TexCubeS32Float:        return "NVPTXISD::TexCubeS32Float";
464  case NVPTXISD::TexCubeS32FloatLevel:
465    return "NVPTXISD::TexCubeS32FloatLevel";
466  case NVPTXISD::TexCubeU32Float:        return "NVPTXISD::TexCubeU32Float";
467  case NVPTXISD::TexCubeU32FloatLevel:
468    return "NVPTXISD::TexCubeU32FloatLevel";
469  case NVPTXISD::TexCubeArrayFloatFloat:
470    return "NVPTXISD::TexCubeArrayFloatFloat";
471  case NVPTXISD::TexCubeArrayFloatFloatLevel:
472    return "NVPTXISD::TexCubeArrayFloatFloatLevel";
473  case NVPTXISD::TexCubeArrayS32Float:
474    return "NVPTXISD::TexCubeArrayS32Float";
475  case NVPTXISD::TexCubeArrayS32FloatLevel:
476    return "NVPTXISD::TexCubeArrayS32FloatLevel";
477  case NVPTXISD::TexCubeArrayU32Float:
478    return "NVPTXISD::TexCubeArrayU32Float";
479  case NVPTXISD::TexCubeArrayU32FloatLevel:
480    return "NVPTXISD::TexCubeArrayU32FloatLevel";
481  case NVPTXISD::Tld4R2DFloatFloat:
482    return "NVPTXISD::Tld4R2DFloatFloat";
483  case NVPTXISD::Tld4G2DFloatFloat:
484    return "NVPTXISD::Tld4G2DFloatFloat";
485  case NVPTXISD::Tld4B2DFloatFloat:
486    return "NVPTXISD::Tld4B2DFloatFloat";
487  case NVPTXISD::Tld4A2DFloatFloat:
488    return "NVPTXISD::Tld4A2DFloatFloat";
489  case NVPTXISD::Tld4R2DS64Float:
490    return "NVPTXISD::Tld4R2DS64Float";
491  case NVPTXISD::Tld4G2DS64Float:
492    return "NVPTXISD::Tld4G2DS64Float";
493  case NVPTXISD::Tld4B2DS64Float:
494    return "NVPTXISD::Tld4B2DS64Float";
495  case NVPTXISD::Tld4A2DS64Float:
496    return "NVPTXISD::Tld4A2DS64Float";
497  case NVPTXISD::Tld4R2DU64Float:
498    return "NVPTXISD::Tld4R2DU64Float";
499  case NVPTXISD::Tld4G2DU64Float:
500    return "NVPTXISD::Tld4G2DU64Float";
501  case NVPTXISD::Tld4B2DU64Float:
502    return "NVPTXISD::Tld4B2DU64Float";
503  case NVPTXISD::Tld4A2DU64Float:
504    return "NVPTXISD::Tld4A2DU64Float";
505
506  case NVPTXISD::TexUnified1DFloatS32:
507    return "NVPTXISD::TexUnified1DFloatS32";
508  case NVPTXISD::TexUnified1DFloatFloat:
509    return "NVPTXISD::TexUnified1DFloatFloat";
510  case NVPTXISD::TexUnified1DFloatFloatLevel:
511    return "NVPTXISD::TexUnified1DFloatFloatLevel";
512  case NVPTXISD::TexUnified1DFloatFloatGrad:
513    return "NVPTXISD::TexUnified1DFloatFloatGrad";
514  case NVPTXISD::TexUnified1DS32S32:
515    return "NVPTXISD::TexUnified1DS32S32";
516  case NVPTXISD::TexUnified1DS32Float:
517    return "NVPTXISD::TexUnified1DS32Float";
518  case NVPTXISD::TexUnified1DS32FloatLevel:
519    return "NVPTXISD::TexUnified1DS32FloatLevel";
520  case NVPTXISD::TexUnified1DS32FloatGrad:
521    return "NVPTXISD::TexUnified1DS32FloatGrad";
522  case NVPTXISD::TexUnified1DU32S32:
523    return "NVPTXISD::TexUnified1DU32S32";
524  case NVPTXISD::TexUnified1DU32Float:
525    return "NVPTXISD::TexUnified1DU32Float";
526  case NVPTXISD::TexUnified1DU32FloatLevel:
527    return "NVPTXISD::TexUnified1DU32FloatLevel";
528  case NVPTXISD::TexUnified1DU32FloatGrad:
529    return "NVPTXISD::TexUnified1DU32FloatGrad";
530  case NVPTXISD::TexUnified1DArrayFloatS32:
531    return "NVPTXISD::TexUnified1DArrayFloatS32";
532  case NVPTXISD::TexUnified1DArrayFloatFloat:
533    return "NVPTXISD::TexUnified1DArrayFloatFloat";
534  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
535    return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
536  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
537    return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
538  case NVPTXISD::TexUnified1DArrayS32S32:
539    return "NVPTXISD::TexUnified1DArrayS32S32";
540  case NVPTXISD::TexUnified1DArrayS32Float:
541    return "NVPTXISD::TexUnified1DArrayS32Float";
542  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
543    return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
544  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
545    return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
546  case NVPTXISD::TexUnified1DArrayU32S32:
547    return "NVPTXISD::TexUnified1DArrayU32S32";
548  case NVPTXISD::TexUnified1DArrayU32Float:
549    return "NVPTXISD::TexUnified1DArrayU32Float";
550  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
551    return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
552  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
553    return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
554  case NVPTXISD::TexUnified2DFloatS32:
555    return "NVPTXISD::TexUnified2DFloatS32";
556  case NVPTXISD::TexUnified2DFloatFloat:
557    return "NVPTXISD::TexUnified2DFloatFloat";
558  case NVPTXISD::TexUnified2DFloatFloatLevel:
559    return "NVPTXISD::TexUnified2DFloatFloatLevel";
560  case NVPTXISD::TexUnified2DFloatFloatGrad:
561    return "NVPTXISD::TexUnified2DFloatFloatGrad";
562  case NVPTXISD::TexUnified2DS32S32:
563    return "NVPTXISD::TexUnified2DS32S32";
564  case NVPTXISD::TexUnified2DS32Float:
565    return "NVPTXISD::TexUnified2DS32Float";
566  case NVPTXISD::TexUnified2DS32FloatLevel:
567    return "NVPTXISD::TexUnified2DS32FloatLevel";
568  case NVPTXISD::TexUnified2DS32FloatGrad:
569    return "NVPTXISD::TexUnified2DS32FloatGrad";
570  case NVPTXISD::TexUnified2DU32S32:
571    return "NVPTXISD::TexUnified2DU32S32";
572  case NVPTXISD::TexUnified2DU32Float:
573    return "NVPTXISD::TexUnified2DU32Float";
574  case NVPTXISD::TexUnified2DU32FloatLevel:
575    return "NVPTXISD::TexUnified2DU32FloatLevel";
576  case NVPTXISD::TexUnified2DU32FloatGrad:
577    return "NVPTXISD::TexUnified2DU32FloatGrad";
578  case NVPTXISD::TexUnified2DArrayFloatS32:
579    return "NVPTXISD::TexUnified2DArrayFloatS32";
580  case NVPTXISD::TexUnified2DArrayFloatFloat:
581    return "NVPTXISD::TexUnified2DArrayFloatFloat";
582  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
583    return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
584  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
585    return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
586  case NVPTXISD::TexUnified2DArrayS32S32:
587    return "NVPTXISD::TexUnified2DArrayS32S32";
588  case NVPTXISD::TexUnified2DArrayS32Float:
589    return "NVPTXISD::TexUnified2DArrayS32Float";
590  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
591    return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
592  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
593    return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
594  case NVPTXISD::TexUnified2DArrayU32S32:
595    return "NVPTXISD::TexUnified2DArrayU32S32";
596  case NVPTXISD::TexUnified2DArrayU32Float:
597    return "NVPTXISD::TexUnified2DArrayU32Float";
598  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
599    return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
600  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
601    return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
602  case NVPTXISD::TexUnified3DFloatS32:
603    return "NVPTXISD::TexUnified3DFloatS32";
604  case NVPTXISD::TexUnified3DFloatFloat:
605    return "NVPTXISD::TexUnified3DFloatFloat";
606  case NVPTXISD::TexUnified3DFloatFloatLevel:
607    return "NVPTXISD::TexUnified3DFloatFloatLevel";
608  case NVPTXISD::TexUnified3DFloatFloatGrad:
609    return "NVPTXISD::TexUnified3DFloatFloatGrad";
610  case NVPTXISD::TexUnified3DS32S32:
611    return "NVPTXISD::TexUnified3DS32S32";
612  case NVPTXISD::TexUnified3DS32Float:
613    return "NVPTXISD::TexUnified3DS32Float";
614  case NVPTXISD::TexUnified3DS32FloatLevel:
615    return "NVPTXISD::TexUnified3DS32FloatLevel";
616  case NVPTXISD::TexUnified3DS32FloatGrad:
617    return "NVPTXISD::TexUnified3DS32FloatGrad";
618  case NVPTXISD::TexUnified3DU32S32:
619    return "NVPTXISD::TexUnified3DU32S32";
620  case NVPTXISD::TexUnified3DU32Float:
621    return "NVPTXISD::TexUnified3DU32Float";
622  case NVPTXISD::TexUnified3DU32FloatLevel:
623    return "NVPTXISD::TexUnified3DU32FloatLevel";
624  case NVPTXISD::TexUnified3DU32FloatGrad:
625    return "NVPTXISD::TexUnified3DU32FloatGrad";
626  case NVPTXISD::TexUnifiedCubeFloatFloat:
627    return "NVPTXISD::TexUnifiedCubeFloatFloat";
628  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
629    return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
630  case NVPTXISD::TexUnifiedCubeS32Float:
631    return "NVPTXISD::TexUnifiedCubeS32Float";
632  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
633    return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
634  case NVPTXISD::TexUnifiedCubeU32Float:
635    return "NVPTXISD::TexUnifiedCubeU32Float";
636  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
637    return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
638  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
639    return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
640  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
641    return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
642  case NVPTXISD::TexUnifiedCubeArrayS32Float:
643    return "NVPTXISD::TexUnifiedCubeArrayS32Float";
644  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
645    return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
646  case NVPTXISD::TexUnifiedCubeArrayU32Float:
647    return "NVPTXISD::TexUnifiedCubeArrayU32Float";
648  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
649    return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
650  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
651    return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
652  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
653    return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
654  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
655    return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
656  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
657    return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
658  case NVPTXISD::Tld4UnifiedR2DS64Float:
659    return "NVPTXISD::Tld4UnifiedR2DS64Float";
660  case NVPTXISD::Tld4UnifiedG2DS64Float:
661    return "NVPTXISD::Tld4UnifiedG2DS64Float";
662  case NVPTXISD::Tld4UnifiedB2DS64Float:
663    return "NVPTXISD::Tld4UnifiedB2DS64Float";
664  case NVPTXISD::Tld4UnifiedA2DS64Float:
665    return "NVPTXISD::Tld4UnifiedA2DS64Float";
666  case NVPTXISD::Tld4UnifiedR2DU64Float:
667    return "NVPTXISD::Tld4UnifiedR2DU64Float";
668  case NVPTXISD::Tld4UnifiedG2DU64Float:
669    return "NVPTXISD::Tld4UnifiedG2DU64Float";
670  case NVPTXISD::Tld4UnifiedB2DU64Float:
671    return "NVPTXISD::Tld4UnifiedB2DU64Float";
672  case NVPTXISD::Tld4UnifiedA2DU64Float:
673    return "NVPTXISD::Tld4UnifiedA2DU64Float";
674
675  case NVPTXISD::Suld1DI8Clamp:          return "NVPTXISD::Suld1DI8Clamp";
676  case NVPTXISD::Suld1DI16Clamp:         return "NVPTXISD::Suld1DI16Clamp";
677  case NVPTXISD::Suld1DI32Clamp:         return "NVPTXISD::Suld1DI32Clamp";
678  case NVPTXISD::Suld1DI64Clamp:         return "NVPTXISD::Suld1DI64Clamp";
679  case NVPTXISD::Suld1DV2I8Clamp:        return "NVPTXISD::Suld1DV2I8Clamp";
680  case NVPTXISD::Suld1DV2I16Clamp:       return "NVPTXISD::Suld1DV2I16Clamp";
681  case NVPTXISD::Suld1DV2I32Clamp:       return "NVPTXISD::Suld1DV2I32Clamp";
682  case NVPTXISD::Suld1DV2I64Clamp:       return "NVPTXISD::Suld1DV2I64Clamp";
683  case NVPTXISD::Suld1DV4I8Clamp:        return "NVPTXISD::Suld1DV4I8Clamp";
684  case NVPTXISD::Suld1DV4I16Clamp:       return "NVPTXISD::Suld1DV4I16Clamp";
685  case NVPTXISD::Suld1DV4I32Clamp:       return "NVPTXISD::Suld1DV4I32Clamp";
686
687  case NVPTXISD::Suld1DArrayI8Clamp:   return "NVPTXISD::Suld1DArrayI8Clamp";
688  case NVPTXISD::Suld1DArrayI16Clamp:  return "NVPTXISD::Suld1DArrayI16Clamp";
689  case NVPTXISD::Suld1DArrayI32Clamp:  return "NVPTXISD::Suld1DArrayI32Clamp";
690  case NVPTXISD::Suld1DArrayI64Clamp:  return "NVPTXISD::Suld1DArrayI64Clamp";
691  case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
692  case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
693  case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
694  case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
695  case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
696  case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
697  case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
698
699  case NVPTXISD::Suld2DI8Clamp:          return "NVPTXISD::Suld2DI8Clamp";
700  case NVPTXISD::Suld2DI16Clamp:         return "NVPTXISD::Suld2DI16Clamp";
701  case NVPTXISD::Suld2DI32Clamp:         return "NVPTXISD::Suld2DI32Clamp";
702  case NVPTXISD::Suld2DI64Clamp:         return "NVPTXISD::Suld2DI64Clamp";
703  case NVPTXISD::Suld2DV2I8Clamp:        return "NVPTXISD::Suld2DV2I8Clamp";
704  case NVPTXISD::Suld2DV2I16Clamp:       return "NVPTXISD::Suld2DV2I16Clamp";
705  case NVPTXISD::Suld2DV2I32Clamp:       return "NVPTXISD::Suld2DV2I32Clamp";
706  case NVPTXISD::Suld2DV2I64Clamp:       return "NVPTXISD::Suld2DV2I64Clamp";
707  case NVPTXISD::Suld2DV4I8Clamp:        return "NVPTXISD::Suld2DV4I8Clamp";
708  case NVPTXISD::Suld2DV4I16Clamp:       return "NVPTXISD::Suld2DV4I16Clamp";
709  case NVPTXISD::Suld2DV4I32Clamp:       return "NVPTXISD::Suld2DV4I32Clamp";
710
711  case NVPTXISD::Suld2DArrayI8Clamp:   return "NVPTXISD::Suld2DArrayI8Clamp";
712  case NVPTXISD::Suld2DArrayI16Clamp:  return "NVPTXISD::Suld2DArrayI16Clamp";
713  case NVPTXISD::Suld2DArrayI32Clamp:  return "NVPTXISD::Suld2DArrayI32Clamp";
714  case NVPTXISD::Suld2DArrayI64Clamp:  return "NVPTXISD::Suld2DArrayI64Clamp";
715  case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
716  case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
717  case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
718  case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
719  case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
720  case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
721  case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
722
723  case NVPTXISD::Suld3DI8Clamp:          return "NVPTXISD::Suld3DI8Clamp";
724  case NVPTXISD::Suld3DI16Clamp:         return "NVPTXISD::Suld3DI16Clamp";
725  case NVPTXISD::Suld3DI32Clamp:         return "NVPTXISD::Suld3DI32Clamp";
726  case NVPTXISD::Suld3DI64Clamp:         return "NVPTXISD::Suld3DI64Clamp";
727  case NVPTXISD::Suld3DV2I8Clamp:        return "NVPTXISD::Suld3DV2I8Clamp";
728  case NVPTXISD::Suld3DV2I16Clamp:       return "NVPTXISD::Suld3DV2I16Clamp";
729  case NVPTXISD::Suld3DV2I32Clamp:       return "NVPTXISD::Suld3DV2I32Clamp";
730  case NVPTXISD::Suld3DV2I64Clamp:       return "NVPTXISD::Suld3DV2I64Clamp";
731  case NVPTXISD::Suld3DV4I8Clamp:        return "NVPTXISD::Suld3DV4I8Clamp";
732  case NVPTXISD::Suld3DV4I16Clamp:       return "NVPTXISD::Suld3DV4I16Clamp";
733  case NVPTXISD::Suld3DV4I32Clamp:       return "NVPTXISD::Suld3DV4I32Clamp";
734
735  case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap";
736  case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap";
737  case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap";
738  case NVPTXISD::Suld1DI64Trap:         return "NVPTXISD::Suld1DI64Trap";
739  case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap";
740  case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap";
741  case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap";
742  case NVPTXISD::Suld1DV2I64Trap:       return "NVPTXISD::Suld1DV2I64Trap";
743  case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap";
744  case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap";
745  case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap";
746
747  case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap";
748  case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap";
749  case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap";
750  case NVPTXISD::Suld1DArrayI64Trap:    return "NVPTXISD::Suld1DArrayI64Trap";
751  case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap";
752  case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap";
753  case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap";
754  case NVPTXISD::Suld1DArrayV2I64Trap:  return "NVPTXISD::Suld1DArrayV2I64Trap";
755  case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap";
756  case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap";
757  case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap";
758
759  case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap";
760  case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap";
761  case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap";
762  case NVPTXISD::Suld2DI64Trap:         return "NVPTXISD::Suld2DI64Trap";
763  case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap";
764  case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap";
765  case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap";
766  case NVPTXISD::Suld2DV2I64Trap:       return "NVPTXISD::Suld2DV2I64Trap";
767  case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap";
768  case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap";
769  case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap";
770
771  case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap";
772  case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap";
773  case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap";
774  case NVPTXISD::Suld2DArrayI64Trap:    return "NVPTXISD::Suld2DArrayI64Trap";
775  case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap";
776  case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap";
777  case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap";
778  case NVPTXISD::Suld2DArrayV2I64Trap:  return "NVPTXISD::Suld2DArrayV2I64Trap";
779  case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap";
780  case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap";
781  case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap";
782
783  case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap";
784  case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap";
785  case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap";
786  case NVPTXISD::Suld3DI64Trap:         return "NVPTXISD::Suld3DI64Trap";
787  case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap";
788  case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap";
789  case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap";
790  case NVPTXISD::Suld3DV2I64Trap:       return "NVPTXISD::Suld3DV2I64Trap";
791  case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap";
792  case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap";
793  case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";
794
795  case NVPTXISD::Suld1DI8Zero:          return "NVPTXISD::Suld1DI8Zero";
796  case NVPTXISD::Suld1DI16Zero:         return "NVPTXISD::Suld1DI16Zero";
797  case NVPTXISD::Suld1DI32Zero:         return "NVPTXISD::Suld1DI32Zero";
798  case NVPTXISD::Suld1DI64Zero:         return "NVPTXISD::Suld1DI64Zero";
799  case NVPTXISD::Suld1DV2I8Zero:        return "NVPTXISD::Suld1DV2I8Zero";
800  case NVPTXISD::Suld1DV2I16Zero:       return "NVPTXISD::Suld1DV2I16Zero";
801  case NVPTXISD::Suld1DV2I32Zero:       return "NVPTXISD::Suld1DV2I32Zero";
802  case NVPTXISD::Suld1DV2I64Zero:       return "NVPTXISD::Suld1DV2I64Zero";
803  case NVPTXISD::Suld1DV4I8Zero:        return "NVPTXISD::Suld1DV4I8Zero";
804  case NVPTXISD::Suld1DV4I16Zero:       return "NVPTXISD::Suld1DV4I16Zero";
805  case NVPTXISD::Suld1DV4I32Zero:       return "NVPTXISD::Suld1DV4I32Zero";
806
807  case NVPTXISD::Suld1DArrayI8Zero:     return "NVPTXISD::Suld1DArrayI8Zero";
808  case NVPTXISD::Suld1DArrayI16Zero:    return "NVPTXISD::Suld1DArrayI16Zero";
809  case NVPTXISD::Suld1DArrayI32Zero:    return "NVPTXISD::Suld1DArrayI32Zero";
810  case NVPTXISD::Suld1DArrayI64Zero:    return "NVPTXISD::Suld1DArrayI64Zero";
811  case NVPTXISD::Suld1DArrayV2I8Zero:   return "NVPTXISD::Suld1DArrayV2I8Zero";
812  case NVPTXISD::Suld1DArrayV2I16Zero:  return "NVPTXISD::Suld1DArrayV2I16Zero";
813  case NVPTXISD::Suld1DArrayV2I32Zero:  return "NVPTXISD::Suld1DArrayV2I32Zero";
814  case NVPTXISD::Suld1DArrayV2I64Zero:  return "NVPTXISD::Suld1DArrayV2I64Zero";
815  case NVPTXISD::Suld1DArrayV4I8Zero:   return "NVPTXISD::Suld1DArrayV4I8Zero";
816  case NVPTXISD::Suld1DArrayV4I16Zero:  return "NVPTXISD::Suld1DArrayV4I16Zero";
817  case NVPTXISD::Suld1DArrayV4I32Zero:  return "NVPTXISD::Suld1DArrayV4I32Zero";
818
819  case NVPTXISD::Suld2DI8Zero:          return "NVPTXISD::Suld2DI8Zero";
820  case NVPTXISD::Suld2DI16Zero:         return "NVPTXISD::Suld2DI16Zero";
821  case NVPTXISD::Suld2DI32Zero:         return "NVPTXISD::Suld2DI32Zero";
822  case NVPTXISD::Suld2DI64Zero:         return "NVPTXISD::Suld2DI64Zero";
823  case NVPTXISD::Suld2DV2I8Zero:        return "NVPTXISD::Suld2DV2I8Zero";
824  case NVPTXISD::Suld2DV2I16Zero:       return "NVPTXISD::Suld2DV2I16Zero";
825  case NVPTXISD::Suld2DV2I32Zero:       return "NVPTXISD::Suld2DV2I32Zero";
826  case NVPTXISD::Suld2DV2I64Zero:       return "NVPTXISD::Suld2DV2I64Zero";
827  case NVPTXISD::Suld2DV4I8Zero:        return "NVPTXISD::Suld2DV4I8Zero";
828  case NVPTXISD::Suld2DV4I16Zero:       return "NVPTXISD::Suld2DV4I16Zero";
829  case NVPTXISD::Suld2DV4I32Zero:       return "NVPTXISD::Suld2DV4I32Zero";
830
831  case NVPTXISD::Suld2DArrayI8Zero:     return "NVPTXISD::Suld2DArrayI8Zero";
832  case NVPTXISD::Suld2DArrayI16Zero:    return "NVPTXISD::Suld2DArrayI16Zero";
833  case NVPTXISD::Suld2DArrayI32Zero:    return "NVPTXISD::Suld2DArrayI32Zero";
834  case NVPTXISD::Suld2DArrayI64Zero:    return "NVPTXISD::Suld2DArrayI64Zero";
835  case NVPTXISD::Suld2DArrayV2I8Zero:   return "NVPTXISD::Suld2DArrayV2I8Zero";
836  case NVPTXISD::Suld2DArrayV2I16Zero:  return "NVPTXISD::Suld2DArrayV2I16Zero";
837  case NVPTXISD::Suld2DArrayV2I32Zero:  return "NVPTXISD::Suld2DArrayV2I32Zero";
838  case NVPTXISD::Suld2DArrayV2I64Zero:  return "NVPTXISD::Suld2DArrayV2I64Zero";
839  case NVPTXISD::Suld2DArrayV4I8Zero:   return "NVPTXISD::Suld2DArrayV4I8Zero";
840  case NVPTXISD::Suld2DArrayV4I16Zero:  return "NVPTXISD::Suld2DArrayV4I16Zero";
841  case NVPTXISD::Suld2DArrayV4I32Zero:  return "NVPTXISD::Suld2DArrayV4I32Zero";
842
843  case NVPTXISD::Suld3DI8Zero:          return "NVPTXISD::Suld3DI8Zero";
844  case NVPTXISD::Suld3DI16Zero:         return "NVPTXISD::Suld3DI16Zero";
845  case NVPTXISD::Suld3DI32Zero:         return "NVPTXISD::Suld3DI32Zero";
846  case NVPTXISD::Suld3DI64Zero:         return "NVPTXISD::Suld3DI64Zero";
847  case NVPTXISD::Suld3DV2I8Zero:        return "NVPTXISD::Suld3DV2I8Zero";
848  case NVPTXISD::Suld3DV2I16Zero:       return "NVPTXISD::Suld3DV2I16Zero";
849  case NVPTXISD::Suld3DV2I32Zero:       return "NVPTXISD::Suld3DV2I32Zero";
850  case NVPTXISD::Suld3DV2I64Zero:       return "NVPTXISD::Suld3DV2I64Zero";
851  case NVPTXISD::Suld3DV4I8Zero:        return "NVPTXISD::Suld3DV4I8Zero";
852  case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
853  case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
854  }
855}
856
857TargetLoweringBase::LegalizeTypeAction
858NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
859  if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
860    return TypeSplitVector;
861
862  return TargetLoweringBase::getPreferredVectorAction(VT);
863}
864
865SDValue
866NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
867  SDLoc dl(Op);
868  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
869  Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
870  return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
871}
872
873std::string
874NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
875                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
876                                  unsigned retAlignment,
877                                  const ImmutableCallSite *CS) const {
878
879  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
880  assert(isABI && "Non-ABI compilation is not supported");
881  if (!isABI)
882    return "";
883
884  std::stringstream O;
885  O << "prototype_" << uniqueCallSite << " : .callprototype ";
886
887  if (retTy->getTypeID() == Type::VoidTyID) {
888    O << "()";
889  } else {
890    O << "(";
891    if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
892      unsigned size = 0;
893      if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
894        size = ITy->getBitWidth();
895        if (size < 32)
896          size = 32;
897      } else {
898        assert(retTy->isFloatingPointTy() &&
899               "Floating point type expected here");
900        size = retTy->getPrimitiveSizeInBits();
901      }
902
903      O << ".param .b" << size << " _";
904    } else if (isa<PointerType>(retTy)) {
905      O << ".param .b" << getPointerTy().getSizeInBits() << " _";
906    } else if ((retTy->getTypeID() == Type::StructTyID) ||
907               isa<VectorType>(retTy)) {
908      O << ".param .align "
909        << retAlignment
910        << " .b8 _["
911        << getDataLayout()->getTypeAllocSize(retTy) << "]";
912    } else {
913      llvm_unreachable("Unknown return type");
914    }
915    O << ") ";
916  }
917  O << "_ (";
918
919  bool first = true;
920  MVT thePointerTy = getPointerTy();
921
922  unsigned OIdx = 0;
923  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
924    Type *Ty = Args[i].Ty;
925    if (!first) {
926      O << ", ";
927    }
928    first = false;
929
930    if (Outs[OIdx].Flags.isByVal() == false) {
931      if (Ty->isAggregateType() || Ty->isVectorTy()) {
932        unsigned align = 0;
933        const CallInst *CallI = cast<CallInst>(CS->getInstruction());
934        const DataLayout *TD = getDataLayout();
935        // +1 because index 0 is reserved for return type alignment
936        if (!llvm::getAlign(*CallI, i + 1, align))
937          align = TD->getABITypeAlignment(Ty);
938        unsigned sz = TD->getTypeAllocSize(Ty);
939        O << ".param .align " << align << " .b8 ";
940        O << "_";
941        O << "[" << sz << "]";
942        // update the index for Outs
943        SmallVector<EVT, 16> vtparts;
944        ComputeValueVTs(*this, Ty, vtparts);
945        if (unsigned len = vtparts.size())
946          OIdx += len - 1;
947        continue;
948      }
949       // i8 types in IR will be i16 types in SDAG
950      assert((getValueType(Ty) == Outs[OIdx].VT ||
951             (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
952             "type mismatch between callee prototype and arguments");
953      // scalar type
954      unsigned sz = 0;
955      if (isa<IntegerType>(Ty)) {
956        sz = cast<IntegerType>(Ty)->getBitWidth();
957        if (sz < 32)
958          sz = 32;
959      } else if (isa<PointerType>(Ty))
960        sz = thePointerTy.getSizeInBits();
961      else
962        sz = Ty->getPrimitiveSizeInBits();
963      O << ".param .b" << sz << " ";
964      O << "_";
965      continue;
966    }
967    const PointerType *PTy = dyn_cast<PointerType>(Ty);
968    assert(PTy && "Param with byval attribute should be a pointer type");
969    Type *ETy = PTy->getElementType();
970
971    unsigned align = Outs[OIdx].Flags.getByValAlign();
972    unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
973    O << ".param .align " << align << " .b8 ";
974    O << "_";
975    O << "[" << sz << "]";
976  }
977  O << ");";
978  return O.str();
979}
980
981unsigned
982NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
983                                          const ImmutableCallSite *CS,
984                                          Type *Ty,
985                                          unsigned Idx) const {
986  const DataLayout *TD = getDataLayout();
987  unsigned Align = 0;
988  const Value *DirectCallee = CS->getCalledFunction();
989
990  if (!DirectCallee) {
991    // We don't have a direct function symbol, but that may be because of
992    // constant cast instructions in the call.
993    const Instruction *CalleeI = CS->getInstruction();
994    assert(CalleeI && "Call target is not a function or derived value?");
995
996    // With bitcast'd call targets, the instruction will be the call
997    if (isa<CallInst>(CalleeI)) {
998      // Check if we have call alignment metadata
999      if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
1000        return Align;
1001
1002      const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
1003      // Ignore any bitcast instructions
1004      while(isa<ConstantExpr>(CalleeV)) {
1005        const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
1006        if (!CE->isCast())
1007          break;
1008        // Look through the bitcast
1009        CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
1010      }
1011
1012      // We have now looked past all of the bitcasts.  Do we finally have a
1013      // Function?
1014      if (isa<Function>(CalleeV))
1015        DirectCallee = CalleeV;
1016    }
1017  }
1018
1019  // Check for function alignment information if we found that the
1020  // ultimate target is a Function
1021  if (DirectCallee)
1022    if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
1023      return Align;
1024
1025  // Call is indirect or alignment information is not available, fall back to
1026  // the ABI type alignment
1027  return TD->getABITypeAlignment(Ty);
1028}
1029
1030SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1031                                       SmallVectorImpl<SDValue> &InVals) const {
1032  SelectionDAG &DAG = CLI.DAG;
1033  SDLoc dl = CLI.DL;
1034  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1035  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1036  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1037  SDValue Chain = CLI.Chain;
1038  SDValue Callee = CLI.Callee;
1039  bool &isTailCall = CLI.IsTailCall;
1040  ArgListTy &Args = CLI.getArgs();
1041  Type *retTy = CLI.RetTy;
1042  ImmutableCallSite *CS = CLI.CS;
1043
1044  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1045  assert(isABI && "Non-ABI compilation is not supported");
1046  if (!isABI)
1047    return Chain;
1048  const DataLayout *TD = getDataLayout();
1049  MachineFunction &MF = DAG.getMachineFunction();
1050  const Function *F = MF.getFunction();
1051
1052  SDValue tempChain = Chain;
1053  Chain =
1054      DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
1055                           dl);
1056  SDValue InFlag = Chain.getValue(1);
1057
1058  unsigned paramCount = 0;
1059  // Args.size() and Outs.size() need not match.
1060  // Outs.size() will be larger
1061  //   * if there is an aggregate argument with multiple fields (each field
1062  //     showing up separately in Outs)
1063  //   * if there is a vector argument with more than typical vector-length
1064  //     elements (generally if more than 4) where each vector element is
1065  //     individually present in Outs.
1066  // So a different index should be used for indexing into Outs/OutVals.
1067  // See similar issue in LowerFormalArguments.
1068  unsigned OIdx = 0;
1069  // Declare the .params or .reg need to pass values
1070  // to the function
1071  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1072    EVT VT = Outs[OIdx].VT;
1073    Type *Ty = Args[i].Ty;
1074
1075    if (Outs[OIdx].Flags.isByVal() == false) {
1076      if (Ty->isAggregateType()) {
1077        // aggregate
1078        SmallVector<EVT, 16> vtparts;
1079        SmallVector<uint64_t, 16> Offsets;
1080        ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
1081
1082        unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
1083        // declare .param .align <align> .b8 .param<n>[<size>];
1084        unsigned sz = TD->getTypeAllocSize(Ty);
1085        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1086        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
1087                                      DAG.getConstant(paramCount, MVT::i32),
1088                                      DAG.getConstant(sz, MVT::i32), InFlag };
1089        Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1090                            DeclareParamOps);
1091        InFlag = Chain.getValue(1);
1092        for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1093          EVT elemtype = vtparts[j];
1094          unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
1095          if (elemtype.isInteger() && (sz < 8))
1096            sz = 8;
1097          SDValue StVal = OutVals[OIdx];
1098          if (elemtype.getSizeInBits() < 16) {
1099            StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
1100          }
1101          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1102          SDValue CopyParamOps[] = { Chain,
1103                                     DAG.getConstant(paramCount, MVT::i32),
1104                                     DAG.getConstant(Offsets[j], MVT::i32),
1105                                     StVal, InFlag };
1106          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1107                                          CopyParamVTs, CopyParamOps,
1108                                          elemtype, MachinePointerInfo(),
1109                                          ArgAlign);
1110          InFlag = Chain.getValue(1);
1111          ++OIdx;
1112        }
1113        if (vtparts.size() > 0)
1114          --OIdx;
1115        ++paramCount;
1116        continue;
1117      }
1118      if (Ty->isVectorTy()) {
1119        EVT ObjectVT = getValueType(Ty);
1120        unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
1121        // declare .param .align <align> .b8 .param<n>[<size>];
1122        unsigned sz = TD->getTypeAllocSize(Ty);
1123        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1124        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
1125                                      DAG.getConstant(paramCount, MVT::i32),
1126                                      DAG.getConstant(sz, MVT::i32), InFlag };
1127        Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1128                            DeclareParamOps);
1129        InFlag = Chain.getValue(1);
1130        unsigned NumElts = ObjectVT.getVectorNumElements();
1131        EVT EltVT = ObjectVT.getVectorElementType();
1132        EVT MemVT = EltVT;
1133        bool NeedExtend = false;
1134        if (EltVT.getSizeInBits() < 16) {
1135          NeedExtend = true;
1136          EltVT = MVT::i16;
1137        }
1138
1139        // V1 store
1140        if (NumElts == 1) {
1141          SDValue Elt = OutVals[OIdx++];
1142          if (NeedExtend)
1143            Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
1144
1145          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1146          SDValue CopyParamOps[] = { Chain,
1147                                     DAG.getConstant(paramCount, MVT::i32),
1148                                     DAG.getConstant(0, MVT::i32), Elt,
1149                                     InFlag };
1150          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1151                                          CopyParamVTs, CopyParamOps,
1152                                          MemVT, MachinePointerInfo());
1153          InFlag = Chain.getValue(1);
1154        } else if (NumElts == 2) {
1155          SDValue Elt0 = OutVals[OIdx++];
1156          SDValue Elt1 = OutVals[OIdx++];
1157          if (NeedExtend) {
1158            Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
1159            Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
1160          }
1161
1162          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1163          SDValue CopyParamOps[] = { Chain,
1164                                     DAG.getConstant(paramCount, MVT::i32),
1165                                     DAG.getConstant(0, MVT::i32), Elt0, Elt1,
1166                                     InFlag };
1167          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
1168                                          CopyParamVTs, CopyParamOps,
1169                                          MemVT, MachinePointerInfo());
1170          InFlag = Chain.getValue(1);
1171        } else {
1172          unsigned curOffset = 0;
1173          // V4 stores
1174          // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
1175          // the
1176          // vector will be expanded to a power of 2 elements, so we know we can
1177          // always round up to the next multiple of 4 when creating the vector
1178          // stores.
1179          // e.g.  4 elem => 1 st.v4
1180          //       6 elem => 2 st.v4
1181          //       8 elem => 2 st.v4
1182          //      11 elem => 3 st.v4
1183          unsigned VecSize = 4;
1184          if (EltVT.getSizeInBits() == 64)
1185            VecSize = 2;
1186
1187          // This is potentially only part of a vector, so assume all elements
1188          // are packed together.
1189          unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
1190
1191          for (unsigned i = 0; i < NumElts; i += VecSize) {
1192            // Get values
1193            SDValue StoreVal;
1194            SmallVector<SDValue, 8> Ops;
1195            Ops.push_back(Chain);
1196            Ops.push_back(DAG.getConstant(paramCount, MVT::i32));
1197            Ops.push_back(DAG.getConstant(curOffset, MVT::i32));
1198
1199            unsigned Opc = NVPTXISD::StoreParamV2;
1200
1201            StoreVal = OutVals[OIdx++];
1202            if (NeedExtend)
1203              StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1204            Ops.push_back(StoreVal);
1205
1206            if (i + 1 < NumElts) {
1207              StoreVal = OutVals[OIdx++];
1208              if (NeedExtend)
1209                StoreVal =
1210                    DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1211            } else {
1212              StoreVal = DAG.getUNDEF(EltVT);
1213            }
1214            Ops.push_back(StoreVal);
1215
1216            if (VecSize == 4) {
1217              Opc = NVPTXISD::StoreParamV4;
1218              if (i + 2 < NumElts) {
1219                StoreVal = OutVals[OIdx++];
1220                if (NeedExtend)
1221                  StoreVal =
1222                      DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1223              } else {
1224                StoreVal = DAG.getUNDEF(EltVT);
1225              }
1226              Ops.push_back(StoreVal);
1227
1228              if (i + 3 < NumElts) {
1229                StoreVal = OutVals[OIdx++];
1230                if (NeedExtend)
1231                  StoreVal =
1232                      DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1233              } else {
1234                StoreVal = DAG.getUNDEF(EltVT);
1235              }
1236              Ops.push_back(StoreVal);
1237            }
1238
1239            Ops.push_back(InFlag);
1240
1241            SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1242            Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
1243                                            MemVT, MachinePointerInfo());
1244            InFlag = Chain.getValue(1);
1245            curOffset += PerStoreOffset;
1246          }
1247        }
1248        ++paramCount;
1249        --OIdx;
1250        continue;
1251      }
1252      // Plain scalar
1253      // for ABI,    declare .param .b<size> .param<n>;
1254      unsigned sz = VT.getSizeInBits();
1255      bool needExtend = false;
1256      if (VT.isInteger()) {
1257        if (sz < 16)
1258          needExtend = true;
1259        if (sz < 32)
1260          sz = 32;
1261      }
1262      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1263      SDValue DeclareParamOps[] = { Chain,
1264                                    DAG.getConstant(paramCount, MVT::i32),
1265                                    DAG.getConstant(sz, MVT::i32),
1266                                    DAG.getConstant(0, MVT::i32), InFlag };
1267      Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
1268                          DeclareParamOps);
1269      InFlag = Chain.getValue(1);
1270      SDValue OutV = OutVals[OIdx];
1271      if (needExtend) {
1272        // zext/sext i1 to i16
1273        unsigned opc = ISD::ZERO_EXTEND;
1274        if (Outs[OIdx].Flags.isSExt())
1275          opc = ISD::SIGN_EXTEND;
1276        OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
1277      }
1278      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1279      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
1280                                 DAG.getConstant(0, MVT::i32), OutV, InFlag };
1281
1282      unsigned opcode = NVPTXISD::StoreParam;
1283      if (Outs[OIdx].Flags.isZExt())
1284        opcode = NVPTXISD::StoreParamU32;
1285      else if (Outs[OIdx].Flags.isSExt())
1286        opcode = NVPTXISD::StoreParamS32;
1287      Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
1288                                      VT, MachinePointerInfo());
1289
1290      InFlag = Chain.getValue(1);
1291      ++paramCount;
1292      continue;
1293    }
1294    // struct or vector
1295    SmallVector<EVT, 16> vtparts;
1296    SmallVector<uint64_t, 16> Offsets;
1297    const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
1298    assert(PTy && "Type of a byval parameter should be pointer");
1299    ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
1300
1301    // declare .param .align <align> .b8 .param<n>[<size>];
1302    unsigned sz = Outs[OIdx].Flags.getByValSize();
1303    SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1304    unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
1305    // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
1306    // so we don't need to worry about natural alignment or not.
1307    // See TargetLowering::LowerCallTo().
1308    SDValue DeclareParamOps[] = {
1309      Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32),
1310      DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
1311      InFlag
1312    };
1313    Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1314                        DeclareParamOps);
1315    InFlag = Chain.getValue(1);
1316    for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1317      EVT elemtype = vtparts[j];
1318      int curOffset = Offsets[j];
1319      unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
1320      SDValue srcAddr =
1321          DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
1322                      DAG.getConstant(curOffset, getPointerTy()));
1323      SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
1324                                   MachinePointerInfo(), false, false, false,
1325                                   PartAlign);
1326      if (elemtype.getSizeInBits() < 16) {
1327        theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
1328      }
1329      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1330      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
1331                                 DAG.getConstant(curOffset, MVT::i32), theVal,
1332                                 InFlag };
1333      Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
1334                                      CopyParamOps, elemtype,
1335                                      MachinePointerInfo());
1336
1337      InFlag = Chain.getValue(1);
1338    }
1339    ++paramCount;
1340  }
1341
1342  GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1343  unsigned retAlignment = 0;
1344
1345  // Handle Result
1346  if (Ins.size() > 0) {
1347    SmallVector<EVT, 16> resvtparts;
1348    ComputeValueVTs(*this, retTy, resvtparts);
1349
1350    // Declare
1351    //  .param .align 16 .b8 retval0[<size-in-bytes>], or
1352    //  .param .b<size-in-bits> retval0
1353    unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
1354    // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
1355    // these three types to match the logic in
1356    // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
1357    // Plus, this behavior is consistent with nvcc's.
1358    if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
1359        retTy->isPointerTy()) {
1360      // Scalar needs to be at least 32bit wide
1361      if (resultsz < 32)
1362        resultsz = 32;
1363      SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1364      SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
1365                                  DAG.getConstant(resultsz, MVT::i32),
1366                                  DAG.getConstant(0, MVT::i32), InFlag };
1367      Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
1368                          DeclareRetOps);
1369      InFlag = Chain.getValue(1);
1370    } else {
1371      retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
1372      SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1373      SDValue DeclareRetOps[] = { Chain,
1374                                  DAG.getConstant(retAlignment, MVT::i32),
1375                                  DAG.getConstant(resultsz / 8, MVT::i32),
1376                                  DAG.getConstant(0, MVT::i32), InFlag };
1377      Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
1378                          DeclareRetOps);
1379      InFlag = Chain.getValue(1);
1380    }
1381  }
1382
1383  if (!Func) {
1384    // This is indirect function call case : PTX requires a prototype of the
1385    // form
1386    // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
1387    // to be emitted, and the label has to used as the last arg of call
1388    // instruction.
1389    // The prototype is embedded in a string and put as the operand for a
1390    // CallPrototype SDNode which will print out to the value of the string.
1391    SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1392    std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
1393    const char *ProtoStr =
1394      nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
1395    SDValue ProtoOps[] = {
1396      Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
1397    };
1398    Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
1399    InFlag = Chain.getValue(1);
1400  }
1401  // Op to just print "call"
1402  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1403  SDValue PrintCallOps[] = {
1404    Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
1405  };
1406  Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
1407                      dl, PrintCallVTs, PrintCallOps);
1408  InFlag = Chain.getValue(1);
1409
1410  // Ops to print out the function name
1411  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1412  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
1413  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
1414  InFlag = Chain.getValue(1);
1415
1416  // Ops to print out the param list
1417  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1418  SDValue CallArgBeginOps[] = { Chain, InFlag };
1419  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
1420                      CallArgBeginOps);
1421  InFlag = Chain.getValue(1);
1422
1423  for (unsigned i = 0, e = paramCount; i != e; ++i) {
1424    unsigned opcode;
1425    if (i == (e - 1))
1426      opcode = NVPTXISD::LastCallArg;
1427    else
1428      opcode = NVPTXISD::CallArg;
1429    SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1430    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
1431                             DAG.getConstant(i, MVT::i32), InFlag };
1432    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
1433    InFlag = Chain.getValue(1);
1434  }
1435  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1436  SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
1437                              InFlag };
1438  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
1439  InFlag = Chain.getValue(1);
1440
1441  if (!Func) {
1442    SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1443    SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
1444                               InFlag };
1445    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
1446    InFlag = Chain.getValue(1);
1447  }
1448
1449  // Generate loads from param memory/moves from registers for result
1450  if (Ins.size() > 0) {
1451    if (retTy && retTy->isVectorTy()) {
1452      EVT ObjectVT = getValueType(retTy);
1453      unsigned NumElts = ObjectVT.getVectorNumElements();
1454      EVT EltVT = ObjectVT.getVectorElementType();
1455      assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
1456                 F->getContext(), ObjectVT) == NumElts &&
1457             "Vector was not scalarized");
1458      unsigned sz = EltVT.getSizeInBits();
1459      bool needTruncate = sz < 8 ? true : false;
1460
1461      if (NumElts == 1) {
1462        // Just a simple load
1463        SmallVector<EVT, 4> LoadRetVTs;
1464        if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1465          // If loading i1/i8 result, generate
1466          //   load.b8 i16
1467          //   if i1
1468          //   trunc i16 to i1
1469          LoadRetVTs.push_back(MVT::i16);
1470        } else
1471          LoadRetVTs.push_back(EltVT);
1472        LoadRetVTs.push_back(MVT::Other);
1473        LoadRetVTs.push_back(MVT::Glue);
1474        SmallVector<SDValue, 4> LoadRetOps;
1475        LoadRetOps.push_back(Chain);
1476        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
1477        LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
1478        LoadRetOps.push_back(InFlag);
1479        SDValue retval = DAG.getMemIntrinsicNode(
1480            NVPTXISD::LoadParam, dl,
1481            DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1482        Chain = retval.getValue(1);
1483        InFlag = retval.getValue(2);
1484        SDValue Ret0 = retval;
1485        if (needTruncate)
1486          Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
1487        InVals.push_back(Ret0);
1488      } else if (NumElts == 2) {
1489        // LoadV2
1490        SmallVector<EVT, 4> LoadRetVTs;
1491        if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1492          // If loading i1/i8 result, generate
1493          //   load.b8 i16
1494          //   if i1
1495          //   trunc i16 to i1
1496          LoadRetVTs.push_back(MVT::i16);
1497          LoadRetVTs.push_back(MVT::i16);
1498        } else {
1499          LoadRetVTs.push_back(EltVT);
1500          LoadRetVTs.push_back(EltVT);
1501        }
1502        LoadRetVTs.push_back(MVT::Other);
1503        LoadRetVTs.push_back(MVT::Glue);
1504        SmallVector<SDValue, 4> LoadRetOps;
1505        LoadRetOps.push_back(Chain);
1506        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
1507        LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
1508        LoadRetOps.push_back(InFlag);
1509        SDValue retval = DAG.getMemIntrinsicNode(
1510            NVPTXISD::LoadParamV2, dl,
1511            DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1512        Chain = retval.getValue(2);
1513        InFlag = retval.getValue(3);
1514        SDValue Ret0 = retval.getValue(0);
1515        SDValue Ret1 = retval.getValue(1);
1516        if (needTruncate) {
1517          Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
1518          InVals.push_back(Ret0);
1519          Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
1520          InVals.push_back(Ret1);
1521        } else {
1522          InVals.push_back(Ret0);
1523          InVals.push_back(Ret1);
1524        }
1525      } else {
1526        // Split into N LoadV4
1527        unsigned Ofst = 0;
1528        unsigned VecSize = 4;
1529        unsigned Opc = NVPTXISD::LoadParamV4;
1530        if (EltVT.getSizeInBits() == 64) {
1531          VecSize = 2;
1532          Opc = NVPTXISD::LoadParamV2;
1533        }
1534        EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
1535        for (unsigned i = 0; i < NumElts; i += VecSize) {
1536          SmallVector<EVT, 8> LoadRetVTs;
1537          if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1538            // If loading i1/i8 result, generate
1539            //   load.b8 i16
1540            //   if i1
1541            //   trunc i16 to i1
1542            for (unsigned j = 0; j < VecSize; ++j)
1543              LoadRetVTs.push_back(MVT::i16);
1544          } else {
1545            for (unsigned j = 0; j < VecSize; ++j)
1546              LoadRetVTs.push_back(EltVT);
1547          }
1548          LoadRetVTs.push_back(MVT::Other);
1549          LoadRetVTs.push_back(MVT::Glue);
1550          SmallVector<SDValue, 4> LoadRetOps;
1551          LoadRetOps.push_back(Chain);
1552          LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
1553          LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
1554          LoadRetOps.push_back(InFlag);
1555          SDValue retval = DAG.getMemIntrinsicNode(
1556              Opc, dl, DAG.getVTList(LoadRetVTs),
1557              LoadRetOps, EltVT, MachinePointerInfo());
1558          if (VecSize == 2) {
1559            Chain = retval.getValue(2);
1560            InFlag = retval.getValue(3);
1561          } else {
1562            Chain = retval.getValue(4);
1563            InFlag = retval.getValue(5);
1564          }
1565
1566          for (unsigned j = 0; j < VecSize; ++j) {
1567            if (i + j >= NumElts)
1568              break;
1569            SDValue Elt = retval.getValue(j);
1570            if (needTruncate)
1571              Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
1572            InVals.push_back(Elt);
1573          }
1574          Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
1575        }
1576      }
1577    } else {
1578      SmallVector<EVT, 16> VTs;
1579      SmallVector<uint64_t, 16> Offsets;
1580      ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
1581      assert(VTs.size() == Ins.size() && "Bad value decomposition");
1582      unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
1583      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
1584        unsigned sz = VTs[i].getSizeInBits();
1585        unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
1586        bool needTruncate = sz < 8 ? true : false;
1587        if (VTs[i].isInteger() && (sz < 8))
1588          sz = 8;
1589
1590        SmallVector<EVT, 4> LoadRetVTs;
1591        EVT TheLoadType = VTs[i];
1592        if (retTy->isIntegerTy() &&
1593            TD->getTypeAllocSizeInBits(retTy) < 32) {
1594          // This is for integer types only, and specifically not for
1595          // aggregates.
1596          LoadRetVTs.push_back(MVT::i32);
1597          TheLoadType = MVT::i32;
1598        } else if (sz < 16) {
1599          // If loading i1/i8 result, generate
1600          //   load i8 (-> i16)
1601          //   trunc i16 to i1/i8
1602          LoadRetVTs.push_back(MVT::i16);
1603        } else
1604          LoadRetVTs.push_back(Ins[i].VT);
1605        LoadRetVTs.push_back(MVT::Other);
1606        LoadRetVTs.push_back(MVT::Glue);
1607
1608        SmallVector<SDValue, 4> LoadRetOps;
1609        LoadRetOps.push_back(Chain);
1610        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
1611        LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32));
1612        LoadRetOps.push_back(InFlag);
1613        SDValue retval = DAG.getMemIntrinsicNode(
1614            NVPTXISD::LoadParam, dl,
1615            DAG.getVTList(LoadRetVTs), LoadRetOps,
1616            TheLoadType, MachinePointerInfo(), AlignI);
1617        Chain = retval.getValue(1);
1618        InFlag = retval.getValue(2);
1619        SDValue Ret0 = retval.getValue(0);
1620        if (needTruncate)
1621          Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
1622        InVals.push_back(Ret0);
1623      }
1624    }
1625  }
1626
1627  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
1628                             DAG.getIntPtrConstant(uniqueCallSite + 1, true),
1629                             InFlag, dl);
1630  uniqueCallSite++;
1631
1632  // set isTailCall to false for now, until we figure out how to express
1633  // tail call optimization in PTX
1634  isTailCall = false;
1635  return Chain;
1636}
1637
1638// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
1639// (see LegalizeDAG.cpp). This is slow and uses local memory.
1640// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
1641SDValue
1642NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
1643  SDNode *Node = Op.getNode();
1644  SDLoc dl(Node);
1645  SmallVector<SDValue, 8> Ops;
1646  unsigned NumOperands = Node->getNumOperands();
1647  for (unsigned i = 0; i < NumOperands; ++i) {
1648    SDValue SubOp = Node->getOperand(i);
1649    EVT VVT = SubOp.getNode()->getValueType(0);
1650    EVT EltVT = VVT.getVectorElementType();
1651    unsigned NumSubElem = VVT.getVectorNumElements();
1652    for (unsigned j = 0; j < NumSubElem; ++j) {
1653      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
1654                                DAG.getIntPtrConstant(j)));
1655    }
1656  }
1657  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
1658}
1659
1660/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
1661/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1662///    amount, or
1663/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1664///    amount.
1665SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
1666                                                  SelectionDAG &DAG) const {
1667  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1668  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
1669
1670  EVT VT = Op.getValueType();
1671  unsigned VTBits = VT.getSizeInBits();
1672  SDLoc dl(Op);
1673  SDValue ShOpLo = Op.getOperand(0);
1674  SDValue ShOpHi = Op.getOperand(1);
1675  SDValue ShAmt  = Op.getOperand(2);
1676  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
1677
1678  if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
1679
1680    // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1681    // {dHi, dLo} = {aHi, aLo} >> Amt
1682    //   dHi = aHi >> Amt
1683    //   dLo = shf.r.clamp aLo, aHi, Amt
1684
1685    SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1686    SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
1687                             ShAmt);
1688
1689    SDValue Ops[2] = { Lo, Hi };
1690    return DAG.getMergeValues(Ops, dl);
1691  }
1692  else {
1693
1694    // {dHi, dLo} = {aHi, aLo} >> Amt
1695    // - if (Amt>=size) then
1696    //      dLo = aHi >> (Amt-size)
1697    //      dHi = aHi >> Amt (this is either all 0 or all 1)
1698    //   else
1699    //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
1700    //      dHi = aHi >> Amt
1701
1702    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1703                                   DAG.getConstant(VTBits, MVT::i32), ShAmt);
1704    SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
1705    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1706                                     DAG.getConstant(VTBits, MVT::i32));
1707    SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
1708    SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1709    SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
1710
1711    SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1712                               DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
1713    SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1714    SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1715
1716    SDValue Ops[2] = { Lo, Hi };
1717    return DAG.getMergeValues(Ops, dl);
1718  }
1719}
1720
1721/// LowerShiftLeftParts - Lower SHL_PARTS, which
1722/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1723///    amount, or
1724/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1725///    amount.
1726SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
1727                                                 SelectionDAG &DAG) const {
1728  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1729  assert(Op.getOpcode() == ISD::SHL_PARTS);
1730
1731  EVT VT = Op.getValueType();
1732  unsigned VTBits = VT.getSizeInBits();
1733  SDLoc dl(Op);
1734  SDValue ShOpLo = Op.getOperand(0);
1735  SDValue ShOpHi = Op.getOperand(1);
1736  SDValue ShAmt  = Op.getOperand(2);
1737
1738  if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
1739
1740    // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1741    // {dHi, dLo} = {aHi, aLo} << Amt
1742    //   dHi = shf.l.clamp aLo, aHi, Amt
1743    //   dLo = aLo << Amt
1744
1745    SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
1746                             ShAmt);
1747    SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1748
1749    SDValue Ops[2] = { Lo, Hi };
1750    return DAG.getMergeValues(Ops, dl);
1751  }
1752  else {
1753
1754    // {dHi, dLo} = {aHi, aLo} << Amt
1755    // - if (Amt>=size) then
1756    //      dLo = aLo << Amt (all 0)
1757    //      dLo = aLo << (Amt-size)
1758    //   else
1759    //      dLo = aLo << Amt
1760    //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
1761
1762    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1763                                   DAG.getConstant(VTBits, MVT::i32), ShAmt);
1764    SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
1765    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1766                                     DAG.getConstant(VTBits, MVT::i32));
1767    SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
1768    SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1769    SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
1770
1771    SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1772                               DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
1773    SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1774    SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1775
1776    SDValue Ops[2] = { Lo, Hi };
1777    return DAG.getMergeValues(Ops, dl);
1778  }
1779}
1780
1781SDValue
1782NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1783  switch (Op.getOpcode()) {
1784  case ISD::RETURNADDR:
1785    return SDValue();
1786  case ISD::FRAMEADDR:
1787    return SDValue();
1788  case ISD::GlobalAddress:
1789    return LowerGlobalAddress(Op, DAG);
1790  case ISD::INTRINSIC_W_CHAIN:
1791    return Op;
1792  case ISD::BUILD_VECTOR:
1793  case ISD::EXTRACT_SUBVECTOR:
1794    return Op;
1795  case ISD::CONCAT_VECTORS:
1796    return LowerCONCAT_VECTORS(Op, DAG);
1797  case ISD::STORE:
1798    return LowerSTORE(Op, DAG);
1799  case ISD::LOAD:
1800    return LowerLOAD(Op, DAG);
1801  case ISD::SHL_PARTS:
1802    return LowerShiftLeftParts(Op, DAG);
1803  case ISD::SRA_PARTS:
1804  case ISD::SRL_PARTS:
1805    return LowerShiftRightParts(Op, DAG);
1806  default:
1807    llvm_unreachable("Custom lowering not defined for operation");
1808  }
1809}
1810
1811SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1812  if (Op.getValueType() == MVT::i1)
1813    return LowerLOADi1(Op, DAG);
1814  else
1815    return SDValue();
1816}
1817
1818// v = ld i1* addr
1819//   =>
1820// v1 = ld i8* addr (-> i16)
1821// v = trunc i16 to i1
1822SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
1823  SDNode *Node = Op.getNode();
1824  LoadSDNode *LD = cast<LoadSDNode>(Node);
1825  SDLoc dl(Node);
1826  assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
1827  assert(Node->getValueType(0) == MVT::i1 &&
1828         "Custom lowering for i1 load only");
1829  SDValue newLD =
1830      DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
1831                  LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
1832                  LD->isInvariant(), LD->getAlignment());
1833  SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
1834  // The legalizer (the caller) is expecting two values from the legalized
1835  // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
1836  // in LegalizeDAG.cpp which also uses MergeValues.
1837  SDValue Ops[] = { result, LD->getChain() };
1838  return DAG.getMergeValues(Ops, dl);
1839}
1840
1841SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1842  EVT ValVT = Op.getOperand(1).getValueType();
1843  if (ValVT == MVT::i1)
1844    return LowerSTOREi1(Op, DAG);
1845  else if (ValVT.isVector())
1846    return LowerSTOREVector(Op, DAG);
1847  else
1848    return SDValue();
1849}
1850
1851SDValue
1852NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
1853  SDNode *N = Op.getNode();
1854  SDValue Val = N->getOperand(1);
1855  SDLoc DL(N);
1856  EVT ValVT = Val.getValueType();
1857
1858  if (ValVT.isVector()) {
1859    // We only handle "native" vector sizes for now, e.g. <4 x double> is not
1860    // legal.  We can (and should) split that into 2 stores of <2 x double> here
1861    // but I'm leaving that as a TODO for now.
1862    if (!ValVT.isSimple())
1863      return SDValue();
1864    switch (ValVT.getSimpleVT().SimpleTy) {
1865    default:
1866      return SDValue();
1867    case MVT::v2i8:
1868    case MVT::v2i16:
1869    case MVT::v2i32:
1870    case MVT::v2i64:
1871    case MVT::v2f32:
1872    case MVT::v2f64:
1873    case MVT::v4i8:
1874    case MVT::v4i16:
1875    case MVT::v4i32:
1876    case MVT::v4f32:
1877      // This is a "native" vector type
1878      break;
1879    }
1880
1881    MemSDNode *MemSD = cast<MemSDNode>(N);
1882    const DataLayout *TD = getDataLayout();
1883
1884    unsigned Align = MemSD->getAlignment();
1885    unsigned PrefAlign =
1886      TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
1887    if (Align < PrefAlign) {
1888      // This store is not sufficiently aligned, so bail out and let this vector
1889      // store be scalarized.  Note that we may still be able to emit smaller
1890      // vector stores.  For example, if we are storing a <4 x float> with an
1891      // alignment of 8, this check will fail but the legalizer will try again
1892      // with 2 x <2 x float>, which will succeed with an alignment of 8.
1893      return SDValue();
1894    }
1895
1896    unsigned Opcode = 0;
1897    EVT EltVT = ValVT.getVectorElementType();
1898    unsigned NumElts = ValVT.getVectorNumElements();
1899
1900    // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
1901    // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
1902    // stored type to i16 and propagate the "real" type as the memory type.
1903    bool NeedExt = false;
1904    if (EltVT.getSizeInBits() < 16)
1905      NeedExt = true;
1906
1907    switch (NumElts) {
1908    default:
1909      return SDValue();
1910    case 2:
1911      Opcode = NVPTXISD::StoreV2;
1912      break;
1913    case 4: {
1914      Opcode = NVPTXISD::StoreV4;
1915      break;
1916    }
1917    }
1918
1919    SmallVector<SDValue, 8> Ops;
1920
1921    // First is the chain
1922    Ops.push_back(N->getOperand(0));
1923
1924    // Then the split values
1925    for (unsigned i = 0; i < NumElts; ++i) {
1926      SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
1927                                   DAG.getIntPtrConstant(i));
1928      if (NeedExt)
1929        ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
1930      Ops.push_back(ExtVal);
1931    }
1932
1933    // Then any remaining arguments
1934    for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
1935      Ops.push_back(N->getOperand(i));
1936    }
1937
1938    SDValue NewSt = DAG.getMemIntrinsicNode(
1939        Opcode, DL, DAG.getVTList(MVT::Other), Ops,
1940        MemSD->getMemoryVT(), MemSD->getMemOperand());
1941
1942    //return DCI.CombineTo(N, NewSt, true);
1943    return NewSt;
1944  }
1945
1946  return SDValue();
1947}
1948
1949// st i1 v, addr
1950//    =>
1951// v1 = zxt v to i16
1952// st.u8 i16, addr
1953SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
1954  SDNode *Node = Op.getNode();
1955  SDLoc dl(Node);
1956  StoreSDNode *ST = cast<StoreSDNode>(Node);
1957  SDValue Tmp1 = ST->getChain();
1958  SDValue Tmp2 = ST->getBasePtr();
1959  SDValue Tmp3 = ST->getValue();
1960  assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
1961  unsigned Alignment = ST->getAlignment();
1962  bool isVolatile = ST->isVolatile();
1963  bool isNonTemporal = ST->isNonTemporal();
1964  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
1965  SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
1966                                     ST->getPointerInfo(), MVT::i8, isNonTemporal,
1967                                     isVolatile, Alignment);
1968  return Result;
1969}
1970
1971SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
1972                                        int idx, EVT v) const {
1973  std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
1974  std::stringstream suffix;
1975  suffix << idx;
1976  *name += suffix.str();
1977  return DAG.getTargetExternalSymbol(name->c_str(), v);
1978}
1979
1980SDValue
1981NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
1982  std::string ParamSym;
1983  raw_string_ostream ParamStr(ParamSym);
1984
1985  ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
1986  ParamStr.flush();
1987
1988  std::string *SavedStr =
1989    nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
1990  return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
1991}
1992
1993SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
1994  return getExtSymb(DAG, ".HLPPARAM", idx);
1995}
1996
1997// Check to see if the kernel argument is image*_t or sampler_t
1998
1999bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
2000  static const char *const specialTypes[] = { "struct._image2d_t",
2001                                              "struct._image3d_t",
2002                                              "struct._sampler_t" };
2003
2004  const Type *Ty = arg->getType();
2005  const PointerType *PTy = dyn_cast<PointerType>(Ty);
2006
2007  if (!PTy)
2008    return false;
2009
2010  if (!context)
2011    return false;
2012
2013  const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
2014  const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
2015
2016  for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
2017    if (TypeName == specialTypes[i])
2018      return true;
2019
2020  return false;
2021}
2022
2023SDValue NVPTXTargetLowering::LowerFormalArguments(
2024    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2025    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2026    SmallVectorImpl<SDValue> &InVals) const {
2027  MachineFunction &MF = DAG.getMachineFunction();
2028  const DataLayout *TD = getDataLayout();
2029
2030  const Function *F = MF.getFunction();
2031  const AttributeSet &PAL = F->getAttributes();
2032  const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
2033
2034  SDValue Root = DAG.getRoot();
2035  std::vector<SDValue> OutChains;
2036
2037  bool isKernel = llvm::isKernelFunction(*F);
2038  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
2039  assert(isABI && "Non-ABI compilation is not supported");
2040  if (!isABI)
2041    return Chain;
2042
2043  std::vector<Type *> argTypes;
2044  std::vector<const Argument *> theArgs;
2045  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
2046       I != E; ++I) {
2047    theArgs.push_back(I);
2048    argTypes.push_back(I->getType());
2049  }
2050  // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
2051  // Ins.size() will be larger
2052  //   * if there is an aggregate argument with multiple fields (each field
2053  //     showing up separately in Ins)
2054  //   * if there is a vector argument with more than typical vector-length
2055  //     elements (generally if more than 4) where each vector element is
2056  //     individually present in Ins.
2057  // So a different index should be used for indexing into Ins.
2058  // See similar issue in LowerCall.
2059  unsigned InsIdx = 0;
2060
2061  int idx = 0;
2062  for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2063    Type *Ty = argTypes[i];
2064
2065    // If the kernel argument is image*_t or sampler_t, convert it to
2066    // a i32 constant holding the parameter position. This can later
2067    // matched in the AsmPrinter to output the correct mangled name.
2068    if (isImageOrSamplerVal(
2069            theArgs[i],
2070            (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
2071                                     : nullptr))) {
2072      assert(isKernel && "Only kernels can have image/sampler params");
2073      InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
2074      continue;
2075    }
2076
2077    if (theArgs[i]->use_empty()) {
2078      // argument is dead
2079      if (Ty->isAggregateType()) {
2080        SmallVector<EVT, 16> vtparts;
2081
2082        ComputePTXValueVTs(*this, Ty, vtparts);
2083        assert(vtparts.size() > 0 && "empty aggregate type not expected");
2084        for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2085             ++parti) {
2086          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2087          ++InsIdx;
2088        }
2089        if (vtparts.size() > 0)
2090          --InsIdx;
2091        continue;
2092      }
2093      if (Ty->isVectorTy()) {
2094        EVT ObjectVT = getValueType(Ty);
2095        unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
2096        for (unsigned parti = 0; parti < NumRegs; ++parti) {
2097          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2098          ++InsIdx;
2099        }
2100        if (NumRegs > 0)
2101          --InsIdx;
2102        continue;
2103      }
2104      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2105      continue;
2106    }
2107
2108    // In the following cases, assign a node order of "idx+1"
2109    // to newly created nodes. The SDNodes for params have to
2110    // appear in the same order as their order of appearance
2111    // in the original function. "idx+1" holds that order.
2112    if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
2113      if (Ty->isAggregateType()) {
2114        SmallVector<EVT, 16> vtparts;
2115        SmallVector<uint64_t, 16> offsets;
2116
2117        // NOTE: Here, we lose the ability to issue vector loads for vectors
2118        // that are a part of a struct.  This should be investigated in the
2119        // future.
2120        ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
2121        assert(vtparts.size() > 0 && "empty aggregate type not expected");
2122        bool aggregateIsPacked = false;
2123        if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
2124          aggregateIsPacked = STy->isPacked();
2125
2126        SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2127        for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2128             ++parti) {
2129          EVT partVT = vtparts[parti];
2130          Value *srcValue = Constant::getNullValue(
2131              PointerType::get(partVT.getTypeForEVT(F->getContext()),
2132                               llvm::ADDRESS_SPACE_PARAM));
2133          SDValue srcAddr =
2134              DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
2135                          DAG.getConstant(offsets[parti], getPointerTy()));
2136          unsigned partAlign =
2137              aggregateIsPacked ? 1
2138                                : TD->getABITypeAlignment(
2139                                      partVT.getTypeForEVT(F->getContext()));
2140          SDValue p;
2141          if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
2142            ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2143                                     ISD::SEXTLOAD : ISD::ZEXTLOAD;
2144            p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
2145                               MachinePointerInfo(srcValue), partVT, false,
2146                               false, false, partAlign);
2147          } else {
2148            p = DAG.getLoad(partVT, dl, Root, srcAddr,
2149                            MachinePointerInfo(srcValue), false, false, false,
2150                            partAlign);
2151          }
2152          if (p.getNode())
2153            p.getNode()->setIROrder(idx + 1);
2154          InVals.push_back(p);
2155          ++InsIdx;
2156        }
2157        if (vtparts.size() > 0)
2158          --InsIdx;
2159        continue;
2160      }
2161      if (Ty->isVectorTy()) {
2162        EVT ObjectVT = getValueType(Ty);
2163        SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2164        unsigned NumElts = ObjectVT.getVectorNumElements();
2165        assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
2166               "Vector was not scalarized");
2167        EVT EltVT = ObjectVT.getVectorElementType();
2168
2169        // V1 load
2170        // f32 = load ...
2171        if (NumElts == 1) {
2172          // We only have one element, so just directly load it
2173          Value *SrcValue = Constant::getNullValue(PointerType::get(
2174              EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2175          SDValue P = DAG.getLoad(
2176              EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
2177              false, true,
2178              TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
2179          if (P.getNode())
2180            P.getNode()->setIROrder(idx + 1);
2181
2182          if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2183            P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
2184          InVals.push_back(P);
2185          ++InsIdx;
2186        } else if (NumElts == 2) {
2187          // V2 load
2188          // f32,f32 = load ...
2189          EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
2190          Value *SrcValue = Constant::getNullValue(PointerType::get(
2191              VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2192          SDValue P = DAG.getLoad(
2193              VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
2194              false, true,
2195              TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
2196          if (P.getNode())
2197            P.getNode()->setIROrder(idx + 1);
2198
2199          SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2200                                     DAG.getIntPtrConstant(0));
2201          SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2202                                     DAG.getIntPtrConstant(1));
2203
2204          if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
2205            Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
2206            Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
2207          }
2208
2209          InVals.push_back(Elt0);
2210          InVals.push_back(Elt1);
2211          InsIdx += 2;
2212        } else {
2213          // V4 loads
2214          // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
2215          // the
2216          // vector will be expanded to a power of 2 elements, so we know we can
2217          // always round up to the next multiple of 4 when creating the vector
2218          // loads.
2219          // e.g.  4 elem => 1 ld.v4
2220          //       6 elem => 2 ld.v4
2221          //       8 elem => 2 ld.v4
2222          //      11 elem => 3 ld.v4
2223          unsigned VecSize = 4;
2224          if (EltVT.getSizeInBits() == 64) {
2225            VecSize = 2;
2226          }
2227          EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2228          unsigned Ofst = 0;
2229          for (unsigned i = 0; i < NumElts; i += VecSize) {
2230            Value *SrcValue = Constant::getNullValue(
2231                PointerType::get(VecVT.getTypeForEVT(F->getContext()),
2232                                 llvm::ADDRESS_SPACE_PARAM));
2233            SDValue SrcAddr =
2234                DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
2235                            DAG.getConstant(Ofst, getPointerTy()));
2236            SDValue P = DAG.getLoad(
2237                VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
2238                false, true,
2239                TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
2240            if (P.getNode())
2241              P.getNode()->setIROrder(idx + 1);
2242
2243            for (unsigned j = 0; j < VecSize; ++j) {
2244              if (i + j >= NumElts)
2245                break;
2246              SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2247                                        DAG.getIntPtrConstant(j));
2248              if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2249                Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
2250              InVals.push_back(Elt);
2251            }
2252            Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2253          }
2254          InsIdx += NumElts;
2255        }
2256
2257        if (NumElts > 0)
2258          --InsIdx;
2259        continue;
2260      }
2261      // A plain scalar.
2262      EVT ObjectVT = getValueType(Ty);
2263      // If ABI, load from the param symbol
2264      SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2265      Value *srcValue = Constant::getNullValue(PointerType::get(
2266          ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2267      SDValue p;
2268       if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
2269        ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2270                                       ISD::SEXTLOAD : ISD::ZEXTLOAD;
2271        p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
2272                           MachinePointerInfo(srcValue), ObjectVT, false, false,
2273                           false,
2274        TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2275      } else {
2276        p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
2277                        MachinePointerInfo(srcValue), false, false, false,
2278        TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2279      }
2280      if (p.getNode())
2281        p.getNode()->setIROrder(idx + 1);
2282      InVals.push_back(p);
2283      continue;
2284    }
2285
2286    // Param has ByVal attribute
2287    // Return MoveParam(param symbol).
2288    // Ideally, the param symbol can be returned directly,
2289    // but when SDNode builder decides to use it in a CopyToReg(),
2290    // machine instruction fails because TargetExternalSymbol
2291    // (not lowered) is target dependent, and CopyToReg assumes
2292    // the source is lowered.
2293    EVT ObjectVT = getValueType(Ty);
2294    assert(ObjectVT == Ins[InsIdx].VT &&
2295           "Ins type did not match function type");
2296    SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2297    SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
2298    if (p.getNode())
2299      p.getNode()->setIROrder(idx + 1);
2300    if (isKernel)
2301      InVals.push_back(p);
2302    else {
2303      SDValue p2 = DAG.getNode(
2304          ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
2305          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
2306      InVals.push_back(p2);
2307    }
2308  }
2309
2310  // Clang will check explicit VarArg and issue error if any. However, Clang
2311  // will let code with
2312  // implicit var arg like f() pass. See bug 617733.
2313  // We treat this case as if the arg list is empty.
2314  // if (F.isVarArg()) {
2315  // assert(0 && "VarArg not supported yet!");
2316  //}
2317
2318  if (!OutChains.empty())
2319    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
2320
2321  return Chain;
2322}
2323
2324
2325SDValue
2326NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2327                                 bool isVarArg,
2328                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
2329                                 const SmallVectorImpl<SDValue> &OutVals,
2330                                 SDLoc dl, SelectionDAG &DAG) const {
2331  MachineFunction &MF = DAG.getMachineFunction();
2332  const Function *F = MF.getFunction();
2333  Type *RetTy = F->getReturnType();
2334  const DataLayout *TD = getDataLayout();
2335
2336  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
2337  assert(isABI && "Non-ABI compilation is not supported");
2338  if (!isABI)
2339    return Chain;
2340
2341  if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
2342    // If we have a vector type, the OutVals array will be the scalarized
2343    // components and we have combine them into 1 or more vector stores.
2344    unsigned NumElts = VTy->getNumElements();
2345    assert(NumElts == Outs.size() && "Bad scalarization of return value");
2346
2347    // const_cast can be removed in later LLVM versions
2348    EVT EltVT = getValueType(RetTy).getVectorElementType();
2349    bool NeedExtend = false;
2350    if (EltVT.getSizeInBits() < 16)
2351      NeedExtend = true;
2352
2353    // V1 store
2354    if (NumElts == 1) {
2355      SDValue StoreVal = OutVals[0];
2356      // We only have one element, so just directly store it
2357      if (NeedExtend)
2358        StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
2359      SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
2360      Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
2361                                      DAG.getVTList(MVT::Other), Ops,
2362                                      EltVT, MachinePointerInfo());
2363
2364    } else if (NumElts == 2) {
2365      // V2 store
2366      SDValue StoreVal0 = OutVals[0];
2367      SDValue StoreVal1 = OutVals[1];
2368
2369      if (NeedExtend) {
2370        StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
2371        StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
2372      }
2373
2374      SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
2375                        StoreVal1 };
2376      Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
2377                                      DAG.getVTList(MVT::Other), Ops,
2378                                      EltVT, MachinePointerInfo());
2379    } else {
2380      // V4 stores
2381      // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
2382      // vector will be expanded to a power of 2 elements, so we know we can
2383      // always round up to the next multiple of 4 when creating the vector
2384      // stores.
2385      // e.g.  4 elem => 1 st.v4
2386      //       6 elem => 2 st.v4
2387      //       8 elem => 2 st.v4
2388      //      11 elem => 3 st.v4
2389
2390      unsigned VecSize = 4;
2391      if (OutVals[0].getValueType().getSizeInBits() == 64)
2392        VecSize = 2;
2393
2394      unsigned Offset = 0;
2395
2396      EVT VecVT =
2397          EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2398      unsigned PerStoreOffset =
2399          TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2400
2401      for (unsigned i = 0; i < NumElts; i += VecSize) {
2402        // Get values
2403        SDValue StoreVal;
2404        SmallVector<SDValue, 8> Ops;
2405        Ops.push_back(Chain);
2406        Ops.push_back(DAG.getConstant(Offset, MVT::i32));
2407        unsigned Opc = NVPTXISD::StoreRetvalV2;
2408        EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
2409
2410        StoreVal = OutVals[i];
2411        if (NeedExtend)
2412          StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2413        Ops.push_back(StoreVal);
2414
2415        if (i + 1 < NumElts) {
2416          StoreVal = OutVals[i + 1];
2417          if (NeedExtend)
2418            StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2419        } else {
2420          StoreVal = DAG.getUNDEF(ExtendedVT);
2421        }
2422        Ops.push_back(StoreVal);
2423
2424        if (VecSize == 4) {
2425          Opc = NVPTXISD::StoreRetvalV4;
2426          if (i + 2 < NumElts) {
2427            StoreVal = OutVals[i + 2];
2428            if (NeedExtend)
2429              StoreVal =
2430                  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2431          } else {
2432            StoreVal = DAG.getUNDEF(ExtendedVT);
2433          }
2434          Ops.push_back(StoreVal);
2435
2436          if (i + 3 < NumElts) {
2437            StoreVal = OutVals[i + 3];
2438            if (NeedExtend)
2439              StoreVal =
2440                  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2441          } else {
2442            StoreVal = DAG.getUNDEF(ExtendedVT);
2443          }
2444          Ops.push_back(StoreVal);
2445        }
2446
2447        // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
2448        Chain =
2449            DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
2450                                    EltVT, MachinePointerInfo());
2451        Offset += PerStoreOffset;
2452      }
2453    }
2454  } else {
2455    SmallVector<EVT, 16> ValVTs;
2456    SmallVector<uint64_t, 16> Offsets;
2457    ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
2458    assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
2459
2460    for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2461      SDValue theVal = OutVals[i];
2462      EVT TheValType = theVal.getValueType();
2463      unsigned numElems = 1;
2464      if (TheValType.isVector())
2465        numElems = TheValType.getVectorNumElements();
2466      for (unsigned j = 0, je = numElems; j != je; ++j) {
2467        SDValue TmpVal = theVal;
2468        if (TheValType.isVector())
2469          TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2470                               TheValType.getVectorElementType(), TmpVal,
2471                               DAG.getIntPtrConstant(j));
2472        EVT TheStoreType = ValVTs[i];
2473        if (RetTy->isIntegerTy() &&
2474            TD->getTypeAllocSizeInBits(RetTy) < 32) {
2475          // The following zero-extension is for integer types only, and
2476          // specifically not for aggregates.
2477          TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
2478          TheStoreType = MVT::i32;
2479        }
2480        else if (TmpVal.getValueType().getSizeInBits() < 16)
2481          TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
2482
2483        SDValue Ops[] = {
2484          Chain,
2485          DAG.getConstant(Offsets[i], MVT::i32),
2486          TmpVal };
2487        Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
2488                                        DAG.getVTList(MVT::Other), Ops,
2489                                        TheStoreType,
2490                                        MachinePointerInfo());
2491      }
2492    }
2493  }
2494
2495  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
2496}
2497
2498
2499void NVPTXTargetLowering::LowerAsmOperandForConstraint(
2500    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2501    SelectionDAG &DAG) const {
2502  if (Constraint.length() > 1)
2503    return;
2504  else
2505    TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2506}
2507
2508// NVPTX suuport vector of legal types of any length in Intrinsics because the
2509// NVPTX specific type legalizer
2510// will legalize them to the PTX supported length.
2511bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
2512  if (isTypeLegal(VT))
2513    return true;
2514  if (VT.isVector()) {
2515    MVT eVT = VT.getVectorElementType();
2516    if (isTypeLegal(eVT))
2517      return true;
2518  }
2519  return false;
2520}
2521
2522static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
2523  switch (Intrinsic) {
2524  default:
2525    return 0;
2526
2527  case Intrinsic::nvvm_tex_1d_v4f32_s32:
2528    return NVPTXISD::Tex1DFloatS32;
2529  case Intrinsic::nvvm_tex_1d_v4f32_f32:
2530    return NVPTXISD::Tex1DFloatFloat;
2531  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
2532    return NVPTXISD::Tex1DFloatFloatLevel;
2533  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
2534    return NVPTXISD::Tex1DFloatFloatGrad;
2535  case Intrinsic::nvvm_tex_1d_v4s32_s32:
2536    return NVPTXISD::Tex1DS32S32;
2537  case Intrinsic::nvvm_tex_1d_v4s32_f32:
2538    return NVPTXISD::Tex1DS32Float;
2539  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
2540    return NVPTXISD::Tex1DS32FloatLevel;
2541  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
2542    return NVPTXISD::Tex1DS32FloatGrad;
2543  case Intrinsic::nvvm_tex_1d_v4u32_s32:
2544    return NVPTXISD::Tex1DU32S32;
2545  case Intrinsic::nvvm_tex_1d_v4u32_f32:
2546    return NVPTXISD::Tex1DU32Float;
2547  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
2548    return NVPTXISD::Tex1DU32FloatLevel;
2549  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
2550    return NVPTXISD::Tex1DU32FloatGrad;
2551
2552  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
2553    return NVPTXISD::Tex1DArrayFloatS32;
2554  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
2555    return NVPTXISD::Tex1DArrayFloatFloat;
2556  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
2557    return NVPTXISD::Tex1DArrayFloatFloatLevel;
2558  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
2559    return NVPTXISD::Tex1DArrayFloatFloatGrad;
2560  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
2561    return NVPTXISD::Tex1DArrayS32S32;
2562  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
2563    return NVPTXISD::Tex1DArrayS32Float;
2564  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
2565    return NVPTXISD::Tex1DArrayS32FloatLevel;
2566  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
2567    return NVPTXISD::Tex1DArrayS32FloatGrad;
2568  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
2569    return NVPTXISD::Tex1DArrayU32S32;
2570  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
2571    return NVPTXISD::Tex1DArrayU32Float;
2572  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
2573    return NVPTXISD::Tex1DArrayU32FloatLevel;
2574  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
2575    return NVPTXISD::Tex1DArrayU32FloatGrad;
2576
2577  case Intrinsic::nvvm_tex_2d_v4f32_s32:
2578    return NVPTXISD::Tex2DFloatS32;
2579  case Intrinsic::nvvm_tex_2d_v4f32_f32:
2580    return NVPTXISD::Tex2DFloatFloat;
2581  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
2582    return NVPTXISD::Tex2DFloatFloatLevel;
2583  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
2584    return NVPTXISD::Tex2DFloatFloatGrad;
2585  case Intrinsic::nvvm_tex_2d_v4s32_s32:
2586    return NVPTXISD::Tex2DS32S32;
2587  case Intrinsic::nvvm_tex_2d_v4s32_f32:
2588    return NVPTXISD::Tex2DS32Float;
2589  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
2590    return NVPTXISD::Tex2DS32FloatLevel;
2591  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
2592    return NVPTXISD::Tex2DS32FloatGrad;
2593  case Intrinsic::nvvm_tex_2d_v4u32_s32:
2594    return NVPTXISD::Tex2DU32S32;
2595  case Intrinsic::nvvm_tex_2d_v4u32_f32:
2596    return NVPTXISD::Tex2DU32Float;
2597  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
2598    return NVPTXISD::Tex2DU32FloatLevel;
2599  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
2600    return NVPTXISD::Tex2DU32FloatGrad;
2601
2602  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
2603    return NVPTXISD::Tex2DArrayFloatS32;
2604  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
2605    return NVPTXISD::Tex2DArrayFloatFloat;
2606  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
2607    return NVPTXISD::Tex2DArrayFloatFloatLevel;
2608  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
2609    return NVPTXISD::Tex2DArrayFloatFloatGrad;
2610  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
2611    return NVPTXISD::Tex2DArrayS32S32;
2612  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
2613    return NVPTXISD::Tex2DArrayS32Float;
2614  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
2615    return NVPTXISD::Tex2DArrayS32FloatLevel;
2616  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
2617    return NVPTXISD::Tex2DArrayS32FloatGrad;
2618  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
2619    return NVPTXISD::Tex2DArrayU32S32;
2620  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
2621    return NVPTXISD::Tex2DArrayU32Float;
2622  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
2623    return NVPTXISD::Tex2DArrayU32FloatLevel;
2624  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
2625    return NVPTXISD::Tex2DArrayU32FloatGrad;
2626
2627  case Intrinsic::nvvm_tex_3d_v4f32_s32:
2628    return NVPTXISD::Tex3DFloatS32;
2629  case Intrinsic::nvvm_tex_3d_v4f32_f32:
2630    return NVPTXISD::Tex3DFloatFloat;
2631  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
2632    return NVPTXISD::Tex3DFloatFloatLevel;
2633  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
2634    return NVPTXISD::Tex3DFloatFloatGrad;
2635  case Intrinsic::nvvm_tex_3d_v4s32_s32:
2636    return NVPTXISD::Tex3DS32S32;
2637  case Intrinsic::nvvm_tex_3d_v4s32_f32:
2638    return NVPTXISD::Tex3DS32Float;
2639  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
2640    return NVPTXISD::Tex3DS32FloatLevel;
2641  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
2642    return NVPTXISD::Tex3DS32FloatGrad;
2643  case Intrinsic::nvvm_tex_3d_v4u32_s32:
2644    return NVPTXISD::Tex3DU32S32;
2645  case Intrinsic::nvvm_tex_3d_v4u32_f32:
2646    return NVPTXISD::Tex3DU32Float;
2647  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
2648    return NVPTXISD::Tex3DU32FloatLevel;
2649  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
2650    return NVPTXISD::Tex3DU32FloatGrad;
2651
2652  case Intrinsic::nvvm_tex_cube_v4f32_f32:
2653    return NVPTXISD::TexCubeFloatFloat;
2654  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
2655    return NVPTXISD::TexCubeFloatFloatLevel;
2656  case Intrinsic::nvvm_tex_cube_v4s32_f32:
2657    return NVPTXISD::TexCubeS32Float;
2658  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
2659    return NVPTXISD::TexCubeS32FloatLevel;
2660  case Intrinsic::nvvm_tex_cube_v4u32_f32:
2661    return NVPTXISD::TexCubeU32Float;
2662  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
2663    return NVPTXISD::TexCubeU32FloatLevel;
2664
2665  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
2666    return NVPTXISD::TexCubeArrayFloatFloat;
2667  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
2668    return NVPTXISD::TexCubeArrayFloatFloatLevel;
2669  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
2670    return NVPTXISD::TexCubeArrayS32Float;
2671  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
2672    return NVPTXISD::TexCubeArrayS32FloatLevel;
2673  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
2674    return NVPTXISD::TexCubeArrayU32Float;
2675  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
2676    return NVPTXISD::TexCubeArrayU32FloatLevel;
2677
2678  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
2679    return NVPTXISD::Tld4R2DFloatFloat;
2680  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
2681    return NVPTXISD::Tld4G2DFloatFloat;
2682  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
2683    return NVPTXISD::Tld4B2DFloatFloat;
2684  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
2685    return NVPTXISD::Tld4A2DFloatFloat;
2686  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
2687    return NVPTXISD::Tld4R2DS64Float;
2688  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
2689    return NVPTXISD::Tld4G2DS64Float;
2690  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
2691    return NVPTXISD::Tld4B2DS64Float;
2692  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
2693    return NVPTXISD::Tld4A2DS64Float;
2694  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
2695    return NVPTXISD::Tld4R2DU64Float;
2696  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
2697    return NVPTXISD::Tld4G2DU64Float;
2698  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
2699    return NVPTXISD::Tld4B2DU64Float;
2700  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
2701    return NVPTXISD::Tld4A2DU64Float;
2702
2703  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
2704    return NVPTXISD::TexUnified1DFloatS32;
2705  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
2706    return NVPTXISD::TexUnified1DFloatFloat;
2707  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
2708    return NVPTXISD::TexUnified1DFloatFloatLevel;
2709  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
2710    return NVPTXISD::TexUnified1DFloatFloatGrad;
2711  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
2712    return NVPTXISD::TexUnified1DS32S32;
2713  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
2714    return NVPTXISD::TexUnified1DS32Float;
2715  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
2716    return NVPTXISD::TexUnified1DS32FloatLevel;
2717  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
2718    return NVPTXISD::TexUnified1DS32FloatGrad;
2719  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
2720    return NVPTXISD::TexUnified1DU32S32;
2721  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
2722    return NVPTXISD::TexUnified1DU32Float;
2723  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
2724    return NVPTXISD::TexUnified1DU32FloatLevel;
2725  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
2726    return NVPTXISD::TexUnified1DU32FloatGrad;
2727
2728  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
2729    return NVPTXISD::TexUnified1DArrayFloatS32;
2730  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
2731    return NVPTXISD::TexUnified1DArrayFloatFloat;
2732  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
2733    return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
2734  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
2735    return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
2736  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
2737    return NVPTXISD::TexUnified1DArrayS32S32;
2738  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
2739    return NVPTXISD::TexUnified1DArrayS32Float;
2740  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
2741    return NVPTXISD::TexUnified1DArrayS32FloatLevel;
2742  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
2743    return NVPTXISD::TexUnified1DArrayS32FloatGrad;
2744  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
2745    return NVPTXISD::TexUnified1DArrayU32S32;
2746  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
2747    return NVPTXISD::TexUnified1DArrayU32Float;
2748  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
2749    return NVPTXISD::TexUnified1DArrayU32FloatLevel;
2750  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
2751    return NVPTXISD::TexUnified1DArrayU32FloatGrad;
2752
2753  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
2754    return NVPTXISD::TexUnified2DFloatS32;
2755  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
2756    return NVPTXISD::TexUnified2DFloatFloat;
2757  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
2758    return NVPTXISD::TexUnified2DFloatFloatLevel;
2759  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
2760    return NVPTXISD::TexUnified2DFloatFloatGrad;
2761  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
2762    return NVPTXISD::TexUnified2DS32S32;
2763  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
2764    return NVPTXISD::TexUnified2DS32Float;
2765  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
2766    return NVPTXISD::TexUnified2DS32FloatLevel;
2767  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
2768    return NVPTXISD::TexUnified2DS32FloatGrad;
2769  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
2770    return NVPTXISD::TexUnified2DU32S32;
2771  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
2772    return NVPTXISD::TexUnified2DU32Float;
2773  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
2774    return NVPTXISD::TexUnified2DU32FloatLevel;
2775  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
2776    return NVPTXISD::TexUnified2DU32FloatGrad;
2777
2778  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
2779    return NVPTXISD::TexUnified2DArrayFloatS32;
2780  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
2781    return NVPTXISD::TexUnified2DArrayFloatFloat;
2782  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
2783    return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
2784  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
2785    return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
2786  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
2787    return NVPTXISD::TexUnified2DArrayS32S32;
2788  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
2789    return NVPTXISD::TexUnified2DArrayS32Float;
2790  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
2791    return NVPTXISD::TexUnified2DArrayS32FloatLevel;
2792  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
2793    return NVPTXISD::TexUnified2DArrayS32FloatGrad;
2794  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
2795    return NVPTXISD::TexUnified2DArrayU32S32;
2796  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
2797    return NVPTXISD::TexUnified2DArrayU32Float;
2798  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
2799    return NVPTXISD::TexUnified2DArrayU32FloatLevel;
2800  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
2801    return NVPTXISD::TexUnified2DArrayU32FloatGrad;
2802
2803  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
2804    return NVPTXISD::TexUnified3DFloatS32;
2805  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
2806    return NVPTXISD::TexUnified3DFloatFloat;
2807  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
2808    return NVPTXISD::TexUnified3DFloatFloatLevel;
2809  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
2810    return NVPTXISD::TexUnified3DFloatFloatGrad;
2811  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
2812    return NVPTXISD::TexUnified3DS32S32;
2813  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
2814    return NVPTXISD::TexUnified3DS32Float;
2815  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
2816    return NVPTXISD::TexUnified3DS32FloatLevel;
2817  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
2818    return NVPTXISD::TexUnified3DS32FloatGrad;
2819  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
2820    return NVPTXISD::TexUnified3DU32S32;
2821  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
2822    return NVPTXISD::TexUnified3DU32Float;
2823  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
2824    return NVPTXISD::TexUnified3DU32FloatLevel;
2825  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
2826    return NVPTXISD::TexUnified3DU32FloatGrad;
2827
2828  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
2829    return NVPTXISD::TexUnifiedCubeFloatFloat;
2830  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
2831    return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
2832  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
2833    return NVPTXISD::TexUnifiedCubeS32Float;
2834  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
2835    return NVPTXISD::TexUnifiedCubeS32FloatLevel;
2836  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
2837    return NVPTXISD::TexUnifiedCubeU32Float;
2838  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
2839    return NVPTXISD::TexUnifiedCubeU32FloatLevel;
2840
2841  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
2842    return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
2843  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
2844    return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
2845  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
2846    return NVPTXISD::TexUnifiedCubeArrayS32Float;
2847  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
2848    return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
2849  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
2850    return NVPTXISD::TexUnifiedCubeArrayU32Float;
2851  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
2852    return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
2853
2854  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
2855    return NVPTXISD::Tld4UnifiedR2DFloatFloat;
2856  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
2857    return NVPTXISD::Tld4UnifiedG2DFloatFloat;
2858  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
2859    return NVPTXISD::Tld4UnifiedB2DFloatFloat;
2860  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
2861    return NVPTXISD::Tld4UnifiedA2DFloatFloat;
2862  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
2863    return NVPTXISD::Tld4UnifiedR2DS64Float;
2864  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
2865    return NVPTXISD::Tld4UnifiedG2DS64Float;
2866  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
2867    return NVPTXISD::Tld4UnifiedB2DS64Float;
2868  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
2869    return NVPTXISD::Tld4UnifiedA2DS64Float;
2870  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
2871    return NVPTXISD::Tld4UnifiedR2DU64Float;
2872  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
2873    return NVPTXISD::Tld4UnifiedG2DU64Float;
2874  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
2875    return NVPTXISD::Tld4UnifiedB2DU64Float;
2876  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
2877    return NVPTXISD::Tld4UnifiedA2DU64Float;
2878  }
2879}
2880
2881static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
2882  switch (Intrinsic) {
2883  default:
2884    return 0;
2885  case Intrinsic::nvvm_suld_1d_i8_clamp:
2886    return NVPTXISD::Suld1DI8Clamp;
2887  case Intrinsic::nvvm_suld_1d_i16_clamp:
2888    return NVPTXISD::Suld1DI16Clamp;
2889  case Intrinsic::nvvm_suld_1d_i32_clamp:
2890    return NVPTXISD::Suld1DI32Clamp;
2891  case Intrinsic::nvvm_suld_1d_i64_clamp:
2892    return NVPTXISD::Suld1DI64Clamp;
2893  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
2894    return NVPTXISD::Suld1DV2I8Clamp;
2895  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
2896    return NVPTXISD::Suld1DV2I16Clamp;
2897  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
2898    return NVPTXISD::Suld1DV2I32Clamp;
2899  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
2900    return NVPTXISD::Suld1DV2I64Clamp;
2901  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
2902    return NVPTXISD::Suld1DV4I8Clamp;
2903  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
2904    return NVPTXISD::Suld1DV4I16Clamp;
2905  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
2906    return NVPTXISD::Suld1DV4I32Clamp;
2907  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
2908    return NVPTXISD::Suld1DArrayI8Clamp;
2909  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
2910    return NVPTXISD::Suld1DArrayI16Clamp;
2911  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
2912    return NVPTXISD::Suld1DArrayI32Clamp;
2913  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
2914    return NVPTXISD::Suld1DArrayI64Clamp;
2915  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
2916    return NVPTXISD::Suld1DArrayV2I8Clamp;
2917  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
2918    return NVPTXISD::Suld1DArrayV2I16Clamp;
2919  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
2920    return NVPTXISD::Suld1DArrayV2I32Clamp;
2921  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
2922    return NVPTXISD::Suld1DArrayV2I64Clamp;
2923  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
2924    return NVPTXISD::Suld1DArrayV4I8Clamp;
2925  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
2926    return NVPTXISD::Suld1DArrayV4I16Clamp;
2927  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
2928    return NVPTXISD::Suld1DArrayV4I32Clamp;
2929  case Intrinsic::nvvm_suld_2d_i8_clamp:
2930    return NVPTXISD::Suld2DI8Clamp;
2931  case Intrinsic::nvvm_suld_2d_i16_clamp:
2932    return NVPTXISD::Suld2DI16Clamp;
2933  case Intrinsic::nvvm_suld_2d_i32_clamp:
2934    return NVPTXISD::Suld2DI32Clamp;
2935  case Intrinsic::nvvm_suld_2d_i64_clamp:
2936    return NVPTXISD::Suld2DI64Clamp;
2937  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
2938    return NVPTXISD::Suld2DV2I8Clamp;
2939  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
2940    return NVPTXISD::Suld2DV2I16Clamp;
2941  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
2942    return NVPTXISD::Suld2DV2I32Clamp;
2943  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
2944    return NVPTXISD::Suld2DV2I64Clamp;
2945  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
2946    return NVPTXISD::Suld2DV4I8Clamp;
2947  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
2948    return NVPTXISD::Suld2DV4I16Clamp;
2949  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
2950    return NVPTXISD::Suld2DV4I32Clamp;
2951  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
2952    return NVPTXISD::Suld2DArrayI8Clamp;
2953  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
2954    return NVPTXISD::Suld2DArrayI16Clamp;
2955  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
2956    return NVPTXISD::Suld2DArrayI32Clamp;
2957  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
2958    return NVPTXISD::Suld2DArrayI64Clamp;
2959  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
2960    return NVPTXISD::Suld2DArrayV2I8Clamp;
2961  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
2962    return NVPTXISD::Suld2DArrayV2I16Clamp;
2963  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
2964    return NVPTXISD::Suld2DArrayV2I32Clamp;
2965  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
2966    return NVPTXISD::Suld2DArrayV2I64Clamp;
2967  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
2968    return NVPTXISD::Suld2DArrayV4I8Clamp;
2969  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
2970    return NVPTXISD::Suld2DArrayV4I16Clamp;
2971  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
2972    return NVPTXISD::Suld2DArrayV4I32Clamp;
2973  case Intrinsic::nvvm_suld_3d_i8_clamp:
2974    return NVPTXISD::Suld3DI8Clamp;
2975  case Intrinsic::nvvm_suld_3d_i16_clamp:
2976    return NVPTXISD::Suld3DI16Clamp;
2977  case Intrinsic::nvvm_suld_3d_i32_clamp:
2978    return NVPTXISD::Suld3DI32Clamp;
2979  case Intrinsic::nvvm_suld_3d_i64_clamp:
2980    return NVPTXISD::Suld3DI64Clamp;
2981  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
2982    return NVPTXISD::Suld3DV2I8Clamp;
2983  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
2984    return NVPTXISD::Suld3DV2I16Clamp;
2985  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
2986    return NVPTXISD::Suld3DV2I32Clamp;
2987  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
2988    return NVPTXISD::Suld3DV2I64Clamp;
2989  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
2990    return NVPTXISD::Suld3DV4I8Clamp;
2991  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
2992    return NVPTXISD::Suld3DV4I16Clamp;
2993  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
2994    return NVPTXISD::Suld3DV4I32Clamp;
2995  case Intrinsic::nvvm_suld_1d_i8_trap:
2996    return NVPTXISD::Suld1DI8Trap;
2997  case Intrinsic::nvvm_suld_1d_i16_trap:
2998    return NVPTXISD::Suld1DI16Trap;
2999  case Intrinsic::nvvm_suld_1d_i32_trap:
3000    return NVPTXISD::Suld1DI32Trap;
3001  case Intrinsic::nvvm_suld_1d_i64_trap:
3002    return NVPTXISD::Suld1DI64Trap;
3003  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3004    return NVPTXISD::Suld1DV2I8Trap;
3005  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3006    return NVPTXISD::Suld1DV2I16Trap;
3007  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3008    return NVPTXISD::Suld1DV2I32Trap;
3009  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3010    return NVPTXISD::Suld1DV2I64Trap;
3011  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3012    return NVPTXISD::Suld1DV4I8Trap;
3013  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3014    return NVPTXISD::Suld1DV4I16Trap;
3015  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3016    return NVPTXISD::Suld1DV4I32Trap;
3017  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3018    return NVPTXISD::Suld1DArrayI8Trap;
3019  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3020    return NVPTXISD::Suld1DArrayI16Trap;
3021  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3022    return NVPTXISD::Suld1DArrayI32Trap;
3023  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3024    return NVPTXISD::Suld1DArrayI64Trap;
3025  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3026    return NVPTXISD::Suld1DArrayV2I8Trap;
3027  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3028    return NVPTXISD::Suld1DArrayV2I16Trap;
3029  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3030    return NVPTXISD::Suld1DArrayV2I32Trap;
3031  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3032    return NVPTXISD::Suld1DArrayV2I64Trap;
3033  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3034    return NVPTXISD::Suld1DArrayV4I8Trap;
3035  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3036    return NVPTXISD::Suld1DArrayV4I16Trap;
3037  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3038    return NVPTXISD::Suld1DArrayV4I32Trap;
3039  case Intrinsic::nvvm_suld_2d_i8_trap:
3040    return NVPTXISD::Suld2DI8Trap;
3041  case Intrinsic::nvvm_suld_2d_i16_trap:
3042    return NVPTXISD::Suld2DI16Trap;
3043  case Intrinsic::nvvm_suld_2d_i32_trap:
3044    return NVPTXISD::Suld2DI32Trap;
3045  case Intrinsic::nvvm_suld_2d_i64_trap:
3046    return NVPTXISD::Suld2DI64Trap;
3047  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3048    return NVPTXISD::Suld2DV2I8Trap;
3049  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3050    return NVPTXISD::Suld2DV2I16Trap;
3051  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3052    return NVPTXISD::Suld2DV2I32Trap;
3053  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3054    return NVPTXISD::Suld2DV2I64Trap;
3055  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3056    return NVPTXISD::Suld2DV4I8Trap;
3057  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3058    return NVPTXISD::Suld2DV4I16Trap;
3059  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3060    return NVPTXISD::Suld2DV4I32Trap;
3061  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3062    return NVPTXISD::Suld2DArrayI8Trap;
3063  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3064    return NVPTXISD::Suld2DArrayI16Trap;
3065  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3066    return NVPTXISD::Suld2DArrayI32Trap;
3067  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3068    return NVPTXISD::Suld2DArrayI64Trap;
3069  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3070    return NVPTXISD::Suld2DArrayV2I8Trap;
3071  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3072    return NVPTXISD::Suld2DArrayV2I16Trap;
3073  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3074    return NVPTXISD::Suld2DArrayV2I32Trap;
3075  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3076    return NVPTXISD::Suld2DArrayV2I64Trap;
3077  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3078    return NVPTXISD::Suld2DArrayV4I8Trap;
3079  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3080    return NVPTXISD::Suld2DArrayV4I16Trap;
3081  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3082    return NVPTXISD::Suld2DArrayV4I32Trap;
3083  case Intrinsic::nvvm_suld_3d_i8_trap:
3084    return NVPTXISD::Suld3DI8Trap;
3085  case Intrinsic::nvvm_suld_3d_i16_trap:
3086    return NVPTXISD::Suld3DI16Trap;
3087  case Intrinsic::nvvm_suld_3d_i32_trap:
3088    return NVPTXISD::Suld3DI32Trap;
3089  case Intrinsic::nvvm_suld_3d_i64_trap:
3090    return NVPTXISD::Suld3DI64Trap;
3091  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3092    return NVPTXISD::Suld3DV2I8Trap;
3093  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3094    return NVPTXISD::Suld3DV2I16Trap;
3095  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3096    return NVPTXISD::Suld3DV2I32Trap;
3097  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3098    return NVPTXISD::Suld3DV2I64Trap;
3099  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3100    return NVPTXISD::Suld3DV4I8Trap;
3101  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3102    return NVPTXISD::Suld3DV4I16Trap;
3103  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3104    return NVPTXISD::Suld3DV4I32Trap;
3105  case Intrinsic::nvvm_suld_1d_i8_zero:
3106    return NVPTXISD::Suld1DI8Zero;
3107  case Intrinsic::nvvm_suld_1d_i16_zero:
3108    return NVPTXISD::Suld1DI16Zero;
3109  case Intrinsic::nvvm_suld_1d_i32_zero:
3110    return NVPTXISD::Suld1DI32Zero;
3111  case Intrinsic::nvvm_suld_1d_i64_zero:
3112    return NVPTXISD::Suld1DI64Zero;
3113  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3114    return NVPTXISD::Suld1DV2I8Zero;
3115  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3116    return NVPTXISD::Suld1DV2I16Zero;
3117  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3118    return NVPTXISD::Suld1DV2I32Zero;
3119  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3120    return NVPTXISD::Suld1DV2I64Zero;
3121  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3122    return NVPTXISD::Suld1DV4I8Zero;
3123  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3124    return NVPTXISD::Suld1DV4I16Zero;
3125  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3126    return NVPTXISD::Suld1DV4I32Zero;
3127  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3128    return NVPTXISD::Suld1DArrayI8Zero;
3129  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3130    return NVPTXISD::Suld1DArrayI16Zero;
3131  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3132    return NVPTXISD::Suld1DArrayI32Zero;
3133  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3134    return NVPTXISD::Suld1DArrayI64Zero;
3135  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3136    return NVPTXISD::Suld1DArrayV2I8Zero;
3137  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3138    return NVPTXISD::Suld1DArrayV2I16Zero;
3139  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3140    return NVPTXISD::Suld1DArrayV2I32Zero;
3141  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3142    return NVPTXISD::Suld1DArrayV2I64Zero;
3143  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3144    return NVPTXISD::Suld1DArrayV4I8Zero;
3145  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3146    return NVPTXISD::Suld1DArrayV4I16Zero;
3147  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3148    return NVPTXISD::Suld1DArrayV4I32Zero;
3149  case Intrinsic::nvvm_suld_2d_i8_zero:
3150    return NVPTXISD::Suld2DI8Zero;
3151  case Intrinsic::nvvm_suld_2d_i16_zero:
3152    return NVPTXISD::Suld2DI16Zero;
3153  case Intrinsic::nvvm_suld_2d_i32_zero:
3154    return NVPTXISD::Suld2DI32Zero;
3155  case Intrinsic::nvvm_suld_2d_i64_zero:
3156    return NVPTXISD::Suld2DI64Zero;
3157  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3158    return NVPTXISD::Suld2DV2I8Zero;
3159  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3160    return NVPTXISD::Suld2DV2I16Zero;
3161  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3162    return NVPTXISD::Suld2DV2I32Zero;
3163  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3164    return NVPTXISD::Suld2DV2I64Zero;
3165  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3166    return NVPTXISD::Suld2DV4I8Zero;
3167  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3168    return NVPTXISD::Suld2DV4I16Zero;
3169  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3170    return NVPTXISD::Suld2DV4I32Zero;
3171  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3172    return NVPTXISD::Suld2DArrayI8Zero;
3173  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3174    return NVPTXISD::Suld2DArrayI16Zero;
3175  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3176    return NVPTXISD::Suld2DArrayI32Zero;
3177  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3178    return NVPTXISD::Suld2DArrayI64Zero;
3179  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3180    return NVPTXISD::Suld2DArrayV2I8Zero;
3181  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3182    return NVPTXISD::Suld2DArrayV2I16Zero;
3183  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3184    return NVPTXISD::Suld2DArrayV2I32Zero;
3185  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3186    return NVPTXISD::Suld2DArrayV2I64Zero;
3187  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3188    return NVPTXISD::Suld2DArrayV4I8Zero;
3189  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3190    return NVPTXISD::Suld2DArrayV4I16Zero;
3191  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3192    return NVPTXISD::Suld2DArrayV4I32Zero;
3193  case Intrinsic::nvvm_suld_3d_i8_zero:
3194    return NVPTXISD::Suld3DI8Zero;
3195  case Intrinsic::nvvm_suld_3d_i16_zero:
3196    return NVPTXISD::Suld3DI16Zero;
3197  case Intrinsic::nvvm_suld_3d_i32_zero:
3198    return NVPTXISD::Suld3DI32Zero;
3199  case Intrinsic::nvvm_suld_3d_i64_zero:
3200    return NVPTXISD::Suld3DI64Zero;
3201  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3202    return NVPTXISD::Suld3DV2I8Zero;
3203  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3204    return NVPTXISD::Suld3DV2I16Zero;
3205  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3206    return NVPTXISD::Suld3DV2I32Zero;
3207  case Intrinsic::nvvm_suld_3d_v2i64_zero:
3208    return NVPTXISD::Suld3DV2I64Zero;
3209  case Intrinsic::nvvm_suld_3d_v4i8_zero:
3210    return NVPTXISD::Suld3DV4I8Zero;
3211  case Intrinsic::nvvm_suld_3d_v4i16_zero:
3212    return NVPTXISD::Suld3DV4I16Zero;
3213  case Intrinsic::nvvm_suld_3d_v4i32_zero:
3214    return NVPTXISD::Suld3DV4I32Zero;
3215  }
3216}
3217
3218// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
3219// TgtMemIntrinsic
3220// because we need the information that is only available in the "Value" type
3221// of destination
3222// pointer. In particular, the address space information.
3223bool NVPTXTargetLowering::getTgtMemIntrinsic(
3224    IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
3225  switch (Intrinsic) {
3226  default:
3227    return false;
3228
3229  case Intrinsic::nvvm_atomic_load_add_f32:
3230    Info.opc = ISD::INTRINSIC_W_CHAIN;
3231    Info.memVT = MVT::f32;
3232    Info.ptrVal = I.getArgOperand(0);
3233    Info.offset = 0;
3234    Info.vol = 0;
3235    Info.readMem = true;
3236    Info.writeMem = true;
3237    Info.align = 0;
3238    return true;
3239
3240  case Intrinsic::nvvm_atomic_load_inc_32:
3241  case Intrinsic::nvvm_atomic_load_dec_32:
3242    Info.opc = ISD::INTRINSIC_W_CHAIN;
3243    Info.memVT = MVT::i32;
3244    Info.ptrVal = I.getArgOperand(0);
3245    Info.offset = 0;
3246    Info.vol = 0;
3247    Info.readMem = true;
3248    Info.writeMem = true;
3249    Info.align = 0;
3250    return true;
3251
3252  case Intrinsic::nvvm_ldu_global_i:
3253  case Intrinsic::nvvm_ldu_global_f:
3254  case Intrinsic::nvvm_ldu_global_p: {
3255
3256    Info.opc = ISD::INTRINSIC_W_CHAIN;
3257    if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
3258      Info.memVT = getValueType(I.getType());
3259    else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
3260      Info.memVT = getPointerTy();
3261    else
3262      Info.memVT = getValueType(I.getType());
3263    Info.ptrVal = I.getArgOperand(0);
3264    Info.offset = 0;
3265    Info.vol = 0;
3266    Info.readMem = true;
3267    Info.writeMem = false;
3268    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3269
3270    return true;
3271  }
3272  case Intrinsic::nvvm_ldg_global_i:
3273  case Intrinsic::nvvm_ldg_global_f:
3274  case Intrinsic::nvvm_ldg_global_p: {
3275
3276    Info.opc = ISD::INTRINSIC_W_CHAIN;
3277    if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
3278      Info.memVT = getValueType(I.getType());
3279    else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
3280      Info.memVT = getPointerTy();
3281    else
3282      Info.memVT = getValueType(I.getType());
3283    Info.ptrVal = I.getArgOperand(0);
3284    Info.offset = 0;
3285    Info.vol = 0;
3286    Info.readMem = true;
3287    Info.writeMem = false;
3288    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3289
3290    return true;
3291  }
3292
3293  case Intrinsic::nvvm_tex_1d_v4f32_s32:
3294  case Intrinsic::nvvm_tex_1d_v4f32_f32:
3295  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
3296  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
3297  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
3298  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
3299  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
3300  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
3301  case Intrinsic::nvvm_tex_2d_v4f32_s32:
3302  case Intrinsic::nvvm_tex_2d_v4f32_f32:
3303  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
3304  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
3305  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3306  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3307  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3308  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3309  case Intrinsic::nvvm_tex_3d_v4f32_s32:
3310  case Intrinsic::nvvm_tex_3d_v4f32_f32:
3311  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3312  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3313  case Intrinsic::nvvm_tex_cube_v4f32_f32:
3314  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3315  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3316  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3317  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3318  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3319  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3320  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3321  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3322  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3323  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3324  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3325  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3326  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3327  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3328  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3329  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3330  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3331  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3332  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3333  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3334  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3335  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3336  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3337  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3338  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3339  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3340  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3341  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3342  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3343  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3344  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3345  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3346  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3347  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3348  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
3349    Info.opc = getOpcForTextureInstr(Intrinsic);
3350    Info.memVT = MVT::v4f32;
3351    Info.ptrVal = nullptr;
3352    Info.offset = 0;
3353    Info.vol = 0;
3354    Info.readMem = true;
3355    Info.writeMem = false;
3356    Info.align = 16;
3357    return true;
3358  }
3359  case Intrinsic::nvvm_tex_1d_v4s32_s32:
3360  case Intrinsic::nvvm_tex_1d_v4s32_f32:
3361  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
3362  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
3363  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
3364  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
3365  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
3366  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
3367  case Intrinsic::nvvm_tex_2d_v4s32_s32:
3368  case Intrinsic::nvvm_tex_2d_v4s32_f32:
3369  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
3370  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
3371  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3372  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3373  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3374  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3375  case Intrinsic::nvvm_tex_3d_v4s32_s32:
3376  case Intrinsic::nvvm_tex_3d_v4s32_f32:
3377  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3378  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3379  case Intrinsic::nvvm_tex_cube_v4s32_f32:
3380  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3381  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3382  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3383  case Intrinsic::nvvm_tex_cube_v4u32_f32:
3384  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3385  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3386  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3387  case Intrinsic::nvvm_tex_1d_v4u32_s32:
3388  case Intrinsic::nvvm_tex_1d_v4u32_f32:
3389  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
3390  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
3391  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
3392  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
3393  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
3394  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
3395  case Intrinsic::nvvm_tex_2d_v4u32_s32:
3396  case Intrinsic::nvvm_tex_2d_v4u32_f32:
3397  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3398  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3399  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3400  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3401  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3402  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3403  case Intrinsic::nvvm_tex_3d_v4u32_s32:
3404  case Intrinsic::nvvm_tex_3d_v4u32_f32:
3405  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3406  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3407  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3408  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3409  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3410  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3411  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3412  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3413  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3414  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3415  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3416  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3417  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3418  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3419  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3420  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3421  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3422  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3423  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3424  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3425  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3426  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3427  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3428  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3429  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3430  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3431  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3432  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3433  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3434  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3435  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3436  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3437  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3438  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3439  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3440  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3441  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3442  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3443  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3444  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3445  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3446  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3447  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3448  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3449  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3450  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3451  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3452  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3453  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3454  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3455  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3456  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
3457  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
3458  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
3459  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
3460  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
3461  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
3462  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
3463  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
3464  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
3465  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
3466  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
3467  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
3468  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
3469  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
3470  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
3471    Info.opc = getOpcForTextureInstr(Intrinsic);
3472    Info.memVT = MVT::v4i32;
3473    Info.ptrVal = nullptr;
3474    Info.offset = 0;
3475    Info.vol = 0;
3476    Info.readMem = true;
3477    Info.writeMem = false;
3478    Info.align = 16;
3479    return true;
3480  }
3481  case Intrinsic::nvvm_suld_1d_i8_clamp:
3482  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
3483  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
3484  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
3485  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
3486  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
3487  case Intrinsic::nvvm_suld_2d_i8_clamp:
3488  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3489  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3490  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3491  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3492  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3493  case Intrinsic::nvvm_suld_3d_i8_clamp:
3494  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3495  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3496  case Intrinsic::nvvm_suld_1d_i8_trap:
3497  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3498  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3499  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3500  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3501  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3502  case Intrinsic::nvvm_suld_2d_i8_trap:
3503  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3504  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3505  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3506  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3507  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3508  case Intrinsic::nvvm_suld_3d_i8_trap:
3509  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3510  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3511  case Intrinsic::nvvm_suld_1d_i8_zero:
3512  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3513  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3514  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3515  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3516  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3517  case Intrinsic::nvvm_suld_2d_i8_zero:
3518  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3519  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3520  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3521  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3522  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3523  case Intrinsic::nvvm_suld_3d_i8_zero:
3524  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3525  case Intrinsic::nvvm_suld_3d_v4i8_zero: {
3526    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3527    Info.memVT = MVT::i8;
3528    Info.ptrVal = nullptr;
3529    Info.offset = 0;
3530    Info.vol = 0;
3531    Info.readMem = true;
3532    Info.writeMem = false;
3533    Info.align = 16;
3534    return true;
3535  }
3536  case Intrinsic::nvvm_suld_1d_i16_clamp:
3537  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
3538  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
3539  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
3540  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
3541  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
3542  case Intrinsic::nvvm_suld_2d_i16_clamp:
3543  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3544  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3545  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3546  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3547  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3548  case Intrinsic::nvvm_suld_3d_i16_clamp:
3549  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3550  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3551  case Intrinsic::nvvm_suld_1d_i16_trap:
3552  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3553  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3554  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3555  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3556  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3557  case Intrinsic::nvvm_suld_2d_i16_trap:
3558  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3559  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3560  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3561  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3562  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3563  case Intrinsic::nvvm_suld_3d_i16_trap:
3564  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3565  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3566  case Intrinsic::nvvm_suld_1d_i16_zero:
3567  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3568  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3569  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3570  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3571  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3572  case Intrinsic::nvvm_suld_2d_i16_zero:
3573  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3574  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3575  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3576  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3577  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3578  case Intrinsic::nvvm_suld_3d_i16_zero:
3579  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3580  case Intrinsic::nvvm_suld_3d_v4i16_zero: {
3581    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3582    Info.memVT = MVT::i16;
3583    Info.ptrVal = nullptr;
3584    Info.offset = 0;
3585    Info.vol = 0;
3586    Info.readMem = true;
3587    Info.writeMem = false;
3588    Info.align = 16;
3589    return true;
3590  }
3591  case Intrinsic::nvvm_suld_1d_i32_clamp:
3592  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
3593  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
3594  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
3595  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
3596  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
3597  case Intrinsic::nvvm_suld_2d_i32_clamp:
3598  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3599  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3600  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3601  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3602  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3603  case Intrinsic::nvvm_suld_3d_i32_clamp:
3604  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3605  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3606  case Intrinsic::nvvm_suld_1d_i32_trap:
3607  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3608  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3609  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3610  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3611  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3612  case Intrinsic::nvvm_suld_2d_i32_trap:
3613  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3614  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3615  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3616  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3617  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3618  case Intrinsic::nvvm_suld_3d_i32_trap:
3619  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3620  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3621  case Intrinsic::nvvm_suld_1d_i32_zero:
3622  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3623  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3624  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3625  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3626  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3627  case Intrinsic::nvvm_suld_2d_i32_zero:
3628  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3629  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3630  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3631  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3632  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3633  case Intrinsic::nvvm_suld_3d_i32_zero:
3634  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3635  case Intrinsic::nvvm_suld_3d_v4i32_zero: {
3636    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3637    Info.memVT = MVT::i32;
3638    Info.ptrVal = nullptr;
3639    Info.offset = 0;
3640    Info.vol = 0;
3641    Info.readMem = true;
3642    Info.writeMem = false;
3643    Info.align = 16;
3644    return true;
3645  }
3646  case Intrinsic::nvvm_suld_1d_i64_clamp:
3647  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
3648  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
3649  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
3650  case Intrinsic::nvvm_suld_2d_i64_clamp:
3651  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3652  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3653  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3654  case Intrinsic::nvvm_suld_3d_i64_clamp:
3655  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3656  case Intrinsic::nvvm_suld_1d_i64_trap:
3657  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3658  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3659  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3660  case Intrinsic::nvvm_suld_2d_i64_trap:
3661  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3662  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3663  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3664  case Intrinsic::nvvm_suld_3d_i64_trap:
3665  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3666  case Intrinsic::nvvm_suld_1d_i64_zero:
3667  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3668  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3669  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3670  case Intrinsic::nvvm_suld_2d_i64_zero:
3671  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3672  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3673  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3674  case Intrinsic::nvvm_suld_3d_i64_zero:
3675  case Intrinsic::nvvm_suld_3d_v2i64_zero: {
3676    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3677    Info.memVT = MVT::i64;
3678    Info.ptrVal = nullptr;
3679    Info.offset = 0;
3680    Info.vol = 0;
3681    Info.readMem = true;
3682    Info.writeMem = false;
3683    Info.align = 16;
3684    return true;
3685  }
3686  }
3687  return false;
3688}
3689
3690/// isLegalAddressingMode - Return true if the addressing mode represented
3691/// by AM is legal for this target, for a load/store of the specified type.
3692/// Used to guide target specific optimizations, like loop strength reduction
3693/// (LoopStrengthReduce.cpp) and memory optimization for address mode
3694/// (CodeGenPrepare.cpp)
3695bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3696                                                Type *Ty) const {
3697
3698  // AddrMode - This represents an addressing mode of:
3699  //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
3700  //
3701  // The legal address modes are
3702  // - [avar]
3703  // - [areg]
3704  // - [areg+immoff]
3705  // - [immAddr]
3706
3707  if (AM.BaseGV) {
3708    if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
3709      return false;
3710    return true;
3711  }
3712
3713  switch (AM.Scale) {
3714  case 0: // "r", "r+i" or "i" is allowed
3715    break;
3716  case 1:
3717    if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
3718      return false;
3719    // Otherwise we have r+i.
3720    break;
3721  default:
3722    // No scale > 1 is allowed
3723    return false;
3724  }
3725  return true;
3726}
3727
3728//===----------------------------------------------------------------------===//
3729//                         NVPTX Inline Assembly Support
3730//===----------------------------------------------------------------------===//
3731
3732/// getConstraintType - Given a constraint letter, return the type of
3733/// constraint it is for this target.
3734NVPTXTargetLowering::ConstraintType
3735NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
3736  if (Constraint.size() == 1) {
3737    switch (Constraint[0]) {
3738    default:
3739      break;
3740    case 'b':
3741    case 'r':
3742    case 'h':
3743    case 'c':
3744    case 'l':
3745    case 'f':
3746    case 'd':
3747    case '0':
3748    case 'N':
3749      return C_RegisterClass;
3750    }
3751  }
3752  return TargetLowering::getConstraintType(Constraint);
3753}
3754
3755std::pair<unsigned, const TargetRegisterClass *>
3756NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3757                                                  MVT VT) const {
3758  if (Constraint.size() == 1) {
3759    switch (Constraint[0]) {
3760    case 'b':
3761      return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
3762    case 'c':
3763      return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3764    case 'h':
3765      return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3766    case 'r':
3767      return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
3768    case 'l':
3769    case 'N':
3770      return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
3771    case 'f':
3772      return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
3773    case 'd':
3774      return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
3775    }
3776  }
3777  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3778}
3779
3780/// getFunctionAlignment - Return the Log2 alignment of this function.
3781unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
3782  return 4;
3783}
3784
3785//===----------------------------------------------------------------------===//
3786//                         NVPTX DAG Combining
3787//===----------------------------------------------------------------------===//
3788
3789bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
3790                                   CodeGenOpt::Level OptLevel) const {
3791  const Function *F = MF.getFunction();
3792  const TargetOptions &TO = MF.getTarget().Options;
3793
3794  // Always honor command-line argument
3795  if (FMAContractLevelOpt.getNumOccurrences() > 0) {
3796    return FMAContractLevelOpt > 0;
3797  } else if (OptLevel == 0) {
3798    // Do not contract if we're not optimizing the code
3799    return false;
3800  } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
3801    // Honor TargetOptions flags that explicitly say fusion is okay
3802    return true;
3803  } else if (F->hasFnAttribute("unsafe-fp-math")) {
3804    // Check for unsafe-fp-math=true coming from Clang
3805    Attribute Attr = F->getFnAttribute("unsafe-fp-math");
3806    StringRef Val = Attr.getValueAsString();
3807    if (Val == "true")
3808      return true;
3809  }
3810
3811  // We did not have a clear indication that fusion is allowed, so assume not
3812  return false;
3813}
3814
3815/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
3816/// operands N0 and N1.  This is a helper for PerformADDCombine that is
3817/// called with the default operands, and if that fails, with commuted
3818/// operands.
3819static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
3820                                           TargetLowering::DAGCombinerInfo &DCI,
3821                                             const NVPTXSubtarget &Subtarget,
3822                                             CodeGenOpt::Level OptLevel) {
3823  SelectionDAG  &DAG = DCI.DAG;
3824  // Skip non-integer, non-scalar case
3825  EVT VT=N0.getValueType();
3826  if (VT.isVector())
3827    return SDValue();
3828
3829  // fold (add (mul a, b), c) -> (mad a, b, c)
3830  //
3831  if (N0.getOpcode() == ISD::MUL) {
3832    assert (VT.isInteger());
3833    // For integer:
3834    // Since integer multiply-add costs the same as integer multiply
3835    // but is more costly than integer add, do the fusion only when
3836    // the mul is only used in the add.
3837    if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
3838        !N0.getNode()->hasOneUse())
3839      return SDValue();
3840
3841    // Do the folding
3842    return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
3843                       N0.getOperand(0), N0.getOperand(1), N1);
3844  }
3845  else if (N0.getOpcode() == ISD::FMUL) {
3846    if (VT == MVT::f32 || VT == MVT::f64) {
3847      const auto *TLI = static_cast<const NVPTXTargetLowering *>(
3848          &DAG.getTargetLoweringInfo());
3849      if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
3850        return SDValue();
3851
3852      // For floating point:
3853      // Do the fusion only when the mul has less than 5 uses and all
3854      // are add.
3855      // The heuristic is that if a use is not an add, then that use
3856      // cannot be fused into fma, therefore mul is still needed anyway.
3857      // If there are more than 4 uses, even if they are all add, fusing
3858      // them will increase register pressue.
3859      //
3860      int numUses = 0;
3861      int nonAddCount = 0;
3862      for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
3863           UE = N0.getNode()->use_end();
3864           UI != UE; ++UI) {
3865        numUses++;
3866        SDNode *User = *UI;
3867        if (User->getOpcode() != ISD::FADD)
3868          ++nonAddCount;
3869      }
3870      if (numUses >= 5)
3871        return SDValue();
3872      if (nonAddCount) {
3873        int orderNo = N->getIROrder();
3874        int orderNo2 = N0.getNode()->getIROrder();
3875        // simple heuristics here for considering potential register
3876        // pressure, the logics here is that the differnce are used
3877        // to measure the distance between def and use, the longer distance
3878        // more likely cause register pressure.
3879        if (orderNo - orderNo2 < 500)
3880          return SDValue();
3881
3882        // Now, check if at least one of the FMUL's operands is live beyond the node N,
3883        // which guarantees that the FMA will not increase register pressure at node N.
3884        bool opIsLive = false;
3885        const SDNode *left = N0.getOperand(0).getNode();
3886        const SDNode *right = N0.getOperand(1).getNode();
3887
3888        if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
3889          opIsLive = true;
3890
3891        if (!opIsLive)
3892          for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
3893            SDNode *User = *UI;
3894            int orderNo3 = User->getIROrder();
3895            if (orderNo3 > orderNo) {
3896              opIsLive = true;
3897              break;
3898            }
3899          }
3900
3901        if (!opIsLive)
3902          for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
3903            SDNode *User = *UI;
3904            int orderNo3 = User->getIROrder();
3905            if (orderNo3 > orderNo) {
3906              opIsLive = true;
3907              break;
3908            }
3909          }
3910
3911        if (!opIsLive)
3912          return SDValue();
3913      }
3914
3915      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
3916                         N0.getOperand(0), N0.getOperand(1), N1);
3917    }
3918  }
3919
3920  return SDValue();
3921}
3922
3923/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3924///
3925static SDValue PerformADDCombine(SDNode *N,
3926                                 TargetLowering::DAGCombinerInfo &DCI,
3927                                 const NVPTXSubtarget &Subtarget,
3928                                 CodeGenOpt::Level OptLevel) {
3929  SDValue N0 = N->getOperand(0);
3930  SDValue N1 = N->getOperand(1);
3931
3932  // First try with the default operand order.
3933  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
3934                                                 OptLevel);
3935  if (Result.getNode())
3936    return Result;
3937
3938  // If that didn't work, try again with the operands commuted.
3939  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
3940}
3941
3942static SDValue PerformANDCombine(SDNode *N,
3943                                 TargetLowering::DAGCombinerInfo &DCI) {
3944  // The type legalizer turns a vector load of i8 values into a zextload to i16
3945  // registers, optionally ANY_EXTENDs it (if target type is integer),
3946  // and ANDs off the high 8 bits. Since we turn this load into a
3947  // target-specific DAG node, the DAG combiner fails to eliminate these AND
3948  // nodes. Do that here.
3949  SDValue Val = N->getOperand(0);
3950  SDValue Mask = N->getOperand(1);
3951
3952  if (isa<ConstantSDNode>(Val)) {
3953    std::swap(Val, Mask);
3954  }
3955
3956  SDValue AExt;
3957  // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
3958  if (Val.getOpcode() == ISD::ANY_EXTEND) {
3959    AExt = Val;
3960    Val = Val->getOperand(0);
3961  }
3962
3963  if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
3964    Val = Val->getOperand(0);
3965  }
3966
3967  if (Val->getOpcode() == NVPTXISD::LoadV2 ||
3968      Val->getOpcode() == NVPTXISD::LoadV4) {
3969    ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
3970    if (!MaskCnst) {
3971      // Not an AND with a constant
3972      return SDValue();
3973    }
3974
3975    uint64_t MaskVal = MaskCnst->getZExtValue();
3976    if (MaskVal != 0xff) {
3977      // Not an AND that chops off top 8 bits
3978      return SDValue();
3979    }
3980
3981    MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
3982    if (!Mem) {
3983      // Not a MemSDNode?!?
3984      return SDValue();
3985    }
3986
3987    EVT MemVT = Mem->getMemoryVT();
3988    if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
3989      // We only handle the i8 case
3990      return SDValue();
3991    }
3992
3993    unsigned ExtType =
3994      cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
3995        getZExtValue();
3996    if (ExtType == ISD::SEXTLOAD) {
3997      // If for some reason the load is a sextload, the and is needed to zero
3998      // out the high 8 bits
3999      return SDValue();
4000    }
4001
4002    bool AddTo = false;
4003    if (AExt.getNode() != 0) {
4004      // Re-insert the ext as a zext.
4005      Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4006                            AExt.getValueType(), Val);
4007      AddTo = true;
4008    }
4009
4010    // If we get here, the AND is unnecessary.  Just replace it with the load
4011    DCI.CombineTo(N, Val, AddTo);
4012  }
4013
4014  return SDValue();
4015}
4016
4017enum OperandSignedness {
4018  Signed = 0,
4019  Unsigned,
4020  Unknown
4021};
4022
4023/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
4024/// that can be demoted to \p OptSize bits without loss of information. The
4025/// signedness of the operand, if determinable, is placed in \p S.
4026static bool IsMulWideOperandDemotable(SDValue Op,
4027                                      unsigned OptSize,
4028                                      OperandSignedness &S) {
4029  S = Unknown;
4030
4031  if (Op.getOpcode() == ISD::SIGN_EXTEND ||
4032      Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4033    EVT OrigVT = Op.getOperand(0).getValueType();
4034    if (OrigVT.getSizeInBits() <= OptSize) {
4035      S = Signed;
4036      return true;
4037    }
4038  } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
4039    EVT OrigVT = Op.getOperand(0).getValueType();
4040    if (OrigVT.getSizeInBits() <= OptSize) {
4041      S = Unsigned;
4042      return true;
4043    }
4044  }
4045
4046  return false;
4047}
4048
4049/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
4050/// be demoted to \p OptSize bits without loss of information. If the operands
4051/// contain a constant, it should appear as the RHS operand. The signedness of
4052/// the operands is placed in \p IsSigned.
4053static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
4054                                        unsigned OptSize,
4055                                        bool &IsSigned) {
4056
4057  OperandSignedness LHSSign;
4058
4059  // The LHS operand must be a demotable op
4060  if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
4061    return false;
4062
4063  // We should have been able to determine the signedness from the LHS
4064  if (LHSSign == Unknown)
4065    return false;
4066
4067  IsSigned = (LHSSign == Signed);
4068
4069  // The RHS can be a demotable op or a constant
4070  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
4071    APInt Val = CI->getAPIntValue();
4072    if (LHSSign == Unsigned) {
4073      if (Val.isIntN(OptSize)) {
4074        return true;
4075      }
4076      return false;
4077    } else {
4078      if (Val.isSignedIntN(OptSize)) {
4079        return true;
4080      }
4081      return false;
4082    }
4083  } else {
4084    OperandSignedness RHSSign;
4085    if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
4086      return false;
4087
4088    if (LHSSign != RHSSign)
4089      return false;
4090
4091    return true;
4092  }
4093}
4094
4095/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
4096/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
4097/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
4098/// amount.
4099static SDValue TryMULWIDECombine(SDNode *N,
4100                                 TargetLowering::DAGCombinerInfo &DCI) {
4101  EVT MulType = N->getValueType(0);
4102  if (MulType != MVT::i32 && MulType != MVT::i64) {
4103    return SDValue();
4104  }
4105
4106  unsigned OptSize = MulType.getSizeInBits() >> 1;
4107  SDValue LHS = N->getOperand(0);
4108  SDValue RHS = N->getOperand(1);
4109
4110  // Canonicalize the multiply so the constant (if any) is on the right
4111  if (N->getOpcode() == ISD::MUL) {
4112    if (isa<ConstantSDNode>(LHS)) {
4113      std::swap(LHS, RHS);
4114    }
4115  }
4116
4117  // If we have a SHL, determine the actual multiply amount
4118  if (N->getOpcode() == ISD::SHL) {
4119    ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
4120    if (!ShlRHS) {
4121      return SDValue();
4122    }
4123
4124    APInt ShiftAmt = ShlRHS->getAPIntValue();
4125    unsigned BitWidth = MulType.getSizeInBits();
4126    if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
4127      APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
4128      RHS = DCI.DAG.getConstant(MulVal, MulType);
4129    } else {
4130      return SDValue();
4131    }
4132  }
4133
4134  bool Signed;
4135  // Verify that our operands are demotable
4136  if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
4137    return SDValue();
4138  }
4139
4140  EVT DemotedVT;
4141  if (MulType == MVT::i32) {
4142    DemotedVT = MVT::i16;
4143  } else {
4144    DemotedVT = MVT::i32;
4145  }
4146
4147  // Truncate the operands to the correct size. Note that these are just for
4148  // type consistency and will (likely) be eliminated in later phases.
4149  SDValue TruncLHS =
4150    DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS);
4151  SDValue TruncRHS =
4152    DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS);
4153
4154  unsigned Opc;
4155  if (Signed) {
4156    Opc = NVPTXISD::MUL_WIDE_SIGNED;
4157  } else {
4158    Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
4159  }
4160
4161  return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS);
4162}
4163
4164/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
4165static SDValue PerformMULCombine(SDNode *N,
4166                                 TargetLowering::DAGCombinerInfo &DCI,
4167                                 CodeGenOpt::Level OptLevel) {
4168  if (OptLevel > 0) {
4169    // Try mul.wide combining at OptLevel > 0
4170    SDValue Ret = TryMULWIDECombine(N, DCI);
4171    if (Ret.getNode())
4172      return Ret;
4173  }
4174
4175  return SDValue();
4176}
4177
4178/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
4179static SDValue PerformSHLCombine(SDNode *N,
4180                                 TargetLowering::DAGCombinerInfo &DCI,
4181                                 CodeGenOpt::Level OptLevel) {
4182  if (OptLevel > 0) {
4183    // Try mul.wide combining at OptLevel > 0
4184    SDValue Ret = TryMULWIDECombine(N, DCI);
4185    if (Ret.getNode())
4186      return Ret;
4187  }
4188
4189  return SDValue();
4190}
4191
4192SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
4193                                               DAGCombinerInfo &DCI) const {
4194  CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
4195  switch (N->getOpcode()) {
4196    default: break;
4197    case ISD::ADD:
4198    case ISD::FADD:
4199      return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel);
4200    case ISD::MUL:
4201      return PerformMULCombine(N, DCI, OptLevel);
4202    case ISD::SHL:
4203      return PerformSHLCombine(N, DCI, OptLevel);
4204    case ISD::AND:
4205      return PerformANDCombine(N, DCI);
4206  }
4207  return SDValue();
4208}
4209
4210/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
4211static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
4212                              const DataLayout *TD,
4213                              SmallVectorImpl<SDValue> &Results) {
4214  EVT ResVT = N->getValueType(0);
4215  SDLoc DL(N);
4216
4217  assert(ResVT.isVector() && "Vector load must have vector type");
4218
4219  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
4220  // legal.  We can (and should) split that into 2 loads of <2 x double> here
4221  // but I'm leaving that as a TODO for now.
4222  assert(ResVT.isSimple() && "Can only handle simple types");
4223  switch (ResVT.getSimpleVT().SimpleTy) {
4224  default:
4225    return;
4226  case MVT::v2i8:
4227  case MVT::v2i16:
4228  case MVT::v2i32:
4229  case MVT::v2i64:
4230  case MVT::v2f32:
4231  case MVT::v2f64:
4232  case MVT::v4i8:
4233  case MVT::v4i16:
4234  case MVT::v4i32:
4235  case MVT::v4f32:
4236    // This is a "native" vector type
4237    break;
4238  }
4239
4240  LoadSDNode *LD = cast<LoadSDNode>(N);
4241
4242  unsigned Align = LD->getAlignment();
4243  unsigned PrefAlign =
4244    TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
4245  if (Align < PrefAlign) {
4246    // This load is not sufficiently aligned, so bail out and let this vector
4247    // load be scalarized.  Note that we may still be able to emit smaller
4248    // vector loads.  For example, if we are loading a <4 x float> with an
4249    // alignment of 8, this check will fail but the legalizer will try again
4250    // with 2 x <2 x float>, which will succeed with an alignment of 8.
4251    return;
4252  }
4253
4254  EVT EltVT = ResVT.getVectorElementType();
4255  unsigned NumElts = ResVT.getVectorNumElements();
4256
4257  // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
4258  // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
4259  // loaded type to i16 and propagate the "real" type as the memory type.
4260  bool NeedTrunc = false;
4261  if (EltVT.getSizeInBits() < 16) {
4262    EltVT = MVT::i16;
4263    NeedTrunc = true;
4264  }
4265
4266  unsigned Opcode = 0;
4267  SDVTList LdResVTs;
4268
4269  switch (NumElts) {
4270  default:
4271    return;
4272  case 2:
4273    Opcode = NVPTXISD::LoadV2;
4274    LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4275    break;
4276  case 4: {
4277    Opcode = NVPTXISD::LoadV4;
4278    EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4279    LdResVTs = DAG.getVTList(ListVTs);
4280    break;
4281  }
4282  }
4283
4284  SmallVector<SDValue, 8> OtherOps;
4285
4286  // Copy regular operands
4287  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
4288    OtherOps.push_back(N->getOperand(i));
4289
4290  // The select routine does not have access to the LoadSDNode instance, so
4291  // pass along the extension information
4292  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
4293
4294  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4295                                          LD->getMemoryVT(),
4296                                          LD->getMemOperand());
4297
4298  SmallVector<SDValue, 4> ScalarRes;
4299
4300  for (unsigned i = 0; i < NumElts; ++i) {
4301    SDValue Res = NewLD.getValue(i);
4302    if (NeedTrunc)
4303      Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4304    ScalarRes.push_back(Res);
4305  }
4306
4307  SDValue LoadChain = NewLD.getValue(NumElts);
4308
4309  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
4310
4311  Results.push_back(BuildVec);
4312  Results.push_back(LoadChain);
4313}
4314
4315static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
4316                                     SmallVectorImpl<SDValue> &Results) {
4317  SDValue Chain = N->getOperand(0);
4318  SDValue Intrin = N->getOperand(1);
4319  SDLoc DL(N);
4320
4321  // Get the intrinsic ID
4322  unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
4323  switch (IntrinNo) {
4324  default:
4325    return;
4326  case Intrinsic::nvvm_ldg_global_i:
4327  case Intrinsic::nvvm_ldg_global_f:
4328  case Intrinsic::nvvm_ldg_global_p:
4329  case Intrinsic::nvvm_ldu_global_i:
4330  case Intrinsic::nvvm_ldu_global_f:
4331  case Intrinsic::nvvm_ldu_global_p: {
4332    EVT ResVT = N->getValueType(0);
4333
4334    if (ResVT.isVector()) {
4335      // Vector LDG/LDU
4336
4337      unsigned NumElts = ResVT.getVectorNumElements();
4338      EVT EltVT = ResVT.getVectorElementType();
4339
4340      // Since LDU/LDG are target nodes, we cannot rely on DAG type
4341      // legalization.
4342      // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
4343      // loaded type to i16 and propagate the "real" type as the memory type.
4344      bool NeedTrunc = false;
4345      if (EltVT.getSizeInBits() < 16) {
4346        EltVT = MVT::i16;
4347        NeedTrunc = true;
4348      }
4349
4350      unsigned Opcode = 0;
4351      SDVTList LdResVTs;
4352
4353      switch (NumElts) {
4354      default:
4355        return;
4356      case 2:
4357        switch (IntrinNo) {
4358        default:
4359          return;
4360        case Intrinsic::nvvm_ldg_global_i:
4361        case Intrinsic::nvvm_ldg_global_f:
4362        case Intrinsic::nvvm_ldg_global_p:
4363          Opcode = NVPTXISD::LDGV2;
4364          break;
4365        case Intrinsic::nvvm_ldu_global_i:
4366        case Intrinsic::nvvm_ldu_global_f:
4367        case Intrinsic::nvvm_ldu_global_p:
4368          Opcode = NVPTXISD::LDUV2;
4369          break;
4370        }
4371        LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4372        break;
4373      case 4: {
4374        switch (IntrinNo) {
4375        default:
4376          return;
4377        case Intrinsic::nvvm_ldg_global_i:
4378        case Intrinsic::nvvm_ldg_global_f:
4379        case Intrinsic::nvvm_ldg_global_p:
4380          Opcode = NVPTXISD::LDGV4;
4381          break;
4382        case Intrinsic::nvvm_ldu_global_i:
4383        case Intrinsic::nvvm_ldu_global_f:
4384        case Intrinsic::nvvm_ldu_global_p:
4385          Opcode = NVPTXISD::LDUV4;
4386          break;
4387        }
4388        EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4389        LdResVTs = DAG.getVTList(ListVTs);
4390        break;
4391      }
4392      }
4393
4394      SmallVector<SDValue, 8> OtherOps;
4395
4396      // Copy regular operands
4397
4398      OtherOps.push_back(Chain); // Chain
4399                                 // Skip operand 1 (intrinsic ID)
4400      // Others
4401      for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
4402        OtherOps.push_back(N->getOperand(i));
4403
4404      MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4405
4406      SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4407                                              MemSD->getMemoryVT(),
4408                                              MemSD->getMemOperand());
4409
4410      SmallVector<SDValue, 4> ScalarRes;
4411
4412      for (unsigned i = 0; i < NumElts; ++i) {
4413        SDValue Res = NewLD.getValue(i);
4414        if (NeedTrunc)
4415          Res =
4416              DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4417        ScalarRes.push_back(Res);
4418      }
4419
4420      SDValue LoadChain = NewLD.getValue(NumElts);
4421
4422      SDValue BuildVec =
4423          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
4424
4425      Results.push_back(BuildVec);
4426      Results.push_back(LoadChain);
4427    } else {
4428      // i8 LDG/LDU
4429      assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
4430             "Custom handling of non-i8 ldu/ldg?");
4431
4432      // Just copy all operands as-is
4433      SmallVector<SDValue, 4> Ops;
4434      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
4435        Ops.push_back(N->getOperand(i));
4436
4437      // Force output to i16
4438      SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
4439
4440      MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4441
4442      // We make sure the memory type is i8, which will be used during isel
4443      // to select the proper instruction.
4444      SDValue NewLD =
4445          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
4446                                  MVT::i8, MemSD->getMemOperand());
4447
4448      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
4449                                    NewLD.getValue(0)));
4450      Results.push_back(NewLD.getValue(1));
4451    }
4452  }
4453  }
4454}
4455
4456void NVPTXTargetLowering::ReplaceNodeResults(
4457    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4458  switch (N->getOpcode()) {
4459  default:
4460    report_fatal_error("Unhandled custom legalization");
4461  case ISD::LOAD:
4462    ReplaceLoadVector(N, DAG, getDataLayout(), Results);
4463    return;
4464  case ISD::INTRINSIC_W_CHAIN:
4465    ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
4466    return;
4467  }
4468}
4469
4470// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
4471void NVPTXSection::anchor() {}
4472
4473NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
4474  delete TextSection;
4475  delete DataSection;
4476  delete BSSSection;
4477  delete ReadOnlySection;
4478
4479  delete StaticCtorSection;
4480  delete StaticDtorSection;
4481  delete LSDASection;
4482  delete EHFrameSection;
4483  delete DwarfAbbrevSection;
4484  delete DwarfInfoSection;
4485  delete DwarfLineSection;
4486  delete DwarfFrameSection;
4487  delete DwarfPubTypesSection;
4488  delete DwarfDebugInlineSection;
4489  delete DwarfStrSection;
4490  delete DwarfLocSection;
4491  delete DwarfARangesSection;
4492  delete DwarfRangesSection;
4493  delete DwarfMacroInfoSection;
4494}
4495
4496const MCSection *
4497NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
4498                                              SectionKind Kind, Mangler &Mang,
4499                                              const TargetMachine &TM) const {
4500  return getDataSection();
4501}
4502