NVPTXISelLowering.cpp revision 243830
1//
2//                     The LLVM Compiler Infrastructure
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14
15#include "NVPTX.h"
16#include "NVPTXISelLowering.h"
17#include "NVPTXTargetMachine.h"
18#include "NVPTXTargetObjectFile.h"
19#include "NVPTXUtilities.h"
20#include "llvm/Intrinsics.h"
21#include "llvm/IntrinsicInst.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/GlobalValue.h"
25#include "llvm/Module.h"
26#include "llvm/Function.h"
27#include "llvm/CodeGen/Analysis.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/Support/CallSite.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
37#include "llvm/MC/MCSectionELF.h"
38#include <sstream>
39
40#undef DEBUG_TYPE
41#define DEBUG_TYPE "nvptx-lower"
42
43using namespace llvm;
44
45static unsigned int uniqueCallSite = 0;
46
47static cl::opt<bool>
48RetainVectorOperands("nvptx-codegen-vectors",
49     cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
50                     cl::init(true));
51
52static cl::opt<bool>
53sched4reg("nvptx-sched4reg",
54          cl::desc("NVPTX Specific: schedule for register pressue"),
55          cl::init(false));
56
57// NVPTXTargetLowering Constructor.
58NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
59: TargetLowering(TM, new NVPTXTargetObjectFile()),
60  nvTM(&TM),
61  nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
62
63  // always lower memset, memcpy, and memmove intrinsics to load/store
64  // instructions, rather
65  // then generating calls to memset, mempcy or memmove.
66  maxStoresPerMemset = (unsigned)0xFFFFFFFF;
67  maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
68  maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
69
70  setBooleanContents(ZeroOrNegativeOneBooleanContent);
71
72  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
73  // condition branches.
74  setJumpIsExpensive(true);
75
76  // By default, use the Source scheduling
77  if (sched4reg)
78    setSchedulingPreference(Sched::RegPressure);
79  else
80    setSchedulingPreference(Sched::Source);
81
82  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
83  addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
84  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
85  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
86  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
87  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
88  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
89
90  if (RetainVectorOperands) {
91    addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
92    addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
93    addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
94    addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
95    addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
96    addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
97    addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
98    addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
99    addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
100    addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
101
102    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32  , Custom);
103    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32  , Custom);
104    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16  , Custom);
105    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8   , Custom);
106    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64  , Custom);
107    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64  , Custom);
108    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32  , Custom);
109    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32  , Custom);
110    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16  , Custom);
111    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8   , Custom);
112
113    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32  , Custom);
114    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32  , Custom);
115    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16  , Custom);
116    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8   , Custom);
117    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64  , Custom);
118    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64  , Custom);
119    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32  , Custom);
120    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32  , Custom);
121    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16  , Custom);
122    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8   , Custom);
123  }
124
125  // Operations not directly supported by NVPTX.
126  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
127  setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
128  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
129  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
130  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
131  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
132  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
133
134  if (nvptxSubtarget.hasROT64()) {
135    setOperationAction(ISD::ROTL , MVT::i64, Legal);
136    setOperationAction(ISD::ROTR , MVT::i64, Legal);
137  }
138  else {
139    setOperationAction(ISD::ROTL , MVT::i64, Expand);
140    setOperationAction(ISD::ROTR , MVT::i64, Expand);
141  }
142  if (nvptxSubtarget.hasROT32()) {
143    setOperationAction(ISD::ROTL , MVT::i32, Legal);
144    setOperationAction(ISD::ROTR , MVT::i32, Legal);
145  }
146  else {
147    setOperationAction(ISD::ROTL , MVT::i32, Expand);
148    setOperationAction(ISD::ROTR , MVT::i32, Expand);
149  }
150
151  setOperationAction(ISD::ROTL , MVT::i16, Expand);
152  setOperationAction(ISD::ROTR , MVT::i16, Expand);
153  setOperationAction(ISD::ROTL , MVT::i8, Expand);
154  setOperationAction(ISD::ROTR , MVT::i8, Expand);
155  setOperationAction(ISD::BSWAP , MVT::i16, Expand);
156  setOperationAction(ISD::BSWAP , MVT::i32, Expand);
157  setOperationAction(ISD::BSWAP , MVT::i64, Expand);
158
159  // Indirect branch is not supported.
160  // This also disables Jump Table creation.
161  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
162  setOperationAction(ISD::BRIND,             MVT::Other, Expand);
163
164  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
165  setOperationAction(ISD::GlobalAddress   , MVT::i64  , Custom);
166
167  // We want to legalize constant related memmove and memcopy
168  // intrinsics.
169  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
170
171  // Turn FP extload into load/fextend
172  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
173  // Turn FP truncstore into trunc + store.
174  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
175
176  // PTX does not support load / store predicate registers
177  setOperationAction(ISD::LOAD, MVT::i1, Custom);
178  setOperationAction(ISD::STORE, MVT::i1, Custom);
179
180  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
181  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
182  setTruncStoreAction(MVT::i64, MVT::i1, Expand);
183  setTruncStoreAction(MVT::i32, MVT::i1, Expand);
184  setTruncStoreAction(MVT::i16, MVT::i1, Expand);
185  setTruncStoreAction(MVT::i8, MVT::i1, Expand);
186
187  // This is legal in NVPTX
188  setOperationAction(ISD::ConstantFP,         MVT::f64, Legal);
189  setOperationAction(ISD::ConstantFP,         MVT::f32, Legal);
190
191  // TRAP can be lowered to PTX trap
192  setOperationAction(ISD::TRAP,               MVT::Other, Legal);
193
194  // By default, CONCAT_VECTORS is implemented via store/load
195  // through stack. It is slow and uses local memory. We need
196  // to custom-lowering them.
197  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32  , Custom);
198  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32  , Custom);
199  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16  , Custom);
200  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8   , Custom);
201  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64  , Custom);
202  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64  , Custom);
203  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32  , Custom);
204  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32  , Custom);
205  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16  , Custom);
206  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8   , Custom);
207
208  // Expand vector int to float and float to int conversions
209  // - For SINT_TO_FP and UINT_TO_FP, the src type
210  //   (Node->getOperand(0).getValueType())
211  //   is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
212  //   the dest type (Node->getValueType(0)) is used.
213  //
214  //   See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
215  //   case, and
216  //   SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
217  //
218  //   That is why v4i32 or v2i32 are used here.
219  //
220  //   The expansion for vectors happens in VectorLegalizer::LegalizeOp()
221  //   (LegalizeVectorOps.cpp).
222  setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
223  setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
224  setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
225  setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
226  setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
227  setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
228  setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
229  setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
230
231  // Now deduce the information based on the above mentioned
232  // actions
233  computeRegisterProperties();
234}
235
236
237const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
238  switch (Opcode) {
239  default: return 0;
240  case NVPTXISD::CALL:            return "NVPTXISD::CALL";
241  case NVPTXISD::RET_FLAG:        return "NVPTXISD::RET_FLAG";
242  case NVPTXISD::Wrapper:         return "NVPTXISD::Wrapper";
243  case NVPTXISD::NVBuiltin:       return "NVPTXISD::NVBuiltin";
244  case NVPTXISD::DeclareParam:    return "NVPTXISD::DeclareParam";
245  case NVPTXISD::DeclareScalarParam:
246    return "NVPTXISD::DeclareScalarParam";
247  case NVPTXISD::DeclareRet:      return "NVPTXISD::DeclareRet";
248  case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
249  case NVPTXISD::PrintCall:       return "NVPTXISD::PrintCall";
250  case NVPTXISD::LoadParam:       return "NVPTXISD::LoadParam";
251  case NVPTXISD::StoreParam:      return "NVPTXISD::StoreParam";
252  case NVPTXISD::StoreParamS32:   return "NVPTXISD::StoreParamS32";
253  case NVPTXISD::StoreParamU32:   return "NVPTXISD::StoreParamU32";
254  case NVPTXISD::MoveToParam:     return "NVPTXISD::MoveToParam";
255  case NVPTXISD::CallArgBegin:    return "NVPTXISD::CallArgBegin";
256  case NVPTXISD::CallArg:         return "NVPTXISD::CallArg";
257  case NVPTXISD::LastCallArg:     return "NVPTXISD::LastCallArg";
258  case NVPTXISD::CallArgEnd:      return "NVPTXISD::CallArgEnd";
259  case NVPTXISD::CallVoid:        return "NVPTXISD::CallVoid";
260  case NVPTXISD::CallVal:         return "NVPTXISD::CallVal";
261  case NVPTXISD::CallSymbol:      return "NVPTXISD::CallSymbol";
262  case NVPTXISD::Prototype:       return "NVPTXISD::Prototype";
263  case NVPTXISD::MoveParam:       return "NVPTXISD::MoveParam";
264  case NVPTXISD::MoveRetval:      return "NVPTXISD::MoveRetval";
265  case NVPTXISD::MoveToRetval:    return "NVPTXISD::MoveToRetval";
266  case NVPTXISD::StoreRetval:     return "NVPTXISD::StoreRetval";
267  case NVPTXISD::PseudoUseParam:  return "NVPTXISD::PseudoUseParam";
268  case NVPTXISD::RETURN:          return "NVPTXISD::RETURN";
269  case NVPTXISD::CallSeqBegin:    return "NVPTXISD::CallSeqBegin";
270  case NVPTXISD::CallSeqEnd:      return "NVPTXISD::CallSeqEnd";
271  }
272}
273
274
275SDValue
276NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
277  DebugLoc dl = Op.getDebugLoc();
278  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
279  Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
280  return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
281}
282
283std::string NVPTXTargetLowering::getPrototype(Type *retTy,
284                                              const ArgListTy &Args,
285                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
286                                              unsigned retAlignment) const {
287
288  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
289
290  std::stringstream O;
291  O << "prototype_" << uniqueCallSite << " : .callprototype ";
292
293  if (retTy->getTypeID() == Type::VoidTyID)
294    O << "()";
295  else {
296    O << "(";
297    if (isABI) {
298      if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
299        unsigned size = 0;
300        if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
301          size = ITy->getBitWidth();
302          if (size < 32) size = 32;
303        }
304        else {
305          assert(retTy->isFloatingPointTy() &&
306                 "Floating point type expected here");
307          size = retTy->getPrimitiveSizeInBits();
308        }
309
310        O << ".param .b" << size << " _";
311      }
312      else if (isa<PointerType>(retTy))
313        O << ".param .b" << getPointerTy().getSizeInBits()
314        << " _";
315      else {
316        if ((retTy->getTypeID() == Type::StructTyID) ||
317            isa<VectorType>(retTy)) {
318          SmallVector<EVT, 16> vtparts;
319          ComputeValueVTs(*this, retTy, vtparts);
320          unsigned totalsz = 0;
321          for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
322            unsigned elems = 1;
323            EVT elemtype = vtparts[i];
324            if (vtparts[i].isVector()) {
325              elems = vtparts[i].getVectorNumElements();
326              elemtype = vtparts[i].getVectorElementType();
327            }
328            for (unsigned j=0, je=elems; j!=je; ++j) {
329              unsigned sz = elemtype.getSizeInBits();
330              if (elemtype.isInteger() && (sz < 8)) sz = 8;
331              totalsz += sz/8;
332            }
333          }
334          O << ".param .align "
335              << retAlignment
336              << " .b8 _["
337              << totalsz << "]";
338        }
339        else {
340          assert(false &&
341                 "Unknown return type");
342        }
343      }
344    }
345    else {
346      SmallVector<EVT, 16> vtparts;
347      ComputeValueVTs(*this, retTy, vtparts);
348      unsigned idx = 0;
349      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
350        unsigned elems = 1;
351        EVT elemtype = vtparts[i];
352        if (vtparts[i].isVector()) {
353          elems = vtparts[i].getVectorNumElements();
354          elemtype = vtparts[i].getVectorElementType();
355        }
356
357        for (unsigned j=0, je=elems; j!=je; ++j) {
358          unsigned sz = elemtype.getSizeInBits();
359          if (elemtype.isInteger() && (sz < 32)) sz = 32;
360          O << ".reg .b" << sz << " _";
361          if (j<je-1) O << ", ";
362          ++idx;
363        }
364        if (i < e-1)
365          O << ", ";
366      }
367    }
368    O << ") ";
369  }
370  O << "_ (";
371
372  bool first = true;
373  MVT thePointerTy = getPointerTy();
374
375  for (unsigned i=0,e=Args.size(); i!=e; ++i) {
376    const Type *Ty = Args[i].Ty;
377    if (!first) {
378      O << ", ";
379    }
380    first = false;
381
382    if (Outs[i].Flags.isByVal() == false) {
383      unsigned sz = 0;
384      if (isa<IntegerType>(Ty)) {
385        sz = cast<IntegerType>(Ty)->getBitWidth();
386        if (sz < 32) sz = 32;
387      }
388      else if (isa<PointerType>(Ty))
389        sz = thePointerTy.getSizeInBits();
390      else
391        sz = Ty->getPrimitiveSizeInBits();
392      if (isABI)
393        O << ".param .b" << sz << " ";
394      else
395        O << ".reg .b" << sz << " ";
396      O << "_";
397      continue;
398    }
399    const PointerType *PTy = dyn_cast<PointerType>(Ty);
400    assert(PTy &&
401           "Param with byval attribute should be a pointer type");
402    Type *ETy = PTy->getElementType();
403
404    if (isABI) {
405      unsigned align = Outs[i].Flags.getByValAlign();
406      unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
407      O << ".param .align " << align
408          << " .b8 ";
409      O << "_";
410      O << "[" << sz << "]";
411      continue;
412    }
413    else {
414      SmallVector<EVT, 16> vtparts;
415      ComputeValueVTs(*this, ETy, vtparts);
416      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
417        unsigned elems = 1;
418        EVT elemtype = vtparts[i];
419        if (vtparts[i].isVector()) {
420          elems = vtparts[i].getVectorNumElements();
421          elemtype = vtparts[i].getVectorElementType();
422        }
423
424        for (unsigned j=0,je=elems; j!=je; ++j) {
425          unsigned sz = elemtype.getSizeInBits();
426          if (elemtype.isInteger() && (sz < 32)) sz = 32;
427          O << ".reg .b" << sz << " ";
428          O << "_";
429          if (j<je-1) O << ", ";
430        }
431        if (i<e-1)
432          O << ", ";
433      }
434      continue;
435    }
436  }
437  O << ");";
438  return O.str();
439}
440
441
442SDValue
443NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
444                               SmallVectorImpl<SDValue> &InVals) const {
445  SelectionDAG &DAG                     = CLI.DAG;
446  DebugLoc &dl                          = CLI.DL;
447  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
448  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
449  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
450  SDValue Chain                         = CLI.Chain;
451  SDValue Callee                        = CLI.Callee;
452  bool &isTailCall                      = CLI.IsTailCall;
453  ArgListTy &Args                       = CLI.Args;
454  Type *retTy                           = CLI.RetTy;
455  ImmutableCallSite *CS                 = CLI.CS;
456
457  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
458
459  SDValue tempChain = Chain;
460  Chain = DAG.getCALLSEQ_START(Chain,
461                               DAG.getIntPtrConstant(uniqueCallSite, true));
462  SDValue InFlag = Chain.getValue(1);
463
464  assert((Outs.size() == Args.size()) &&
465         "Unexpected number of arguments to function call");
466  unsigned paramCount = 0;
467  // Declare the .params or .reg need to pass values
468  // to the function
469  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
470    EVT VT = Outs[i].VT;
471
472    if (Outs[i].Flags.isByVal() == false) {
473      // Plain scalar
474      // for ABI,    declare .param .b<size> .param<n>;
475      // for nonABI, declare .reg .b<size> .param<n>;
476      unsigned isReg = 1;
477      if (isABI)
478        isReg = 0;
479      unsigned sz = VT.getSizeInBits();
480      if (VT.isInteger() && (sz < 32)) sz = 32;
481      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
482      SDValue DeclareParamOps[] = { Chain,
483                                    DAG.getConstant(paramCount, MVT::i32),
484                                    DAG.getConstant(sz, MVT::i32),
485                                    DAG.getConstant(isReg, MVT::i32),
486                                    InFlag };
487      Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
488                          DeclareParamOps, 5);
489      InFlag = Chain.getValue(1);
490      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
491      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
492                             DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
493
494      unsigned opcode = NVPTXISD::StoreParam;
495      if (isReg)
496        opcode = NVPTXISD::MoveToParam;
497      else {
498        if (Outs[i].Flags.isZExt())
499          opcode = NVPTXISD::StoreParamU32;
500        else if (Outs[i].Flags.isSExt())
501          opcode = NVPTXISD::StoreParamS32;
502      }
503      Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
504
505      InFlag = Chain.getValue(1);
506      ++paramCount;
507      continue;
508    }
509    // struct or vector
510    SmallVector<EVT, 16> vtparts;
511    const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
512    assert(PTy &&
513           "Type of a byval parameter should be pointer");
514    ComputeValueVTs(*this, PTy->getElementType(), vtparts);
515
516    if (isABI) {
517      // declare .param .align 16 .b8 .param<n>[<size>];
518      unsigned sz = Outs[i].Flags.getByValSize();
519      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
520      // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
521      // don't need to
522      // worry about natural alignment or not. See TargetLowering::LowerCallTo()
523      SDValue DeclareParamOps[] = { Chain,
524                       DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
525                                    DAG.getConstant(paramCount, MVT::i32),
526                                    DAG.getConstant(sz, MVT::i32),
527                                    InFlag };
528      Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
529                          DeclareParamOps, 5);
530      InFlag = Chain.getValue(1);
531      unsigned curOffset = 0;
532      for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
533        unsigned elems = 1;
534        EVT elemtype = vtparts[j];
535        if (vtparts[j].isVector()) {
536          elems = vtparts[j].getVectorNumElements();
537          elemtype = vtparts[j].getVectorElementType();
538        }
539        for (unsigned k=0,ke=elems; k!=ke; ++k) {
540          unsigned sz = elemtype.getSizeInBits();
541          if (elemtype.isInteger() && (sz < 8)) sz = 8;
542          SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
543                                        OutVals[i],
544                                        DAG.getConstant(curOffset,
545                                                        getPointerTy()));
546          SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
547                                MachinePointerInfo(), false, false, false, 0);
548          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
549          SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
550                                                            MVT::i32),
551                                           DAG.getConstant(curOffset, MVT::i32),
552                                                            theVal, InFlag };
553          Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
554                              CopyParamOps, 5);
555          InFlag = Chain.getValue(1);
556          curOffset += sz/8;
557        }
558      }
559      ++paramCount;
560      continue;
561    }
562    // Non-abi, struct or vector
563    // Declare a bunch or .reg .b<size> .param<n>
564    unsigned curOffset = 0;
565    for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
566      unsigned elems = 1;
567      EVT elemtype = vtparts[j];
568      if (vtparts[j].isVector()) {
569        elems = vtparts[j].getVectorNumElements();
570        elemtype = vtparts[j].getVectorElementType();
571      }
572      for (unsigned k=0,ke=elems; k!=ke; ++k) {
573        unsigned sz = elemtype.getSizeInBits();
574        if (elemtype.isInteger() && (sz < 32)) sz = 32;
575        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
576        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
577                                                             MVT::i32),
578                                                  DAG.getConstant(sz, MVT::i32),
579                                                   DAG.getConstant(1, MVT::i32),
580                                                             InFlag };
581        Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
582                            DeclareParamOps, 5);
583        InFlag = Chain.getValue(1);
584        SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
585                                      DAG.getConstant(curOffset,
586                                                      getPointerTy()));
587        SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
588                                  MachinePointerInfo(), false, false, false, 0);
589        SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
590        SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
591                                   DAG.getConstant(0, MVT::i32), theVal,
592                                   InFlag };
593        Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
594                            CopyParamOps, 5);
595        InFlag = Chain.getValue(1);
596        ++paramCount;
597      }
598    }
599  }
600
601  GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
602  unsigned retAlignment = 0;
603
604  // Handle Result
605  unsigned retCount = 0;
606  if (Ins.size() > 0) {
607    SmallVector<EVT, 16> resvtparts;
608    ComputeValueVTs(*this, retTy, resvtparts);
609
610    // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
611    // individual .reg .b<size> func_retval<0..> for non ABI
612    unsigned resultsz = 0;
613    for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
614      unsigned elems = 1;
615      EVT elemtype = resvtparts[i];
616      if (resvtparts[i].isVector()) {
617        elems = resvtparts[i].getVectorNumElements();
618        elemtype = resvtparts[i].getVectorElementType();
619      }
620      for (unsigned j=0,je=elems; j!=je; ++j) {
621        unsigned sz = elemtype.getSizeInBits();
622        if (isABI == false) {
623          if (elemtype.isInteger() && (sz < 32)) sz = 32;
624        }
625        else {
626          if (elemtype.isInteger() && (sz < 8)) sz = 8;
627        }
628        if (isABI == false) {
629          SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
630          SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
631                                      DAG.getConstant(sz, MVT::i32),
632                                      DAG.getConstant(retCount, MVT::i32),
633                                      InFlag };
634          Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
635                              DeclareRetOps, 5);
636          InFlag = Chain.getValue(1);
637          ++retCount;
638        }
639        resultsz += sz;
640      }
641    }
642    if (isABI) {
643      if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
644          retTy->isPointerTy() ) {
645        // Scalar needs to be at least 32bit wide
646        if (resultsz < 32)
647          resultsz = 32;
648        SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
649        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
650                                    DAG.getConstant(resultsz, MVT::i32),
651                                    DAG.getConstant(0, MVT::i32), InFlag };
652        Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
653                            DeclareRetOps, 5);
654        InFlag = Chain.getValue(1);
655      }
656      else {
657        if (Func) { // direct call
658          if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
659            retAlignment = getDataLayout()->getABITypeAlignment(retTy);
660        } else { // indirect call
661          const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
662          if (!llvm::getAlign(*CallI, 0, retAlignment))
663            retAlignment = getDataLayout()->getABITypeAlignment(retTy);
664        }
665        SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
666        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
667                                                           MVT::i32),
668                                          DAG.getConstant(resultsz/8, MVT::i32),
669                                         DAG.getConstant(0, MVT::i32), InFlag };
670        Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
671                            DeclareRetOps, 5);
672        InFlag = Chain.getValue(1);
673      }
674    }
675  }
676
677  if (!Func) {
678    // This is indirect function call case : PTX requires a prototype of the
679    // form
680    // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
681    // to be emitted, and the label has to used as the last arg of call
682    // instruction.
683    // The prototype is embedded in a string and put as the operand for an
684    // INLINEASM SDNode.
685    SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
686    std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
687    const char *asmstr = nvTM->getManagedStrPool()->
688        getManagedString(proto_string.c_str())->c_str();
689    SDValue InlineAsmOps[] = { Chain,
690                               DAG.getTargetExternalSymbol(asmstr,
691                                                           getPointerTy()),
692                                                           DAG.getMDNode(0),
693                                   DAG.getTargetConstant(0, MVT::i32), InFlag };
694    Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
695    InFlag = Chain.getValue(1);
696  }
697  // Op to just print "call"
698  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
699  SDValue PrintCallOps[] = { Chain,
700                             DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
701                                 : retCount, MVT::i32),
702                                   InFlag };
703  Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
704      PrintCallVTs, PrintCallOps, 3);
705  InFlag = Chain.getValue(1);
706
707  // Ops to print out the function name
708  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
709  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
710  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
711  InFlag = Chain.getValue(1);
712
713  // Ops to print out the param list
714  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
715  SDValue CallArgBeginOps[] = { Chain, InFlag };
716  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
717                      CallArgBeginOps, 2);
718  InFlag = Chain.getValue(1);
719
720  for (unsigned i=0, e=paramCount; i!=e; ++i) {
721    unsigned opcode;
722    if (i==(e-1))
723      opcode = NVPTXISD::LastCallArg;
724    else
725      opcode = NVPTXISD::CallArg;
726    SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
727    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
728                             DAG.getConstant(i, MVT::i32),
729                             InFlag };
730    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
731    InFlag = Chain.getValue(1);
732  }
733  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
734  SDValue CallArgEndOps[] = { Chain,
735                              DAG.getConstant(Func ? 1 : 0, MVT::i32),
736                              InFlag };
737  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
738                      3);
739  InFlag = Chain.getValue(1);
740
741  if (!Func) {
742    SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
743    SDValue PrototypeOps[] = { Chain,
744                               DAG.getConstant(uniqueCallSite, MVT::i32),
745                               InFlag };
746    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
747    InFlag = Chain.getValue(1);
748  }
749
750  // Generate loads from param memory/moves from registers for result
751  if (Ins.size() > 0) {
752    if (isABI) {
753      unsigned resoffset = 0;
754      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
755        unsigned sz = Ins[i].VT.getSizeInBits();
756        if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
757        std::vector<EVT> LoadRetVTs;
758        LoadRetVTs.push_back(Ins[i].VT);
759        LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
760        std::vector<SDValue> LoadRetOps;
761        LoadRetOps.push_back(Chain);
762        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
763        LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
764        LoadRetOps.push_back(InFlag);
765        SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
766                                     &LoadRetOps[0], LoadRetOps.size());
767        Chain = retval.getValue(1);
768        InFlag = retval.getValue(2);
769        InVals.push_back(retval);
770        resoffset += sz/8;
771      }
772    }
773    else {
774      SmallVector<EVT, 16> resvtparts;
775      ComputeValueVTs(*this, retTy, resvtparts);
776
777      assert(Ins.size() == resvtparts.size() &&
778             "Unexpected number of return values in non-ABI case");
779      unsigned paramNum = 0;
780      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
781        assert(EVT(Ins[i].VT) == resvtparts[i] &&
782               "Unexpected EVT type in non-ABI case");
783        unsigned numelems = 1;
784        EVT elemtype = Ins[i].VT;
785        if (Ins[i].VT.isVector()) {
786          numelems = Ins[i].VT.getVectorNumElements();
787          elemtype = Ins[i].VT.getVectorElementType();
788        }
789        std::vector<SDValue> tempRetVals;
790        for (unsigned j=0; j<numelems; ++j) {
791          std::vector<EVT> MoveRetVTs;
792          MoveRetVTs.push_back(elemtype);
793          MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
794          std::vector<SDValue> MoveRetOps;
795          MoveRetOps.push_back(Chain);
796          MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
797          MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
798          MoveRetOps.push_back(InFlag);
799          SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
800                                       &MoveRetOps[0], MoveRetOps.size());
801          Chain = retval.getValue(1);
802          InFlag = retval.getValue(2);
803          tempRetVals.push_back(retval);
804          ++paramNum;
805        }
806        if (Ins[i].VT.isVector())
807          InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
808                                       &tempRetVals[0], tempRetVals.size()));
809        else
810          InVals.push_back(tempRetVals[0]);
811      }
812    }
813  }
814  Chain = DAG.getCALLSEQ_END(Chain,
815                             DAG.getIntPtrConstant(uniqueCallSite, true),
816                             DAG.getIntPtrConstant(uniqueCallSite+1, true),
817                             InFlag);
818  uniqueCallSite++;
819
820  // set isTailCall to false for now, until we figure out how to express
821  // tail call optimization in PTX
822  isTailCall = false;
823  return Chain;
824}
825
826// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
827// (see LegalizeDAG.cpp). This is slow and uses local memory.
828// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
829SDValue NVPTXTargetLowering::
830LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
831  SDNode *Node = Op.getNode();
832  DebugLoc dl = Node->getDebugLoc();
833  SmallVector<SDValue, 8> Ops;
834  unsigned NumOperands = Node->getNumOperands();
835  for (unsigned i=0; i < NumOperands; ++i) {
836    SDValue SubOp = Node->getOperand(i);
837    EVT VVT = SubOp.getNode()->getValueType(0);
838    EVT EltVT = VVT.getVectorElementType();
839    unsigned NumSubElem = VVT.getVectorNumElements();
840    for (unsigned j=0; j < NumSubElem; ++j) {
841      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
842                                DAG.getIntPtrConstant(j)));
843    }
844  }
845  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
846                     &Ops[0], Ops.size());
847}
848
849SDValue NVPTXTargetLowering::
850LowerOperation(SDValue Op, SelectionDAG &DAG) const {
851  switch (Op.getOpcode()) {
852  case ISD::RETURNADDR: return SDValue();
853  case ISD::FRAMEADDR:  return SDValue();
854  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
855  case ISD::INTRINSIC_W_CHAIN: return Op;
856  case ISD::BUILD_VECTOR:
857  case ISD::EXTRACT_SUBVECTOR:
858    return Op;
859  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
860  case ISD::STORE: return LowerSTORE(Op, DAG);
861  case ISD::LOAD: return LowerLOAD(Op, DAG);
862  default:
863    llvm_unreachable("Custom lowering not defined for operation");
864  }
865}
866
867
868// v = ld i1* addr
869//   =>
870// v1 = ld i8* addr
871// v = trunc v1 to i1
872SDValue NVPTXTargetLowering::
873LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
874  SDNode *Node = Op.getNode();
875  LoadSDNode *LD = cast<LoadSDNode>(Node);
876  DebugLoc dl = Node->getDebugLoc();
877  assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
878  assert(Node->getValueType(0) == MVT::i1 &&
879         "Custom lowering for i1 load only");
880  SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
881                              LD->getPointerInfo(),
882                              LD->isVolatile(), LD->isNonTemporal(),
883                              LD->isInvariant(),
884                              LD->getAlignment());
885  SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
886  // The legalizer (the caller) is expecting two values from the legalized
887  // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
888  // in LegalizeDAG.cpp which also uses MergeValues.
889  SDValue Ops[] = {result, LD->getChain()};
890  return DAG.getMergeValues(Ops, 2, dl);
891}
892
893// st i1 v, addr
894//    =>
895// v1 = zxt v to i8
896// st i8, addr
897SDValue NVPTXTargetLowering::
898LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
899  SDNode *Node = Op.getNode();
900  DebugLoc dl = Node->getDebugLoc();
901  StoreSDNode *ST = cast<StoreSDNode>(Node);
902  SDValue Tmp1 = ST->getChain();
903  SDValue Tmp2 = ST->getBasePtr();
904  SDValue Tmp3 = ST->getValue();
905  assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
906  unsigned Alignment = ST->getAlignment();
907  bool isVolatile = ST->isVolatile();
908  bool isNonTemporal = ST->isNonTemporal();
909  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
910                     MVT::i8, Tmp3);
911  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
912                                ST->getPointerInfo(), isVolatile,
913                                isNonTemporal, Alignment);
914  return Result;
915}
916
917
918SDValue
919NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
920                                EVT v) const {
921  std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
922  std::stringstream suffix;
923  suffix << idx;
924  *name += suffix.str();
925  return DAG.getTargetExternalSymbol(name->c_str(), v);
926}
927
928SDValue
929NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
930  return getExtSymb(DAG, ".PARAM", idx, v);
931}
932
933SDValue
934NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
935  return getExtSymb(DAG, ".HLPPARAM", idx);
936}
937
938// Check to see if the kernel argument is image*_t or sampler_t
939
940bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
941  static const char *const specialTypes[] = {
942                                             "struct._image2d_t",
943                                             "struct._image3d_t",
944                                             "struct._sampler_t"
945  };
946
947  const Type *Ty = arg->getType();
948  const PointerType *PTy = dyn_cast<PointerType>(Ty);
949
950  if (!PTy)
951    return false;
952
953  if (!context)
954    return false;
955
956  const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
957  const std::string TypeName = STy ? STy->getName() : "";
958
959  for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
960    if (TypeName == specialTypes[i])
961      return true;
962
963  return false;
964}
965
966SDValue
967NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
968                                        CallingConv::ID CallConv, bool isVarArg,
969                                      const SmallVectorImpl<ISD::InputArg> &Ins,
970                                          DebugLoc dl, SelectionDAG &DAG,
971                                       SmallVectorImpl<SDValue> &InVals) const {
972  MachineFunction &MF = DAG.getMachineFunction();
973  const DataLayout *TD = getDataLayout();
974
975  const Function *F = MF.getFunction();
976  const AttrListPtr &PAL = F->getAttributes();
977
978  SDValue Root = DAG.getRoot();
979  std::vector<SDValue> OutChains;
980
981  bool isKernel = llvm::isKernelFunction(*F);
982  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
983
984  std::vector<Type *> argTypes;
985  std::vector<const Argument *> theArgs;
986  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
987      I != E; ++I) {
988    theArgs.push_back(I);
989    argTypes.push_back(I->getType());
990  }
991  assert(argTypes.size() == Ins.size() &&
992         "Ins types and function types did not match");
993
994  int idx = 0;
995  for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
996    Type *Ty = argTypes[i];
997    EVT ObjectVT = getValueType(Ty);
998    assert(ObjectVT == Ins[i].VT &&
999           "Ins type did not match function type");
1000
1001    // If the kernel argument is image*_t or sampler_t, convert it to
1002    // a i32 constant holding the parameter position. This can later
1003    // matched in the AsmPrinter to output the correct mangled name.
1004    if (isImageOrSamplerVal(theArgs[i],
1005                           (theArgs[i]->getParent() ?
1006                               theArgs[i]->getParent()->getParent() : 0))) {
1007      assert(isKernel && "Only kernels can have image/sampler params");
1008      InVals.push_back(DAG.getConstant(i+1, MVT::i32));
1009      continue;
1010    }
1011
1012    if (theArgs[i]->use_empty()) {
1013      // argument is dead
1014      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
1015      continue;
1016    }
1017
1018    // In the following cases, assign a node order of "idx+1"
1019    // to newly created nodes. The SDNOdes for params have to
1020    // appear in the same order as their order of appearance
1021    // in the original function. "idx+1" holds that order.
1022    if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) {
1023      // A plain scalar.
1024      if (isABI || isKernel) {
1025        // If ABI, load from the param symbol
1026        SDValue Arg = getParamSymbol(DAG, idx);
1027        Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
1028            F->getContext()),
1029            llvm::ADDRESS_SPACE_PARAM));
1030        SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
1031                                MachinePointerInfo(srcValue), false, false,
1032                                false,
1033                                TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
1034                                  F->getContext())));
1035        if (p.getNode())
1036          DAG.AssignOrdering(p.getNode(), idx+1);
1037        InVals.push_back(p);
1038      }
1039      else {
1040        // If no ABI, just move the param symbol
1041        SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
1042        SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1043        if (p.getNode())
1044          DAG.AssignOrdering(p.getNode(), idx+1);
1045        InVals.push_back(p);
1046      }
1047      continue;
1048    }
1049
1050    // Param has ByVal attribute
1051    if (isABI || isKernel) {
1052      // Return MoveParam(param symbol).
1053      // Ideally, the param symbol can be returned directly,
1054      // but when SDNode builder decides to use it in a CopyToReg(),
1055      // machine instruction fails because TargetExternalSymbol
1056      // (not lowered) is target dependent, and CopyToReg assumes
1057      // the source is lowered.
1058      SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
1059      SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1060      if (p.getNode())
1061        DAG.AssignOrdering(p.getNode(), idx+1);
1062      if (isKernel)
1063        InVals.push_back(p);
1064      else {
1065        SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
1066                    DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
1067                                 p);
1068        InVals.push_back(p2);
1069      }
1070    } else {
1071      // Have to move a set of param symbols to registers and
1072      // store them locally and return the local pointer in InVals
1073      const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
1074      assert(elemPtrType &&
1075             "Byval parameter should be a pointer type");
1076      Type *elemType = elemPtrType->getElementType();
1077      // Compute the constituent parts
1078      SmallVector<EVT, 16> vtparts;
1079      SmallVector<uint64_t, 16> offsets;
1080      ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
1081      unsigned totalsize = 0;
1082      for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
1083        totalsize += vtparts[j].getStoreSizeInBits();
1084      SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
1085                                      CreateStackObject(totalsize/8, 16, false),
1086                                             getPointerTy());
1087      unsigned sizesofar = 0;
1088      std::vector<SDValue> theChains;
1089      for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
1090        unsigned numElems = 1;
1091        if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
1092        for (unsigned k=0, ke=numElems; k!=ke; ++k) {
1093          EVT tmpvt = vtparts[j];
1094          if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
1095          SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
1096                                    getParamSymbol(DAG, idx, tmpvt));
1097          SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
1098                                    DAG.getConstant(sizesofar, getPointerTy()));
1099          theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
1100                                        MachinePointerInfo(), false, false, 0));
1101          sizesofar += tmpvt.getStoreSizeInBits()/8;
1102          ++idx;
1103        }
1104      }
1105      --idx;
1106      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
1107                          theChains.size());
1108      InVals.push_back(localcopy);
1109    }
1110  }
1111
1112  // Clang will check explicit VarArg and issue error if any. However, Clang
1113  // will let code with
1114  // implicit var arg like f() pass.
1115  // We treat this case as if the arg list is empty.
1116  //if (F.isVarArg()) {
1117  // assert(0 && "VarArg not supported yet!");
1118  //}
1119
1120  if (!OutChains.empty())
1121    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1122                            &OutChains[0], OutChains.size()));
1123
1124  return Chain;
1125}
1126
1127SDValue
1128NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1129                                 bool isVarArg,
1130                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
1131                                 const SmallVectorImpl<SDValue> &OutVals,
1132                                 DebugLoc dl, SelectionDAG &DAG) const {
1133
1134  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1135
1136  unsigned sizesofar = 0;
1137  unsigned idx = 0;
1138  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
1139    SDValue theVal = OutVals[i];
1140    EVT theValType = theVal.getValueType();
1141    unsigned numElems = 1;
1142    if (theValType.isVector()) numElems = theValType.getVectorNumElements();
1143    for (unsigned j=0,je=numElems; j!=je; ++j) {
1144      SDValue tmpval = theVal;
1145      if (theValType.isVector())
1146        tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1147                             theValType.getVectorElementType(),
1148                             tmpval, DAG.getIntPtrConstant(j));
1149      Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
1150          dl, MVT::Other,
1151          Chain,
1152          DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
1153          tmpval);
1154      if (theValType.isVector())
1155        sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
1156      else
1157        sizesofar += theValType.getStoreSizeInBits()/8;
1158      ++idx;
1159    }
1160  }
1161
1162  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
1163}
1164
1165void
1166NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
1167                                                  std::string &Constraint,
1168                                                  std::vector<SDValue> &Ops,
1169                                                  SelectionDAG &DAG) const
1170{
1171  if (Constraint.length() > 1)
1172    return;
1173  else
1174    TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1175}
1176
1177// NVPTX suuport vector of legal types of any length in Intrinsics because the
1178// NVPTX specific type legalizer
1179// will legalize them to the PTX supported length.
1180bool
1181NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
1182  if (isTypeLegal(VT))
1183    return true;
1184  if (VT.isVector()) {
1185    MVT eVT = VT.getVectorElementType();
1186    if (isTypeLegal(eVT))
1187      return true;
1188  }
1189  return false;
1190}
1191
1192
1193// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
1194// TgtMemIntrinsic
1195// because we need the information that is only available in the "Value" type
1196// of destination
1197// pointer. In particular, the address space information.
1198bool
1199NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
1200                                        unsigned Intrinsic) const {
1201  switch (Intrinsic) {
1202  default:
1203    return false;
1204
1205  case Intrinsic::nvvm_atomic_load_add_f32:
1206    Info.opc = ISD::INTRINSIC_W_CHAIN;
1207    Info.memVT = MVT::f32;
1208    Info.ptrVal = I.getArgOperand(0);
1209    Info.offset = 0;
1210    Info.vol = 0;
1211    Info.readMem = true;
1212    Info.writeMem = true;
1213    Info.align = 0;
1214    return true;
1215
1216  case Intrinsic::nvvm_atomic_load_inc_32:
1217  case Intrinsic::nvvm_atomic_load_dec_32:
1218    Info.opc = ISD::INTRINSIC_W_CHAIN;
1219    Info.memVT = MVT::i32;
1220    Info.ptrVal = I.getArgOperand(0);
1221    Info.offset = 0;
1222    Info.vol = 0;
1223    Info.readMem = true;
1224    Info.writeMem = true;
1225    Info.align = 0;
1226    return true;
1227
1228  case Intrinsic::nvvm_ldu_global_i:
1229  case Intrinsic::nvvm_ldu_global_f:
1230  case Intrinsic::nvvm_ldu_global_p:
1231
1232    Info.opc = ISD::INTRINSIC_W_CHAIN;
1233    if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
1234      Info.memVT = MVT::i32;
1235    else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
1236      Info.memVT = getPointerTy();
1237    else
1238      Info.memVT = MVT::f32;
1239    Info.ptrVal = I.getArgOperand(0);
1240    Info.offset = 0;
1241    Info.vol = 0;
1242    Info.readMem = true;
1243    Info.writeMem = false;
1244    Info.align = 0;
1245    return true;
1246
1247  }
1248  return false;
1249}
1250
1251/// isLegalAddressingMode - Return true if the addressing mode represented
1252/// by AM is legal for this target, for a load/store of the specified type.
1253/// Used to guide target specific optimizations, like loop strength reduction
1254/// (LoopStrengthReduce.cpp) and memory optimization for address mode
1255/// (CodeGenPrepare.cpp)
1256bool
1257NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
1258                                           Type *Ty) const {
1259
1260  // AddrMode - This represents an addressing mode of:
1261  //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
1262  //
1263  // The legal address modes are
1264  // - [avar]
1265  // - [areg]
1266  // - [areg+immoff]
1267  // - [immAddr]
1268
1269  if (AM.BaseGV) {
1270    if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
1271      return false;
1272    return true;
1273  }
1274
1275  switch (AM.Scale) {
1276  case 0:  // "r", "r+i" or "i" is allowed
1277    break;
1278  case 1:
1279    if (AM.HasBaseReg)  // "r+r+i" or "r+r" is not allowed.
1280      return false;
1281    // Otherwise we have r+i.
1282    break;
1283  default:
1284    // No scale > 1 is allowed
1285    return false;
1286  }
1287  return true;
1288}
1289
1290//===----------------------------------------------------------------------===//
1291//                         NVPTX Inline Assembly Support
1292//===----------------------------------------------------------------------===//
1293
1294/// getConstraintType - Given a constraint letter, return the type of
1295/// constraint it is for this target.
1296NVPTXTargetLowering::ConstraintType
1297NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
1298  if (Constraint.size() == 1) {
1299    switch (Constraint[0]) {
1300    default:
1301      break;
1302    case 'r':
1303    case 'h':
1304    case 'c':
1305    case 'l':
1306    case 'f':
1307    case 'd':
1308    case '0':
1309    case 'N':
1310      return C_RegisterClass;
1311    }
1312  }
1313  return TargetLowering::getConstraintType(Constraint);
1314}
1315
1316
1317std::pair<unsigned, const TargetRegisterClass*>
1318NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
1319                                                  EVT VT) const {
1320  if (Constraint.size() == 1) {
1321    switch (Constraint[0]) {
1322    case 'c':
1323      return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
1324    case 'h':
1325      return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
1326    case 'r':
1327      return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
1328    case 'l':
1329    case 'N':
1330      return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
1331    case 'f':
1332      return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
1333    case 'd':
1334      return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
1335    }
1336  }
1337  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
1338}
1339
1340
1341
1342/// getFunctionAlignment - Return the Log2 alignment of this function.
1343unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
1344  return 4;
1345}
1346