NVPTXISelDAGToDAG.cpp revision 249423
168651Skris//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
268651Skris//
368651Skris//                     The LLVM Compiler Infrastructure
468651Skris//
568651Skris// This file is distributed under the University of Illinois Open Source
668651Skris// License. See LICENSE.TXT for details.
768651Skris//
868651Skris//===----------------------------------------------------------------------===//
968651Skris//
1068651Skris// This file defines an instruction selector for the NVPTX target.
1168651Skris//
1268651Skris//===----------------------------------------------------------------------===//
1368651Skris
1468651Skris#include "NVPTXISelDAGToDAG.h"
1568651Skris#include "llvm/IR/GlobalValue.h"
1668651Skris#include "llvm/IR/Instructions.h"
1768651Skris#include "llvm/Support/CommandLine.h"
1868651Skris#include "llvm/Support/Debug.h"
1968651Skris#include "llvm/Support/ErrorHandling.h"
2076866Skris#include "llvm/Support/raw_ostream.h"
2168651Skris#include "llvm/Target/TargetIntrinsicInfo.h"
2268651Skris
23120631Snectar#undef DEBUG_TYPE
24120631Snectar#define DEBUG_TYPE "nvptx-isel"
25120631Snectar
26120631Snectarusing namespace llvm;
27120631Snectar
28120631Snectarstatic cl::opt<bool> UseFMADInstruction(
29120631Snectar    "nvptx-mad-enable", cl::ZeroOrMore,
30120631Snectar    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
31120631Snectar    cl::init(false));
3268651Skris
3368651Skrisstatic cl::opt<int>
3468651SkrisFMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
3568651Skris                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
3689837Skris                          " 1: do it  2: do it aggressively"),
3789837Skris                 cl::init(2));
38120631Snectar
39120631Snectarstatic cl::opt<int> UsePrecDivF32(
4068651Skris    "nvptx-prec-divf32", cl::ZeroOrMore,
4168651Skris    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
42             " IEEE Compliant F32 div.rnd if avaiable."),
43    cl::init(2));
44
45/// createNVPTXISelDag - This pass converts a legalized DAG into a
46/// NVPTX-specific DAG, ready for instruction scheduling.
47FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
48                                       llvm::CodeGenOpt::Level OptLevel) {
49  return new NVPTXDAGToDAGISel(TM, OptLevel);
50}
51
52NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
53                                     CodeGenOpt::Level OptLevel)
54    : SelectionDAGISel(tm, OptLevel),
55      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
56  // Always do fma.f32 fpcontract if the target supports the instruction.
57  // Always do fma.f64 fpcontract if the target supports the instruction.
58  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
59  // support fma.f32.
60
61  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
62  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
63  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
64  doFMAF32AGG =
65      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
66  doFMAF64AGG =
67      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
68
69  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
70
71  UseF32FTZ = false;
72
73  doMulWide = (OptLevel > 0);
74
75  // Decide how to translate f32 div
76  do_DIVF32_PREC = UsePrecDivF32;
77  // sm less than sm_20 does not support div.rnd. Use div.full.
78  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
79    do_DIVF32_PREC = 1;
80
81}
82
83/// Select - Select instructions not customized! Used for
84/// expanded, promoted and normal instructions.
85SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
86
87  if (N->isMachineOpcode())
88    return NULL; // Already selected.
89
90  SDNode *ResNode = NULL;
91  switch (N->getOpcode()) {
92  case ISD::LOAD:
93    ResNode = SelectLoad(N);
94    break;
95  case ISD::STORE:
96    ResNode = SelectStore(N);
97    break;
98  case NVPTXISD::LoadV2:
99  case NVPTXISD::LoadV4:
100    ResNode = SelectLoadVector(N);
101    break;
102  case NVPTXISD::LDGV2:
103  case NVPTXISD::LDGV4:
104  case NVPTXISD::LDUV2:
105  case NVPTXISD::LDUV4:
106    ResNode = SelectLDGLDUVector(N);
107    break;
108  case NVPTXISD::StoreV2:
109  case NVPTXISD::StoreV4:
110    ResNode = SelectStoreVector(N);
111    break;
112  default:
113    break;
114  }
115  if (ResNode)
116    return ResNode;
117  return SelectCode(N);
118}
119
120static unsigned int getCodeAddrSpace(MemSDNode *N,
121                                     const NVPTXSubtarget &Subtarget) {
122  const Value *Src = N->getSrcValue();
123  if (!Src)
124    return NVPTX::PTXLdStInstCode::LOCAL;
125
126  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
127    switch (PT->getAddressSpace()) {
128    case llvm::ADDRESS_SPACE_LOCAL:
129      return NVPTX::PTXLdStInstCode::LOCAL;
130    case llvm::ADDRESS_SPACE_GLOBAL:
131      return NVPTX::PTXLdStInstCode::GLOBAL;
132    case llvm::ADDRESS_SPACE_SHARED:
133      return NVPTX::PTXLdStInstCode::SHARED;
134    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
135      return NVPTX::PTXLdStInstCode::CONSTANT;
136    case llvm::ADDRESS_SPACE_GENERIC:
137      return NVPTX::PTXLdStInstCode::GENERIC;
138    case llvm::ADDRESS_SPACE_PARAM:
139      return NVPTX::PTXLdStInstCode::PARAM;
140    case llvm::ADDRESS_SPACE_CONST:
141      // If the arch supports generic address space, translate it to GLOBAL
142      // for correctness.
143      // If the arch does not support generic address space, then the arch
144      // does not really support ADDRESS_SPACE_CONST, translate it to
145      // to CONSTANT for better performance.
146      if (Subtarget.hasGenericLdSt())
147        return NVPTX::PTXLdStInstCode::GLOBAL;
148      else
149        return NVPTX::PTXLdStInstCode::CONSTANT;
150    default:
151      break;
152    }
153  }
154  return NVPTX::PTXLdStInstCode::LOCAL;
155}
156
157SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
158  DebugLoc dl = N->getDebugLoc();
159  LoadSDNode *LD = cast<LoadSDNode>(N);
160  EVT LoadedVT = LD->getMemoryVT();
161  SDNode *NVPTXLD = NULL;
162
163  // do not support pre/post inc/dec
164  if (LD->isIndexed())
165    return NULL;
166
167  if (!LoadedVT.isSimple())
168    return NULL;
169
170  // Address Space Setting
171  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
172
173  // Volatile Setting
174  // - .volatile is only availalble for .global and .shared
175  bool isVolatile = LD->isVolatile();
176  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
177      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
178      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
179    isVolatile = false;
180
181  // Vector Setting
182  MVT SimpleVT = LoadedVT.getSimpleVT();
183  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
184  if (SimpleVT.isVector()) {
185    unsigned num = SimpleVT.getVectorNumElements();
186    if (num == 2)
187      vecType = NVPTX::PTXLdStInstCode::V2;
188    else if (num == 4)
189      vecType = NVPTX::PTXLdStInstCode::V4;
190    else
191      return NULL;
192  }
193
194  // Type Setting: fromType + fromTypeWidth
195  //
196  // Sign   : ISD::SEXTLOAD
197  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
198  //          type is integer
199  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
200  MVT ScalarVT = SimpleVT.getScalarType();
201  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
202  unsigned int fromType;
203  if ((LD->getExtensionType() == ISD::SEXTLOAD))
204    fromType = NVPTX::PTXLdStInstCode::Signed;
205  else if (ScalarVT.isFloatingPoint())
206    fromType = NVPTX::PTXLdStInstCode::Float;
207  else
208    fromType = NVPTX::PTXLdStInstCode::Unsigned;
209
210  // Create the machine instruction DAG
211  SDValue Chain = N->getOperand(0);
212  SDValue N1 = N->getOperand(1);
213  SDValue Addr;
214  SDValue Offset, Base;
215  unsigned Opcode;
216  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
217
218  if (SelectDirectAddr(N1, Addr)) {
219    switch (TargetVT) {
220    case MVT::i8:
221      Opcode = NVPTX::LD_i8_avar;
222      break;
223    case MVT::i16:
224      Opcode = NVPTX::LD_i16_avar;
225      break;
226    case MVT::i32:
227      Opcode = NVPTX::LD_i32_avar;
228      break;
229    case MVT::i64:
230      Opcode = NVPTX::LD_i64_avar;
231      break;
232    case MVT::f32:
233      Opcode = NVPTX::LD_f32_avar;
234      break;
235    case MVT::f64:
236      Opcode = NVPTX::LD_f64_avar;
237      break;
238    default:
239      return NULL;
240    }
241    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
242                      getI32Imm(vecType), getI32Imm(fromType),
243                      getI32Imm(fromTypeWidth), Addr, Chain };
244    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
245  } else if (Subtarget.is64Bit()
246                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
247                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
248    switch (TargetVT) {
249    case MVT::i8:
250      Opcode = NVPTX::LD_i8_asi;
251      break;
252    case MVT::i16:
253      Opcode = NVPTX::LD_i16_asi;
254      break;
255    case MVT::i32:
256      Opcode = NVPTX::LD_i32_asi;
257      break;
258    case MVT::i64:
259      Opcode = NVPTX::LD_i64_asi;
260      break;
261    case MVT::f32:
262      Opcode = NVPTX::LD_f32_asi;
263      break;
264    case MVT::f64:
265      Opcode = NVPTX::LD_f64_asi;
266      break;
267    default:
268      return NULL;
269    }
270    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
271                      getI32Imm(vecType), getI32Imm(fromType),
272                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
273    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
274  } else if (Subtarget.is64Bit()
275                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
276                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
277    if (Subtarget.is64Bit()) {
278      switch (TargetVT) {
279      case MVT::i8:
280        Opcode = NVPTX::LD_i8_ari_64;
281        break;
282      case MVT::i16:
283        Opcode = NVPTX::LD_i16_ari_64;
284        break;
285      case MVT::i32:
286        Opcode = NVPTX::LD_i32_ari_64;
287        break;
288      case MVT::i64:
289        Opcode = NVPTX::LD_i64_ari_64;
290        break;
291      case MVT::f32:
292        Opcode = NVPTX::LD_f32_ari_64;
293        break;
294      case MVT::f64:
295        Opcode = NVPTX::LD_f64_ari_64;
296        break;
297      default:
298        return NULL;
299      }
300    } else {
301      switch (TargetVT) {
302      case MVT::i8:
303        Opcode = NVPTX::LD_i8_ari;
304        break;
305      case MVT::i16:
306        Opcode = NVPTX::LD_i16_ari;
307        break;
308      case MVT::i32:
309        Opcode = NVPTX::LD_i32_ari;
310        break;
311      case MVT::i64:
312        Opcode = NVPTX::LD_i64_ari;
313        break;
314      case MVT::f32:
315        Opcode = NVPTX::LD_f32_ari;
316        break;
317      case MVT::f64:
318        Opcode = NVPTX::LD_f64_ari;
319        break;
320      default:
321        return NULL;
322      }
323    }
324    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
325                      getI32Imm(vecType), getI32Imm(fromType),
326                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
327    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
328  } else {
329    if (Subtarget.is64Bit()) {
330      switch (TargetVT) {
331      case MVT::i8:
332        Opcode = NVPTX::LD_i8_areg_64;
333        break;
334      case MVT::i16:
335        Opcode = NVPTX::LD_i16_areg_64;
336        break;
337      case MVT::i32:
338        Opcode = NVPTX::LD_i32_areg_64;
339        break;
340      case MVT::i64:
341        Opcode = NVPTX::LD_i64_areg_64;
342        break;
343      case MVT::f32:
344        Opcode = NVPTX::LD_f32_areg_64;
345        break;
346      case MVT::f64:
347        Opcode = NVPTX::LD_f64_areg_64;
348        break;
349      default:
350        return NULL;
351      }
352    } else {
353      switch (TargetVT) {
354      case MVT::i8:
355        Opcode = NVPTX::LD_i8_areg;
356        break;
357      case MVT::i16:
358        Opcode = NVPTX::LD_i16_areg;
359        break;
360      case MVT::i32:
361        Opcode = NVPTX::LD_i32_areg;
362        break;
363      case MVT::i64:
364        Opcode = NVPTX::LD_i64_areg;
365        break;
366      case MVT::f32:
367        Opcode = NVPTX::LD_f32_areg;
368        break;
369      case MVT::f64:
370        Opcode = NVPTX::LD_f64_areg;
371        break;
372      default:
373        return NULL;
374      }
375    }
376    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
377                      getI32Imm(vecType), getI32Imm(fromType),
378                      getI32Imm(fromTypeWidth), N1, Chain };
379    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
380  }
381
382  if (NVPTXLD != NULL) {
383    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
384    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
385    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
386  }
387
388  return NVPTXLD;
389}
390
391SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
392
393  SDValue Chain = N->getOperand(0);
394  SDValue Op1 = N->getOperand(1);
395  SDValue Addr, Offset, Base;
396  unsigned Opcode;
397  DebugLoc DL = N->getDebugLoc();
398  SDNode *LD;
399  MemSDNode *MemSD = cast<MemSDNode>(N);
400  EVT LoadedVT = MemSD->getMemoryVT();
401
402  if (!LoadedVT.isSimple())
403    return NULL;
404
405  // Address Space Setting
406  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
407
408  // Volatile Setting
409  // - .volatile is only availalble for .global and .shared
410  bool IsVolatile = MemSD->isVolatile();
411  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
412      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
413      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
414    IsVolatile = false;
415
416  // Vector Setting
417  MVT SimpleVT = LoadedVT.getSimpleVT();
418
419  // Type Setting: fromType + fromTypeWidth
420  //
421  // Sign   : ISD::SEXTLOAD
422  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
423  //          type is integer
424  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
425  MVT ScalarVT = SimpleVT.getScalarType();
426  unsigned FromTypeWidth = ScalarVT.getSizeInBits();
427  unsigned int FromType;
428  // The last operand holds the original LoadSDNode::getExtensionType() value
429  unsigned ExtensionType = cast<ConstantSDNode>(
430      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
431  if (ExtensionType == ISD::SEXTLOAD)
432    FromType = NVPTX::PTXLdStInstCode::Signed;
433  else if (ScalarVT.isFloatingPoint())
434    FromType = NVPTX::PTXLdStInstCode::Float;
435  else
436    FromType = NVPTX::PTXLdStInstCode::Unsigned;
437
438  unsigned VecType;
439
440  switch (N->getOpcode()) {
441  case NVPTXISD::LoadV2:
442    VecType = NVPTX::PTXLdStInstCode::V2;
443    break;
444  case NVPTXISD::LoadV4:
445    VecType = NVPTX::PTXLdStInstCode::V4;
446    break;
447  default:
448    return NULL;
449  }
450
451  EVT EltVT = N->getValueType(0);
452
453  if (SelectDirectAddr(Op1, Addr)) {
454    switch (N->getOpcode()) {
455    default:
456      return NULL;
457    case NVPTXISD::LoadV2:
458      switch (EltVT.getSimpleVT().SimpleTy) {
459      default:
460        return NULL;
461      case MVT::i8:
462        Opcode = NVPTX::LDV_i8_v2_avar;
463        break;
464      case MVT::i16:
465        Opcode = NVPTX::LDV_i16_v2_avar;
466        break;
467      case MVT::i32:
468        Opcode = NVPTX::LDV_i32_v2_avar;
469        break;
470      case MVT::i64:
471        Opcode = NVPTX::LDV_i64_v2_avar;
472        break;
473      case MVT::f32:
474        Opcode = NVPTX::LDV_f32_v2_avar;
475        break;
476      case MVT::f64:
477        Opcode = NVPTX::LDV_f64_v2_avar;
478        break;
479      }
480      break;
481    case NVPTXISD::LoadV4:
482      switch (EltVT.getSimpleVT().SimpleTy) {
483      default:
484        return NULL;
485      case MVT::i8:
486        Opcode = NVPTX::LDV_i8_v4_avar;
487        break;
488      case MVT::i16:
489        Opcode = NVPTX::LDV_i16_v4_avar;
490        break;
491      case MVT::i32:
492        Opcode = NVPTX::LDV_i32_v4_avar;
493        break;
494      case MVT::f32:
495        Opcode = NVPTX::LDV_f32_v4_avar;
496        break;
497      }
498      break;
499    }
500
501    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
502                      getI32Imm(VecType), getI32Imm(FromType),
503                      getI32Imm(FromTypeWidth), Addr, Chain };
504    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
505  } else if (Subtarget.is64Bit()
506                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
507                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
508    switch (N->getOpcode()) {
509    default:
510      return NULL;
511    case NVPTXISD::LoadV2:
512      switch (EltVT.getSimpleVT().SimpleTy) {
513      default:
514        return NULL;
515      case MVT::i8:
516        Opcode = NVPTX::LDV_i8_v2_asi;
517        break;
518      case MVT::i16:
519        Opcode = NVPTX::LDV_i16_v2_asi;
520        break;
521      case MVT::i32:
522        Opcode = NVPTX::LDV_i32_v2_asi;
523        break;
524      case MVT::i64:
525        Opcode = NVPTX::LDV_i64_v2_asi;
526        break;
527      case MVT::f32:
528        Opcode = NVPTX::LDV_f32_v2_asi;
529        break;
530      case MVT::f64:
531        Opcode = NVPTX::LDV_f64_v2_asi;
532        break;
533      }
534      break;
535    case NVPTXISD::LoadV4:
536      switch (EltVT.getSimpleVT().SimpleTy) {
537      default:
538        return NULL;
539      case MVT::i8:
540        Opcode = NVPTX::LDV_i8_v4_asi;
541        break;
542      case MVT::i16:
543        Opcode = NVPTX::LDV_i16_v4_asi;
544        break;
545      case MVT::i32:
546        Opcode = NVPTX::LDV_i32_v4_asi;
547        break;
548      case MVT::f32:
549        Opcode = NVPTX::LDV_f32_v4_asi;
550        break;
551      }
552      break;
553    }
554
555    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
556                      getI32Imm(VecType), getI32Imm(FromType),
557                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
558    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
559  } else if (Subtarget.is64Bit()
560                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
561                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
562    if (Subtarget.is64Bit()) {
563      switch (N->getOpcode()) {
564      default:
565        return NULL;
566      case NVPTXISD::LoadV2:
567        switch (EltVT.getSimpleVT().SimpleTy) {
568        default:
569          return NULL;
570        case MVT::i8:
571          Opcode = NVPTX::LDV_i8_v2_ari_64;
572          break;
573        case MVT::i16:
574          Opcode = NVPTX::LDV_i16_v2_ari_64;
575          break;
576        case MVT::i32:
577          Opcode = NVPTX::LDV_i32_v2_ari_64;
578          break;
579        case MVT::i64:
580          Opcode = NVPTX::LDV_i64_v2_ari_64;
581          break;
582        case MVT::f32:
583          Opcode = NVPTX::LDV_f32_v2_ari_64;
584          break;
585        case MVT::f64:
586          Opcode = NVPTX::LDV_f64_v2_ari_64;
587          break;
588        }
589        break;
590      case NVPTXISD::LoadV4:
591        switch (EltVT.getSimpleVT().SimpleTy) {
592        default:
593          return NULL;
594        case MVT::i8:
595          Opcode = NVPTX::LDV_i8_v4_ari_64;
596          break;
597        case MVT::i16:
598          Opcode = NVPTX::LDV_i16_v4_ari_64;
599          break;
600        case MVT::i32:
601          Opcode = NVPTX::LDV_i32_v4_ari_64;
602          break;
603        case MVT::f32:
604          Opcode = NVPTX::LDV_f32_v4_ari_64;
605          break;
606        }
607        break;
608      }
609    } else {
610      switch (N->getOpcode()) {
611      default:
612        return NULL;
613      case NVPTXISD::LoadV2:
614        switch (EltVT.getSimpleVT().SimpleTy) {
615        default:
616          return NULL;
617        case MVT::i8:
618          Opcode = NVPTX::LDV_i8_v2_ari;
619          break;
620        case MVT::i16:
621          Opcode = NVPTX::LDV_i16_v2_ari;
622          break;
623        case MVT::i32:
624          Opcode = NVPTX::LDV_i32_v2_ari;
625          break;
626        case MVT::i64:
627          Opcode = NVPTX::LDV_i64_v2_ari;
628          break;
629        case MVT::f32:
630          Opcode = NVPTX::LDV_f32_v2_ari;
631          break;
632        case MVT::f64:
633          Opcode = NVPTX::LDV_f64_v2_ari;
634          break;
635        }
636        break;
637      case NVPTXISD::LoadV4:
638        switch (EltVT.getSimpleVT().SimpleTy) {
639        default:
640          return NULL;
641        case MVT::i8:
642          Opcode = NVPTX::LDV_i8_v4_ari;
643          break;
644        case MVT::i16:
645          Opcode = NVPTX::LDV_i16_v4_ari;
646          break;
647        case MVT::i32:
648          Opcode = NVPTX::LDV_i32_v4_ari;
649          break;
650        case MVT::f32:
651          Opcode = NVPTX::LDV_f32_v4_ari;
652          break;
653        }
654        break;
655      }
656    }
657
658    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
659                      getI32Imm(VecType), getI32Imm(FromType),
660                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
661
662    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
663  } else {
664    if (Subtarget.is64Bit()) {
665      switch (N->getOpcode()) {
666      default:
667        return NULL;
668      case NVPTXISD::LoadV2:
669        switch (EltVT.getSimpleVT().SimpleTy) {
670        default:
671          return NULL;
672        case MVT::i8:
673          Opcode = NVPTX::LDV_i8_v2_areg_64;
674          break;
675        case MVT::i16:
676          Opcode = NVPTX::LDV_i16_v2_areg_64;
677          break;
678        case MVT::i32:
679          Opcode = NVPTX::LDV_i32_v2_areg_64;
680          break;
681        case MVT::i64:
682          Opcode = NVPTX::LDV_i64_v2_areg_64;
683          break;
684        case MVT::f32:
685          Opcode = NVPTX::LDV_f32_v2_areg_64;
686          break;
687        case MVT::f64:
688          Opcode = NVPTX::LDV_f64_v2_areg_64;
689          break;
690        }
691        break;
692      case NVPTXISD::LoadV4:
693        switch (EltVT.getSimpleVT().SimpleTy) {
694        default:
695          return NULL;
696        case MVT::i8:
697          Opcode = NVPTX::LDV_i8_v4_areg_64;
698          break;
699        case MVT::i16:
700          Opcode = NVPTX::LDV_i16_v4_areg_64;
701          break;
702        case MVT::i32:
703          Opcode = NVPTX::LDV_i32_v4_areg_64;
704          break;
705        case MVT::f32:
706          Opcode = NVPTX::LDV_f32_v4_areg_64;
707          break;
708        }
709        break;
710      }
711    } else {
712      switch (N->getOpcode()) {
713      default:
714        return NULL;
715      case NVPTXISD::LoadV2:
716        switch (EltVT.getSimpleVT().SimpleTy) {
717        default:
718          return NULL;
719        case MVT::i8:
720          Opcode = NVPTX::LDV_i8_v2_areg;
721          break;
722        case MVT::i16:
723          Opcode = NVPTX::LDV_i16_v2_areg;
724          break;
725        case MVT::i32:
726          Opcode = NVPTX::LDV_i32_v2_areg;
727          break;
728        case MVT::i64:
729          Opcode = NVPTX::LDV_i64_v2_areg;
730          break;
731        case MVT::f32:
732          Opcode = NVPTX::LDV_f32_v2_areg;
733          break;
734        case MVT::f64:
735          Opcode = NVPTX::LDV_f64_v2_areg;
736          break;
737        }
738        break;
739      case NVPTXISD::LoadV4:
740        switch (EltVT.getSimpleVT().SimpleTy) {
741        default:
742          return NULL;
743        case MVT::i8:
744          Opcode = NVPTX::LDV_i8_v4_areg;
745          break;
746        case MVT::i16:
747          Opcode = NVPTX::LDV_i16_v4_areg;
748          break;
749        case MVT::i32:
750          Opcode = NVPTX::LDV_i32_v4_areg;
751          break;
752        case MVT::f32:
753          Opcode = NVPTX::LDV_f32_v4_areg;
754          break;
755        }
756        break;
757      }
758    }
759
760    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
761                      getI32Imm(VecType), getI32Imm(FromType),
762                      getI32Imm(FromTypeWidth), Op1, Chain };
763    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
764  }
765
766  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
767  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
768  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
769
770  return LD;
771}
772
773SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
774
775  SDValue Chain = N->getOperand(0);
776  SDValue Op1 = N->getOperand(1);
777  unsigned Opcode;
778  DebugLoc DL = N->getDebugLoc();
779  SDNode *LD;
780
781  EVT RetVT = N->getValueType(0);
782
783  // Select opcode
784  if (Subtarget.is64Bit()) {
785    switch (N->getOpcode()) {
786    default:
787      return NULL;
788    case NVPTXISD::LDGV2:
789      switch (RetVT.getSimpleVT().SimpleTy) {
790      default:
791        return NULL;
792      case MVT::i8:
793        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
794        break;
795      case MVT::i16:
796        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
797        break;
798      case MVT::i32:
799        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
800        break;
801      case MVT::i64:
802        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
803        break;
804      case MVT::f32:
805        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
806        break;
807      case MVT::f64:
808        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
809        break;
810      }
811      break;
812    case NVPTXISD::LDGV4:
813      switch (RetVT.getSimpleVT().SimpleTy) {
814      default:
815        return NULL;
816      case MVT::i8:
817        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
818        break;
819      case MVT::i16:
820        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
821        break;
822      case MVT::i32:
823        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
824        break;
825      case MVT::f32:
826        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
827        break;
828      }
829      break;
830    case NVPTXISD::LDUV2:
831      switch (RetVT.getSimpleVT().SimpleTy) {
832      default:
833        return NULL;
834      case MVT::i8:
835        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
836        break;
837      case MVT::i16:
838        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
839        break;
840      case MVT::i32:
841        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
842        break;
843      case MVT::i64:
844        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
845        break;
846      case MVT::f32:
847        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
848        break;
849      case MVT::f64:
850        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
851        break;
852      }
853      break;
854    case NVPTXISD::LDUV4:
855      switch (RetVT.getSimpleVT().SimpleTy) {
856      default:
857        return NULL;
858      case MVT::i8:
859        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
860        break;
861      case MVT::i16:
862        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
863        break;
864      case MVT::i32:
865        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
866        break;
867      case MVT::f32:
868        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
869        break;
870      }
871      break;
872    }
873  } else {
874    switch (N->getOpcode()) {
875    default:
876      return NULL;
877    case NVPTXISD::LDGV2:
878      switch (RetVT.getSimpleVT().SimpleTy) {
879      default:
880        return NULL;
881      case MVT::i8:
882        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
883        break;
884      case MVT::i16:
885        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
886        break;
887      case MVT::i32:
888        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
889        break;
890      case MVT::i64:
891        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
892        break;
893      case MVT::f32:
894        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
895        break;
896      case MVT::f64:
897        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
898        break;
899      }
900      break;
901    case NVPTXISD::LDGV4:
902      switch (RetVT.getSimpleVT().SimpleTy) {
903      default:
904        return NULL;
905      case MVT::i8:
906        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
907        break;
908      case MVT::i16:
909        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
910        break;
911      case MVT::i32:
912        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
913        break;
914      case MVT::f32:
915        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
916        break;
917      }
918      break;
919    case NVPTXISD::LDUV2:
920      switch (RetVT.getSimpleVT().SimpleTy) {
921      default:
922        return NULL;
923      case MVT::i8:
924        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
925        break;
926      case MVT::i16:
927        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
928        break;
929      case MVT::i32:
930        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
931        break;
932      case MVT::i64:
933        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
934        break;
935      case MVT::f32:
936        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
937        break;
938      case MVT::f64:
939        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
940        break;
941      }
942      break;
943    case NVPTXISD::LDUV4:
944      switch (RetVT.getSimpleVT().SimpleTy) {
945      default:
946        return NULL;
947      case MVT::i8:
948        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
949        break;
950      case MVT::i16:
951        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
952        break;
953      case MVT::i32:
954        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
955        break;
956      case MVT::f32:
957        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
958        break;
959      }
960      break;
961    }
962  }
963
964  SDValue Ops[] = { Op1, Chain };
965  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
966
967  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
968  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
969  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
970
971  return LD;
972}
973
974SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
975  DebugLoc dl = N->getDebugLoc();
976  StoreSDNode *ST = cast<StoreSDNode>(N);
977  EVT StoreVT = ST->getMemoryVT();
978  SDNode *NVPTXST = NULL;
979
980  // do not support pre/post inc/dec
981  if (ST->isIndexed())
982    return NULL;
983
984  if (!StoreVT.isSimple())
985    return NULL;
986
987  // Address Space Setting
988  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
989
990  // Volatile Setting
991  // - .volatile is only availalble for .global and .shared
992  bool isVolatile = ST->isVolatile();
993  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
994      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
995      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
996    isVolatile = false;
997
998  // Vector Setting
999  MVT SimpleVT = StoreVT.getSimpleVT();
1000  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1001  if (SimpleVT.isVector()) {
1002    unsigned num = SimpleVT.getVectorNumElements();
1003    if (num == 2)
1004      vecType = NVPTX::PTXLdStInstCode::V2;
1005    else if (num == 4)
1006      vecType = NVPTX::PTXLdStInstCode::V4;
1007    else
1008      return NULL;
1009  }
1010
1011  // Type Setting: toType + toTypeWidth
1012  // - for integer type, always use 'u'
1013  //
1014  MVT ScalarVT = SimpleVT.getScalarType();
1015  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1016  unsigned int toType;
1017  if (ScalarVT.isFloatingPoint())
1018    toType = NVPTX::PTXLdStInstCode::Float;
1019  else
1020    toType = NVPTX::PTXLdStInstCode::Unsigned;
1021
1022  // Create the machine instruction DAG
1023  SDValue Chain = N->getOperand(0);
1024  SDValue N1 = N->getOperand(1);
1025  SDValue N2 = N->getOperand(2);
1026  SDValue Addr;
1027  SDValue Offset, Base;
1028  unsigned Opcode;
1029  MVT::SimpleValueType SourceVT =
1030      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1031
1032  if (SelectDirectAddr(N2, Addr)) {
1033    switch (SourceVT) {
1034    case MVT::i8:
1035      Opcode = NVPTX::ST_i8_avar;
1036      break;
1037    case MVT::i16:
1038      Opcode = NVPTX::ST_i16_avar;
1039      break;
1040    case MVT::i32:
1041      Opcode = NVPTX::ST_i32_avar;
1042      break;
1043    case MVT::i64:
1044      Opcode = NVPTX::ST_i64_avar;
1045      break;
1046    case MVT::f32:
1047      Opcode = NVPTX::ST_f32_avar;
1048      break;
1049    case MVT::f64:
1050      Opcode = NVPTX::ST_f64_avar;
1051      break;
1052    default:
1053      return NULL;
1054    }
1055    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1056                      getI32Imm(vecType), getI32Imm(toType),
1057                      getI32Imm(toTypeWidth), Addr, Chain };
1058    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
1059  } else if (Subtarget.is64Bit()
1060                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1061                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1062    switch (SourceVT) {
1063    case MVT::i8:
1064      Opcode = NVPTX::ST_i8_asi;
1065      break;
1066    case MVT::i16:
1067      Opcode = NVPTX::ST_i16_asi;
1068      break;
1069    case MVT::i32:
1070      Opcode = NVPTX::ST_i32_asi;
1071      break;
1072    case MVT::i64:
1073      Opcode = NVPTX::ST_i64_asi;
1074      break;
1075    case MVT::f32:
1076      Opcode = NVPTX::ST_f32_asi;
1077      break;
1078    case MVT::f64:
1079      Opcode = NVPTX::ST_f64_asi;
1080      break;
1081    default:
1082      return NULL;
1083    }
1084    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1085                      getI32Imm(vecType), getI32Imm(toType),
1086                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1087    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
1088  } else if (Subtarget.is64Bit()
1089                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1090                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1091    if (Subtarget.is64Bit()) {
1092      switch (SourceVT) {
1093      case MVT::i8:
1094        Opcode = NVPTX::ST_i8_ari_64;
1095        break;
1096      case MVT::i16:
1097        Opcode = NVPTX::ST_i16_ari_64;
1098        break;
1099      case MVT::i32:
1100        Opcode = NVPTX::ST_i32_ari_64;
1101        break;
1102      case MVT::i64:
1103        Opcode = NVPTX::ST_i64_ari_64;
1104        break;
1105      case MVT::f32:
1106        Opcode = NVPTX::ST_f32_ari_64;
1107        break;
1108      case MVT::f64:
1109        Opcode = NVPTX::ST_f64_ari_64;
1110        break;
1111      default:
1112        return NULL;
1113      }
1114    } else {
1115      switch (SourceVT) {
1116      case MVT::i8:
1117        Opcode = NVPTX::ST_i8_ari;
1118        break;
1119      case MVT::i16:
1120        Opcode = NVPTX::ST_i16_ari;
1121        break;
1122      case MVT::i32:
1123        Opcode = NVPTX::ST_i32_ari;
1124        break;
1125      case MVT::i64:
1126        Opcode = NVPTX::ST_i64_ari;
1127        break;
1128      case MVT::f32:
1129        Opcode = NVPTX::ST_f32_ari;
1130        break;
1131      case MVT::f64:
1132        Opcode = NVPTX::ST_f64_ari;
1133        break;
1134      default:
1135        return NULL;
1136      }
1137    }
1138    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1139                      getI32Imm(vecType), getI32Imm(toType),
1140                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1141    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
1142  } else {
1143    if (Subtarget.is64Bit()) {
1144      switch (SourceVT) {
1145      case MVT::i8:
1146        Opcode = NVPTX::ST_i8_areg_64;
1147        break;
1148      case MVT::i16:
1149        Opcode = NVPTX::ST_i16_areg_64;
1150        break;
1151      case MVT::i32:
1152        Opcode = NVPTX::ST_i32_areg_64;
1153        break;
1154      case MVT::i64:
1155        Opcode = NVPTX::ST_i64_areg_64;
1156        break;
1157      case MVT::f32:
1158        Opcode = NVPTX::ST_f32_areg_64;
1159        break;
1160      case MVT::f64:
1161        Opcode = NVPTX::ST_f64_areg_64;
1162        break;
1163      default:
1164        return NULL;
1165      }
1166    } else {
1167      switch (SourceVT) {
1168      case MVT::i8:
1169        Opcode = NVPTX::ST_i8_areg;
1170        break;
1171      case MVT::i16:
1172        Opcode = NVPTX::ST_i16_areg;
1173        break;
1174      case MVT::i32:
1175        Opcode = NVPTX::ST_i32_areg;
1176        break;
1177      case MVT::i64:
1178        Opcode = NVPTX::ST_i64_areg;
1179        break;
1180      case MVT::f32:
1181        Opcode = NVPTX::ST_f32_areg;
1182        break;
1183      case MVT::f64:
1184        Opcode = NVPTX::ST_f64_areg;
1185        break;
1186      default:
1187        return NULL;
1188      }
1189    }
1190    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1191                      getI32Imm(vecType), getI32Imm(toType),
1192                      getI32Imm(toTypeWidth), N2, Chain };
1193    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
1194  }
1195
1196  if (NVPTXST != NULL) {
1197    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1198    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1199    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1200  }
1201
1202  return NVPTXST;
1203}
1204
1205SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1206  SDValue Chain = N->getOperand(0);
1207  SDValue Op1 = N->getOperand(1);
1208  SDValue Addr, Offset, Base;
1209  unsigned Opcode;
1210  DebugLoc DL = N->getDebugLoc();
1211  SDNode *ST;
1212  EVT EltVT = Op1.getValueType();
1213  MemSDNode *MemSD = cast<MemSDNode>(N);
1214  EVT StoreVT = MemSD->getMemoryVT();
1215
1216  // Address Space Setting
1217  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1218
1219  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1220    report_fatal_error("Cannot store to pointer that points to constant "
1221                       "memory space");
1222  }
1223
1224  // Volatile Setting
1225  // - .volatile is only availalble for .global and .shared
1226  bool IsVolatile = MemSD->isVolatile();
1227  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1228      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1229      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1230    IsVolatile = false;
1231
1232  // Type Setting: toType + toTypeWidth
1233  // - for integer type, always use 'u'
1234  assert(StoreVT.isSimple() && "Store value is not simple");
1235  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1236  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1237  unsigned ToType;
1238  if (ScalarVT.isFloatingPoint())
1239    ToType = NVPTX::PTXLdStInstCode::Float;
1240  else
1241    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1242
1243  SmallVector<SDValue, 12> StOps;
1244  SDValue N2;
1245  unsigned VecType;
1246
1247  switch (N->getOpcode()) {
1248  case NVPTXISD::StoreV2:
1249    VecType = NVPTX::PTXLdStInstCode::V2;
1250    StOps.push_back(N->getOperand(1));
1251    StOps.push_back(N->getOperand(2));
1252    N2 = N->getOperand(3);
1253    break;
1254  case NVPTXISD::StoreV4:
1255    VecType = NVPTX::PTXLdStInstCode::V4;
1256    StOps.push_back(N->getOperand(1));
1257    StOps.push_back(N->getOperand(2));
1258    StOps.push_back(N->getOperand(3));
1259    StOps.push_back(N->getOperand(4));
1260    N2 = N->getOperand(5);
1261    break;
1262  default:
1263    return NULL;
1264  }
1265
1266  StOps.push_back(getI32Imm(IsVolatile));
1267  StOps.push_back(getI32Imm(CodeAddrSpace));
1268  StOps.push_back(getI32Imm(VecType));
1269  StOps.push_back(getI32Imm(ToType));
1270  StOps.push_back(getI32Imm(ToTypeWidth));
1271
1272  if (SelectDirectAddr(N2, Addr)) {
1273    switch (N->getOpcode()) {
1274    default:
1275      return NULL;
1276    case NVPTXISD::StoreV2:
1277      switch (EltVT.getSimpleVT().SimpleTy) {
1278      default:
1279        return NULL;
1280      case MVT::i8:
1281        Opcode = NVPTX::STV_i8_v2_avar;
1282        break;
1283      case MVT::i16:
1284        Opcode = NVPTX::STV_i16_v2_avar;
1285        break;
1286      case MVT::i32:
1287        Opcode = NVPTX::STV_i32_v2_avar;
1288        break;
1289      case MVT::i64:
1290        Opcode = NVPTX::STV_i64_v2_avar;
1291        break;
1292      case MVT::f32:
1293        Opcode = NVPTX::STV_f32_v2_avar;
1294        break;
1295      case MVT::f64:
1296        Opcode = NVPTX::STV_f64_v2_avar;
1297        break;
1298      }
1299      break;
1300    case NVPTXISD::StoreV4:
1301      switch (EltVT.getSimpleVT().SimpleTy) {
1302      default:
1303        return NULL;
1304      case MVT::i8:
1305        Opcode = NVPTX::STV_i8_v4_avar;
1306        break;
1307      case MVT::i16:
1308        Opcode = NVPTX::STV_i16_v4_avar;
1309        break;
1310      case MVT::i32:
1311        Opcode = NVPTX::STV_i32_v4_avar;
1312        break;
1313      case MVT::f32:
1314        Opcode = NVPTX::STV_f32_v4_avar;
1315        break;
1316      }
1317      break;
1318    }
1319    StOps.push_back(Addr);
1320  } else if (Subtarget.is64Bit()
1321                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1322                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1323    switch (N->getOpcode()) {
1324    default:
1325      return NULL;
1326    case NVPTXISD::StoreV2:
1327      switch (EltVT.getSimpleVT().SimpleTy) {
1328      default:
1329        return NULL;
1330      case MVT::i8:
1331        Opcode = NVPTX::STV_i8_v2_asi;
1332        break;
1333      case MVT::i16:
1334        Opcode = NVPTX::STV_i16_v2_asi;
1335        break;
1336      case MVT::i32:
1337        Opcode = NVPTX::STV_i32_v2_asi;
1338        break;
1339      case MVT::i64:
1340        Opcode = NVPTX::STV_i64_v2_asi;
1341        break;
1342      case MVT::f32:
1343        Opcode = NVPTX::STV_f32_v2_asi;
1344        break;
1345      case MVT::f64:
1346        Opcode = NVPTX::STV_f64_v2_asi;
1347        break;
1348      }
1349      break;
1350    case NVPTXISD::StoreV4:
1351      switch (EltVT.getSimpleVT().SimpleTy) {
1352      default:
1353        return NULL;
1354      case MVT::i8:
1355        Opcode = NVPTX::STV_i8_v4_asi;
1356        break;
1357      case MVT::i16:
1358        Opcode = NVPTX::STV_i16_v4_asi;
1359        break;
1360      case MVT::i32:
1361        Opcode = NVPTX::STV_i32_v4_asi;
1362        break;
1363      case MVT::f32:
1364        Opcode = NVPTX::STV_f32_v4_asi;
1365        break;
1366      }
1367      break;
1368    }
1369    StOps.push_back(Base);
1370    StOps.push_back(Offset);
1371  } else if (Subtarget.is64Bit()
1372                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1373                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1374    if (Subtarget.is64Bit()) {
1375      switch (N->getOpcode()) {
1376      default:
1377        return NULL;
1378      case NVPTXISD::StoreV2:
1379        switch (EltVT.getSimpleVT().SimpleTy) {
1380        default:
1381          return NULL;
1382        case MVT::i8:
1383          Opcode = NVPTX::STV_i8_v2_ari_64;
1384          break;
1385        case MVT::i16:
1386          Opcode = NVPTX::STV_i16_v2_ari_64;
1387          break;
1388        case MVT::i32:
1389          Opcode = NVPTX::STV_i32_v2_ari_64;
1390          break;
1391        case MVT::i64:
1392          Opcode = NVPTX::STV_i64_v2_ari_64;
1393          break;
1394        case MVT::f32:
1395          Opcode = NVPTX::STV_f32_v2_ari_64;
1396          break;
1397        case MVT::f64:
1398          Opcode = NVPTX::STV_f64_v2_ari_64;
1399          break;
1400        }
1401        break;
1402      case NVPTXISD::StoreV4:
1403        switch (EltVT.getSimpleVT().SimpleTy) {
1404        default:
1405          return NULL;
1406        case MVT::i8:
1407          Opcode = NVPTX::STV_i8_v4_ari_64;
1408          break;
1409        case MVT::i16:
1410          Opcode = NVPTX::STV_i16_v4_ari_64;
1411          break;
1412        case MVT::i32:
1413          Opcode = NVPTX::STV_i32_v4_ari_64;
1414          break;
1415        case MVT::f32:
1416          Opcode = NVPTX::STV_f32_v4_ari_64;
1417          break;
1418        }
1419        break;
1420      }
1421    } else {
1422      switch (N->getOpcode()) {
1423      default:
1424        return NULL;
1425      case NVPTXISD::StoreV2:
1426        switch (EltVT.getSimpleVT().SimpleTy) {
1427        default:
1428          return NULL;
1429        case MVT::i8:
1430          Opcode = NVPTX::STV_i8_v2_ari;
1431          break;
1432        case MVT::i16:
1433          Opcode = NVPTX::STV_i16_v2_ari;
1434          break;
1435        case MVT::i32:
1436          Opcode = NVPTX::STV_i32_v2_ari;
1437          break;
1438        case MVT::i64:
1439          Opcode = NVPTX::STV_i64_v2_ari;
1440          break;
1441        case MVT::f32:
1442          Opcode = NVPTX::STV_f32_v2_ari;
1443          break;
1444        case MVT::f64:
1445          Opcode = NVPTX::STV_f64_v2_ari;
1446          break;
1447        }
1448        break;
1449      case NVPTXISD::StoreV4:
1450        switch (EltVT.getSimpleVT().SimpleTy) {
1451        default:
1452          return NULL;
1453        case MVT::i8:
1454          Opcode = NVPTX::STV_i8_v4_ari;
1455          break;
1456        case MVT::i16:
1457          Opcode = NVPTX::STV_i16_v4_ari;
1458          break;
1459        case MVT::i32:
1460          Opcode = NVPTX::STV_i32_v4_ari;
1461          break;
1462        case MVT::f32:
1463          Opcode = NVPTX::STV_f32_v4_ari;
1464          break;
1465        }
1466        break;
1467      }
1468    }
1469    StOps.push_back(Base);
1470    StOps.push_back(Offset);
1471  } else {
1472    if (Subtarget.is64Bit()) {
1473      switch (N->getOpcode()) {
1474      default:
1475        return NULL;
1476      case NVPTXISD::StoreV2:
1477        switch (EltVT.getSimpleVT().SimpleTy) {
1478        default:
1479          return NULL;
1480        case MVT::i8:
1481          Opcode = NVPTX::STV_i8_v2_areg_64;
1482          break;
1483        case MVT::i16:
1484          Opcode = NVPTX::STV_i16_v2_areg_64;
1485          break;
1486        case MVT::i32:
1487          Opcode = NVPTX::STV_i32_v2_areg_64;
1488          break;
1489        case MVT::i64:
1490          Opcode = NVPTX::STV_i64_v2_areg_64;
1491          break;
1492        case MVT::f32:
1493          Opcode = NVPTX::STV_f32_v2_areg_64;
1494          break;
1495        case MVT::f64:
1496          Opcode = NVPTX::STV_f64_v2_areg_64;
1497          break;
1498        }
1499        break;
1500      case NVPTXISD::StoreV4:
1501        switch (EltVT.getSimpleVT().SimpleTy) {
1502        default:
1503          return NULL;
1504        case MVT::i8:
1505          Opcode = NVPTX::STV_i8_v4_areg_64;
1506          break;
1507        case MVT::i16:
1508          Opcode = NVPTX::STV_i16_v4_areg_64;
1509          break;
1510        case MVT::i32:
1511          Opcode = NVPTX::STV_i32_v4_areg_64;
1512          break;
1513        case MVT::f32:
1514          Opcode = NVPTX::STV_f32_v4_areg_64;
1515          break;
1516        }
1517        break;
1518      }
1519    } else {
1520      switch (N->getOpcode()) {
1521      default:
1522        return NULL;
1523      case NVPTXISD::StoreV2:
1524        switch (EltVT.getSimpleVT().SimpleTy) {
1525        default:
1526          return NULL;
1527        case MVT::i8:
1528          Opcode = NVPTX::STV_i8_v2_areg;
1529          break;
1530        case MVT::i16:
1531          Opcode = NVPTX::STV_i16_v2_areg;
1532          break;
1533        case MVT::i32:
1534          Opcode = NVPTX::STV_i32_v2_areg;
1535          break;
1536        case MVT::i64:
1537          Opcode = NVPTX::STV_i64_v2_areg;
1538          break;
1539        case MVT::f32:
1540          Opcode = NVPTX::STV_f32_v2_areg;
1541          break;
1542        case MVT::f64:
1543          Opcode = NVPTX::STV_f64_v2_areg;
1544          break;
1545        }
1546        break;
1547      case NVPTXISD::StoreV4:
1548        switch (EltVT.getSimpleVT().SimpleTy) {
1549        default:
1550          return NULL;
1551        case MVT::i8:
1552          Opcode = NVPTX::STV_i8_v4_areg;
1553          break;
1554        case MVT::i16:
1555          Opcode = NVPTX::STV_i16_v4_areg;
1556          break;
1557        case MVT::i32:
1558          Opcode = NVPTX::STV_i32_v4_areg;
1559          break;
1560        case MVT::f32:
1561          Opcode = NVPTX::STV_f32_v4_areg;
1562          break;
1563        }
1564        break;
1565      }
1566    }
1567    StOps.push_back(N2);
1568  }
1569
1570  StOps.push_back(Chain);
1571
1572  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
1573
1574  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1575  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1576  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1577
1578  return ST;
1579}
1580
1581// SelectDirectAddr - Match a direct address for DAG.
1582// A direct address could be a globaladdress or externalsymbol.
1583bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
1584  // Return true if TGA or ES.
1585  if (N.getOpcode() == ISD::TargetGlobalAddress ||
1586      N.getOpcode() == ISD::TargetExternalSymbol) {
1587    Address = N;
1588    return true;
1589  }
1590  if (N.getOpcode() == NVPTXISD::Wrapper) {
1591    Address = N.getOperand(0);
1592    return true;
1593  }
1594  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
1595    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
1596    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
1597      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
1598        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
1599  }
1600  return false;
1601}
1602
1603// symbol+offset
1604bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
1605    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
1606  if (Addr.getOpcode() == ISD::ADD) {
1607    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1608      SDValue base = Addr.getOperand(0);
1609      if (SelectDirectAddr(base, Base)) {
1610        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1611        return true;
1612      }
1613    }
1614  }
1615  return false;
1616}
1617
1618// symbol+offset
1619bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
1620                                     SDValue &Base, SDValue &Offset) {
1621  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
1622}
1623
1624// symbol+offset
1625bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
1626                                       SDValue &Base, SDValue &Offset) {
1627  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
1628}
1629
1630// register+offset
1631bool NVPTXDAGToDAGISel::SelectADDRri_imp(
1632    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
1633  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1634    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1635    Offset = CurDAG->getTargetConstant(0, mvt);
1636    return true;
1637  }
1638  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
1639      Addr.getOpcode() == ISD::TargetGlobalAddress)
1640    return false; // direct calls.
1641
1642  if (Addr.getOpcode() == ISD::ADD) {
1643    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
1644      return false;
1645    }
1646    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1647      if (FrameIndexSDNode *FIN =
1648              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
1649        // Constant offset from frame ref.
1650        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1651      else
1652        Base = Addr.getOperand(0);
1653      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1654      return true;
1655    }
1656  }
1657  return false;
1658}
1659
1660// register+offset
1661bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
1662                                     SDValue &Base, SDValue &Offset) {
1663  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
1664}
1665
1666// register+offset
1667bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
1668                                       SDValue &Base, SDValue &Offset) {
1669  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
1670}
1671
1672bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
1673                                                 unsigned int spN) const {
1674  const Value *Src = NULL;
1675  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
1676  // the classof() for MemSDNode does not include MemIntrinsicSDNode
1677  // (See SelectionDAGNodes.h). So we need to check for both.
1678  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
1679    Src = mN->getSrcValue();
1680  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
1681    Src = mN->getSrcValue();
1682  }
1683  if (!Src)
1684    return false;
1685  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
1686    return (PT->getAddressSpace() == spN);
1687  return false;
1688}
1689
1690/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
1691/// inline asm expressions.
1692bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
1693    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
1694  SDValue Op0, Op1;
1695  switch (ConstraintCode) {
1696  default:
1697    return true;
1698  case 'm': // memory
1699    if (SelectDirectAddr(Op, Op0)) {
1700      OutOps.push_back(Op0);
1701      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
1702      return false;
1703    }
1704    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
1705      OutOps.push_back(Op0);
1706      OutOps.push_back(Op1);
1707      return false;
1708    }
1709    break;
1710  }
1711  return true;
1712}
1713
1714// Return true if N is a undef or a constant.
1715// If N was undef, return a (i8imm 0) in Retval
1716// If N was imm, convert it to i8imm and return in Retval
1717// Note: The convert to i8imm is required, otherwise the
1718// pattern matcher inserts a bunch of IMOVi8rr to convert
1719// the imm to i8imm, and this causes instruction selection
1720// to fail.
1721bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
1722  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
1723    return false;
1724
1725  if (N.getOpcode() == ISD::UNDEF)
1726    Retval = CurDAG->getTargetConstant(0, MVT::i8);
1727  else {
1728    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
1729    unsigned retval = cn->getZExtValue();
1730    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
1731  }
1732  return true;
1733}
1734