NVPTXISelDAGToDAG.cpp revision 239310
161452Sdfr//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
261452Sdfr//
361452Sdfr//                     The LLVM Compiler Infrastructure
461452Sdfr//
561452Sdfr// This file is distributed under the University of Illinois Open Source
661452Sdfr// License. See LICENSE.TXT for details.
761452Sdfr//
861452Sdfr//===----------------------------------------------------------------------===//
961452Sdfr//
1061452Sdfr// This file defines an instruction selector for the NVPTX target.
1161452Sdfr//
1261452Sdfr//===----------------------------------------------------------------------===//
1361452Sdfr
1461452Sdfr
1561452Sdfr#include "llvm/Instructions.h"
1661452Sdfr#include "llvm/Support/raw_ostream.h"
1761452Sdfr#include "NVPTXISelDAGToDAG.h"
1861452Sdfr#include "llvm/Support/Debug.h"
1961452Sdfr#include "llvm/Support/ErrorHandling.h"
2061452Sdfr#include "llvm/Support/CommandLine.h"
2161452Sdfr#include "llvm/Target/TargetIntrinsicInfo.h"
2261452Sdfr#include "llvm/GlobalValue.h"
2361452Sdfr
2461452Sdfr#undef DEBUG_TYPE
2561452Sdfr#define DEBUG_TYPE "nvptx-isel"
2661452Sdfr
2761452Sdfrusing namespace llvm;
2861452Sdfr
2961452Sdfr
3061452Sdfrstatic cl::opt<bool>
3161452SdfrUseFMADInstruction("nvptx-mad-enable",
3261452Sdfr                   cl::ZeroOrMore,
3361452Sdfr                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
3461452Sdfr                   cl::init(false));
3561452Sdfr
3661452Sdfrstatic cl::opt<int>
37102480SbdeFMAContractLevel("nvptx-fma-level",
3876827Salfred                 cl::ZeroOrMore,
3979339Sjhb                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
4061452Sdfr                     " 1: do it  2: do it aggressively"),
4161452Sdfr                     cl::init(2));
4261452Sdfr
4361452Sdfr
4461452Sdfrstatic cl::opt<int>
4561452SdfrUsePrecDivF32("nvptx-prec-divf32",
4661452Sdfr              cl::ZeroOrMore,
4761452Sdfr             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
4861452Sdfr                  " IEEE Compliant F32 div.rnd if avaiable."),
4961452Sdfr                  cl::init(2));
5061452Sdfr
5161452Sdfr/// createNVPTXISelDag - This pass converts a legalized DAG into a
5261452Sdfr/// NVPTX-specific DAG, ready for instruction scheduling.
5361501SdfrFunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
5461501Sdfr                                       llvm::CodeGenOpt::Level OptLevel) {
5561452Sdfr  return new NVPTXDAGToDAGISel(TM, OptLevel);
5661452Sdfr}
5761452Sdfr
5861452Sdfr
5961452SdfrNVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
6061501Sdfr                                     CodeGenOpt::Level OptLevel)
6161501Sdfr: SelectionDAGISel(tm, OptLevel),
6287479Scokane  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
6387479Scokane{
6461501Sdfr  // Always do fma.f32 fpcontract if the target supports the instruction.
6561501Sdfr  // Always do fma.f64 fpcontract if the target supports the instruction.
6661501Sdfr  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
6761501Sdfr  // support fma.f32.
6861452Sdfr
6961452Sdfr  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
7061452Sdfr  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
7161452Sdfr      (FMAContractLevel>=1);
7261452Sdfr  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
7361452Sdfr      (FMAContractLevel>=1);
7461501Sdfr  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
7561452Sdfr      (FMAContractLevel==2);
7661452Sdfr  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
7761501Sdfr      (FMAContractLevel==2);
7861501Sdfr
7961501Sdfr  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
8061501Sdfr
8161501Sdfr  UseF32FTZ = false;
8261501Sdfr
8387479Scokane  doMulWide = (OptLevel > 0);
8461501Sdfr
8561501Sdfr  // Decide how to translate f32 div
8661501Sdfr  do_DIVF32_PREC = UsePrecDivF32;
8761501Sdfr  // sm less than sm_20 does not support div.rnd. Use div.full.
8861501Sdfr  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
8961501Sdfr    do_DIVF32_PREC = 1;
9061501Sdfr
9161501Sdfr}
9261501Sdfr
9361501Sdfr/// Select - Select instructions not customized! Used for
9461501Sdfr/// expanded, promoted and normal instructions.
9561501SdfrSDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
9661501Sdfr
9787479Scokane  if (N->isMachineOpcode())
9887479Scokane    return NULL;   // Already selected.
9961501Sdfr
10061501Sdfr  SDNode *ResNode = NULL;
10161501Sdfr  switch (N->getOpcode()) {
10261501Sdfr  case ISD::LOAD:
10361501Sdfr    ResNode = SelectLoad(N);
10461501Sdfr    break;
10561501Sdfr  case ISD::STORE:
10661501Sdfr    ResNode = SelectStore(N);
10761501Sdfr    break;
10861501Sdfr  }
10961501Sdfr  if (ResNode)
11061501Sdfr    return ResNode;
11161501Sdfr  return SelectCode(N);
11261501Sdfr}
11361501Sdfr
11461501Sdfr
11561501Sdfrstatic unsigned int
11661501SdfrgetCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
11761501Sdfr{
11861501Sdfr  const Value *Src = N->getSrcValue();
11961501Sdfr  if (!Src)
12061501Sdfr    return NVPTX::PTXLdStInstCode::LOCAL;
12161501Sdfr
12261501Sdfr  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
12394790Scokane    switch (PT->getAddressSpace()) {
12494790Scokane    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
12561501Sdfr    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
12687479Scokane    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
127105145Smarcel    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
128105145Smarcel      return NVPTX::PTXLdStInstCode::CONSTANT;
12987479Scokane    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
13087479Scokane    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
13187479Scokane    case llvm::ADDRESS_SPACE_CONST:
13287479Scokane      // If the arch supports generic address space, translate it to GLOBAL
13387479Scokane      // for correctness.
13487479Scokane      // If the arch does not support generic address space, then the arch
13587479Scokane      // does not really support ADDRESS_SPACE_CONST, translate it to
13694790Scokane      // to CONSTANT for better performance.
13787479Scokane      if (Subtarget.hasGenericLdSt())
13861501Sdfr        return NVPTX::PTXLdStInstCode::GLOBAL;
13961501Sdfr      else
14061501Sdfr        return NVPTX::PTXLdStInstCode::CONSTANT;
14187479Scokane    default: break;
14287479Scokane    }
14387479Scokane  }
14487479Scokane  return NVPTX::PTXLdStInstCode::LOCAL;
14587479Scokane}
14661501Sdfr
14787479Scokane
14887479ScokaneSDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
14987479Scokane  DebugLoc dl = N->getDebugLoc();
15061501Sdfr  LoadSDNode *LD = cast<LoadSDNode>(N);
15161501Sdfr  EVT LoadedVT = LD->getMemoryVT();
15287479Scokane  SDNode *NVPTXLD= NULL;
15361501Sdfr
15461501Sdfr  // do not support pre/post inc/dec
15561501Sdfr  if (LD->isIndexed())
15661501Sdfr    return NULL;
15761501Sdfr
15861501Sdfr  if (!LoadedVT.isSimple())
15987479Scokane    return NULL;
16061501Sdfr
16161501Sdfr  // Address Space Setting
16261501Sdfr  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
16361501Sdfr
16461501Sdfr  // Volatile Setting
16561501Sdfr  // - .volatile is only availalble for .global and .shared
16661501Sdfr  bool isVolatile = LD->isVolatile();
16761501Sdfr  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
16861501Sdfr      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
16961501Sdfr      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
17061501Sdfr    isVolatile = false;
17161501Sdfr
17261501Sdfr  // Vector Setting
17361501Sdfr  MVT SimpleVT = LoadedVT.getSimpleVT();
17461501Sdfr  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
17561501Sdfr  if (SimpleVT.isVector()) {
17661501Sdfr    unsigned num = SimpleVT.getVectorNumElements();
17761501Sdfr    if (num == 2)
17861452Sdfr      vecType = NVPTX::PTXLdStInstCode::V2;
17961452Sdfr    else if (num == 4)
18061452Sdfr      vecType = NVPTX::PTXLdStInstCode::V4;
18161452Sdfr    else
18261452Sdfr      return NULL;
18361452Sdfr  }
18461452Sdfr
18561452Sdfr  // Type Setting: fromType + fromTypeWidth
18661452Sdfr  //
18761452Sdfr  // Sign   : ISD::SEXTLOAD
18861452Sdfr  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
18987479Scokane  //          type is integer
19083699Scokane  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
19183699Scokane  MVT ScalarVT = SimpleVT.getScalarType();
19287479Scokane  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
19361452Sdfr  unsigned int fromType;
19461452Sdfr  if ((LD->getExtensionType() == ISD::SEXTLOAD))
19587479Scokane    fromType = NVPTX::PTXLdStInstCode::Signed;
19687479Scokane  else if (ScalarVT.isFloatingPoint())
19787479Scokane    fromType = NVPTX::PTXLdStInstCode::Float;
19887479Scokane  else
19961452Sdfr    fromType = NVPTX::PTXLdStInstCode::Unsigned;
20061452Sdfr
20161452Sdfr  // Create the machine instruction DAG
20261452Sdfr  SDValue Chain = N->getOperand(0);
20361452Sdfr  SDValue N1 = N->getOperand(1);
20461452Sdfr  SDValue Addr;
20561452Sdfr  SDValue Offset, Base;
20661452Sdfr  unsigned Opcode;
20761452Sdfr  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
20861452Sdfr
20961452Sdfr  if (SelectDirectAddr(N1, Addr)) {
21061452Sdfr    switch (TargetVT) {
21161452Sdfr    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
21261452Sdfr    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
21361452Sdfr    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
21461452Sdfr    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
21561452Sdfr    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
21661452Sdfr    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
21761452Sdfr    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_avar; break;
21861452Sdfr    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
21961452Sdfr    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
22061452Sdfr    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
22161452Sdfr    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
22261452Sdfr    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
22361501Sdfr    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_avar; break;
22461452Sdfr    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
22561452Sdfr    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
22661452Sdfr    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
22761452Sdfr    default: return NULL;
22861452Sdfr    }
22961452Sdfr    SDValue Ops[] = { getI32Imm(isVolatile),
23061452Sdfr                      getI32Imm(codeAddrSpace),
23161452Sdfr                      getI32Imm(vecType),
23261452Sdfr                      getI32Imm(fromType),
23361452Sdfr                      getI32Imm(fromTypeWidth),
23461452Sdfr                      Addr, Chain };
23561452Sdfr    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
23661452Sdfr                                     MVT::Other, Ops, 7);
23761452Sdfr  } else if (Subtarget.is64Bit()?
23861452Sdfr      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
23961452Sdfr      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
24061452Sdfr    switch (TargetVT) {
24161452Sdfr    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
24261452Sdfr    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
24361452Sdfr    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
24461501Sdfr    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
24561452Sdfr    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
24661452Sdfr    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
24761452Sdfr    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_asi; break;
24861452Sdfr    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
24961452Sdfr    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
25061452Sdfr    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
25161452Sdfr    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
25261452Sdfr    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
25361452Sdfr    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_asi; break;
25461452Sdfr    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
25561452Sdfr    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
25661452Sdfr    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
25761452Sdfr    default: return NULL;
25861501Sdfr    }
25961452Sdfr    SDValue Ops[] = { getI32Imm(isVolatile),
26061452Sdfr                      getI32Imm(codeAddrSpace),
26161501Sdfr                      getI32Imm(vecType),
26261501Sdfr                      getI32Imm(fromType),
26361501Sdfr                      getI32Imm(fromTypeWidth),
26461452Sdfr                      Base, Offset, Chain };
26561501Sdfr    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
26661501Sdfr                                     MVT::Other, Ops, 8);
26761501Sdfr  } else if (Subtarget.is64Bit()?
26861501Sdfr      SelectADDRri64(N1.getNode(), N1, Base, Offset):
26961501Sdfr      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
27061452Sdfr    switch (TargetVT) {
27161452Sdfr    case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
27261452Sdfr    case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
27361452Sdfr    case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
27461452Sdfr    case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
27561501Sdfr    case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
27661452Sdfr    case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
27761452Sdfr    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_ari; break;
27861452Sdfr    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
27961452Sdfr    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
28061452Sdfr    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
28161452Sdfr    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
28261503Sdfr    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
28361452Sdfr    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_ari; break;
28461503Sdfr    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
28561503Sdfr    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
28661503Sdfr    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
28761503Sdfr    default: return NULL;
28861452Sdfr    }
28961452Sdfr    SDValue Ops[] = { getI32Imm(isVolatile),
29061452Sdfr                      getI32Imm(codeAddrSpace),
29161452Sdfr                      getI32Imm(vecType),
29261452Sdfr                      getI32Imm(fromType),
29361452Sdfr                      getI32Imm(fromTypeWidth),
29461452Sdfr                      Base, Offset, Chain };
29561452Sdfr    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
29661452Sdfr                                     MVT::Other, Ops, 8);
29761452Sdfr  }
29861452Sdfr  else {
29961452Sdfr    switch (TargetVT) {
30061452Sdfr    case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
30161501Sdfr    case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
30261503Sdfr    case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
30361503Sdfr    case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
30461503Sdfr    case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
30561503Sdfr    case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
30661452Sdfr    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_areg; break;
30761452Sdfr    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
30861452Sdfr    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
30961452Sdfr    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
31061452Sdfr    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
31161452Sdfr    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
31261452Sdfr    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_areg; break;
31361452Sdfr    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
31461452Sdfr    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
31561452Sdfr    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
31661452Sdfr    default: return NULL;
31761452Sdfr    }
31861452Sdfr    SDValue Ops[] = { getI32Imm(isVolatile),
31961452Sdfr                      getI32Imm(codeAddrSpace),
32061452Sdfr                      getI32Imm(vecType),
32161452Sdfr                      getI32Imm(fromType),
32261452Sdfr                      getI32Imm(fromTypeWidth),
32361452Sdfr                      N1, Chain };
32461452Sdfr    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
32561452Sdfr                                     MVT::Other, Ops, 7);
32661452Sdfr  }
32761452Sdfr
32861452Sdfr  if (NVPTXLD != NULL) {
32961452Sdfr    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
33061452Sdfr    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
33161452Sdfr    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
33261452Sdfr  }
33361452Sdfr
33461452Sdfr  return NVPTXLD;
33561452Sdfr}
33661452Sdfr
33787479ScokaneSDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
33887479Scokane  DebugLoc dl = N->getDebugLoc();
33987479Scokane  StoreSDNode *ST = cast<StoreSDNode>(N);
34087479Scokane  EVT StoreVT = ST->getMemoryVT();
34161452Sdfr  SDNode *NVPTXST = NULL;
34287479Scokane
34387479Scokane  // do not support pre/post inc/dec
34461452Sdfr  if (ST->isIndexed())
34561452Sdfr    return NULL;
34661452Sdfr
34761452Sdfr  if (!StoreVT.isSimple())
34861452Sdfr    return NULL;
34961452Sdfr
35061452Sdfr  // Address Space Setting
35161452Sdfr  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
35261452Sdfr
35361452Sdfr  // Volatile Setting
35461452Sdfr  // - .volatile is only availalble for .global and .shared
35561452Sdfr  bool isVolatile = ST->isVolatile();
35661452Sdfr  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
35787479Scokane      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
35887479Scokane      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
35987479Scokane    isVolatile = false;
36061452Sdfr
36161452Sdfr  // Vector Setting
36261452Sdfr  MVT SimpleVT = StoreVT.getSimpleVT();
36361452Sdfr  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
36461452Sdfr  if (SimpleVT.isVector()) {
36561452Sdfr    unsigned num = SimpleVT.getVectorNumElements();
36661452Sdfr    if (num == 2)
36761452Sdfr      vecType = NVPTX::PTXLdStInstCode::V2;
36861452Sdfr    else if (num == 4)
36961452Sdfr      vecType = NVPTX::PTXLdStInstCode::V4;
37061452Sdfr    else
37161452Sdfr      return NULL;
37261452Sdfr  }
37361452Sdfr
37461452Sdfr  // Type Setting: toType + toTypeWidth
37561452Sdfr  // - for integer type, always use 'u'
37661452Sdfr  //
37761452Sdfr  MVT ScalarVT = SimpleVT.getScalarType();
37861452Sdfr  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
37961452Sdfr  unsigned int toType;
38061452Sdfr  if (ScalarVT.isFloatingPoint())
38161452Sdfr    toType = NVPTX::PTXLdStInstCode::Float;
38261452Sdfr  else
38361452Sdfr    toType = NVPTX::PTXLdStInstCode::Unsigned;
38461452Sdfr
38561452Sdfr  // Create the machine instruction DAG
38661452Sdfr  SDValue Chain = N->getOperand(0);
38761452Sdfr  SDValue N1 = N->getOperand(1);
38861452Sdfr  SDValue N2 = N->getOperand(2);
38961452Sdfr  SDValue Addr;
39061452Sdfr  SDValue Offset, Base;
39161452Sdfr  unsigned Opcode;
39261452Sdfr  MVT::SimpleValueType SourceVT =
39361452Sdfr      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
39461452Sdfr
39561452Sdfr  if (SelectDirectAddr(N2, Addr)) {
39661452Sdfr    switch (SourceVT) {
39761452Sdfr    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
39861452Sdfr    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
39961452Sdfr    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
40061452Sdfr    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
40161452Sdfr    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
40261452Sdfr    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
40361452Sdfr    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_avar; break;
40461452Sdfr    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
40561452Sdfr    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
40661452Sdfr    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
40761452Sdfr    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
40861452Sdfr    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
40961452Sdfr    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_avar; break;
41061452Sdfr    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
41161452Sdfr    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
41261452Sdfr    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
41361452Sdfr    default: return NULL;
41461452Sdfr    }
41561452Sdfr    SDValue Ops[] = { N1,
41661452Sdfr                      getI32Imm(isVolatile),
41761452Sdfr                      getI32Imm(codeAddrSpace),
41861452Sdfr                      getI32Imm(vecType),
41961452Sdfr                      getI32Imm(toType),
420113506Smdodd                      getI32Imm(toTypeWidth),
421113506Smdodd                      Addr, Chain };
422    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
423                                     MVT::Other, Ops, 8);
424  } else if (Subtarget.is64Bit()?
425      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
426      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
427    switch (SourceVT) {
428    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
429    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
430    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
431    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
432    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
433    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
434    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_asi; break;
435    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
436    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
437    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
438    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
439    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
440    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_asi; break;
441    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
442    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
443    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
444    default: return NULL;
445    }
446    SDValue Ops[] = { N1,
447                      getI32Imm(isVolatile),
448                      getI32Imm(codeAddrSpace),
449                      getI32Imm(vecType),
450                      getI32Imm(toType),
451                      getI32Imm(toTypeWidth),
452                      Base, Offset, Chain };
453    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
454                                     MVT::Other, Ops, 9);
455  } else if (Subtarget.is64Bit()?
456      SelectADDRri64(N2.getNode(), N2, Base, Offset):
457      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
458    switch (SourceVT) {
459    case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
460    case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
461    case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
462    case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
463    case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
464    case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
465    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_ari; break;
466    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
467    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
468    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
469    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
470    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
471    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_ari; break;
472    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
473    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
474    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
475    default: return NULL;
476    }
477    SDValue Ops[] = { N1,
478                      getI32Imm(isVolatile),
479                      getI32Imm(codeAddrSpace),
480                      getI32Imm(vecType),
481                      getI32Imm(toType),
482                      getI32Imm(toTypeWidth),
483                      Base, Offset, Chain };
484    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
485                                     MVT::Other, Ops, 9);
486  } else {
487    switch (SourceVT) {
488    case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
489    case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
490    case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
491    case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
492    case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
493    case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
494    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_areg; break;
495    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
496    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
497    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
498    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
499    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
500    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_areg; break;
501    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
502    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
503    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
504    default: return NULL;
505    }
506    SDValue Ops[] = { N1,
507                      getI32Imm(isVolatile),
508                      getI32Imm(codeAddrSpace),
509                      getI32Imm(vecType),
510                      getI32Imm(toType),
511                      getI32Imm(toTypeWidth),
512                      N2, Chain };
513    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
514                                     MVT::Other, Ops, 8);
515  }
516
517  if (NVPTXST != NULL) {
518    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
519    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
520    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
521  }
522
523  return NVPTXST;
524}
525
526// SelectDirectAddr - Match a direct address for DAG.
527// A direct address could be a globaladdress or externalsymbol.
528bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
529  // Return true if TGA or ES.
530  if (N.getOpcode() == ISD::TargetGlobalAddress
531      || N.getOpcode() == ISD::TargetExternalSymbol) {
532    Address = N;
533    return true;
534  }
535  if (N.getOpcode() == NVPTXISD::Wrapper) {
536    Address = N.getOperand(0);
537    return true;
538  }
539  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
540    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
541    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
542      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
543        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
544  }
545  return false;
546}
547
548// symbol+offset
549bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
550                                         SDValue &Base, SDValue &Offset,
551                                         MVT mvt) {
552  if (Addr.getOpcode() == ISD::ADD) {
553    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
554      SDValue base=Addr.getOperand(0);
555      if (SelectDirectAddr(base, Base)) {
556        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
557        return true;
558      }
559    }
560  }
561  return false;
562}
563
564// symbol+offset
565bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
566                                     SDValue &Base, SDValue &Offset) {
567  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
568}
569
570// symbol+offset
571bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
572                                       SDValue &Base, SDValue &Offset) {
573  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
574}
575
576// register+offset
577bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
578                                         SDValue &Base, SDValue &Offset,
579                                         MVT mvt) {
580  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
581    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
582    Offset = CurDAG->getTargetConstant(0, mvt);
583    return true;
584  }
585  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
586      Addr.getOpcode() == ISD::TargetGlobalAddress)
587    return false;  // direct calls.
588
589  if (Addr.getOpcode() == ISD::ADD) {
590    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
591      return false;
592    }
593    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
594      if (FrameIndexSDNode *FIN =
595          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
596        // Constant offset from frame ref.
597        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
598      else
599        Base = Addr.getOperand(0);
600      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
601      return true;
602    }
603  }
604  return false;
605}
606
607// register+offset
608bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
609                                     SDValue &Base, SDValue &Offset) {
610  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
611}
612
613// register+offset
614bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
615                                       SDValue &Base, SDValue &Offset) {
616  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
617}
618
619bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
620                                                 unsigned int spN) const {
621  const Value *Src = NULL;
622  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
623  // the classof() for MemSDNode does not include MemIntrinsicSDNode
624  // (See SelectionDAGNodes.h). So we need to check for both.
625  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
626    Src = mN->getSrcValue();
627  }
628  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
629    Src = mN->getSrcValue();
630  }
631  if (!Src)
632    return false;
633  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
634    return (PT->getAddressSpace() == spN);
635  return false;
636}
637
638/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
639/// inline asm expressions.
640bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
641                                                     char ConstraintCode,
642                                                 std::vector<SDValue> &OutOps) {
643  SDValue Op0, Op1;
644  switch (ConstraintCode) {
645  default: return true;
646  case 'm':   // memory
647    if (SelectDirectAddr(Op, Op0)) {
648      OutOps.push_back(Op0);
649      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
650      return false;
651    }
652    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
653      OutOps.push_back(Op0);
654      OutOps.push_back(Op1);
655      return false;
656    }
657    break;
658  }
659  return true;
660}
661
662// Return true if N is a undef or a constant.
663// If N was undef, return a (i8imm 0) in Retval
664// If N was imm, convert it to i8imm and return in Retval
665// Note: The convert to i8imm is required, otherwise the
666// pattern matcher inserts a bunch of IMOVi8rr to convert
667// the imm to i8imm, and this causes instruction selection
668// to fail.
669bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
670                                   SDValue &Retval) {
671  if (!(N.getOpcode() == ISD::UNDEF) &&
672      !(N.getOpcode() == ISD::Constant))
673    return false;
674
675  if (N.getOpcode() == ISD::UNDEF)
676    Retval = CurDAG->getTargetConstant(0, MVT::i8);
677  else {
678    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
679    unsigned retval = cn->getZExtValue();
680    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
681  }
682  return true;
683}
684