1284677Sdim//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
2284677Sdim//
3284677Sdim//                     The LLVM Compiler Infrastructure
4284677Sdim//
5284677Sdim// This file is distributed under the University of Illinois Open Source
6284677Sdim// License. See LICENSE.TXT for details.
7284677Sdim//
8284677Sdim//==-----------------------------------------------------------------------===//
9284677Sdim//
10284677Sdim/// \file
11284677Sdim/// \brief Defines an instruction selector for the AMDGPU target.
12284677Sdim//
13284677Sdim//===----------------------------------------------------------------------===//
14296417Sdim
15296417Sdim#include "AMDGPUDiagnosticInfoUnsupported.h"
16284677Sdim#include "AMDGPUInstrInfo.h"
17284677Sdim#include "AMDGPUISelLowering.h" // For AMDGPUISD
18284677Sdim#include "AMDGPURegisterInfo.h"
19284677Sdim#include "AMDGPUSubtarget.h"
20284677Sdim#include "R600InstrInfo.h"
21284677Sdim#include "SIDefines.h"
22284677Sdim#include "SIISelLowering.h"
23284677Sdim#include "SIMachineFunctionInfo.h"
24284677Sdim#include "llvm/CodeGen/FunctionLoweringInfo.h"
25284677Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
26284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
27296417Sdim#include "llvm/CodeGen/PseudoSourceValue.h"
28284677Sdim#include "llvm/CodeGen/SelectionDAG.h"
29284677Sdim#include "llvm/CodeGen/SelectionDAGISel.h"
30284677Sdim#include "llvm/IR/Function.h"
31284677Sdim
32284677Sdimusing namespace llvm;
33284677Sdim
34284677Sdim//===----------------------------------------------------------------------===//
35284677Sdim// Instruction Selector Implementation
36284677Sdim//===----------------------------------------------------------------------===//
37284677Sdim
38284677Sdimnamespace {
39284677Sdim/// AMDGPU specific code to select AMDGPU machine instructions for
40284677Sdim/// SelectionDAG operations.
41284677Sdimclass AMDGPUDAGToDAGISel : public SelectionDAGISel {
42284677Sdim  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
43284677Sdim  // make the right decision when generating code for different targets.
44284677Sdim  const AMDGPUSubtarget *Subtarget;
45296417Sdim
46284677Sdimpublic:
47284677Sdim  AMDGPUDAGToDAGISel(TargetMachine &TM);
48284677Sdim  virtual ~AMDGPUDAGToDAGISel();
49284677Sdim  bool runOnMachineFunction(MachineFunction &MF) override;
50284677Sdim  SDNode *Select(SDNode *N) override;
51284677Sdim  const char *getPassName() const override;
52296417Sdim  void PreprocessISelDAG() override;
53284677Sdim  void PostprocessISelDAG() override;
54284677Sdim
55284677Sdimprivate:
56284677Sdim  bool isInlineImmediate(SDNode *N) const;
57284677Sdim  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
58284677Sdim                   const R600InstrInfo *TII);
59284677Sdim  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
60284677Sdim  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
61284677Sdim
62284677Sdim  // Complex pattern selectors
63284677Sdim  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
64284677Sdim  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
65284677Sdim  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
66284677Sdim
67284677Sdim  static bool checkType(const Value *ptr, unsigned int addrspace);
68284677Sdim  static bool checkPrivateAddress(const MachineMemOperand *Op);
69284677Sdim
70284677Sdim  static bool isGlobalStore(const StoreSDNode *N);
71284677Sdim  static bool isFlatStore(const StoreSDNode *N);
72284677Sdim  static bool isPrivateStore(const StoreSDNode *N);
73284677Sdim  static bool isLocalStore(const StoreSDNode *N);
74284677Sdim  static bool isRegionStore(const StoreSDNode *N);
75284677Sdim
76284677Sdim  bool isCPLoad(const LoadSDNode *N) const;
77284677Sdim  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
78284677Sdim  bool isGlobalLoad(const LoadSDNode *N) const;
79284677Sdim  bool isFlatLoad(const LoadSDNode *N) const;
80284677Sdim  bool isParamLoad(const LoadSDNode *N) const;
81284677Sdim  bool isPrivateLoad(const LoadSDNode *N) const;
82284677Sdim  bool isLocalLoad(const LoadSDNode *N) const;
83284677Sdim  bool isRegionLoad(const LoadSDNode *N) const;
84284677Sdim
85284677Sdim  SDNode *glueCopyToM0(SDNode *N) const;
86284677Sdim
87284677Sdim  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
88284677Sdim  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
89284677Sdim  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
90284677Sdim                                       SDValue& Offset);
91284677Sdim  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
92284677Sdim  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
93284677Sdim  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
94284677Sdim                       unsigned OffsetBits) const;
95284677Sdim  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
96284677Sdim  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
97284677Sdim                                 SDValue &Offset1) const;
98296417Sdim  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
99284677Sdim                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
100284677Sdim                   SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
101284677Sdim                   SDValue &TFE) const;
102284677Sdim  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
103284677Sdim                         SDValue &SOffset, SDValue &Offset, SDValue &GLC,
104284677Sdim                         SDValue &SLC, SDValue &TFE) const;
105284677Sdim  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
106284677Sdim                         SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
107284677Sdim                         SDValue &SLC) const;
108284677Sdim  bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
109284677Sdim                          SDValue &SOffset, SDValue &ImmOffset) const;
110284677Sdim  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
111284677Sdim                         SDValue &Offset, SDValue &GLC, SDValue &SLC,
112284677Sdim                         SDValue &TFE) const;
113284677Sdim  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
114284677Sdim                         SDValue &Offset, SDValue &GLC) const;
115296417Sdim  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
116296417Sdim                        bool &Imm) const;
117296417Sdim  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
118296417Sdim                  bool &Imm) const;
119296417Sdim  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
120296417Sdim  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
121296417Sdim  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
122296417Sdim  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
123296417Sdim  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
124296417Sdim  bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
125284677Sdim  SDNode *SelectAddrSpaceCast(SDNode *N);
126284677Sdim  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
127286684Sdim  bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
128284677Sdim  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
129284677Sdim                       SDValue &Clamp, SDValue &Omod) const;
130286684Sdim  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
131286684Sdim                         SDValue &Clamp, SDValue &Omod) const;
132284677Sdim
133284677Sdim  bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
134284677Sdim                            SDValue &Omod) const;
135284677Sdim  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
136284677Sdim                                 SDValue &Clamp,
137284677Sdim                                 SDValue &Omod) const;
138284677Sdim
139284677Sdim  SDNode *SelectADD_SUB_I64(SDNode *N);
140284677Sdim  SDNode *SelectDIV_SCALE(SDNode *N);
141284677Sdim
142284677Sdim  SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
143284677Sdim                   uint32_t Offset, uint32_t Width);
144284677Sdim  SDNode *SelectS_BFEFromShifts(SDNode *N);
145284677Sdim  SDNode *SelectS_BFE(SDNode *N);
146284677Sdim
147284677Sdim  // Include the pieces autogenerated from the target description.
148284677Sdim#include "AMDGPUGenDAGISel.inc"
149284677Sdim};
150284677Sdim}  // end anonymous namespace
151284677Sdim
152284677Sdim/// \brief This pass converts a legalized DAG into a AMDGPU-specific
153284677Sdim// DAG, ready for instruction scheduling.
154284677SdimFunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
155284677Sdim  return new AMDGPUDAGToDAGISel(TM);
156284677Sdim}
157284677Sdim
158284677SdimAMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
159284677Sdim    : SelectionDAGISel(TM) {}
160284677Sdim
161284677Sdimbool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
162284677Sdim  Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
163284677Sdim  return SelectionDAGISel::runOnMachineFunction(MF);
164284677Sdim}
165284677Sdim
166284677SdimAMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
167284677Sdim}
168284677Sdim
169284677Sdimbool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
170284677Sdim  const SITargetLowering *TL
171284677Sdim      = static_cast<const SITargetLowering *>(getTargetLowering());
172284677Sdim  return TL->analyzeImmediate(N) == 0;
173284677Sdim}
174284677Sdim
175284677Sdim/// \brief Determine the register class for \p OpNo
176284677Sdim/// \returns The register class of the virtual register that will be used for
177284677Sdim/// the given operand number \OpNo or NULL if the register class cannot be
178284677Sdim/// determined.
179284677Sdimconst TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
180284677Sdim                                                          unsigned OpNo) const {
181284677Sdim  if (!N->isMachineOpcode())
182284677Sdim    return nullptr;
183284677Sdim
184284677Sdim  switch (N->getMachineOpcode()) {
185284677Sdim  default: {
186284677Sdim    const MCInstrDesc &Desc =
187284677Sdim        Subtarget->getInstrInfo()->get(N->getMachineOpcode());
188284677Sdim    unsigned OpIdx = Desc.getNumDefs() + OpNo;
189284677Sdim    if (OpIdx >= Desc.getNumOperands())
190284677Sdim      return nullptr;
191284677Sdim    int RegClass = Desc.OpInfo[OpIdx].RegClass;
192284677Sdim    if (RegClass == -1)
193284677Sdim      return nullptr;
194284677Sdim
195284677Sdim    return Subtarget->getRegisterInfo()->getRegClass(RegClass);
196284677Sdim  }
197284677Sdim  case AMDGPU::REG_SEQUENCE: {
198284677Sdim    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
199284677Sdim    const TargetRegisterClass *SuperRC =
200284677Sdim        Subtarget->getRegisterInfo()->getRegClass(RCID);
201284677Sdim
202284677Sdim    SDValue SubRegOp = N->getOperand(OpNo + 1);
203284677Sdim    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
204284677Sdim    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
205284677Sdim                                                              SubRegIdx);
206284677Sdim  }
207284677Sdim  }
208284677Sdim}
209284677Sdim
210284677Sdimbool AMDGPUDAGToDAGISel::SelectADDRParam(
211284677Sdim  SDValue Addr, SDValue& R1, SDValue& R2) {
212284677Sdim
213284677Sdim  if (Addr.getOpcode() == ISD::FrameIndex) {
214284677Sdim    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
215284677Sdim      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
216284677Sdim      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
217284677Sdim    } else {
218284677Sdim      R1 = Addr;
219284677Sdim      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
220284677Sdim    }
221284677Sdim  } else if (Addr.getOpcode() == ISD::ADD) {
222284677Sdim    R1 = Addr.getOperand(0);
223284677Sdim    R2 = Addr.getOperand(1);
224284677Sdim  } else {
225284677Sdim    R1 = Addr;
226284677Sdim    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
227284677Sdim  }
228284677Sdim  return true;
229284677Sdim}
230284677Sdim
231284677Sdimbool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
232284677Sdim  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
233284677Sdim      Addr.getOpcode() == ISD::TargetGlobalAddress) {
234284677Sdim    return false;
235284677Sdim  }
236284677Sdim  return SelectADDRParam(Addr, R1, R2);
237284677Sdim}
238284677Sdim
239284677Sdim
240284677Sdimbool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
241284677Sdim  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
242284677Sdim      Addr.getOpcode() == ISD::TargetGlobalAddress) {
243284677Sdim    return false;
244284677Sdim  }
245284677Sdim
246284677Sdim  if (Addr.getOpcode() == ISD::FrameIndex) {
247284677Sdim    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
248284677Sdim      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
249284677Sdim      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
250284677Sdim    } else {
251284677Sdim      R1 = Addr;
252284677Sdim      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
253284677Sdim    }
254284677Sdim  } else if (Addr.getOpcode() == ISD::ADD) {
255284677Sdim    R1 = Addr.getOperand(0);
256284677Sdim    R2 = Addr.getOperand(1);
257284677Sdim  } else {
258284677Sdim    R1 = Addr;
259284677Sdim    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
260284677Sdim  }
261284677Sdim  return true;
262284677Sdim}
263284677Sdim
264284677SdimSDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
265284677Sdim  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
266284677Sdim      !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
267284677Sdim                 AMDGPUAS::LOCAL_ADDRESS))
268284677Sdim    return N;
269284677Sdim
270284677Sdim  const SITargetLowering& Lowering =
271284677Sdim      *static_cast<const SITargetLowering*>(getTargetLowering());
272284677Sdim
273284677Sdim  // Write max value to m0 before each load operation
274284677Sdim
275284677Sdim  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
276284677Sdim                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
277284677Sdim
278284677Sdim  SDValue Glue = M0.getValue(1);
279284677Sdim
280284677Sdim  SmallVector <SDValue, 8> Ops;
281284677Sdim  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
282284677Sdim     Ops.push_back(N->getOperand(i));
283284677Sdim  }
284284677Sdim  Ops.push_back(Glue);
285284677Sdim  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
286284677Sdim
287284677Sdim  return N;
288284677Sdim}
289284677Sdim
290296417Sdimstatic unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
291296417Sdim  switch (NumVectorElts) {
292296417Sdim  case 1:
293296417Sdim    return AMDGPU::SReg_32RegClassID;
294296417Sdim  case 2:
295296417Sdim    return AMDGPU::SReg_64RegClassID;
296296417Sdim  case 4:
297296417Sdim    return AMDGPU::SReg_128RegClassID;
298296417Sdim  case 8:
299296417Sdim    return AMDGPU::SReg_256RegClassID;
300296417Sdim  case 16:
301296417Sdim    return AMDGPU::SReg_512RegClassID;
302296417Sdim  }
303296417Sdim
304296417Sdim  llvm_unreachable("invalid vector size");
305296417Sdim}
306296417Sdim
307284677SdimSDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
308284677Sdim  unsigned int Opc = N->getOpcode();
309284677Sdim  if (N->isMachineOpcode()) {
310284677Sdim    N->setNodeId(-1);
311284677Sdim    return nullptr;   // Already selected.
312284677Sdim  }
313284677Sdim
314284677Sdim  if (isa<AtomicSDNode>(N))
315284677Sdim    N = glueCopyToM0(N);
316284677Sdim
317284677Sdim  switch (Opc) {
318284677Sdim  default: break;
319284677Sdim  // We are selecting i64 ADD here instead of custom lower it during
320284677Sdim  // DAG legalization, so we can fold some i64 ADDs used for address
321284677Sdim  // calculation into the LOAD and STORE instructions.
322284677Sdim  case ISD::ADD:
323284677Sdim  case ISD::SUB: {
324284677Sdim    if (N->getValueType(0) != MVT::i64 ||
325284677Sdim        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
326284677Sdim      break;
327284677Sdim
328284677Sdim    return SelectADD_SUB_I64(N);
329284677Sdim  }
330284677Sdim  case ISD::SCALAR_TO_VECTOR:
331284677Sdim  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
332284677Sdim  case ISD::BUILD_VECTOR: {
333284677Sdim    unsigned RegClassID;
334284677Sdim    const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
335284677Sdim    EVT VT = N->getValueType(0);
336284677Sdim    unsigned NumVectorElts = VT.getVectorNumElements();
337284677Sdim    EVT EltVT = VT.getVectorElementType();
338284677Sdim    assert(EltVT.bitsEq(MVT::i32));
339284677Sdim    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
340296417Sdim      RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
341284677Sdim    } else {
342284677Sdim      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
343284677Sdim      // that adds a 128 bits reg copy when going through TwoAddressInstructions
344284677Sdim      // pass. We want to avoid 128 bits copies as much as possible because they
345284677Sdim      // can't be bundled by our scheduler.
346284677Sdim      switch(NumVectorElts) {
347284677Sdim      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
348284677Sdim      case 4:
349284677Sdim        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
350284677Sdim          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
351284677Sdim        else
352284677Sdim          RegClassID = AMDGPU::R600_Reg128RegClassID;
353284677Sdim        break;
354284677Sdim      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
355284677Sdim      }
356284677Sdim    }
357284677Sdim
358284677Sdim    SDLoc DL(N);
359284677Sdim    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
360284677Sdim
361284677Sdim    if (NumVectorElts == 1) {
362284677Sdim      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
363284677Sdim                                  N->getOperand(0), RegClass);
364284677Sdim    }
365284677Sdim
366284677Sdim    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
367284677Sdim                                  "supported yet");
368284677Sdim    // 16 = Max Num Vector Elements
369284677Sdim    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
370284677Sdim    // 1 = Vector Register Class
371284677Sdim    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
372284677Sdim
373284677Sdim    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
374284677Sdim    bool IsRegSeq = true;
375284677Sdim    unsigned NOps = N->getNumOperands();
376284677Sdim    for (unsigned i = 0; i < NOps; i++) {
377284677Sdim      // XXX: Why is this here?
378284677Sdim      if (isa<RegisterSDNode>(N->getOperand(i))) {
379284677Sdim        IsRegSeq = false;
380284677Sdim        break;
381284677Sdim      }
382284677Sdim      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
383284677Sdim      RegSeqArgs[1 + (2 * i) + 1] =
384284677Sdim              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
385284677Sdim                                        MVT::i32);
386284677Sdim    }
387284677Sdim
388284677Sdim    if (NOps != NumVectorElts) {
389284677Sdim      // Fill in the missing undef elements if this was a scalar_to_vector.
390284677Sdim      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
391284677Sdim
392284677Sdim      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
393284677Sdim                                                     DL, EltVT);
394284677Sdim      for (unsigned i = NOps; i < NumVectorElts; ++i) {
395284677Sdim        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
396284677Sdim        RegSeqArgs[1 + (2 * i) + 1] =
397284677Sdim          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
398284677Sdim      }
399284677Sdim    }
400284677Sdim
401284677Sdim    if (!IsRegSeq)
402284677Sdim      break;
403284677Sdim    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
404284677Sdim                                RegSeqArgs);
405284677Sdim  }
406284677Sdim  case ISD::BUILD_PAIR: {
407284677Sdim    SDValue RC, SubReg0, SubReg1;
408284677Sdim    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
409284677Sdim      break;
410284677Sdim    }
411284677Sdim    SDLoc DL(N);
412284677Sdim    if (N->getValueType(0) == MVT::i128) {
413284677Sdim      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
414284677Sdim      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
415284677Sdim      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
416284677Sdim    } else if (N->getValueType(0) == MVT::i64) {
417284677Sdim      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
418284677Sdim      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
419284677Sdim      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
420284677Sdim    } else {
421284677Sdim      llvm_unreachable("Unhandled value type for BUILD_PAIR");
422284677Sdim    }
423284677Sdim    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
424284677Sdim                            N->getOperand(1), SubReg1 };
425284677Sdim    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
426284677Sdim                                  DL, N->getValueType(0), Ops);
427284677Sdim  }
428284677Sdim
429284677Sdim  case ISD::Constant:
430284677Sdim  case ISD::ConstantFP: {
431284677Sdim    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
432284677Sdim        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
433284677Sdim      break;
434284677Sdim
435284677Sdim    uint64_t Imm;
436284677Sdim    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
437284677Sdim      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
438284677Sdim    else {
439284677Sdim      ConstantSDNode *C = cast<ConstantSDNode>(N);
440284677Sdim      Imm = C->getZExtValue();
441284677Sdim    }
442284677Sdim
443284677Sdim    SDLoc DL(N);
444284677Sdim    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
445284677Sdim                                CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
446284677Sdim                                                    MVT::i32));
447284677Sdim    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
448284677Sdim                                CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
449284677Sdim    const SDValue Ops[] = {
450284677Sdim      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
451284677Sdim      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
452284677Sdim      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
453284677Sdim    };
454284677Sdim
455284677Sdim    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
456284677Sdim                                  N->getValueType(0), Ops);
457284677Sdim  }
458296417Sdim  case ISD::LOAD:
459284677Sdim  case ISD::STORE: {
460284677Sdim    N = glueCopyToM0(N);
461284677Sdim    break;
462284677Sdim  }
463284677Sdim
464284677Sdim  case AMDGPUISD::BFE_I32:
465284677Sdim  case AMDGPUISD::BFE_U32: {
466284677Sdim    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
467284677Sdim      break;
468284677Sdim
469284677Sdim    // There is a scalar version available, but unlike the vector version which
470284677Sdim    // has a separate operand for the offset and width, the scalar version packs
471284677Sdim    // the width and offset into a single operand. Try to move to the scalar
472284677Sdim    // version if the offsets are constant, so that we can try to keep extended
473284677Sdim    // loads of kernel arguments in SGPRs.
474284677Sdim
475284677Sdim    // TODO: Technically we could try to pattern match scalar bitshifts of
476284677Sdim    // dynamic values, but it's probably not useful.
477284677Sdim    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
478284677Sdim    if (!Offset)
479284677Sdim      break;
480284677Sdim
481284677Sdim    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
482284677Sdim    if (!Width)
483284677Sdim      break;
484284677Sdim
485284677Sdim    bool Signed = Opc == AMDGPUISD::BFE_I32;
486284677Sdim
487284677Sdim    uint32_t OffsetVal = Offset->getZExtValue();
488284677Sdim    uint32_t WidthVal = Width->getZExtValue();
489284677Sdim
490284677Sdim    return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
491284677Sdim                    N->getOperand(0), OffsetVal, WidthVal);
492284677Sdim  }
493284677Sdim  case AMDGPUISD::DIV_SCALE: {
494284677Sdim    return SelectDIV_SCALE(N);
495284677Sdim  }
496284677Sdim  case ISD::CopyToReg: {
497284677Sdim    const SITargetLowering& Lowering =
498284677Sdim      *static_cast<const SITargetLowering*>(getTargetLowering());
499284677Sdim    Lowering.legalizeTargetIndependentNode(N, *CurDAG);
500284677Sdim    break;
501284677Sdim  }
502284677Sdim  case ISD::ADDRSPACECAST:
503284677Sdim    return SelectAddrSpaceCast(N);
504284677Sdim  case ISD::AND:
505284677Sdim  case ISD::SRL:
506284677Sdim  case ISD::SRA:
507284677Sdim    if (N->getValueType(0) != MVT::i32 ||
508284677Sdim        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
509284677Sdim      break;
510284677Sdim
511284677Sdim    return SelectS_BFE(N);
512284677Sdim  }
513284677Sdim
514284677Sdim  return SelectCode(N);
515284677Sdim}
516284677Sdim
517284677Sdimbool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
518284677Sdim  assert(AS != 0 && "Use checkPrivateAddress instead.");
519284677Sdim  if (!Ptr)
520284677Sdim    return false;
521284677Sdim
522284677Sdim  return Ptr->getType()->getPointerAddressSpace() == AS;
523284677Sdim}
524284677Sdim
525284677Sdimbool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
526284677Sdim  if (Op->getPseudoValue())
527284677Sdim    return true;
528284677Sdim
529284677Sdim  if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
530284677Sdim    return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
531284677Sdim
532284677Sdim  return false;
533284677Sdim}
534284677Sdim
535284677Sdimbool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
536284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
537284677Sdim}
538284677Sdim
539284677Sdimbool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
540284677Sdim  const Value *MemVal = N->getMemOperand()->getValue();
541284677Sdim  return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
542284677Sdim          !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
543284677Sdim          !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
544284677Sdim}
545284677Sdim
546284677Sdimbool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
547284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
548284677Sdim}
549284677Sdim
550284677Sdimbool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
551284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
552284677Sdim}
553284677Sdim
554284677Sdimbool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
555284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
556284677Sdim}
557284677Sdim
558284677Sdimbool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
559284677Sdim  const Value *MemVal = N->getMemOperand()->getValue();
560284677Sdim  if (CbId == -1)
561284677Sdim    return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
562284677Sdim
563284677Sdim  return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
564284677Sdim}
565284677Sdim
566284677Sdimbool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
567284677Sdim  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
568284677Sdim    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
569284677Sdim        N->getMemoryVT().bitsLT(MVT::i32))
570284677Sdim      return true;
571284677Sdim
572284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
573284677Sdim}
574284677Sdim
575284677Sdimbool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
576284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
577284677Sdim}
578284677Sdim
579284677Sdimbool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
580284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
581284677Sdim}
582284677Sdim
583284677Sdimbool AMDGPUDAGToDAGISel::isFlatLoad(const  LoadSDNode *N) const {
584284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
585284677Sdim}
586284677Sdim
587284677Sdimbool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
588284677Sdim  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
589284677Sdim}
590284677Sdim
591284677Sdimbool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
592284677Sdim  MachineMemOperand *MMO = N->getMemOperand();
593284677Sdim  if (checkPrivateAddress(N->getMemOperand())) {
594284677Sdim    if (MMO) {
595284677Sdim      const PseudoSourceValue *PSV = MMO->getPseudoValue();
596296417Sdim      if (PSV && PSV->isConstantPool()) {
597284677Sdim        return true;
598284677Sdim      }
599284677Sdim    }
600284677Sdim  }
601284677Sdim  return false;
602284677Sdim}
603284677Sdim
604284677Sdimbool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
605284677Sdim  if (checkPrivateAddress(N->getMemOperand())) {
606284677Sdim    // Check to make sure we are not a constant pool load or a constant load
607284677Sdim    // that is marked as a private load
608284677Sdim    if (isCPLoad(N) || isConstantLoad(N, -1)) {
609284677Sdim      return false;
610284677Sdim    }
611284677Sdim  }
612284677Sdim
613284677Sdim  const Value *MemVal = N->getMemOperand()->getValue();
614284677Sdim  if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
615284677Sdim      !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
616284677Sdim      !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
617284677Sdim      !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
618284677Sdim      !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
619284677Sdim      !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
620284677Sdim      !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
621284677Sdim    return true;
622284677Sdim  }
623284677Sdim  return false;
624284677Sdim}
625284677Sdim
626284677Sdimconst char *AMDGPUDAGToDAGISel::getPassName() const {
627284677Sdim  return "AMDGPU DAG->DAG Pattern Instruction Selection";
628284677Sdim}
629284677Sdim
630284677Sdim#ifdef DEBUGTMP
631284677Sdim#undef INT64_C
632284677Sdim#endif
633284677Sdim#undef DEBUGTMP
634284677Sdim
635284677Sdim//===----------------------------------------------------------------------===//
636284677Sdim// Complex Patterns
637284677Sdim//===----------------------------------------------------------------------===//
638284677Sdim
639284677Sdimbool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
640284677Sdim                                                         SDValue& IntPtr) {
641284677Sdim  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
642284677Sdim    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
643284677Sdim                                       true);
644284677Sdim    return true;
645284677Sdim  }
646284677Sdim  return false;
647284677Sdim}
648284677Sdim
649284677Sdimbool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
650284677Sdim    SDValue& BaseReg, SDValue &Offset) {
651284677Sdim  if (!isa<ConstantSDNode>(Addr)) {
652284677Sdim    BaseReg = Addr;
653284677Sdim    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
654284677Sdim    return true;
655284677Sdim  }
656284677Sdim  return false;
657284677Sdim}
658284677Sdim
659284677Sdimbool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
660284677Sdim                                           SDValue &Offset) {
661284677Sdim  ConstantSDNode *IMMOffset;
662284677Sdim
663284677Sdim  if (Addr.getOpcode() == ISD::ADD
664284677Sdim      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
665284677Sdim      && isInt<16>(IMMOffset->getZExtValue())) {
666284677Sdim
667284677Sdim      Base = Addr.getOperand(0);
668284677Sdim      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
669284677Sdim                                         MVT::i32);
670284677Sdim      return true;
671284677Sdim  // If the pointer address is constant, we can move it to the offset field.
672284677Sdim  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
673284677Sdim             && isInt<16>(IMMOffset->getZExtValue())) {
674284677Sdim    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
675284677Sdim                                  SDLoc(CurDAG->getEntryNode()),
676284677Sdim                                  AMDGPU::ZERO, MVT::i32);
677284677Sdim    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
678284677Sdim                                       MVT::i32);
679284677Sdim    return true;
680284677Sdim  }
681284677Sdim
682284677Sdim  // Default case, no offset
683284677Sdim  Base = Addr;
684284677Sdim  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
685284677Sdim  return true;
686284677Sdim}
687284677Sdim
688284677Sdimbool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
689284677Sdim                                            SDValue &Offset) {
690284677Sdim  ConstantSDNode *C;
691284677Sdim  SDLoc DL(Addr);
692284677Sdim
693284677Sdim  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
694284677Sdim    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
695284677Sdim    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
696284677Sdim  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
697284677Sdim            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
698284677Sdim    Base = Addr.getOperand(0);
699284677Sdim    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
700284677Sdim  } else {
701284677Sdim    Base = Addr;
702284677Sdim    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
703284677Sdim  }
704284677Sdim
705284677Sdim  return true;
706284677Sdim}
707284677Sdim
708284677SdimSDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
709284677Sdim  SDLoc DL(N);
710284677Sdim  SDValue LHS = N->getOperand(0);
711284677Sdim  SDValue RHS = N->getOperand(1);
712284677Sdim
713284677Sdim  bool IsAdd = (N->getOpcode() == ISD::ADD);
714284677Sdim
715284677Sdim  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
716284677Sdim  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
717284677Sdim
718284677Sdim  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
719284677Sdim                                       DL, MVT::i32, LHS, Sub0);
720284677Sdim  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
721284677Sdim                                       DL, MVT::i32, LHS, Sub1);
722284677Sdim
723284677Sdim  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
724284677Sdim                                       DL, MVT::i32, RHS, Sub0);
725284677Sdim  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
726284677Sdim                                       DL, MVT::i32, RHS, Sub1);
727284677Sdim
728284677Sdim  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
729284677Sdim  SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
730284677Sdim
731284677Sdim
732284677Sdim  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
733284677Sdim  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
734284677Sdim
735284677Sdim  SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
736284677Sdim  SDValue Carry(AddLo, 1);
737284677Sdim  SDNode *AddHi
738284677Sdim    = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
739284677Sdim                             SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
740284677Sdim
741284677Sdim  SDValue Args[5] = {
742284677Sdim    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
743284677Sdim    SDValue(AddLo,0),
744284677Sdim    Sub0,
745284677Sdim    SDValue(AddHi,0),
746284677Sdim    Sub1,
747284677Sdim  };
748284677Sdim  return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
749284677Sdim}
750284677Sdim
751284677Sdim// We need to handle this here because tablegen doesn't support matching
752284677Sdim// instructions with multiple outputs.
753284677SdimSDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
754284677Sdim  SDLoc SL(N);
755284677Sdim  EVT VT = N->getValueType(0);
756284677Sdim
757284677Sdim  assert(VT == MVT::f32 || VT == MVT::f64);
758284677Sdim
759284677Sdim  unsigned Opc
760284677Sdim    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
761284677Sdim
762296417Sdim  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
763296417Sdim  // omod
764284677Sdim  SDValue Ops[8];
765284677Sdim
766284677Sdim  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
767284677Sdim  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
768284677Sdim  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
769284677Sdim  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
770284677Sdim}
771284677Sdim
772284677Sdimbool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
773284677Sdim                                         unsigned OffsetBits) const {
774284677Sdim  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
775284677Sdim      (OffsetBits == 8 && !isUInt<8>(Offset)))
776284677Sdim    return false;
777284677Sdim
778286684Sdim  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
779286684Sdim      Subtarget->unsafeDSOffsetFoldingEnabled())
780284677Sdim    return true;
781284677Sdim
782284677Sdim  // On Southern Islands instruction with a negative base value and an offset
783284677Sdim  // don't seem to work.
784284677Sdim  return CurDAG->SignBitIsZero(Base);
785284677Sdim}
786284677Sdim
787284677Sdimbool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
788284677Sdim                                              SDValue &Offset) const {
789284677Sdim  if (CurDAG->isBaseWithConstantOffset(Addr)) {
790284677Sdim    SDValue N0 = Addr.getOperand(0);
791284677Sdim    SDValue N1 = Addr.getOperand(1);
792284677Sdim    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
793284677Sdim    if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
794284677Sdim      // (add n0, c0)
795284677Sdim      Base = N0;
796284677Sdim      Offset = N1;
797284677Sdim      return true;
798284677Sdim    }
799296417Sdim  } else if (Addr.getOpcode() == ISD::SUB) {
800296417Sdim    // sub C, x -> add (sub 0, x), C
801296417Sdim    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
802296417Sdim      int64_t ByteOffset = C->getSExtValue();
803296417Sdim      if (isUInt<16>(ByteOffset)) {
804296417Sdim        SDLoc DL(Addr);
805296417Sdim        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
806284677Sdim
807296417Sdim        // XXX - This is kind of hacky. Create a dummy sub node so we can check
808296417Sdim        // the known bits in isDSOffsetLegal. We need to emit the selected node
809296417Sdim        // here, so this is thrown away.
810296417Sdim        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
811296417Sdim                                      Zero, Addr.getOperand(1));
812284677Sdim
813296417Sdim        if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
814296417Sdim          MachineSDNode *MachineSub
815296417Sdim            = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
816296417Sdim                                     Zero, Addr.getOperand(1));
817296417Sdim
818296417Sdim          Base = SDValue(MachineSub, 0);
819296417Sdim          Offset = Addr.getOperand(0);
820296417Sdim          return true;
821296417Sdim        }
822296417Sdim      }
823296417Sdim    }
824296417Sdim  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
825296417Sdim    // If we have a constant address, prefer to put the constant into the
826296417Sdim    // offset. This can save moves to load the constant address since multiple
827296417Sdim    // operations can share the zero base address register, and enables merging
828296417Sdim    // into read2 / write2 instructions.
829296417Sdim
830296417Sdim    SDLoc DL(Addr);
831296417Sdim
832284677Sdim    if (isUInt<16>(CAddr->getZExtValue())) {
833284677Sdim      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
834284677Sdim      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
835284677Sdim                                 DL, MVT::i32, Zero);
836284677Sdim      Base = SDValue(MovZero, 0);
837284677Sdim      Offset = Addr;
838284677Sdim      return true;
839284677Sdim    }
840284677Sdim  }
841284677Sdim
842284677Sdim  // default case
843284677Sdim  Base = Addr;
844296417Sdim  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
845284677Sdim  return true;
846284677Sdim}
847284677Sdim
848296417Sdim// TODO: If offset is too big, put low 16-bit into offset.
849284677Sdimbool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
850284677Sdim                                                   SDValue &Offset0,
851284677Sdim                                                   SDValue &Offset1) const {
852284677Sdim  SDLoc DL(Addr);
853284677Sdim
854284677Sdim  if (CurDAG->isBaseWithConstantOffset(Addr)) {
855284677Sdim    SDValue N0 = Addr.getOperand(0);
856284677Sdim    SDValue N1 = Addr.getOperand(1);
857284677Sdim    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
858284677Sdim    unsigned DWordOffset0 = C1->getZExtValue() / 4;
859284677Sdim    unsigned DWordOffset1 = DWordOffset0 + 1;
860284677Sdim    // (add n0, c0)
861284677Sdim    if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
862284677Sdim      Base = N0;
863284677Sdim      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
864284677Sdim      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
865284677Sdim      return true;
866284677Sdim    }
867296417Sdim  } else if (Addr.getOpcode() == ISD::SUB) {
868296417Sdim    // sub C, x -> add (sub 0, x), C
869296417Sdim    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
870296417Sdim      unsigned DWordOffset0 = C->getZExtValue() / 4;
871296417Sdim      unsigned DWordOffset1 = DWordOffset0 + 1;
872284677Sdim
873296417Sdim      if (isUInt<8>(DWordOffset0)) {
874296417Sdim        SDLoc DL(Addr);
875296417Sdim        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
876296417Sdim
877296417Sdim        // XXX - This is kind of hacky. Create a dummy sub node so we can check
878296417Sdim        // the known bits in isDSOffsetLegal. We need to emit the selected node
879296417Sdim        // here, so this is thrown away.
880296417Sdim        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
881296417Sdim                                      Zero, Addr.getOperand(1));
882296417Sdim
883296417Sdim        if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
884296417Sdim          MachineSDNode *MachineSub
885296417Sdim            = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
886296417Sdim                                     Zero, Addr.getOperand(1));
887296417Sdim
888296417Sdim          Base = SDValue(MachineSub, 0);
889296417Sdim          Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
890296417Sdim          Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
891296417Sdim          return true;
892296417Sdim        }
893296417Sdim      }
894296417Sdim    }
895296417Sdim  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
896284677Sdim    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
897284677Sdim    unsigned DWordOffset1 = DWordOffset0 + 1;
898284677Sdim    assert(4 * DWordOffset0 == CAddr->getZExtValue());
899284677Sdim
900284677Sdim    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
901284677Sdim      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
902284677Sdim      MachineSDNode *MovZero
903284677Sdim        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
904284677Sdim                                 DL, MVT::i32, Zero);
905284677Sdim      Base = SDValue(MovZero, 0);
906284677Sdim      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
907284677Sdim      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
908284677Sdim      return true;
909284677Sdim    }
910284677Sdim  }
911284677Sdim
912284677Sdim  // default case
913284677Sdim  Base = Addr;
914284677Sdim  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
915284677Sdim  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
916284677Sdim  return true;
917284677Sdim}
918284677Sdim
919284677Sdimstatic bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
920284677Sdim  return isUInt<12>(Imm->getZExtValue());
921284677Sdim}
922284677Sdim
923296417Sdimbool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
924284677Sdim                                     SDValue &VAddr, SDValue &SOffset,
925284677Sdim                                     SDValue &Offset, SDValue &Offen,
926284677Sdim                                     SDValue &Idxen, SDValue &Addr64,
927284677Sdim                                     SDValue &GLC, SDValue &SLC,
928284677Sdim                                     SDValue &TFE) const {
929296417Sdim  // Subtarget prefers to use flat instruction
930296417Sdim  if (Subtarget->useFlatForGlobal())
931296417Sdim    return false;
932296417Sdim
933284677Sdim  SDLoc DL(Addr);
934284677Sdim
935284677Sdim  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
936284677Sdim  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
937284677Sdim  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
938284677Sdim
939284677Sdim  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
940284677Sdim  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
941284677Sdim  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
942284677Sdim  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
943284677Sdim
944284677Sdim  if (CurDAG->isBaseWithConstantOffset(Addr)) {
945284677Sdim    SDValue N0 = Addr.getOperand(0);
946284677Sdim    SDValue N1 = Addr.getOperand(1);
947284677Sdim    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
948284677Sdim
949284677Sdim    if (N0.getOpcode() == ISD::ADD) {
950284677Sdim      // (add (add N2, N3), C1) -> addr64
951284677Sdim      SDValue N2 = N0.getOperand(0);
952284677Sdim      SDValue N3 = N0.getOperand(1);
953284677Sdim      Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
954284677Sdim      Ptr = N2;
955284677Sdim      VAddr = N3;
956284677Sdim    } else {
957284677Sdim
958284677Sdim      // (add N0, C1) -> offset
959284677Sdim      VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
960284677Sdim      Ptr = N0;
961284677Sdim    }
962284677Sdim
963284677Sdim    if (isLegalMUBUFImmOffset(C1)) {
964284677Sdim        Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
965296417Sdim        return true;
966284677Sdim    } else if (isUInt<32>(C1->getZExtValue())) {
967284677Sdim      // Illegal offset, store it in soffset.
968284677Sdim      Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
969284677Sdim      SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
970284677Sdim                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
971284677Sdim                        0);
972296417Sdim      return true;
973284677Sdim    }
974284677Sdim  }
975284677Sdim
976284677Sdim  if (Addr.getOpcode() == ISD::ADD) {
977284677Sdim    // (add N0, N1) -> addr64
978284677Sdim    SDValue N0 = Addr.getOperand(0);
979284677Sdim    SDValue N1 = Addr.getOperand(1);
980284677Sdim    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
981284677Sdim    Ptr = N0;
982284677Sdim    VAddr = N1;
983284677Sdim    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
984296417Sdim    return true;
985284677Sdim  }
986284677Sdim
987284677Sdim  // default case -> offset
988284677Sdim  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
989284677Sdim  Ptr = Addr;
990284677Sdim  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
991284677Sdim
992296417Sdim  return true;
993284677Sdim}
994284677Sdim
995284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
996284677Sdim                                           SDValue &VAddr, SDValue &SOffset,
997284677Sdim                                           SDValue &Offset, SDValue &GLC,
998284677Sdim                                           SDValue &SLC, SDValue &TFE) const {
999284677Sdim  SDValue Ptr, Offen, Idxen, Addr64;
1000284677Sdim
1001287521Sdim  // addr64 bit was removed for volcanic islands.
1002287521Sdim  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1003287521Sdim    return false;
1004287521Sdim
1005296417Sdim  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1006296417Sdim              GLC, SLC, TFE))
1007296417Sdim    return false;
1008284677Sdim
1009284677Sdim  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1010284677Sdim  if (C->getSExtValue()) {
1011284677Sdim    SDLoc DL(Addr);
1012284677Sdim
1013284677Sdim    const SITargetLowering& Lowering =
1014284677Sdim      *static_cast<const SITargetLowering*>(getTargetLowering());
1015284677Sdim
1016284677Sdim    SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1017284677Sdim    return true;
1018284677Sdim  }
1019284677Sdim
1020284677Sdim  return false;
1021284677Sdim}
1022284677Sdim
1023284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1024284677Sdim                                           SDValue &VAddr, SDValue &SOffset,
1025296417Sdim                                           SDValue &Offset,
1026296417Sdim                                           SDValue &SLC) const {
1027284677Sdim  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1028284677Sdim  SDValue GLC, TFE;
1029284677Sdim
1030284677Sdim  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1031284677Sdim}
1032284677Sdim
1033284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1034284677Sdim                                            SDValue &VAddr, SDValue &SOffset,
1035284677Sdim                                            SDValue &ImmOffset) const {
1036284677Sdim
1037284677Sdim  SDLoc DL(Addr);
1038284677Sdim  MachineFunction &MF = CurDAG->getMachineFunction();
1039296417Sdim  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1040284677Sdim
1041296417Sdim  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1042296417Sdim  SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1043284677Sdim
1044284677Sdim  // (add n0, c1)
1045284677Sdim  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1046287521Sdim    SDValue N0 = Addr.getOperand(0);
1047284677Sdim    SDValue N1 = Addr.getOperand(1);
1048287521Sdim    // Offsets in vaddr must be positive.
1049287521Sdim    if (CurDAG->SignBitIsZero(N0)) {
1050287521Sdim      ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1051287521Sdim      if (isLegalMUBUFImmOffset(C1)) {
1052287521Sdim        VAddr = N0;
1053287521Sdim        ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1054287521Sdim        return true;
1055287521Sdim      }
1056284677Sdim    }
1057284677Sdim  }
1058284677Sdim
1059284677Sdim  // (node)
1060284677Sdim  VAddr = Addr;
1061284677Sdim  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1062284677Sdim  return true;
1063284677Sdim}
1064284677Sdim
1065284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1066284677Sdim                                           SDValue &SOffset, SDValue &Offset,
1067284677Sdim                                           SDValue &GLC, SDValue &SLC,
1068284677Sdim                                           SDValue &TFE) const {
1069284677Sdim  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1070284677Sdim  const SIInstrInfo *TII =
1071284677Sdim    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1072284677Sdim
1073296417Sdim  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1074296417Sdim              GLC, SLC, TFE))
1075296417Sdim    return false;
1076284677Sdim
1077284677Sdim  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1078284677Sdim      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1079284677Sdim      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1080284677Sdim    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1081284677Sdim                    APInt::getAllOnesValue(32).getZExtValue(); // Size
1082284677Sdim    SDLoc DL(Addr);
1083284677Sdim
1084284677Sdim    const SITargetLowering& Lowering =
1085284677Sdim      *static_cast<const SITargetLowering*>(getTargetLowering());
1086284677Sdim
1087284677Sdim    SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1088284677Sdim    return true;
1089284677Sdim  }
1090284677Sdim  return false;
1091284677Sdim}
1092284677Sdim
1093284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1094284677Sdim                                           SDValue &Soffset, SDValue &Offset,
1095284677Sdim                                           SDValue &GLC) const {
1096284677Sdim  SDValue SLC, TFE;
1097284677Sdim
1098284677Sdim  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1099284677Sdim}
1100284677Sdim
1101296417Sdim///
1102296417Sdim/// \param EncodedOffset This is the immediate value that will be encoded
1103296417Sdim///        directly into the instruction.  On SI/CI the \p EncodedOffset
1104296417Sdim///        will be in units of dwords and on VI+ it will be units of bytes.
1105296417Sdimstatic bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1106296417Sdim                                 int64_t EncodedOffset) {
1107296417Sdim  return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1108296417Sdim     isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1109296417Sdim}
1110296417Sdim
1111296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1112296417Sdim                                          SDValue &Offset, bool &Imm) const {
1113296417Sdim
1114296417Sdim  // FIXME: Handle non-constant offsets.
1115296417Sdim  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1116296417Sdim  if (!C)
1117296417Sdim    return false;
1118296417Sdim
1119296417Sdim  SDLoc SL(ByteOffsetNode);
1120296417Sdim  AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1121296417Sdim  int64_t ByteOffset = C->getSExtValue();
1122296417Sdim  int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1123296417Sdim      ByteOffset >> 2 : ByteOffset;
1124296417Sdim
1125296417Sdim  if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1126296417Sdim    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1127296417Sdim    Imm = true;
1128296417Sdim    return true;
1129296417Sdim  }
1130296417Sdim
1131296417Sdim  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1132296417Sdim    return false;
1133296417Sdim
1134296417Sdim  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1135296417Sdim    // 32-bit Immediates are supported on Sea Islands.
1136296417Sdim    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1137296417Sdim  } else {
1138296417Sdim    SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1139296417Sdim    Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1140296417Sdim                                            C32Bit), 0);
1141296417Sdim  }
1142296417Sdim  Imm = false;
1143296417Sdim  return true;
1144296417Sdim}
1145296417Sdim
1146296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1147296417Sdim                                     SDValue &Offset, bool &Imm) const {
1148296417Sdim
1149296417Sdim  SDLoc SL(Addr);
1150296417Sdim  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1151296417Sdim    SDValue N0 = Addr.getOperand(0);
1152296417Sdim    SDValue N1 = Addr.getOperand(1);
1153296417Sdim
1154296417Sdim    if (SelectSMRDOffset(N1, Offset, Imm)) {
1155296417Sdim      SBase = N0;
1156296417Sdim      return true;
1157296417Sdim    }
1158296417Sdim  }
1159296417Sdim  SBase = Addr;
1160296417Sdim  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1161296417Sdim  Imm = true;
1162296417Sdim  return true;
1163296417Sdim}
1164296417Sdim
1165296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1166296417Sdim                                       SDValue &Offset) const {
1167296417Sdim  bool Imm;
1168296417Sdim  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1169296417Sdim}
1170296417Sdim
1171296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1172296417Sdim                                         SDValue &Offset) const {
1173296417Sdim
1174296417Sdim  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1175296417Sdim    return false;
1176296417Sdim
1177296417Sdim  bool Imm;
1178296417Sdim  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1179296417Sdim    return false;
1180296417Sdim
1181296417Sdim  return !Imm && isa<ConstantSDNode>(Offset);
1182296417Sdim}
1183296417Sdim
1184296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1185296417Sdim                                        SDValue &Offset) const {
1186296417Sdim  bool Imm;
1187296417Sdim  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1188296417Sdim         !isa<ConstantSDNode>(Offset);
1189296417Sdim}
1190296417Sdim
1191296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1192296417Sdim                                             SDValue &Offset) const {
1193296417Sdim  bool Imm;
1194296417Sdim  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1195296417Sdim}
1196296417Sdim
1197296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1198296417Sdim                                               SDValue &Offset) const {
1199296417Sdim  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1200296417Sdim    return false;
1201296417Sdim
1202296417Sdim  bool Imm;
1203296417Sdim  if (!SelectSMRDOffset(Addr, Offset, Imm))
1204296417Sdim    return false;
1205296417Sdim
1206296417Sdim  return !Imm && isa<ConstantSDNode>(Offset);
1207296417Sdim}
1208296417Sdim
1209296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1210296417Sdim                                              SDValue &Offset) const {
1211296417Sdim  bool Imm;
1212296417Sdim  return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1213296417Sdim         !isa<ConstantSDNode>(Offset);
1214296417Sdim}
1215296417Sdim
1216284677Sdim// FIXME: This is incorrect and only enough to be able to compile.
1217284677SdimSDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
1218284677Sdim  AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
1219284677Sdim  SDLoc DL(N);
1220284677Sdim
1221296417Sdim  const MachineFunction &MF = CurDAG->getMachineFunction();
1222296417Sdim  DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
1223296417Sdim                                           "addrspacecast not implemented");
1224296417Sdim  CurDAG->getContext()->diagnose(NotImplemented);
1225296417Sdim
1226284677Sdim  assert(Subtarget->hasFlatAddressSpace() &&
1227284677Sdim         "addrspacecast only supported with flat address space!");
1228284677Sdim
1229284677Sdim  assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
1230284677Sdim          ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
1231284677Sdim         "Can only cast to / from flat address space!");
1232284677Sdim
1233284677Sdim  // The flat instructions read the address as the index of the VGPR holding the
1234284677Sdim  // address, so casting should just be reinterpreting the base VGPR, so just
1235284677Sdim  // insert trunc / bitcast / zext.
1236284677Sdim
1237284677Sdim  SDValue Src = ASC->getOperand(0);
1238284677Sdim  EVT DestVT = ASC->getValueType(0);
1239284677Sdim  EVT SrcVT = Src.getValueType();
1240284677Sdim
1241284677Sdim  unsigned SrcSize = SrcVT.getSizeInBits();
1242284677Sdim  unsigned DestSize = DestVT.getSizeInBits();
1243284677Sdim
1244284677Sdim  if (SrcSize > DestSize) {
1245284677Sdim    assert(SrcSize == 64 && DestSize == 32);
1246284677Sdim    return CurDAG->getMachineNode(
1247284677Sdim      TargetOpcode::EXTRACT_SUBREG,
1248284677Sdim      DL,
1249284677Sdim      DestVT,
1250284677Sdim      Src,
1251284677Sdim      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
1252284677Sdim  }
1253284677Sdim
1254284677Sdim  if (DestSize > SrcSize) {
1255284677Sdim    assert(SrcSize == 32 && DestSize == 64);
1256284677Sdim
1257284677Sdim    // FIXME: This is probably wrong, we should never be defining
1258284677Sdim    // a register class with both VGPRs and SGPRs
1259284677Sdim    SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
1260284677Sdim                                           MVT::i32);
1261284677Sdim
1262284677Sdim    const SDValue Ops[] = {
1263284677Sdim      RC,
1264284677Sdim      Src,
1265284677Sdim      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
1266284677Sdim      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1267284677Sdim                                     CurDAG->getConstant(0, DL, MVT::i32)), 0),
1268284677Sdim      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
1269284677Sdim    };
1270284677Sdim
1271284677Sdim    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
1272284677Sdim                                  DL, N->getValueType(0), Ops);
1273284677Sdim  }
1274284677Sdim
1275284677Sdim  assert(SrcSize == 64 && DestSize == 64);
1276284677Sdim  return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
1277284677Sdim}
1278284677Sdim
1279284677SdimSDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1280284677Sdim                                     uint32_t Offset, uint32_t Width) {
1281284677Sdim  // Transformation function, pack the offset and width of a BFE into
1282284677Sdim  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1283284677Sdim  // source, bits [5:0] contain the offset and bits [22:16] the width.
1284284677Sdim  uint32_t PackedVal = Offset | (Width << 16);
1285284677Sdim  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1286284677Sdim
1287284677Sdim  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1288284677Sdim}
1289284677Sdim
1290284677SdimSDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1291284677Sdim  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1292284677Sdim  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1293284677Sdim  // Predicate: 0 < b <= c < 32
1294284677Sdim
1295284677Sdim  const SDValue &Shl = N->getOperand(0);
1296284677Sdim  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1297284677Sdim  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1298284677Sdim
1299284677Sdim  if (B && C) {
1300284677Sdim    uint32_t BVal = B->getZExtValue();
1301284677Sdim    uint32_t CVal = C->getZExtValue();
1302284677Sdim
1303284677Sdim    if (0 < BVal && BVal <= CVal && CVal < 32) {
1304284677Sdim      bool Signed = N->getOpcode() == ISD::SRA;
1305284677Sdim      unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1306284677Sdim
1307284677Sdim      return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
1308284677Sdim                      CVal - BVal, 32 - CVal);
1309284677Sdim    }
1310284677Sdim  }
1311284677Sdim  return SelectCode(N);
1312284677Sdim}
1313284677Sdim
1314284677SdimSDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1315284677Sdim  switch (N->getOpcode()) {
1316284677Sdim  case ISD::AND:
1317284677Sdim    if (N->getOperand(0).getOpcode() == ISD::SRL) {
1318284677Sdim      // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1319284677Sdim      // Predicate: isMask(mask)
1320284677Sdim      const SDValue &Srl = N->getOperand(0);
1321284677Sdim      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1322284677Sdim      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1323284677Sdim
1324284677Sdim      if (Shift && Mask) {
1325284677Sdim        uint32_t ShiftVal = Shift->getZExtValue();
1326284677Sdim        uint32_t MaskVal = Mask->getZExtValue();
1327284677Sdim
1328284677Sdim        if (isMask_32(MaskVal)) {
1329284677Sdim          uint32_t WidthVal = countPopulation(MaskVal);
1330284677Sdim
1331284677Sdim          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
1332284677Sdim                          ShiftVal, WidthVal);
1333284677Sdim        }
1334284677Sdim      }
1335284677Sdim    }
1336284677Sdim    break;
1337284677Sdim  case ISD::SRL:
1338284677Sdim    if (N->getOperand(0).getOpcode() == ISD::AND) {
1339284677Sdim      // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1340284677Sdim      // Predicate: isMask(mask >> b)
1341284677Sdim      const SDValue &And = N->getOperand(0);
1342284677Sdim      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1343284677Sdim      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1344284677Sdim
1345284677Sdim      if (Shift && Mask) {
1346284677Sdim        uint32_t ShiftVal = Shift->getZExtValue();
1347284677Sdim        uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1348284677Sdim
1349284677Sdim        if (isMask_32(MaskVal)) {
1350284677Sdim          uint32_t WidthVal = countPopulation(MaskVal);
1351284677Sdim
1352284677Sdim          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
1353284677Sdim                          ShiftVal, WidthVal);
1354284677Sdim        }
1355284677Sdim      }
1356284677Sdim    } else if (N->getOperand(0).getOpcode() == ISD::SHL)
1357284677Sdim      return SelectS_BFEFromShifts(N);
1358284677Sdim    break;
1359284677Sdim  case ISD::SRA:
1360284677Sdim    if (N->getOperand(0).getOpcode() == ISD::SHL)
1361284677Sdim      return SelectS_BFEFromShifts(N);
1362284677Sdim    break;
1363284677Sdim  }
1364284677Sdim
1365284677Sdim  return SelectCode(N);
1366284677Sdim}
1367284677Sdim
1368284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1369284677Sdim                                        SDValue &SrcMods) const {
1370284677Sdim
1371284677Sdim  unsigned Mods = 0;
1372284677Sdim
1373284677Sdim  Src = In;
1374284677Sdim
1375284677Sdim  if (Src.getOpcode() == ISD::FNEG) {
1376284677Sdim    Mods |= SISrcMods::NEG;
1377284677Sdim    Src = Src.getOperand(0);
1378284677Sdim  }
1379284677Sdim
1380284677Sdim  if (Src.getOpcode() == ISD::FABS) {
1381284677Sdim    Mods |= SISrcMods::ABS;
1382284677Sdim    Src = Src.getOperand(0);
1383284677Sdim  }
1384284677Sdim
1385284677Sdim  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1386284677Sdim
1387284677Sdim  return true;
1388284677Sdim}
1389284677Sdim
1390286684Sdimbool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1391286684Sdim                                         SDValue &SrcMods) const {
1392286684Sdim  bool Res = SelectVOP3Mods(In, Src, SrcMods);
1393286684Sdim  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1394286684Sdim}
1395286684Sdim
1396284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1397284677Sdim                                         SDValue &SrcMods, SDValue &Clamp,
1398284677Sdim                                         SDValue &Omod) const {
1399284677Sdim  SDLoc DL(In);
1400284677Sdim  // FIXME: Handle Clamp and Omod
1401284677Sdim  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1402284677Sdim  Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1403284677Sdim
1404284677Sdim  return SelectVOP3Mods(In, Src, SrcMods);
1405284677Sdim}
1406284677Sdim
1407286684Sdimbool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1408286684Sdim                                           SDValue &SrcMods, SDValue &Clamp,
1409286684Sdim                                           SDValue &Omod) const {
1410286684Sdim  bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1411286684Sdim
1412286684Sdim  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1413286684Sdim                cast<ConstantSDNode>(Clamp)->isNullValue() &&
1414286684Sdim                cast<ConstantSDNode>(Omod)->isNullValue();
1415286684Sdim}
1416286684Sdim
1417284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1418284677Sdim                                              SDValue &SrcMods,
1419284677Sdim                                              SDValue &Omod) const {
1420284677Sdim  // FIXME: Handle Omod
1421284677Sdim  Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1422284677Sdim
1423284677Sdim  return SelectVOP3Mods(In, Src, SrcMods);
1424284677Sdim}
1425284677Sdim
1426284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1427284677Sdim                                                   SDValue &SrcMods,
1428284677Sdim                                                   SDValue &Clamp,
1429284677Sdim                                                   SDValue &Omod) const {
1430284677Sdim  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1431284677Sdim  return SelectVOP3Mods(In, Src, SrcMods);
1432284677Sdim}
1433284677Sdim
1434296417Sdimvoid AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1435296417Sdim  bool Modified = false;
1436296417Sdim
1437296417Sdim  // XXX - Other targets seem to be able to do this without a worklist.
1438296417Sdim  SmallVector<LoadSDNode *, 8> LoadsToReplace;
1439296417Sdim  SmallVector<StoreSDNode *, 8> StoresToReplace;
1440296417Sdim
1441296417Sdim  for (SDNode &Node : CurDAG->allnodes()) {
1442296417Sdim    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
1443296417Sdim      EVT VT = LD->getValueType(0);
1444296417Sdim      if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
1445296417Sdim        continue;
1446296417Sdim
1447296417Sdim      // To simplify the TableGen patters, we replace all i64 loads with v2i32
1448296417Sdim      // loads.  Alternatively, we could promote i64 loads to v2i32 during DAG
1449296417Sdim      // legalization, however, so places (ExpandUnalignedLoad) in the DAG
1450296417Sdim      // legalizer assume that if i64 is legal, so doing this promotion early
1451296417Sdim      // can cause problems.
1452296417Sdim      LoadsToReplace.push_back(LD);
1453296417Sdim    } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
1454296417Sdim      // Handle i64 stores here for the same reason mentioned above for loads.
1455296417Sdim      SDValue Value = ST->getValue();
1456296417Sdim      if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
1457296417Sdim        continue;
1458296417Sdim      StoresToReplace.push_back(ST);
1459296417Sdim    }
1460296417Sdim  }
1461296417Sdim
1462296417Sdim  for (LoadSDNode *LD : LoadsToReplace) {
1463296417Sdim    SDLoc SL(LD);
1464296417Sdim
1465296417Sdim    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
1466296417Sdim                                      LD->getBasePtr(), LD->getMemOperand());
1467296417Sdim    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
1468296417Sdim                                      MVT::i64, NewLoad);
1469296417Sdim    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
1470296417Sdim    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
1471296417Sdim    Modified = true;
1472296417Sdim  }
1473296417Sdim
1474296417Sdim  for (StoreSDNode *ST : StoresToReplace) {
1475296417Sdim    SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
1476296417Sdim                                       MVT::v2i32, ST->getValue());
1477296417Sdim    const SDValue StoreOps[] = {
1478296417Sdim      ST->getChain(),
1479296417Sdim      NewValue,
1480296417Sdim      ST->getBasePtr(),
1481296417Sdim      ST->getOffset()
1482296417Sdim    };
1483296417Sdim
1484296417Sdim    CurDAG->UpdateNodeOperands(ST, StoreOps);
1485296417Sdim    Modified = true;
1486296417Sdim  }
1487296417Sdim
1488296417Sdim  // XXX - Is this necessary?
1489296417Sdim  if (Modified)
1490296417Sdim    CurDAG->RemoveDeadNodes();
1491296417Sdim}
1492296417Sdim
1493284677Sdimvoid AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1494284677Sdim  const AMDGPUTargetLowering& Lowering =
1495284677Sdim    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1496284677Sdim  bool IsModified = false;
1497284677Sdim  do {
1498284677Sdim    IsModified = false;
1499284677Sdim    // Go over all selected nodes and try to fold them a bit more
1500286684Sdim    for (SDNode &Node : CurDAG->allnodes()) {
1501286684Sdim      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1502284677Sdim      if (!MachineNode)
1503284677Sdim        continue;
1504284677Sdim
1505284677Sdim      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1506286684Sdim      if (ResNode != &Node) {
1507286684Sdim        ReplaceUses(&Node, ResNode);
1508284677Sdim        IsModified = true;
1509284677Sdim      }
1510284677Sdim    }
1511284677Sdim    CurDAG->RemoveDeadNodes();
1512284677Sdim  } while (IsModified);
1513284677Sdim}
1514