1249259Sdim//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim//==-----------------------------------------------------------------------===//
9249259Sdim//
10249259Sdim/// \file
11249259Sdim/// \brief TargetLowering functions borrowed from AMDIL.
12249259Sdim//
13249259Sdim//===----------------------------------------------------------------------===//
14249259Sdim
15249259Sdim#include "AMDGPUISelLowering.h"
16249259Sdim#include "AMDGPURegisterInfo.h"
17249259Sdim#include "AMDGPUSubtarget.h"
18249259Sdim#include "AMDILIntrinsicInfo.h"
19249259Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
20249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
21249259Sdim#include "llvm/CodeGen/PseudoSourceValue.h"
22249259Sdim#include "llvm/CodeGen/SelectionDAG.h"
23249259Sdim#include "llvm/CodeGen/SelectionDAGNodes.h"
24249259Sdim#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25249259Sdim#include "llvm/IR/CallingConv.h"
26249259Sdim#include "llvm/IR/DerivedTypes.h"
27249259Sdim#include "llvm/IR/Instructions.h"
28249259Sdim#include "llvm/IR/Intrinsics.h"
29249259Sdim#include "llvm/Support/raw_ostream.h"
30249259Sdim#include "llvm/Target/TargetInstrInfo.h"
31249259Sdim#include "llvm/Target/TargetOptions.h"
32249259Sdim
33249259Sdimusing namespace llvm;
34249259Sdim//===----------------------------------------------------------------------===//
35249259Sdim// TargetLowering Implementation Help Functions End
36249259Sdim//===----------------------------------------------------------------------===//
37249259Sdim
38249259Sdim//===----------------------------------------------------------------------===//
39249259Sdim// TargetLowering Class Implementation Begins
40249259Sdim//===----------------------------------------------------------------------===//
41249259Sdimvoid AMDGPUTargetLowering::InitAMDILLowering() {
42263509Sdim  static const int types[] = {
43249259Sdim    (int)MVT::i8,
44249259Sdim    (int)MVT::i16,
45249259Sdim    (int)MVT::i32,
46249259Sdim    (int)MVT::f32,
47249259Sdim    (int)MVT::f64,
48249259Sdim    (int)MVT::i64,
49249259Sdim    (int)MVT::v2i8,
50249259Sdim    (int)MVT::v4i8,
51249259Sdim    (int)MVT::v2i16,
52249259Sdim    (int)MVT::v4i16,
53249259Sdim    (int)MVT::v4f32,
54249259Sdim    (int)MVT::v4i32,
55249259Sdim    (int)MVT::v2f32,
56249259Sdim    (int)MVT::v2i32,
57249259Sdim    (int)MVT::v2f64,
58249259Sdim    (int)MVT::v2i64
59249259Sdim  };
60249259Sdim
61263509Sdim  static const int IntTypes[] = {
62249259Sdim    (int)MVT::i8,
63249259Sdim    (int)MVT::i16,
64249259Sdim    (int)MVT::i32,
65249259Sdim    (int)MVT::i64
66249259Sdim  };
67249259Sdim
68263509Sdim  static const int FloatTypes[] = {
69249259Sdim    (int)MVT::f32,
70249259Sdim    (int)MVT::f64
71249259Sdim  };
72249259Sdim
73263509Sdim  static const int VectorTypes[] = {
74249259Sdim    (int)MVT::v2i8,
75249259Sdim    (int)MVT::v4i8,
76249259Sdim    (int)MVT::v2i16,
77249259Sdim    (int)MVT::v4i16,
78249259Sdim    (int)MVT::v4f32,
79249259Sdim    (int)MVT::v4i32,
80249259Sdim    (int)MVT::v2f32,
81249259Sdim    (int)MVT::v2i32,
82249259Sdim    (int)MVT::v2f64,
83249259Sdim    (int)MVT::v2i64
84249259Sdim  };
85263509Sdim  const size_t NumTypes = array_lengthof(types);
86263509Sdim  const size_t NumFloatTypes = array_lengthof(FloatTypes);
87263509Sdim  const size_t NumIntTypes = array_lengthof(IntTypes);
88263509Sdim  const size_t NumVectorTypes = array_lengthof(VectorTypes);
89249259Sdim
90249259Sdim  const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
91249259Sdim  // These are the current register classes that are
92249259Sdim  // supported
93249259Sdim
94249259Sdim  for (unsigned int x  = 0; x < NumTypes; ++x) {
95249259Sdim    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
96249259Sdim
97249259Sdim    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
98249259Sdim    // We cannot sextinreg, expand to shifts
99249259Sdim    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
100249259Sdim    setOperationAction(ISD::SUBE, VT, Expand);
101249259Sdim    setOperationAction(ISD::SUBC, VT, Expand);
102249259Sdim    setOperationAction(ISD::ADDE, VT, Expand);
103249259Sdim    setOperationAction(ISD::ADDC, VT, Expand);
104249259Sdim    setOperationAction(ISD::BRCOND, VT, Custom);
105249259Sdim    setOperationAction(ISD::BR_JT, VT, Expand);
106249259Sdim    setOperationAction(ISD::BRIND, VT, Expand);
107249259Sdim    // TODO: Implement custom UREM/SREM routines
108249259Sdim    setOperationAction(ISD::SREM, VT, Expand);
109249259Sdim    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
110249259Sdim    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
111249259Sdim    if (VT != MVT::i64 && VT != MVT::v2i64) {
112249259Sdim      setOperationAction(ISD::SDIV, VT, Custom);
113249259Sdim    }
114249259Sdim  }
115249259Sdim  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
116249259Sdim    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
117249259Sdim
118249259Sdim    // IL does not have these operations for floating point types
119249259Sdim    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
120249259Sdim    setOperationAction(ISD::SETOLT, VT, Expand);
121249259Sdim    setOperationAction(ISD::SETOGE, VT, Expand);
122249259Sdim    setOperationAction(ISD::SETOGT, VT, Expand);
123249259Sdim    setOperationAction(ISD::SETOLE, VT, Expand);
124249259Sdim    setOperationAction(ISD::SETULT, VT, Expand);
125249259Sdim    setOperationAction(ISD::SETUGE, VT, Expand);
126249259Sdim    setOperationAction(ISD::SETUGT, VT, Expand);
127249259Sdim    setOperationAction(ISD::SETULE, VT, Expand);
128249259Sdim  }
129249259Sdim
130249259Sdim  for (unsigned int x = 0; x < NumIntTypes; ++x) {
131249259Sdim    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
132249259Sdim
133249259Sdim    // GPU also does not have divrem function for signed or unsigned
134249259Sdim    setOperationAction(ISD::SDIVREM, VT, Expand);
135249259Sdim
136249259Sdim    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
137249259Sdim    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
138249259Sdim    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
139249259Sdim
140249259Sdim    setOperationAction(ISD::BSWAP, VT, Expand);
141249259Sdim
142249259Sdim    // GPU doesn't have any counting operators
143249259Sdim    setOperationAction(ISD::CTPOP, VT, Expand);
144249259Sdim    setOperationAction(ISD::CTTZ, VT, Expand);
145249259Sdim    setOperationAction(ISD::CTLZ, VT, Expand);
146249259Sdim  }
147249259Sdim
148249259Sdim  for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
149249259Sdim    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
150249259Sdim
151249259Sdim    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
152249259Sdim    setOperationAction(ISD::SDIVREM, VT, Expand);
153249259Sdim    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
154249259Sdim    // setOperationAction(ISD::VSETCC, VT, Expand);
155249259Sdim    setOperationAction(ISD::SELECT_CC, VT, Expand);
156249259Sdim
157249259Sdim  }
158263509Sdim  setOperationAction(ISD::MULHU, MVT::i64, Expand);
159263509Sdim  setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
160263509Sdim  setOperationAction(ISD::MULHS, MVT::i64, Expand);
161263509Sdim  setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
162263509Sdim  setOperationAction(ISD::ADD, MVT::v2i64, Expand);
163263509Sdim  setOperationAction(ISD::SREM, MVT::v2i64, Expand);
164263509Sdim  setOperationAction(ISD::Constant          , MVT::i64  , Legal);
165263509Sdim  setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
166263509Sdim  setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
167263509Sdim  setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
168263509Sdim  setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
169263509Sdim  setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
170263509Sdim  if (STM.hasHWFP64()) {
171249259Sdim    // we support loading/storing v2f64 but not operations on the type
172249259Sdim    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
173249259Sdim    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
174249259Sdim    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
175249259Sdim    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
176249259Sdim    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
177249259Sdim    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
178249259Sdim    // We want to expand vector conversions into their scalar
179249259Sdim    // counterparts.
180249259Sdim    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
181249259Sdim    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
182249259Sdim    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
183249259Sdim    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
184249259Sdim    setOperationAction(ISD::FABS, MVT::f64, Expand);
185249259Sdim    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
186249259Sdim  }
187249259Sdim  // TODO: Fix the UDIV24 algorithm so it works for these
188249259Sdim  // types correctly. This needs vector comparisons
189249259Sdim  // for this to work correctly.
190249259Sdim  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
191249259Sdim  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
192249259Sdim  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
193249259Sdim  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
194249259Sdim  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
195249259Sdim  setOperationAction(ISD::SUBC, MVT::Other, Expand);
196249259Sdim  setOperationAction(ISD::ADDE, MVT::Other, Expand);
197249259Sdim  setOperationAction(ISD::ADDC, MVT::Other, Expand);
198249259Sdim  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
199249259Sdim  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
200249259Sdim  setOperationAction(ISD::BRIND, MVT::Other, Expand);
201249259Sdim  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
202249259Sdim
203249259Sdim
204249259Sdim  // Use the default implementation.
205249259Sdim  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
206249259Sdim  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
207249259Sdim
208249259Sdim  setSchedulingPreference(Sched::RegPressure);
209249259Sdim  setPow2DivIsCheap(false);
210249259Sdim  setSelectIsExpensive(true);
211249259Sdim  setJumpIsExpensive(true);
212249259Sdim
213249259Sdim  MaxStoresPerMemcpy  = 4096;
214249259Sdim  MaxStoresPerMemmove = 4096;
215249259Sdim  MaxStoresPerMemset  = 4096;
216249259Sdim
217249259Sdim}
218249259Sdim
219249259Sdimbool
220249259SdimAMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
221249259Sdim    const CallInst &I, unsigned Intrinsic) const {
222249259Sdim  return false;
223249259Sdim}
224249259Sdim
225249259Sdim// The backend supports 32 and 64 bit floating point immediates
226249259Sdimbool
227249259SdimAMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
228249259Sdim  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
229249259Sdim      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
230249259Sdim    return true;
231249259Sdim  } else {
232249259Sdim    return false;
233249259Sdim  }
234249259Sdim}
235249259Sdim
236249259Sdimbool
237249259SdimAMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
238249259Sdim  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
239249259Sdim      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
240249259Sdim    return false;
241249259Sdim  } else {
242249259Sdim    return true;
243249259Sdim  }
244249259Sdim}
245249259Sdim
246249259Sdim
247249259Sdim// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
248249259Sdim// be zero. Op is expected to be a target specific node. Used by DAG
249249259Sdim// combiner.
250249259Sdim
251249259Sdimvoid
252249259SdimAMDGPUTargetLowering::computeMaskedBitsForTargetNode(
253249259Sdim    const SDValue Op,
254249259Sdim    APInt &KnownZero,
255249259Sdim    APInt &KnownOne,
256249259Sdim    const SelectionDAG &DAG,
257249259Sdim    unsigned Depth) const {
258249259Sdim  APInt KnownZero2;
259249259Sdim  APInt KnownOne2;
260249259Sdim  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
261249259Sdim  switch (Op.getOpcode()) {
262249259Sdim    default: break;
263249259Sdim    case ISD::SELECT_CC:
264249259Sdim             DAG.ComputeMaskedBits(
265249259Sdim                 Op.getOperand(1),
266249259Sdim                 KnownZero,
267249259Sdim                 KnownOne,
268249259Sdim                 Depth + 1
269249259Sdim                 );
270249259Sdim             DAG.ComputeMaskedBits(
271249259Sdim                 Op.getOperand(0),
272249259Sdim                 KnownZero2,
273249259Sdim                 KnownOne2
274249259Sdim                 );
275249259Sdim             assert((KnownZero & KnownOne) == 0
276249259Sdim                 && "Bits known to be one AND zero?");
277249259Sdim             assert((KnownZero2 & KnownOne2) == 0
278249259Sdim                 && "Bits known to be one AND zero?");
279249259Sdim             // Only known if known in both the LHS and RHS
280249259Sdim             KnownOne &= KnownOne2;
281249259Sdim             KnownZero &= KnownZero2;
282249259Sdim             break;
283249259Sdim  };
284249259Sdim}
285249259Sdim
286249259Sdim//===----------------------------------------------------------------------===//
287249259Sdim//                           Other Lowering Hooks
288249259Sdim//===----------------------------------------------------------------------===//
289249259Sdim
290249259SdimSDValue
291249259SdimAMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
292249259Sdim  EVT OVT = Op.getValueType();
293249259Sdim  SDValue DST;
294249259Sdim  if (OVT.getScalarType() == MVT::i64) {
295249259Sdim    DST = LowerSDIV64(Op, DAG);
296249259Sdim  } else if (OVT.getScalarType() == MVT::i32) {
297249259Sdim    DST = LowerSDIV32(Op, DAG);
298249259Sdim  } else if (OVT.getScalarType() == MVT::i16
299249259Sdim      || OVT.getScalarType() == MVT::i8) {
300249259Sdim    DST = LowerSDIV24(Op, DAG);
301249259Sdim  } else {
302249259Sdim    DST = SDValue(Op.getNode(), 0);
303249259Sdim  }
304249259Sdim  return DST;
305249259Sdim}
306249259Sdim
307249259SdimSDValue
308249259SdimAMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
309249259Sdim  EVT OVT = Op.getValueType();
310249259Sdim  SDValue DST;
311249259Sdim  if (OVT.getScalarType() == MVT::i64) {
312249259Sdim    DST = LowerSREM64(Op, DAG);
313249259Sdim  } else if (OVT.getScalarType() == MVT::i32) {
314249259Sdim    DST = LowerSREM32(Op, DAG);
315249259Sdim  } else if (OVT.getScalarType() == MVT::i16) {
316249259Sdim    DST = LowerSREM16(Op, DAG);
317249259Sdim  } else if (OVT.getScalarType() == MVT::i8) {
318249259Sdim    DST = LowerSREM8(Op, DAG);
319249259Sdim  } else {
320249259Sdim    DST = SDValue(Op.getNode(), 0);
321249259Sdim  }
322249259Sdim  return DST;
323249259Sdim}
324249259Sdim
325249259SdimSDValue
326249259SdimAMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
327249259Sdim  SDValue Data = Op.getOperand(0);
328249259Sdim  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
329263509Sdim  SDLoc DL(Op);
330249259Sdim  EVT DVT = Data.getValueType();
331249259Sdim  EVT BVT = BaseType->getVT();
332249259Sdim  unsigned baseBits = BVT.getScalarType().getSizeInBits();
333249259Sdim  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
334249259Sdim  unsigned shiftBits = srcBits - baseBits;
335249259Sdim  if (srcBits < 32) {
336249259Sdim    // If the op is less than 32 bits, then it needs to extend to 32bits
337249259Sdim    // so it can properly keep the upper bits valid.
338249259Sdim    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
339249259Sdim    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
340249259Sdim    shiftBits = 32 - baseBits;
341249259Sdim    DVT = IVT;
342249259Sdim  }
343249259Sdim  SDValue Shift = DAG.getConstant(shiftBits, DVT);
344249259Sdim  // Shift left by 'Shift' bits.
345249259Sdim  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
346249259Sdim  // Signed shift Right by 'Shift' bits.
347249259Sdim  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
348249259Sdim  if (srcBits < 32) {
349249259Sdim    // Once the sign extension is done, the op needs to be converted to
350249259Sdim    // its original type.
351249259Sdim    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
352249259Sdim  }
353249259Sdim  return Data;
354249259Sdim}
355249259SdimEVT
356249259SdimAMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
357249259Sdim  int iSize = (size * numEle);
358249259Sdim  int vEle = (iSize >> ((size == 64) ? 6 : 5));
359249259Sdim  if (!vEle) {
360249259Sdim    vEle = 1;
361249259Sdim  }
362249259Sdim  if (size == 64) {
363249259Sdim    if (vEle == 1) {
364249259Sdim      return EVT(MVT::i64);
365249259Sdim    } else {
366249259Sdim      return EVT(MVT::getVectorVT(MVT::i64, vEle));
367249259Sdim    }
368249259Sdim  } else {
369249259Sdim    if (vEle == 1) {
370249259Sdim      return EVT(MVT::i32);
371249259Sdim    } else {
372249259Sdim      return EVT(MVT::getVectorVT(MVT::i32, vEle));
373249259Sdim    }
374249259Sdim  }
375249259Sdim}
376249259Sdim
377249259SdimSDValue
378249259SdimAMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
379249259Sdim  SDValue Chain = Op.getOperand(0);
380249259Sdim  SDValue Cond  = Op.getOperand(1);
381249259Sdim  SDValue Jump  = Op.getOperand(2);
382249259Sdim  SDValue Result;
383249259Sdim  Result = DAG.getNode(
384249259Sdim      AMDGPUISD::BRANCH_COND,
385263509Sdim      SDLoc(Op),
386249259Sdim      Op.getValueType(),
387249259Sdim      Chain, Jump, Cond);
388249259Sdim  return Result;
389249259Sdim}
390249259Sdim
391249259SdimSDValue
392249259SdimAMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
393263509Sdim  SDLoc DL(Op);
394249259Sdim  EVT OVT = Op.getValueType();
395249259Sdim  SDValue LHS = Op.getOperand(0);
396249259Sdim  SDValue RHS = Op.getOperand(1);
397249259Sdim  MVT INTTY;
398249259Sdim  MVT FLTTY;
399249259Sdim  if (!OVT.isVector()) {
400249259Sdim    INTTY = MVT::i32;
401249259Sdim    FLTTY = MVT::f32;
402249259Sdim  } else if (OVT.getVectorNumElements() == 2) {
403249259Sdim    INTTY = MVT::v2i32;
404249259Sdim    FLTTY = MVT::v2f32;
405249259Sdim  } else if (OVT.getVectorNumElements() == 4) {
406249259Sdim    INTTY = MVT::v4i32;
407249259Sdim    FLTTY = MVT::v4f32;
408249259Sdim  }
409249259Sdim  unsigned bitsize = OVT.getScalarType().getSizeInBits();
410249259Sdim  // char|short jq = ia ^ ib;
411249259Sdim  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
412249259Sdim
413249259Sdim  // jq = jq >> (bitsize - 2)
414249259Sdim  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
415249259Sdim
416249259Sdim  // jq = jq | 0x1
417249259Sdim  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
418249259Sdim
419249259Sdim  // jq = (int)jq
420249259Sdim  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
421249259Sdim
422249259Sdim  // int ia = (int)LHS;
423249259Sdim  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
424249259Sdim
425249259Sdim  // int ib, (int)RHS;
426249259Sdim  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
427249259Sdim
428249259Sdim  // float fa = (float)ia;
429249259Sdim  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
430249259Sdim
431249259Sdim  // float fb = (float)ib;
432249259Sdim  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
433249259Sdim
434249259Sdim  // float fq = native_divide(fa, fb);
435249259Sdim  SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
436249259Sdim
437249259Sdim  // fq = trunc(fq);
438249259Sdim  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
439249259Sdim
440249259Sdim  // float fqneg = -fq;
441249259Sdim  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
442249259Sdim
443249259Sdim  // float fr = mad(fqneg, fb, fa);
444249259Sdim  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
445249259Sdim      DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
446249259Sdim
447249259Sdim  // int iq = (int)fq;
448249259Sdim  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
449249259Sdim
450249259Sdim  // fr = fabs(fr);
451249259Sdim  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
452249259Sdim
453249259Sdim  // fb = fabs(fb);
454249259Sdim  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
455249259Sdim
456249259Sdim  // int cv = fr >= fb;
457249259Sdim  SDValue cv;
458249259Sdim  if (INTTY == MVT::i32) {
459249259Sdim    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
460249259Sdim  } else {
461249259Sdim    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
462249259Sdim  }
463249259Sdim  // jq = (cv ? jq : 0);
464249259Sdim  jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
465249259Sdim      DAG.getConstant(0, OVT));
466249259Sdim  // dst = iq + jq;
467249259Sdim  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
468249259Sdim  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
469249259Sdim  return iq;
470249259Sdim}
471249259Sdim
472249259SdimSDValue
473249259SdimAMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
474263509Sdim  SDLoc DL(Op);
475249259Sdim  EVT OVT = Op.getValueType();
476249259Sdim  SDValue LHS = Op.getOperand(0);
477249259Sdim  SDValue RHS = Op.getOperand(1);
478249259Sdim  // The LowerSDIV32 function generates equivalent to the following IL.
479249259Sdim  // mov r0, LHS
480249259Sdim  // mov r1, RHS
481249259Sdim  // ilt r10, r0, 0
482249259Sdim  // ilt r11, r1, 0
483249259Sdim  // iadd r0, r0, r10
484249259Sdim  // iadd r1, r1, r11
485249259Sdim  // ixor r0, r0, r10
486249259Sdim  // ixor r1, r1, r11
487249259Sdim  // udiv r0, r0, r1
488249259Sdim  // ixor r10, r10, r11
489249259Sdim  // iadd r0, r0, r10
490249259Sdim  // ixor DST, r0, r10
491249259Sdim
492249259Sdim  // mov r0, LHS
493249259Sdim  SDValue r0 = LHS;
494249259Sdim
495249259Sdim  // mov r1, RHS
496249259Sdim  SDValue r1 = RHS;
497249259Sdim
498249259Sdim  // ilt r10, r0, 0
499249259Sdim  SDValue r10 = DAG.getSelectCC(DL,
500249259Sdim      r0, DAG.getConstant(0, OVT),
501249259Sdim      DAG.getConstant(-1, MVT::i32),
502249259Sdim      DAG.getConstant(0, MVT::i32),
503249259Sdim      ISD::SETLT);
504249259Sdim
505249259Sdim  // ilt r11, r1, 0
506249259Sdim  SDValue r11 = DAG.getSelectCC(DL,
507249259Sdim      r1, DAG.getConstant(0, OVT),
508249259Sdim      DAG.getConstant(-1, MVT::i32),
509249259Sdim      DAG.getConstant(0, MVT::i32),
510249259Sdim      ISD::SETLT);
511249259Sdim
512249259Sdim  // iadd r0, r0, r10
513249259Sdim  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
514249259Sdim
515249259Sdim  // iadd r1, r1, r11
516249259Sdim  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
517249259Sdim
518249259Sdim  // ixor r0, r0, r10
519249259Sdim  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
520249259Sdim
521249259Sdim  // ixor r1, r1, r11
522249259Sdim  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
523249259Sdim
524249259Sdim  // udiv r0, r0, r1
525249259Sdim  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
526249259Sdim
527249259Sdim  // ixor r10, r10, r11
528249259Sdim  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
529249259Sdim
530249259Sdim  // iadd r0, r0, r10
531249259Sdim  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
532249259Sdim
533249259Sdim  // ixor DST, r0, r10
534249259Sdim  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
535249259Sdim  return DST;
536249259Sdim}
537249259Sdim
538249259SdimSDValue
539249259SdimAMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
540249259Sdim  return SDValue(Op.getNode(), 0);
541249259Sdim}
542249259Sdim
543249259SdimSDValue
544249259SdimAMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
545263509Sdim  SDLoc DL(Op);
546249259Sdim  EVT OVT = Op.getValueType();
547249259Sdim  MVT INTTY = MVT::i32;
548249259Sdim  if (OVT == MVT::v2i8) {
549249259Sdim    INTTY = MVT::v2i32;
550249259Sdim  } else if (OVT == MVT::v4i8) {
551249259Sdim    INTTY = MVT::v4i32;
552249259Sdim  }
553249259Sdim  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
554249259Sdim  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
555249259Sdim  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
556249259Sdim  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
557249259Sdim  return LHS;
558249259Sdim}
559249259Sdim
560249259SdimSDValue
561249259SdimAMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
562263509Sdim  SDLoc DL(Op);
563249259Sdim  EVT OVT = Op.getValueType();
564249259Sdim  MVT INTTY = MVT::i32;
565249259Sdim  if (OVT == MVT::v2i16) {
566249259Sdim    INTTY = MVT::v2i32;
567249259Sdim  } else if (OVT == MVT::v4i16) {
568249259Sdim    INTTY = MVT::v4i32;
569249259Sdim  }
570249259Sdim  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
571249259Sdim  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
572249259Sdim  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
573249259Sdim  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
574249259Sdim  return LHS;
575249259Sdim}
576249259Sdim
577249259SdimSDValue
578249259SdimAMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
579263509Sdim  SDLoc DL(Op);
580249259Sdim  EVT OVT = Op.getValueType();
581249259Sdim  SDValue LHS = Op.getOperand(0);
582249259Sdim  SDValue RHS = Op.getOperand(1);
583249259Sdim  // The LowerSREM32 function generates equivalent to the following IL.
584249259Sdim  // mov r0, LHS
585249259Sdim  // mov r1, RHS
586249259Sdim  // ilt r10, r0, 0
587249259Sdim  // ilt r11, r1, 0
588249259Sdim  // iadd r0, r0, r10
589249259Sdim  // iadd r1, r1, r11
590249259Sdim  // ixor r0, r0, r10
591249259Sdim  // ixor r1, r1, r11
592249259Sdim  // udiv r20, r0, r1
593249259Sdim  // umul r20, r20, r1
594249259Sdim  // sub r0, r0, r20
595249259Sdim  // iadd r0, r0, r10
596249259Sdim  // ixor DST, r0, r10
597249259Sdim
598249259Sdim  // mov r0, LHS
599249259Sdim  SDValue r0 = LHS;
600249259Sdim
601249259Sdim  // mov r1, RHS
602249259Sdim  SDValue r1 = RHS;
603249259Sdim
604249259Sdim  // ilt r10, r0, 0
605249259Sdim  SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
606249259Sdim
607249259Sdim  // ilt r11, r1, 0
608249259Sdim  SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
609249259Sdim
610249259Sdim  // iadd r0, r0, r10
611249259Sdim  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
612249259Sdim
613249259Sdim  // iadd r1, r1, r11
614249259Sdim  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
615249259Sdim
616249259Sdim  // ixor r0, r0, r10
617249259Sdim  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
618249259Sdim
619249259Sdim  // ixor r1, r1, r11
620249259Sdim  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
621249259Sdim
622249259Sdim  // udiv r20, r0, r1
623249259Sdim  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
624249259Sdim
625249259Sdim  // umul r20, r20, r1
626249259Sdim  r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
627249259Sdim
628249259Sdim  // sub r0, r0, r20
629249259Sdim  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
630249259Sdim
631249259Sdim  // iadd r0, r0, r10
632249259Sdim  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
633249259Sdim
634249259Sdim  // ixor DST, r0, r10
635249259Sdim  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
636249259Sdim  return DST;
637249259Sdim}
638249259Sdim
639249259SdimSDValue
640249259SdimAMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
641249259Sdim  return SDValue(Op.getNode(), 0);
642249259Sdim}
643