1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief TargetLowering functions borrowed from AMDIL.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUISelLowering.h"
16#include "AMDGPURegisterInfo.h"
17#include "AMDGPUSubtarget.h"
18#include "AMDILIntrinsicInfo.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SelectionDAGNodes.h"
24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Instructions.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/Support/raw_ostream.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetOptions.h"
32
33using namespace llvm;
34//===----------------------------------------------------------------------===//
35// TargetLowering Implementation Help Functions End
36//===----------------------------------------------------------------------===//
37
38//===----------------------------------------------------------------------===//
39// TargetLowering Class Implementation Begins
40//===----------------------------------------------------------------------===//
41void AMDGPUTargetLowering::InitAMDILLowering() {
42  static const int types[] = {
43    (int)MVT::i8,
44    (int)MVT::i16,
45    (int)MVT::i32,
46    (int)MVT::f32,
47    (int)MVT::f64,
48    (int)MVT::i64,
49    (int)MVT::v2i8,
50    (int)MVT::v4i8,
51    (int)MVT::v2i16,
52    (int)MVT::v4i16,
53    (int)MVT::v4f32,
54    (int)MVT::v4i32,
55    (int)MVT::v2f32,
56    (int)MVT::v2i32,
57    (int)MVT::v2f64,
58    (int)MVT::v2i64
59  };
60
61  static const int IntTypes[] = {
62    (int)MVT::i8,
63    (int)MVT::i16,
64    (int)MVT::i32,
65    (int)MVT::i64
66  };
67
68  static const int FloatTypes[] = {
69    (int)MVT::f32,
70    (int)MVT::f64
71  };
72
73  static const int VectorTypes[] = {
74    (int)MVT::v2i8,
75    (int)MVT::v4i8,
76    (int)MVT::v2i16,
77    (int)MVT::v4i16,
78    (int)MVT::v4f32,
79    (int)MVT::v4i32,
80    (int)MVT::v2f32,
81    (int)MVT::v2i32,
82    (int)MVT::v2f64,
83    (int)MVT::v2i64
84  };
85  const size_t NumTypes = array_lengthof(types);
86  const size_t NumFloatTypes = array_lengthof(FloatTypes);
87  const size_t NumIntTypes = array_lengthof(IntTypes);
88  const size_t NumVectorTypes = array_lengthof(VectorTypes);
89
90  const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
91  // These are the current register classes that are
92  // supported
93
94  for (unsigned int x  = 0; x < NumTypes; ++x) {
95    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
96
97    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
98    // We cannot sextinreg, expand to shifts
99    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
100    setOperationAction(ISD::SUBE, VT, Expand);
101    setOperationAction(ISD::SUBC, VT, Expand);
102    setOperationAction(ISD::ADDE, VT, Expand);
103    setOperationAction(ISD::ADDC, VT, Expand);
104    setOperationAction(ISD::BRCOND, VT, Custom);
105    setOperationAction(ISD::BR_JT, VT, Expand);
106    setOperationAction(ISD::BRIND, VT, Expand);
107    // TODO: Implement custom UREM/SREM routines
108    setOperationAction(ISD::SREM, VT, Expand);
109    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
110    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
111    if (VT != MVT::i64 && VT != MVT::v2i64) {
112      setOperationAction(ISD::SDIV, VT, Custom);
113    }
114  }
115  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
116    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
117
118    // IL does not have these operations for floating point types
119    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
120    setOperationAction(ISD::SETOLT, VT, Expand);
121    setOperationAction(ISD::SETOGE, VT, Expand);
122    setOperationAction(ISD::SETOGT, VT, Expand);
123    setOperationAction(ISD::SETOLE, VT, Expand);
124    setOperationAction(ISD::SETULT, VT, Expand);
125    setOperationAction(ISD::SETUGE, VT, Expand);
126    setOperationAction(ISD::SETUGT, VT, Expand);
127    setOperationAction(ISD::SETULE, VT, Expand);
128  }
129
130  for (unsigned int x = 0; x < NumIntTypes; ++x) {
131    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
132
133    // GPU also does not have divrem function for signed or unsigned
134    setOperationAction(ISD::SDIVREM, VT, Expand);
135
136    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
137    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
138    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
139
140    setOperationAction(ISD::BSWAP, VT, Expand);
141
142    // GPU doesn't have any counting operators
143    setOperationAction(ISD::CTPOP, VT, Expand);
144    setOperationAction(ISD::CTTZ, VT, Expand);
145    setOperationAction(ISD::CTLZ, VT, Expand);
146  }
147
148  for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
149    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
150
151    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
152    setOperationAction(ISD::SDIVREM, VT, Expand);
153    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
154    // setOperationAction(ISD::VSETCC, VT, Expand);
155    setOperationAction(ISD::SELECT_CC, VT, Expand);
156
157  }
158  setOperationAction(ISD::MULHU, MVT::i64, Expand);
159  setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
160  setOperationAction(ISD::MULHS, MVT::i64, Expand);
161  setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
162  setOperationAction(ISD::ADD, MVT::v2i64, Expand);
163  setOperationAction(ISD::SREM, MVT::v2i64, Expand);
164  setOperationAction(ISD::Constant          , MVT::i64  , Legal);
165  setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
166  setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
167  setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
168  setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
169  setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
170  if (STM.hasHWFP64()) {
171    // we support loading/storing v2f64 but not operations on the type
172    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
173    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
174    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
175    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
176    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
177    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
178    // We want to expand vector conversions into their scalar
179    // counterparts.
180    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
181    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
182    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
183    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
184    setOperationAction(ISD::FABS, MVT::f64, Expand);
185    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
186  }
187  // TODO: Fix the UDIV24 algorithm so it works for these
188  // types correctly. This needs vector comparisons
189  // for this to work correctly.
190  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
191  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
192  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
193  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
194  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
195  setOperationAction(ISD::SUBC, MVT::Other, Expand);
196  setOperationAction(ISD::ADDE, MVT::Other, Expand);
197  setOperationAction(ISD::ADDC, MVT::Other, Expand);
198  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
199  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
200  setOperationAction(ISD::BRIND, MVT::Other, Expand);
201  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
202
203
204  // Use the default implementation.
205  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
206  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
207
208  setSchedulingPreference(Sched::RegPressure);
209  setPow2DivIsCheap(false);
210  setSelectIsExpensive(true);
211  setJumpIsExpensive(true);
212
213  MaxStoresPerMemcpy  = 4096;
214  MaxStoresPerMemmove = 4096;
215  MaxStoresPerMemset  = 4096;
216
217}
218
219bool
220AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
221    const CallInst &I, unsigned Intrinsic) const {
222  return false;
223}
224
225// The backend supports 32 and 64 bit floating point immediates
226bool
227AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
228  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
229      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
230    return true;
231  } else {
232    return false;
233  }
234}
235
236bool
237AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
238  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
239      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
240    return false;
241  } else {
242    return true;
243  }
244}
245
246
247// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
248// be zero. Op is expected to be a target specific node. Used by DAG
249// combiner.
250
251void
252AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
253    const SDValue Op,
254    APInt &KnownZero,
255    APInt &KnownOne,
256    const SelectionDAG &DAG,
257    unsigned Depth) const {
258  APInt KnownZero2;
259  APInt KnownOne2;
260  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
261  switch (Op.getOpcode()) {
262    default: break;
263    case ISD::SELECT_CC:
264             DAG.ComputeMaskedBits(
265                 Op.getOperand(1),
266                 KnownZero,
267                 KnownOne,
268                 Depth + 1
269                 );
270             DAG.ComputeMaskedBits(
271                 Op.getOperand(0),
272                 KnownZero2,
273                 KnownOne2
274                 );
275             assert((KnownZero & KnownOne) == 0
276                 && "Bits known to be one AND zero?");
277             assert((KnownZero2 & KnownOne2) == 0
278                 && "Bits known to be one AND zero?");
279             // Only known if known in both the LHS and RHS
280             KnownOne &= KnownOne2;
281             KnownZero &= KnownZero2;
282             break;
283  };
284}
285
286//===----------------------------------------------------------------------===//
287//                           Other Lowering Hooks
288//===----------------------------------------------------------------------===//
289
290SDValue
291AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
292  EVT OVT = Op.getValueType();
293  SDValue DST;
294  if (OVT.getScalarType() == MVT::i64) {
295    DST = LowerSDIV64(Op, DAG);
296  } else if (OVT.getScalarType() == MVT::i32) {
297    DST = LowerSDIV32(Op, DAG);
298  } else if (OVT.getScalarType() == MVT::i16
299      || OVT.getScalarType() == MVT::i8) {
300    DST = LowerSDIV24(Op, DAG);
301  } else {
302    DST = SDValue(Op.getNode(), 0);
303  }
304  return DST;
305}
306
307SDValue
308AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
309  EVT OVT = Op.getValueType();
310  SDValue DST;
311  if (OVT.getScalarType() == MVT::i64) {
312    DST = LowerSREM64(Op, DAG);
313  } else if (OVT.getScalarType() == MVT::i32) {
314    DST = LowerSREM32(Op, DAG);
315  } else if (OVT.getScalarType() == MVT::i16) {
316    DST = LowerSREM16(Op, DAG);
317  } else if (OVT.getScalarType() == MVT::i8) {
318    DST = LowerSREM8(Op, DAG);
319  } else {
320    DST = SDValue(Op.getNode(), 0);
321  }
322  return DST;
323}
324
325SDValue
326AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
327  SDValue Data = Op.getOperand(0);
328  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
329  SDLoc DL(Op);
330  EVT DVT = Data.getValueType();
331  EVT BVT = BaseType->getVT();
332  unsigned baseBits = BVT.getScalarType().getSizeInBits();
333  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
334  unsigned shiftBits = srcBits - baseBits;
335  if (srcBits < 32) {
336    // If the op is less than 32 bits, then it needs to extend to 32bits
337    // so it can properly keep the upper bits valid.
338    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
339    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
340    shiftBits = 32 - baseBits;
341    DVT = IVT;
342  }
343  SDValue Shift = DAG.getConstant(shiftBits, DVT);
344  // Shift left by 'Shift' bits.
345  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
346  // Signed shift Right by 'Shift' bits.
347  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
348  if (srcBits < 32) {
349    // Once the sign extension is done, the op needs to be converted to
350    // its original type.
351    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
352  }
353  return Data;
354}
355EVT
356AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
357  int iSize = (size * numEle);
358  int vEle = (iSize >> ((size == 64) ? 6 : 5));
359  if (!vEle) {
360    vEle = 1;
361  }
362  if (size == 64) {
363    if (vEle == 1) {
364      return EVT(MVT::i64);
365    } else {
366      return EVT(MVT::getVectorVT(MVT::i64, vEle));
367    }
368  } else {
369    if (vEle == 1) {
370      return EVT(MVT::i32);
371    } else {
372      return EVT(MVT::getVectorVT(MVT::i32, vEle));
373    }
374  }
375}
376
377SDValue
378AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
379  SDValue Chain = Op.getOperand(0);
380  SDValue Cond  = Op.getOperand(1);
381  SDValue Jump  = Op.getOperand(2);
382  SDValue Result;
383  Result = DAG.getNode(
384      AMDGPUISD::BRANCH_COND,
385      SDLoc(Op),
386      Op.getValueType(),
387      Chain, Jump, Cond);
388  return Result;
389}
390
391SDValue
392AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
393  SDLoc DL(Op);
394  EVT OVT = Op.getValueType();
395  SDValue LHS = Op.getOperand(0);
396  SDValue RHS = Op.getOperand(1);
397  MVT INTTY;
398  MVT FLTTY;
399  if (!OVT.isVector()) {
400    INTTY = MVT::i32;
401    FLTTY = MVT::f32;
402  } else if (OVT.getVectorNumElements() == 2) {
403    INTTY = MVT::v2i32;
404    FLTTY = MVT::v2f32;
405  } else if (OVT.getVectorNumElements() == 4) {
406    INTTY = MVT::v4i32;
407    FLTTY = MVT::v4f32;
408  }
409  unsigned bitsize = OVT.getScalarType().getSizeInBits();
410  // char|short jq = ia ^ ib;
411  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
412
413  // jq = jq >> (bitsize - 2)
414  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
415
416  // jq = jq | 0x1
417  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
418
419  // jq = (int)jq
420  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
421
422  // int ia = (int)LHS;
423  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
424
425  // int ib, (int)RHS;
426  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
427
428  // float fa = (float)ia;
429  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
430
431  // float fb = (float)ib;
432  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
433
434  // float fq = native_divide(fa, fb);
435  SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
436
437  // fq = trunc(fq);
438  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
439
440  // float fqneg = -fq;
441  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
442
443  // float fr = mad(fqneg, fb, fa);
444  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
445      DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
446
447  // int iq = (int)fq;
448  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
449
450  // fr = fabs(fr);
451  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
452
453  // fb = fabs(fb);
454  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
455
456  // int cv = fr >= fb;
457  SDValue cv;
458  if (INTTY == MVT::i32) {
459    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
460  } else {
461    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
462  }
463  // jq = (cv ? jq : 0);
464  jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
465      DAG.getConstant(0, OVT));
466  // dst = iq + jq;
467  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
468  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
469  return iq;
470}
471
472SDValue
473AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
474  SDLoc DL(Op);
475  EVT OVT = Op.getValueType();
476  SDValue LHS = Op.getOperand(0);
477  SDValue RHS = Op.getOperand(1);
478  // The LowerSDIV32 function generates equivalent to the following IL.
479  // mov r0, LHS
480  // mov r1, RHS
481  // ilt r10, r0, 0
482  // ilt r11, r1, 0
483  // iadd r0, r0, r10
484  // iadd r1, r1, r11
485  // ixor r0, r0, r10
486  // ixor r1, r1, r11
487  // udiv r0, r0, r1
488  // ixor r10, r10, r11
489  // iadd r0, r0, r10
490  // ixor DST, r0, r10
491
492  // mov r0, LHS
493  SDValue r0 = LHS;
494
495  // mov r1, RHS
496  SDValue r1 = RHS;
497
498  // ilt r10, r0, 0
499  SDValue r10 = DAG.getSelectCC(DL,
500      r0, DAG.getConstant(0, OVT),
501      DAG.getConstant(-1, MVT::i32),
502      DAG.getConstant(0, MVT::i32),
503      ISD::SETLT);
504
505  // ilt r11, r1, 0
506  SDValue r11 = DAG.getSelectCC(DL,
507      r1, DAG.getConstant(0, OVT),
508      DAG.getConstant(-1, MVT::i32),
509      DAG.getConstant(0, MVT::i32),
510      ISD::SETLT);
511
512  // iadd r0, r0, r10
513  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
514
515  // iadd r1, r1, r11
516  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
517
518  // ixor r0, r0, r10
519  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
520
521  // ixor r1, r1, r11
522  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
523
524  // udiv r0, r0, r1
525  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
526
527  // ixor r10, r10, r11
528  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
529
530  // iadd r0, r0, r10
531  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
532
533  // ixor DST, r0, r10
534  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
535  return DST;
536}
537
538SDValue
539AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
540  return SDValue(Op.getNode(), 0);
541}
542
543SDValue
544AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
545  SDLoc DL(Op);
546  EVT OVT = Op.getValueType();
547  MVT INTTY = MVT::i32;
548  if (OVT == MVT::v2i8) {
549    INTTY = MVT::v2i32;
550  } else if (OVT == MVT::v4i8) {
551    INTTY = MVT::v4i32;
552  }
553  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
554  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
555  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
556  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
557  return LHS;
558}
559
560SDValue
561AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
562  SDLoc DL(Op);
563  EVT OVT = Op.getValueType();
564  MVT INTTY = MVT::i32;
565  if (OVT == MVT::v2i16) {
566    INTTY = MVT::v2i32;
567  } else if (OVT == MVT::v4i16) {
568    INTTY = MVT::v4i32;
569  }
570  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
571  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
572  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
573  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
574  return LHS;
575}
576
577SDValue
578AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
579  SDLoc DL(Op);
580  EVT OVT = Op.getValueType();
581  SDValue LHS = Op.getOperand(0);
582  SDValue RHS = Op.getOperand(1);
583  // The LowerSREM32 function generates equivalent to the following IL.
584  // mov r0, LHS
585  // mov r1, RHS
586  // ilt r10, r0, 0
587  // ilt r11, r1, 0
588  // iadd r0, r0, r10
589  // iadd r1, r1, r11
590  // ixor r0, r0, r10
591  // ixor r1, r1, r11
592  // udiv r20, r0, r1
593  // umul r20, r20, r1
594  // sub r0, r0, r20
595  // iadd r0, r0, r10
596  // ixor DST, r0, r10
597
598  // mov r0, LHS
599  SDValue r0 = LHS;
600
601  // mov r1, RHS
602  SDValue r1 = RHS;
603
604  // ilt r10, r0, 0
605  SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
606
607  // ilt r11, r1, 0
608  SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
609
610  // iadd r0, r0, r10
611  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
612
613  // iadd r1, r1, r11
614  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
615
616  // ixor r0, r0, r10
617  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
618
619  // ixor r1, r1, r11
620  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
621
622  // udiv r20, r0, r1
623  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
624
625  // umul r20, r20, r1
626  r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
627
628  // sub r0, r0, r20
629  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
630
631  // iadd r0, r0, r10
632  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
633
634  // ixor DST, r0, r10
635  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
636  return DST;
637}
638
639SDValue
640AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
641  return SDValue(Op.getNode(), 0);
642}
643