1249259Sdim//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//==-----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim/// \file 11249259Sdim/// \brief TargetLowering functions borrowed from AMDIL. 12249259Sdim// 13249259Sdim//===----------------------------------------------------------------------===// 14249259Sdim 15249259Sdim#include "AMDGPUISelLowering.h" 16249259Sdim#include "AMDGPURegisterInfo.h" 17249259Sdim#include "AMDGPUSubtarget.h" 18249259Sdim#include "AMDILIntrinsicInfo.h" 19249259Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 20249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 21249259Sdim#include "llvm/CodeGen/PseudoSourceValue.h" 22249259Sdim#include "llvm/CodeGen/SelectionDAG.h" 23249259Sdim#include "llvm/CodeGen/SelectionDAGNodes.h" 24249259Sdim#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25249259Sdim#include "llvm/IR/CallingConv.h" 26249259Sdim#include "llvm/IR/DerivedTypes.h" 27249259Sdim#include "llvm/IR/Instructions.h" 28249259Sdim#include "llvm/IR/Intrinsics.h" 29249259Sdim#include "llvm/Support/raw_ostream.h" 30249259Sdim#include "llvm/Target/TargetInstrInfo.h" 31249259Sdim#include "llvm/Target/TargetOptions.h" 32249259Sdim 33249259Sdimusing namespace llvm; 34249259Sdim//===----------------------------------------------------------------------===// 35249259Sdim// TargetLowering Implementation Help Functions End 36249259Sdim//===----------------------------------------------------------------------===// 37249259Sdim 38249259Sdim//===----------------------------------------------------------------------===// 39249259Sdim// TargetLowering Class Implementation Begins 40249259Sdim//===----------------------------------------------------------------------===// 41249259Sdimvoid AMDGPUTargetLowering::InitAMDILLowering() { 42263509Sdim static const int types[] = { 43249259Sdim (int)MVT::i8, 44249259Sdim (int)MVT::i16, 45249259Sdim (int)MVT::i32, 46249259Sdim (int)MVT::f32, 47249259Sdim (int)MVT::f64, 48249259Sdim (int)MVT::i64, 49249259Sdim (int)MVT::v2i8, 50249259Sdim (int)MVT::v4i8, 51249259Sdim (int)MVT::v2i16, 52249259Sdim (int)MVT::v4i16, 53249259Sdim (int)MVT::v4f32, 54249259Sdim (int)MVT::v4i32, 55249259Sdim (int)MVT::v2f32, 56249259Sdim (int)MVT::v2i32, 57249259Sdim (int)MVT::v2f64, 58249259Sdim (int)MVT::v2i64 59249259Sdim }; 60249259Sdim 61263509Sdim static const int IntTypes[] = { 62249259Sdim (int)MVT::i8, 63249259Sdim (int)MVT::i16, 64249259Sdim (int)MVT::i32, 65249259Sdim (int)MVT::i64 66249259Sdim }; 67249259Sdim 68263509Sdim static const int FloatTypes[] = { 69249259Sdim (int)MVT::f32, 70249259Sdim (int)MVT::f64 71249259Sdim }; 72249259Sdim 73263509Sdim static const int VectorTypes[] = { 74249259Sdim (int)MVT::v2i8, 75249259Sdim (int)MVT::v4i8, 76249259Sdim (int)MVT::v2i16, 77249259Sdim (int)MVT::v4i16, 78249259Sdim (int)MVT::v4f32, 79249259Sdim (int)MVT::v4i32, 80249259Sdim (int)MVT::v2f32, 81249259Sdim (int)MVT::v2i32, 82249259Sdim (int)MVT::v2f64, 83249259Sdim (int)MVT::v2i64 84249259Sdim }; 85263509Sdim const size_t NumTypes = array_lengthof(types); 86263509Sdim const size_t NumFloatTypes = array_lengthof(FloatTypes); 87263509Sdim const size_t NumIntTypes = array_lengthof(IntTypes); 88263509Sdim const size_t NumVectorTypes = array_lengthof(VectorTypes); 89249259Sdim 90249259Sdim const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); 91249259Sdim // These are the current register classes that are 92249259Sdim // supported 93249259Sdim 94249259Sdim for (unsigned int x = 0; x < NumTypes; ++x) { 95249259Sdim MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 96249259Sdim 97249259Sdim //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 98249259Sdim // We cannot sextinreg, expand to shifts 99249259Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 100249259Sdim setOperationAction(ISD::SUBE, VT, Expand); 101249259Sdim setOperationAction(ISD::SUBC, VT, Expand); 102249259Sdim setOperationAction(ISD::ADDE, VT, Expand); 103249259Sdim setOperationAction(ISD::ADDC, VT, Expand); 104249259Sdim setOperationAction(ISD::BRCOND, VT, Custom); 105249259Sdim setOperationAction(ISD::BR_JT, VT, Expand); 106249259Sdim setOperationAction(ISD::BRIND, VT, Expand); 107249259Sdim // TODO: Implement custom UREM/SREM routines 108249259Sdim setOperationAction(ISD::SREM, VT, Expand); 109249259Sdim setOperationAction(ISD::SMUL_LOHI, VT, Expand); 110249259Sdim setOperationAction(ISD::UMUL_LOHI, VT, Expand); 111249259Sdim if (VT != MVT::i64 && VT != MVT::v2i64) { 112249259Sdim setOperationAction(ISD::SDIV, VT, Custom); 113249259Sdim } 114249259Sdim } 115249259Sdim for (unsigned int x = 0; x < NumFloatTypes; ++x) { 116249259Sdim MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 117249259Sdim 118249259Sdim // IL does not have these operations for floating point types 119249259Sdim setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 120249259Sdim setOperationAction(ISD::SETOLT, VT, Expand); 121249259Sdim setOperationAction(ISD::SETOGE, VT, Expand); 122249259Sdim setOperationAction(ISD::SETOGT, VT, Expand); 123249259Sdim setOperationAction(ISD::SETOLE, VT, Expand); 124249259Sdim setOperationAction(ISD::SETULT, VT, Expand); 125249259Sdim setOperationAction(ISD::SETUGE, VT, Expand); 126249259Sdim setOperationAction(ISD::SETUGT, VT, Expand); 127249259Sdim setOperationAction(ISD::SETULE, VT, Expand); 128249259Sdim } 129249259Sdim 130249259Sdim for (unsigned int x = 0; x < NumIntTypes; ++x) { 131249259Sdim MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 132249259Sdim 133249259Sdim // GPU also does not have divrem function for signed or unsigned 134249259Sdim setOperationAction(ISD::SDIVREM, VT, Expand); 135249259Sdim 136249259Sdim // GPU does not have [S|U]MUL_LOHI functions as a single instruction 137249259Sdim setOperationAction(ISD::SMUL_LOHI, VT, Expand); 138249259Sdim setOperationAction(ISD::UMUL_LOHI, VT, Expand); 139249259Sdim 140249259Sdim setOperationAction(ISD::BSWAP, VT, Expand); 141249259Sdim 142249259Sdim // GPU doesn't have any counting operators 143249259Sdim setOperationAction(ISD::CTPOP, VT, Expand); 144249259Sdim setOperationAction(ISD::CTTZ, VT, Expand); 145249259Sdim setOperationAction(ISD::CTLZ, VT, Expand); 146249259Sdim } 147249259Sdim 148249259Sdim for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { 149249259Sdim MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 150249259Sdim 151249259Sdim setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 152249259Sdim setOperationAction(ISD::SDIVREM, VT, Expand); 153249259Sdim setOperationAction(ISD::SMUL_LOHI, VT, Expand); 154249259Sdim // setOperationAction(ISD::VSETCC, VT, Expand); 155249259Sdim setOperationAction(ISD::SELECT_CC, VT, Expand); 156249259Sdim 157249259Sdim } 158263509Sdim setOperationAction(ISD::MULHU, MVT::i64, Expand); 159263509Sdim setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 160263509Sdim setOperationAction(ISD::MULHS, MVT::i64, Expand); 161263509Sdim setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 162263509Sdim setOperationAction(ISD::ADD, MVT::v2i64, Expand); 163263509Sdim setOperationAction(ISD::SREM, MVT::v2i64, Expand); 164263509Sdim setOperationAction(ISD::Constant , MVT::i64 , Legal); 165263509Sdim setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 166263509Sdim setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 167263509Sdim setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 168263509Sdim setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 169263509Sdim setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 170263509Sdim if (STM.hasHWFP64()) { 171249259Sdim // we support loading/storing v2f64 but not operations on the type 172249259Sdim setOperationAction(ISD::FADD, MVT::v2f64, Expand); 173249259Sdim setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 174249259Sdim setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 175249259Sdim setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 176249259Sdim setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 177249259Sdim setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 178249259Sdim // We want to expand vector conversions into their scalar 179249259Sdim // counterparts. 180249259Sdim setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 181249259Sdim setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 182249259Sdim setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 183249259Sdim setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 184249259Sdim setOperationAction(ISD::FABS, MVT::f64, Expand); 185249259Sdim setOperationAction(ISD::FABS, MVT::v2f64, Expand); 186249259Sdim } 187249259Sdim // TODO: Fix the UDIV24 algorithm so it works for these 188249259Sdim // types correctly. This needs vector comparisons 189249259Sdim // for this to work correctly. 190249259Sdim setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 191249259Sdim setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 192249259Sdim setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 193249259Sdim setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 194249259Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 195249259Sdim setOperationAction(ISD::SUBC, MVT::Other, Expand); 196249259Sdim setOperationAction(ISD::ADDE, MVT::Other, Expand); 197249259Sdim setOperationAction(ISD::ADDC, MVT::Other, Expand); 198249259Sdim setOperationAction(ISD::BRCOND, MVT::Other, Custom); 199249259Sdim setOperationAction(ISD::BR_JT, MVT::Other, Expand); 200249259Sdim setOperationAction(ISD::BRIND, MVT::Other, Expand); 201249259Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 202249259Sdim 203249259Sdim 204249259Sdim // Use the default implementation. 205249259Sdim setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 206249259Sdim setOperationAction(ISD::Constant , MVT::i32 , Legal); 207249259Sdim 208249259Sdim setSchedulingPreference(Sched::RegPressure); 209249259Sdim setPow2DivIsCheap(false); 210249259Sdim setSelectIsExpensive(true); 211249259Sdim setJumpIsExpensive(true); 212249259Sdim 213249259Sdim MaxStoresPerMemcpy = 4096; 214249259Sdim MaxStoresPerMemmove = 4096; 215249259Sdim MaxStoresPerMemset = 4096; 216249259Sdim 217249259Sdim} 218249259Sdim 219249259Sdimbool 220249259SdimAMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 221249259Sdim const CallInst &I, unsigned Intrinsic) const { 222249259Sdim return false; 223249259Sdim} 224249259Sdim 225249259Sdim// The backend supports 32 and 64 bit floating point immediates 226249259Sdimbool 227249259SdimAMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 228249259Sdim if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 229249259Sdim || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 230249259Sdim return true; 231249259Sdim } else { 232249259Sdim return false; 233249259Sdim } 234249259Sdim} 235249259Sdim 236249259Sdimbool 237249259SdimAMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { 238249259Sdim if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 239249259Sdim || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 240249259Sdim return false; 241249259Sdim } else { 242249259Sdim return true; 243249259Sdim } 244249259Sdim} 245249259Sdim 246249259Sdim 247249259Sdim// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 248249259Sdim// be zero. Op is expected to be a target specific node. Used by DAG 249249259Sdim// combiner. 250249259Sdim 251249259Sdimvoid 252249259SdimAMDGPUTargetLowering::computeMaskedBitsForTargetNode( 253249259Sdim const SDValue Op, 254249259Sdim APInt &KnownZero, 255249259Sdim APInt &KnownOne, 256249259Sdim const SelectionDAG &DAG, 257249259Sdim unsigned Depth) const { 258249259Sdim APInt KnownZero2; 259249259Sdim APInt KnownOne2; 260249259Sdim KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 261249259Sdim switch (Op.getOpcode()) { 262249259Sdim default: break; 263249259Sdim case ISD::SELECT_CC: 264249259Sdim DAG.ComputeMaskedBits( 265249259Sdim Op.getOperand(1), 266249259Sdim KnownZero, 267249259Sdim KnownOne, 268249259Sdim Depth + 1 269249259Sdim ); 270249259Sdim DAG.ComputeMaskedBits( 271249259Sdim Op.getOperand(0), 272249259Sdim KnownZero2, 273249259Sdim KnownOne2 274249259Sdim ); 275249259Sdim assert((KnownZero & KnownOne) == 0 276249259Sdim && "Bits known to be one AND zero?"); 277249259Sdim assert((KnownZero2 & KnownOne2) == 0 278249259Sdim && "Bits known to be one AND zero?"); 279249259Sdim // Only known if known in both the LHS and RHS 280249259Sdim KnownOne &= KnownOne2; 281249259Sdim KnownZero &= KnownZero2; 282249259Sdim break; 283249259Sdim }; 284249259Sdim} 285249259Sdim 286249259Sdim//===----------------------------------------------------------------------===// 287249259Sdim// Other Lowering Hooks 288249259Sdim//===----------------------------------------------------------------------===// 289249259Sdim 290249259SdimSDValue 291249259SdimAMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { 292249259Sdim EVT OVT = Op.getValueType(); 293249259Sdim SDValue DST; 294249259Sdim if (OVT.getScalarType() == MVT::i64) { 295249259Sdim DST = LowerSDIV64(Op, DAG); 296249259Sdim } else if (OVT.getScalarType() == MVT::i32) { 297249259Sdim DST = LowerSDIV32(Op, DAG); 298249259Sdim } else if (OVT.getScalarType() == MVT::i16 299249259Sdim || OVT.getScalarType() == MVT::i8) { 300249259Sdim DST = LowerSDIV24(Op, DAG); 301249259Sdim } else { 302249259Sdim DST = SDValue(Op.getNode(), 0); 303249259Sdim } 304249259Sdim return DST; 305249259Sdim} 306249259Sdim 307249259SdimSDValue 308249259SdimAMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { 309249259Sdim EVT OVT = Op.getValueType(); 310249259Sdim SDValue DST; 311249259Sdim if (OVT.getScalarType() == MVT::i64) { 312249259Sdim DST = LowerSREM64(Op, DAG); 313249259Sdim } else if (OVT.getScalarType() == MVT::i32) { 314249259Sdim DST = LowerSREM32(Op, DAG); 315249259Sdim } else if (OVT.getScalarType() == MVT::i16) { 316249259Sdim DST = LowerSREM16(Op, DAG); 317249259Sdim } else if (OVT.getScalarType() == MVT::i8) { 318249259Sdim DST = LowerSREM8(Op, DAG); 319249259Sdim } else { 320249259Sdim DST = SDValue(Op.getNode(), 0); 321249259Sdim } 322249259Sdim return DST; 323249259Sdim} 324249259Sdim 325249259SdimSDValue 326249259SdimAMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { 327249259Sdim SDValue Data = Op.getOperand(0); 328249259Sdim VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 329263509Sdim SDLoc DL(Op); 330249259Sdim EVT DVT = Data.getValueType(); 331249259Sdim EVT BVT = BaseType->getVT(); 332249259Sdim unsigned baseBits = BVT.getScalarType().getSizeInBits(); 333249259Sdim unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 334249259Sdim unsigned shiftBits = srcBits - baseBits; 335249259Sdim if (srcBits < 32) { 336249259Sdim // If the op is less than 32 bits, then it needs to extend to 32bits 337249259Sdim // so it can properly keep the upper bits valid. 338249259Sdim EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 339249259Sdim Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 340249259Sdim shiftBits = 32 - baseBits; 341249259Sdim DVT = IVT; 342249259Sdim } 343249259Sdim SDValue Shift = DAG.getConstant(shiftBits, DVT); 344249259Sdim // Shift left by 'Shift' bits. 345249259Sdim Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 346249259Sdim // Signed shift Right by 'Shift' bits. 347249259Sdim Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 348249259Sdim if (srcBits < 32) { 349249259Sdim // Once the sign extension is done, the op needs to be converted to 350249259Sdim // its original type. 351249259Sdim Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 352249259Sdim } 353249259Sdim return Data; 354249259Sdim} 355249259SdimEVT 356249259SdimAMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { 357249259Sdim int iSize = (size * numEle); 358249259Sdim int vEle = (iSize >> ((size == 64) ? 6 : 5)); 359249259Sdim if (!vEle) { 360249259Sdim vEle = 1; 361249259Sdim } 362249259Sdim if (size == 64) { 363249259Sdim if (vEle == 1) { 364249259Sdim return EVT(MVT::i64); 365249259Sdim } else { 366249259Sdim return EVT(MVT::getVectorVT(MVT::i64, vEle)); 367249259Sdim } 368249259Sdim } else { 369249259Sdim if (vEle == 1) { 370249259Sdim return EVT(MVT::i32); 371249259Sdim } else { 372249259Sdim return EVT(MVT::getVectorVT(MVT::i32, vEle)); 373249259Sdim } 374249259Sdim } 375249259Sdim} 376249259Sdim 377249259SdimSDValue 378249259SdimAMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 379249259Sdim SDValue Chain = Op.getOperand(0); 380249259Sdim SDValue Cond = Op.getOperand(1); 381249259Sdim SDValue Jump = Op.getOperand(2); 382249259Sdim SDValue Result; 383249259Sdim Result = DAG.getNode( 384249259Sdim AMDGPUISD::BRANCH_COND, 385263509Sdim SDLoc(Op), 386249259Sdim Op.getValueType(), 387249259Sdim Chain, Jump, Cond); 388249259Sdim return Result; 389249259Sdim} 390249259Sdim 391249259SdimSDValue 392249259SdimAMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { 393263509Sdim SDLoc DL(Op); 394249259Sdim EVT OVT = Op.getValueType(); 395249259Sdim SDValue LHS = Op.getOperand(0); 396249259Sdim SDValue RHS = Op.getOperand(1); 397249259Sdim MVT INTTY; 398249259Sdim MVT FLTTY; 399249259Sdim if (!OVT.isVector()) { 400249259Sdim INTTY = MVT::i32; 401249259Sdim FLTTY = MVT::f32; 402249259Sdim } else if (OVT.getVectorNumElements() == 2) { 403249259Sdim INTTY = MVT::v2i32; 404249259Sdim FLTTY = MVT::v2f32; 405249259Sdim } else if (OVT.getVectorNumElements() == 4) { 406249259Sdim INTTY = MVT::v4i32; 407249259Sdim FLTTY = MVT::v4f32; 408249259Sdim } 409249259Sdim unsigned bitsize = OVT.getScalarType().getSizeInBits(); 410249259Sdim // char|short jq = ia ^ ib; 411249259Sdim SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 412249259Sdim 413249259Sdim // jq = jq >> (bitsize - 2) 414249259Sdim jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 415249259Sdim 416249259Sdim // jq = jq | 0x1 417249259Sdim jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 418249259Sdim 419249259Sdim // jq = (int)jq 420249259Sdim jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 421249259Sdim 422249259Sdim // int ia = (int)LHS; 423249259Sdim SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 424249259Sdim 425249259Sdim // int ib, (int)RHS; 426249259Sdim SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 427249259Sdim 428249259Sdim // float fa = (float)ia; 429249259Sdim SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 430249259Sdim 431249259Sdim // float fb = (float)ib; 432249259Sdim SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 433249259Sdim 434249259Sdim // float fq = native_divide(fa, fb); 435249259Sdim SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); 436249259Sdim 437249259Sdim // fq = trunc(fq); 438249259Sdim fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 439249259Sdim 440249259Sdim // float fqneg = -fq; 441249259Sdim SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 442249259Sdim 443249259Sdim // float fr = mad(fqneg, fb, fa); 444249259Sdim SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, 445249259Sdim DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); 446249259Sdim 447249259Sdim // int iq = (int)fq; 448249259Sdim SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 449249259Sdim 450249259Sdim // fr = fabs(fr); 451249259Sdim fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 452249259Sdim 453249259Sdim // fb = fabs(fb); 454249259Sdim fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 455249259Sdim 456249259Sdim // int cv = fr >= fb; 457249259Sdim SDValue cv; 458249259Sdim if (INTTY == MVT::i32) { 459249259Sdim cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 460249259Sdim } else { 461249259Sdim cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 462249259Sdim } 463249259Sdim // jq = (cv ? jq : 0); 464249259Sdim jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, 465249259Sdim DAG.getConstant(0, OVT)); 466249259Sdim // dst = iq + jq; 467249259Sdim iq = DAG.getSExtOrTrunc(iq, DL, OVT); 468249259Sdim iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 469249259Sdim return iq; 470249259Sdim} 471249259Sdim 472249259SdimSDValue 473249259SdimAMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { 474263509Sdim SDLoc DL(Op); 475249259Sdim EVT OVT = Op.getValueType(); 476249259Sdim SDValue LHS = Op.getOperand(0); 477249259Sdim SDValue RHS = Op.getOperand(1); 478249259Sdim // The LowerSDIV32 function generates equivalent to the following IL. 479249259Sdim // mov r0, LHS 480249259Sdim // mov r1, RHS 481249259Sdim // ilt r10, r0, 0 482249259Sdim // ilt r11, r1, 0 483249259Sdim // iadd r0, r0, r10 484249259Sdim // iadd r1, r1, r11 485249259Sdim // ixor r0, r0, r10 486249259Sdim // ixor r1, r1, r11 487249259Sdim // udiv r0, r0, r1 488249259Sdim // ixor r10, r10, r11 489249259Sdim // iadd r0, r0, r10 490249259Sdim // ixor DST, r0, r10 491249259Sdim 492249259Sdim // mov r0, LHS 493249259Sdim SDValue r0 = LHS; 494249259Sdim 495249259Sdim // mov r1, RHS 496249259Sdim SDValue r1 = RHS; 497249259Sdim 498249259Sdim // ilt r10, r0, 0 499249259Sdim SDValue r10 = DAG.getSelectCC(DL, 500249259Sdim r0, DAG.getConstant(0, OVT), 501249259Sdim DAG.getConstant(-1, MVT::i32), 502249259Sdim DAG.getConstant(0, MVT::i32), 503249259Sdim ISD::SETLT); 504249259Sdim 505249259Sdim // ilt r11, r1, 0 506249259Sdim SDValue r11 = DAG.getSelectCC(DL, 507249259Sdim r1, DAG.getConstant(0, OVT), 508249259Sdim DAG.getConstant(-1, MVT::i32), 509249259Sdim DAG.getConstant(0, MVT::i32), 510249259Sdim ISD::SETLT); 511249259Sdim 512249259Sdim // iadd r0, r0, r10 513249259Sdim r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 514249259Sdim 515249259Sdim // iadd r1, r1, r11 516249259Sdim r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 517249259Sdim 518249259Sdim // ixor r0, r0, r10 519249259Sdim r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 520249259Sdim 521249259Sdim // ixor r1, r1, r11 522249259Sdim r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 523249259Sdim 524249259Sdim // udiv r0, r0, r1 525249259Sdim r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 526249259Sdim 527249259Sdim // ixor r10, r10, r11 528249259Sdim r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 529249259Sdim 530249259Sdim // iadd r0, r0, r10 531249259Sdim r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 532249259Sdim 533249259Sdim // ixor DST, r0, r10 534249259Sdim SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 535249259Sdim return DST; 536249259Sdim} 537249259Sdim 538249259SdimSDValue 539249259SdimAMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { 540249259Sdim return SDValue(Op.getNode(), 0); 541249259Sdim} 542249259Sdim 543249259SdimSDValue 544249259SdimAMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { 545263509Sdim SDLoc DL(Op); 546249259Sdim EVT OVT = Op.getValueType(); 547249259Sdim MVT INTTY = MVT::i32; 548249259Sdim if (OVT == MVT::v2i8) { 549249259Sdim INTTY = MVT::v2i32; 550249259Sdim } else if (OVT == MVT::v4i8) { 551249259Sdim INTTY = MVT::v4i32; 552249259Sdim } 553249259Sdim SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 554249259Sdim SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 555249259Sdim LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 556249259Sdim LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 557249259Sdim return LHS; 558249259Sdim} 559249259Sdim 560249259SdimSDValue 561249259SdimAMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { 562263509Sdim SDLoc DL(Op); 563249259Sdim EVT OVT = Op.getValueType(); 564249259Sdim MVT INTTY = MVT::i32; 565249259Sdim if (OVT == MVT::v2i16) { 566249259Sdim INTTY = MVT::v2i32; 567249259Sdim } else if (OVT == MVT::v4i16) { 568249259Sdim INTTY = MVT::v4i32; 569249259Sdim } 570249259Sdim SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 571249259Sdim SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 572249259Sdim LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 573249259Sdim LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 574249259Sdim return LHS; 575249259Sdim} 576249259Sdim 577249259SdimSDValue 578249259SdimAMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { 579263509Sdim SDLoc DL(Op); 580249259Sdim EVT OVT = Op.getValueType(); 581249259Sdim SDValue LHS = Op.getOperand(0); 582249259Sdim SDValue RHS = Op.getOperand(1); 583249259Sdim // The LowerSREM32 function generates equivalent to the following IL. 584249259Sdim // mov r0, LHS 585249259Sdim // mov r1, RHS 586249259Sdim // ilt r10, r0, 0 587249259Sdim // ilt r11, r1, 0 588249259Sdim // iadd r0, r0, r10 589249259Sdim // iadd r1, r1, r11 590249259Sdim // ixor r0, r0, r10 591249259Sdim // ixor r1, r1, r11 592249259Sdim // udiv r20, r0, r1 593249259Sdim // umul r20, r20, r1 594249259Sdim // sub r0, r0, r20 595249259Sdim // iadd r0, r0, r10 596249259Sdim // ixor DST, r0, r10 597249259Sdim 598249259Sdim // mov r0, LHS 599249259Sdim SDValue r0 = LHS; 600249259Sdim 601249259Sdim // mov r1, RHS 602249259Sdim SDValue r1 = RHS; 603249259Sdim 604249259Sdim // ilt r10, r0, 0 605249259Sdim SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); 606249259Sdim 607249259Sdim // ilt r11, r1, 0 608249259Sdim SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); 609249259Sdim 610249259Sdim // iadd r0, r0, r10 611249259Sdim r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 612249259Sdim 613249259Sdim // iadd r1, r1, r11 614249259Sdim r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 615249259Sdim 616249259Sdim // ixor r0, r0, r10 617249259Sdim r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 618249259Sdim 619249259Sdim // ixor r1, r1, r11 620249259Sdim r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 621249259Sdim 622249259Sdim // udiv r20, r0, r1 623249259Sdim SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 624249259Sdim 625249259Sdim // umul r20, r20, r1 626249259Sdim r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); 627249259Sdim 628249259Sdim // sub r0, r0, r20 629249259Sdim r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 630249259Sdim 631249259Sdim // iadd r0, r0, r10 632249259Sdim r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 633249259Sdim 634249259Sdim // ixor DST, r0, r10 635249259Sdim SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 636249259Sdim return DST; 637249259Sdim} 638249259Sdim 639249259SdimSDValue 640249259SdimAMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { 641249259Sdim return SDValue(Op.getNode(), 0); 642249259Sdim} 643