1284677Sdim//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2284677Sdim// 3284677Sdim// The LLVM Compiler Infrastructure 4284677Sdim// 5284677Sdim// This file is distributed under the University of Illinois Open Source 6284677Sdim// License. See LICENSE.TXT for details. 7284677Sdim// 8284677Sdim//==-----------------------------------------------------------------------===// 9284677Sdim// 10284677Sdim/// \file 11284677Sdim/// \brief Defines an instruction selector for the AMDGPU target. 12284677Sdim// 13284677Sdim//===----------------------------------------------------------------------===// 14296417Sdim 15296417Sdim#include "AMDGPUDiagnosticInfoUnsupported.h" 16284677Sdim#include "AMDGPUInstrInfo.h" 17284677Sdim#include "AMDGPUISelLowering.h" // For AMDGPUISD 18284677Sdim#include "AMDGPURegisterInfo.h" 19284677Sdim#include "AMDGPUSubtarget.h" 20284677Sdim#include "R600InstrInfo.h" 21284677Sdim#include "SIDefines.h" 22284677Sdim#include "SIISelLowering.h" 23284677Sdim#include "SIMachineFunctionInfo.h" 24284677Sdim#include "llvm/CodeGen/FunctionLoweringInfo.h" 25284677Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 26284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 27296417Sdim#include "llvm/CodeGen/PseudoSourceValue.h" 28284677Sdim#include "llvm/CodeGen/SelectionDAG.h" 29284677Sdim#include "llvm/CodeGen/SelectionDAGISel.h" 30284677Sdim#include "llvm/IR/Function.h" 31284677Sdim 32284677Sdimusing namespace llvm; 33284677Sdim 34284677Sdim//===----------------------------------------------------------------------===// 35284677Sdim// Instruction Selector Implementation 36284677Sdim//===----------------------------------------------------------------------===// 37284677Sdim 38284677Sdimnamespace { 39284677Sdim/// AMDGPU specific code to select AMDGPU machine instructions for 40284677Sdim/// SelectionDAG operations. 41284677Sdimclass AMDGPUDAGToDAGISel : public SelectionDAGISel { 42284677Sdim // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 43284677Sdim // make the right decision when generating code for different targets. 44284677Sdim const AMDGPUSubtarget *Subtarget; 45296417Sdim 46284677Sdimpublic: 47284677Sdim AMDGPUDAGToDAGISel(TargetMachine &TM); 48284677Sdim virtual ~AMDGPUDAGToDAGISel(); 49284677Sdim bool runOnMachineFunction(MachineFunction &MF) override; 50284677Sdim SDNode *Select(SDNode *N) override; 51284677Sdim const char *getPassName() const override; 52296417Sdim void PreprocessISelDAG() override; 53284677Sdim void PostprocessISelDAG() override; 54284677Sdim 55284677Sdimprivate: 56284677Sdim bool isInlineImmediate(SDNode *N) const; 57284677Sdim bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 58284677Sdim const R600InstrInfo *TII); 59284677Sdim bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 60284677Sdim bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 61284677Sdim 62284677Sdim // Complex pattern selectors 63284677Sdim bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 64284677Sdim bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 65284677Sdim bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 66284677Sdim 67284677Sdim static bool checkType(const Value *ptr, unsigned int addrspace); 68284677Sdim static bool checkPrivateAddress(const MachineMemOperand *Op); 69284677Sdim 70284677Sdim static bool isGlobalStore(const StoreSDNode *N); 71284677Sdim static bool isFlatStore(const StoreSDNode *N); 72284677Sdim static bool isPrivateStore(const StoreSDNode *N); 73284677Sdim static bool isLocalStore(const StoreSDNode *N); 74284677Sdim static bool isRegionStore(const StoreSDNode *N); 75284677Sdim 76284677Sdim bool isCPLoad(const LoadSDNode *N) const; 77284677Sdim bool isConstantLoad(const LoadSDNode *N, int cbID) const; 78284677Sdim bool isGlobalLoad(const LoadSDNode *N) const; 79284677Sdim bool isFlatLoad(const LoadSDNode *N) const; 80284677Sdim bool isParamLoad(const LoadSDNode *N) const; 81284677Sdim bool isPrivateLoad(const LoadSDNode *N) const; 82284677Sdim bool isLocalLoad(const LoadSDNode *N) const; 83284677Sdim bool isRegionLoad(const LoadSDNode *N) const; 84284677Sdim 85284677Sdim SDNode *glueCopyToM0(SDNode *N) const; 86284677Sdim 87284677Sdim const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 88284677Sdim bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 89284677Sdim bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 90284677Sdim SDValue& Offset); 91284677Sdim bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 92284677Sdim bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 93284677Sdim bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 94284677Sdim unsigned OffsetBits) const; 95284677Sdim bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 96284677Sdim bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 97284677Sdim SDValue &Offset1) const; 98296417Sdim bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 99284677Sdim SDValue &SOffset, SDValue &Offset, SDValue &Offen, 100284677Sdim SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 101284677Sdim SDValue &TFE) const; 102284677Sdim bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 103284677Sdim SDValue &SOffset, SDValue &Offset, SDValue &GLC, 104284677Sdim SDValue &SLC, SDValue &TFE) const; 105284677Sdim bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 106284677Sdim SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 107284677Sdim SDValue &SLC) const; 108284677Sdim bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 109284677Sdim SDValue &SOffset, SDValue &ImmOffset) const; 110284677Sdim bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 111284677Sdim SDValue &Offset, SDValue &GLC, SDValue &SLC, 112284677Sdim SDValue &TFE) const; 113284677Sdim bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 114284677Sdim SDValue &Offset, SDValue &GLC) const; 115296417Sdim bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 116296417Sdim bool &Imm) const; 117296417Sdim bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 118296417Sdim bool &Imm) const; 119296417Sdim bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 120296417Sdim bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 121296417Sdim bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 122296417Sdim bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 123296417Sdim bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 124296417Sdim bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 125284677Sdim SDNode *SelectAddrSpaceCast(SDNode *N); 126284677Sdim bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 127286684Sdim bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 128284677Sdim bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 129284677Sdim SDValue &Clamp, SDValue &Omod) const; 130286684Sdim bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 131286684Sdim SDValue &Clamp, SDValue &Omod) const; 132284677Sdim 133284677Sdim bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 134284677Sdim SDValue &Omod) const; 135284677Sdim bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 136284677Sdim SDValue &Clamp, 137284677Sdim SDValue &Omod) const; 138284677Sdim 139284677Sdim SDNode *SelectADD_SUB_I64(SDNode *N); 140284677Sdim SDNode *SelectDIV_SCALE(SDNode *N); 141284677Sdim 142284677Sdim SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 143284677Sdim uint32_t Offset, uint32_t Width); 144284677Sdim SDNode *SelectS_BFEFromShifts(SDNode *N); 145284677Sdim SDNode *SelectS_BFE(SDNode *N); 146284677Sdim 147284677Sdim // Include the pieces autogenerated from the target description. 148284677Sdim#include "AMDGPUGenDAGISel.inc" 149284677Sdim}; 150284677Sdim} // end anonymous namespace 151284677Sdim 152284677Sdim/// \brief This pass converts a legalized DAG into a AMDGPU-specific 153284677Sdim// DAG, ready for instruction scheduling. 154284677SdimFunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 155284677Sdim return new AMDGPUDAGToDAGISel(TM); 156284677Sdim} 157284677Sdim 158284677SdimAMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 159284677Sdim : SelectionDAGISel(TM) {} 160284677Sdim 161284677Sdimbool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 162284677Sdim Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 163284677Sdim return SelectionDAGISel::runOnMachineFunction(MF); 164284677Sdim} 165284677Sdim 166284677SdimAMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 167284677Sdim} 168284677Sdim 169284677Sdimbool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 170284677Sdim const SITargetLowering *TL 171284677Sdim = static_cast<const SITargetLowering *>(getTargetLowering()); 172284677Sdim return TL->analyzeImmediate(N) == 0; 173284677Sdim} 174284677Sdim 175284677Sdim/// \brief Determine the register class for \p OpNo 176284677Sdim/// \returns The register class of the virtual register that will be used for 177284677Sdim/// the given operand number \OpNo or NULL if the register class cannot be 178284677Sdim/// determined. 179284677Sdimconst TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 180284677Sdim unsigned OpNo) const { 181284677Sdim if (!N->isMachineOpcode()) 182284677Sdim return nullptr; 183284677Sdim 184284677Sdim switch (N->getMachineOpcode()) { 185284677Sdim default: { 186284677Sdim const MCInstrDesc &Desc = 187284677Sdim Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 188284677Sdim unsigned OpIdx = Desc.getNumDefs() + OpNo; 189284677Sdim if (OpIdx >= Desc.getNumOperands()) 190284677Sdim return nullptr; 191284677Sdim int RegClass = Desc.OpInfo[OpIdx].RegClass; 192284677Sdim if (RegClass == -1) 193284677Sdim return nullptr; 194284677Sdim 195284677Sdim return Subtarget->getRegisterInfo()->getRegClass(RegClass); 196284677Sdim } 197284677Sdim case AMDGPU::REG_SEQUENCE: { 198284677Sdim unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 199284677Sdim const TargetRegisterClass *SuperRC = 200284677Sdim Subtarget->getRegisterInfo()->getRegClass(RCID); 201284677Sdim 202284677Sdim SDValue SubRegOp = N->getOperand(OpNo + 1); 203284677Sdim unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 204284677Sdim return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 205284677Sdim SubRegIdx); 206284677Sdim } 207284677Sdim } 208284677Sdim} 209284677Sdim 210284677Sdimbool AMDGPUDAGToDAGISel::SelectADDRParam( 211284677Sdim SDValue Addr, SDValue& R1, SDValue& R2) { 212284677Sdim 213284677Sdim if (Addr.getOpcode() == ISD::FrameIndex) { 214284677Sdim if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 215284677Sdim R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 216284677Sdim R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 217284677Sdim } else { 218284677Sdim R1 = Addr; 219284677Sdim R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 220284677Sdim } 221284677Sdim } else if (Addr.getOpcode() == ISD::ADD) { 222284677Sdim R1 = Addr.getOperand(0); 223284677Sdim R2 = Addr.getOperand(1); 224284677Sdim } else { 225284677Sdim R1 = Addr; 226284677Sdim R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 227284677Sdim } 228284677Sdim return true; 229284677Sdim} 230284677Sdim 231284677Sdimbool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 232284677Sdim if (Addr.getOpcode() == ISD::TargetExternalSymbol || 233284677Sdim Addr.getOpcode() == ISD::TargetGlobalAddress) { 234284677Sdim return false; 235284677Sdim } 236284677Sdim return SelectADDRParam(Addr, R1, R2); 237284677Sdim} 238284677Sdim 239284677Sdim 240284677Sdimbool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 241284677Sdim if (Addr.getOpcode() == ISD::TargetExternalSymbol || 242284677Sdim Addr.getOpcode() == ISD::TargetGlobalAddress) { 243284677Sdim return false; 244284677Sdim } 245284677Sdim 246284677Sdim if (Addr.getOpcode() == ISD::FrameIndex) { 247284677Sdim if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 248284677Sdim R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 249284677Sdim R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 250284677Sdim } else { 251284677Sdim R1 = Addr; 252284677Sdim R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 253284677Sdim } 254284677Sdim } else if (Addr.getOpcode() == ISD::ADD) { 255284677Sdim R1 = Addr.getOperand(0); 256284677Sdim R2 = Addr.getOperand(1); 257284677Sdim } else { 258284677Sdim R1 = Addr; 259284677Sdim R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 260284677Sdim } 261284677Sdim return true; 262284677Sdim} 263284677Sdim 264284677SdimSDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 265284677Sdim if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 266284677Sdim !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 267284677Sdim AMDGPUAS::LOCAL_ADDRESS)) 268284677Sdim return N; 269284677Sdim 270284677Sdim const SITargetLowering& Lowering = 271284677Sdim *static_cast<const SITargetLowering*>(getTargetLowering()); 272284677Sdim 273284677Sdim // Write max value to m0 before each load operation 274284677Sdim 275284677Sdim SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 276284677Sdim CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 277284677Sdim 278284677Sdim SDValue Glue = M0.getValue(1); 279284677Sdim 280284677Sdim SmallVector <SDValue, 8> Ops; 281284677Sdim for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 282284677Sdim Ops.push_back(N->getOperand(i)); 283284677Sdim } 284284677Sdim Ops.push_back(Glue); 285284677Sdim CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 286284677Sdim 287284677Sdim return N; 288284677Sdim} 289284677Sdim 290296417Sdimstatic unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 291296417Sdim switch (NumVectorElts) { 292296417Sdim case 1: 293296417Sdim return AMDGPU::SReg_32RegClassID; 294296417Sdim case 2: 295296417Sdim return AMDGPU::SReg_64RegClassID; 296296417Sdim case 4: 297296417Sdim return AMDGPU::SReg_128RegClassID; 298296417Sdim case 8: 299296417Sdim return AMDGPU::SReg_256RegClassID; 300296417Sdim case 16: 301296417Sdim return AMDGPU::SReg_512RegClassID; 302296417Sdim } 303296417Sdim 304296417Sdim llvm_unreachable("invalid vector size"); 305296417Sdim} 306296417Sdim 307284677SdimSDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 308284677Sdim unsigned int Opc = N->getOpcode(); 309284677Sdim if (N->isMachineOpcode()) { 310284677Sdim N->setNodeId(-1); 311284677Sdim return nullptr; // Already selected. 312284677Sdim } 313284677Sdim 314284677Sdim if (isa<AtomicSDNode>(N)) 315284677Sdim N = glueCopyToM0(N); 316284677Sdim 317284677Sdim switch (Opc) { 318284677Sdim default: break; 319284677Sdim // We are selecting i64 ADD here instead of custom lower it during 320284677Sdim // DAG legalization, so we can fold some i64 ADDs used for address 321284677Sdim // calculation into the LOAD and STORE instructions. 322284677Sdim case ISD::ADD: 323284677Sdim case ISD::SUB: { 324284677Sdim if (N->getValueType(0) != MVT::i64 || 325284677Sdim Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 326284677Sdim break; 327284677Sdim 328284677Sdim return SelectADD_SUB_I64(N); 329284677Sdim } 330284677Sdim case ISD::SCALAR_TO_VECTOR: 331284677Sdim case AMDGPUISD::BUILD_VERTICAL_VECTOR: 332284677Sdim case ISD::BUILD_VECTOR: { 333284677Sdim unsigned RegClassID; 334284677Sdim const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 335284677Sdim EVT VT = N->getValueType(0); 336284677Sdim unsigned NumVectorElts = VT.getVectorNumElements(); 337284677Sdim EVT EltVT = VT.getVectorElementType(); 338284677Sdim assert(EltVT.bitsEq(MVT::i32)); 339284677Sdim if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 340296417Sdim RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 341284677Sdim } else { 342284677Sdim // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 343284677Sdim // that adds a 128 bits reg copy when going through TwoAddressInstructions 344284677Sdim // pass. We want to avoid 128 bits copies as much as possible because they 345284677Sdim // can't be bundled by our scheduler. 346284677Sdim switch(NumVectorElts) { 347284677Sdim case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 348284677Sdim case 4: 349284677Sdim if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 350284677Sdim RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 351284677Sdim else 352284677Sdim RegClassID = AMDGPU::R600_Reg128RegClassID; 353284677Sdim break; 354284677Sdim default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 355284677Sdim } 356284677Sdim } 357284677Sdim 358284677Sdim SDLoc DL(N); 359284677Sdim SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 360284677Sdim 361284677Sdim if (NumVectorElts == 1) { 362284677Sdim return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 363284677Sdim N->getOperand(0), RegClass); 364284677Sdim } 365284677Sdim 366284677Sdim assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 367284677Sdim "supported yet"); 368284677Sdim // 16 = Max Num Vector Elements 369284677Sdim // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 370284677Sdim // 1 = Vector Register Class 371284677Sdim SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 372284677Sdim 373284677Sdim RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 374284677Sdim bool IsRegSeq = true; 375284677Sdim unsigned NOps = N->getNumOperands(); 376284677Sdim for (unsigned i = 0; i < NOps; i++) { 377284677Sdim // XXX: Why is this here? 378284677Sdim if (isa<RegisterSDNode>(N->getOperand(i))) { 379284677Sdim IsRegSeq = false; 380284677Sdim break; 381284677Sdim } 382284677Sdim RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 383284677Sdim RegSeqArgs[1 + (2 * i) + 1] = 384284677Sdim CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 385284677Sdim MVT::i32); 386284677Sdim } 387284677Sdim 388284677Sdim if (NOps != NumVectorElts) { 389284677Sdim // Fill in the missing undef elements if this was a scalar_to_vector. 390284677Sdim assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 391284677Sdim 392284677Sdim MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 393284677Sdim DL, EltVT); 394284677Sdim for (unsigned i = NOps; i < NumVectorElts; ++i) { 395284677Sdim RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 396284677Sdim RegSeqArgs[1 + (2 * i) + 1] = 397284677Sdim CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 398284677Sdim } 399284677Sdim } 400284677Sdim 401284677Sdim if (!IsRegSeq) 402284677Sdim break; 403284677Sdim return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 404284677Sdim RegSeqArgs); 405284677Sdim } 406284677Sdim case ISD::BUILD_PAIR: { 407284677Sdim SDValue RC, SubReg0, SubReg1; 408284677Sdim if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 409284677Sdim break; 410284677Sdim } 411284677Sdim SDLoc DL(N); 412284677Sdim if (N->getValueType(0) == MVT::i128) { 413284677Sdim RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 414284677Sdim SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 415284677Sdim SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 416284677Sdim } else if (N->getValueType(0) == MVT::i64) { 417284677Sdim RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 418284677Sdim SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 419284677Sdim SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 420284677Sdim } else { 421284677Sdim llvm_unreachable("Unhandled value type for BUILD_PAIR"); 422284677Sdim } 423284677Sdim const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 424284677Sdim N->getOperand(1), SubReg1 }; 425284677Sdim return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 426284677Sdim DL, N->getValueType(0), Ops); 427284677Sdim } 428284677Sdim 429284677Sdim case ISD::Constant: 430284677Sdim case ISD::ConstantFP: { 431284677Sdim if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 432284677Sdim N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 433284677Sdim break; 434284677Sdim 435284677Sdim uint64_t Imm; 436284677Sdim if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 437284677Sdim Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 438284677Sdim else { 439284677Sdim ConstantSDNode *C = cast<ConstantSDNode>(N); 440284677Sdim Imm = C->getZExtValue(); 441284677Sdim } 442284677Sdim 443284677Sdim SDLoc DL(N); 444284677Sdim SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 445284677Sdim CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 446284677Sdim MVT::i32)); 447284677Sdim SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 448284677Sdim CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 449284677Sdim const SDValue Ops[] = { 450284677Sdim CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 451284677Sdim SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 452284677Sdim SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 453284677Sdim }; 454284677Sdim 455284677Sdim return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 456284677Sdim N->getValueType(0), Ops); 457284677Sdim } 458296417Sdim case ISD::LOAD: 459284677Sdim case ISD::STORE: { 460284677Sdim N = glueCopyToM0(N); 461284677Sdim break; 462284677Sdim } 463284677Sdim 464284677Sdim case AMDGPUISD::BFE_I32: 465284677Sdim case AMDGPUISD::BFE_U32: { 466284677Sdim if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 467284677Sdim break; 468284677Sdim 469284677Sdim // There is a scalar version available, but unlike the vector version which 470284677Sdim // has a separate operand for the offset and width, the scalar version packs 471284677Sdim // the width and offset into a single operand. Try to move to the scalar 472284677Sdim // version if the offsets are constant, so that we can try to keep extended 473284677Sdim // loads of kernel arguments in SGPRs. 474284677Sdim 475284677Sdim // TODO: Technically we could try to pattern match scalar bitshifts of 476284677Sdim // dynamic values, but it's probably not useful. 477284677Sdim ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 478284677Sdim if (!Offset) 479284677Sdim break; 480284677Sdim 481284677Sdim ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 482284677Sdim if (!Width) 483284677Sdim break; 484284677Sdim 485284677Sdim bool Signed = Opc == AMDGPUISD::BFE_I32; 486284677Sdim 487284677Sdim uint32_t OffsetVal = Offset->getZExtValue(); 488284677Sdim uint32_t WidthVal = Width->getZExtValue(); 489284677Sdim 490284677Sdim return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 491284677Sdim N->getOperand(0), OffsetVal, WidthVal); 492284677Sdim } 493284677Sdim case AMDGPUISD::DIV_SCALE: { 494284677Sdim return SelectDIV_SCALE(N); 495284677Sdim } 496284677Sdim case ISD::CopyToReg: { 497284677Sdim const SITargetLowering& Lowering = 498284677Sdim *static_cast<const SITargetLowering*>(getTargetLowering()); 499284677Sdim Lowering.legalizeTargetIndependentNode(N, *CurDAG); 500284677Sdim break; 501284677Sdim } 502284677Sdim case ISD::ADDRSPACECAST: 503284677Sdim return SelectAddrSpaceCast(N); 504284677Sdim case ISD::AND: 505284677Sdim case ISD::SRL: 506284677Sdim case ISD::SRA: 507284677Sdim if (N->getValueType(0) != MVT::i32 || 508284677Sdim Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 509284677Sdim break; 510284677Sdim 511284677Sdim return SelectS_BFE(N); 512284677Sdim } 513284677Sdim 514284677Sdim return SelectCode(N); 515284677Sdim} 516284677Sdim 517284677Sdimbool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 518284677Sdim assert(AS != 0 && "Use checkPrivateAddress instead."); 519284677Sdim if (!Ptr) 520284677Sdim return false; 521284677Sdim 522284677Sdim return Ptr->getType()->getPointerAddressSpace() == AS; 523284677Sdim} 524284677Sdim 525284677Sdimbool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 526284677Sdim if (Op->getPseudoValue()) 527284677Sdim return true; 528284677Sdim 529284677Sdim if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 530284677Sdim return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 531284677Sdim 532284677Sdim return false; 533284677Sdim} 534284677Sdim 535284677Sdimbool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 536284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 537284677Sdim} 538284677Sdim 539284677Sdimbool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 540284677Sdim const Value *MemVal = N->getMemOperand()->getValue(); 541284677Sdim return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 542284677Sdim !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 543284677Sdim !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 544284677Sdim} 545284677Sdim 546284677Sdimbool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 547284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 548284677Sdim} 549284677Sdim 550284677Sdimbool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 551284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 552284677Sdim} 553284677Sdim 554284677Sdimbool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 555284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 556284677Sdim} 557284677Sdim 558284677Sdimbool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 559284677Sdim const Value *MemVal = N->getMemOperand()->getValue(); 560284677Sdim if (CbId == -1) 561284677Sdim return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 562284677Sdim 563284677Sdim return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 564284677Sdim} 565284677Sdim 566284677Sdimbool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 567284677Sdim if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 568284677Sdim if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 569284677Sdim N->getMemoryVT().bitsLT(MVT::i32)) 570284677Sdim return true; 571284677Sdim 572284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 573284677Sdim} 574284677Sdim 575284677Sdimbool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 576284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 577284677Sdim} 578284677Sdim 579284677Sdimbool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 580284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 581284677Sdim} 582284677Sdim 583284677Sdimbool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 584284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 585284677Sdim} 586284677Sdim 587284677Sdimbool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 588284677Sdim return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 589284677Sdim} 590284677Sdim 591284677Sdimbool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 592284677Sdim MachineMemOperand *MMO = N->getMemOperand(); 593284677Sdim if (checkPrivateAddress(N->getMemOperand())) { 594284677Sdim if (MMO) { 595284677Sdim const PseudoSourceValue *PSV = MMO->getPseudoValue(); 596296417Sdim if (PSV && PSV->isConstantPool()) { 597284677Sdim return true; 598284677Sdim } 599284677Sdim } 600284677Sdim } 601284677Sdim return false; 602284677Sdim} 603284677Sdim 604284677Sdimbool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 605284677Sdim if (checkPrivateAddress(N->getMemOperand())) { 606284677Sdim // Check to make sure we are not a constant pool load or a constant load 607284677Sdim // that is marked as a private load 608284677Sdim if (isCPLoad(N) || isConstantLoad(N, -1)) { 609284677Sdim return false; 610284677Sdim } 611284677Sdim } 612284677Sdim 613284677Sdim const Value *MemVal = N->getMemOperand()->getValue(); 614284677Sdim if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 615284677Sdim !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 616284677Sdim !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 617284677Sdim !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 618284677Sdim !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 619284677Sdim !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 620284677Sdim !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 621284677Sdim return true; 622284677Sdim } 623284677Sdim return false; 624284677Sdim} 625284677Sdim 626284677Sdimconst char *AMDGPUDAGToDAGISel::getPassName() const { 627284677Sdim return "AMDGPU DAG->DAG Pattern Instruction Selection"; 628284677Sdim} 629284677Sdim 630284677Sdim#ifdef DEBUGTMP 631284677Sdim#undef INT64_C 632284677Sdim#endif 633284677Sdim#undef DEBUGTMP 634284677Sdim 635284677Sdim//===----------------------------------------------------------------------===// 636284677Sdim// Complex Patterns 637284677Sdim//===----------------------------------------------------------------------===// 638284677Sdim 639284677Sdimbool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 640284677Sdim SDValue& IntPtr) { 641284677Sdim if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 642284677Sdim IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 643284677Sdim true); 644284677Sdim return true; 645284677Sdim } 646284677Sdim return false; 647284677Sdim} 648284677Sdim 649284677Sdimbool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 650284677Sdim SDValue& BaseReg, SDValue &Offset) { 651284677Sdim if (!isa<ConstantSDNode>(Addr)) { 652284677Sdim BaseReg = Addr; 653284677Sdim Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 654284677Sdim return true; 655284677Sdim } 656284677Sdim return false; 657284677Sdim} 658284677Sdim 659284677Sdimbool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 660284677Sdim SDValue &Offset) { 661284677Sdim ConstantSDNode *IMMOffset; 662284677Sdim 663284677Sdim if (Addr.getOpcode() == ISD::ADD 664284677Sdim && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 665284677Sdim && isInt<16>(IMMOffset->getZExtValue())) { 666284677Sdim 667284677Sdim Base = Addr.getOperand(0); 668284677Sdim Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 669284677Sdim MVT::i32); 670284677Sdim return true; 671284677Sdim // If the pointer address is constant, we can move it to the offset field. 672284677Sdim } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 673284677Sdim && isInt<16>(IMMOffset->getZExtValue())) { 674284677Sdim Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 675284677Sdim SDLoc(CurDAG->getEntryNode()), 676284677Sdim AMDGPU::ZERO, MVT::i32); 677284677Sdim Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 678284677Sdim MVT::i32); 679284677Sdim return true; 680284677Sdim } 681284677Sdim 682284677Sdim // Default case, no offset 683284677Sdim Base = Addr; 684284677Sdim Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 685284677Sdim return true; 686284677Sdim} 687284677Sdim 688284677Sdimbool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 689284677Sdim SDValue &Offset) { 690284677Sdim ConstantSDNode *C; 691284677Sdim SDLoc DL(Addr); 692284677Sdim 693284677Sdim if ((C = dyn_cast<ConstantSDNode>(Addr))) { 694284677Sdim Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 695284677Sdim Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 696284677Sdim } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 697284677Sdim (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 698284677Sdim Base = Addr.getOperand(0); 699284677Sdim Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 700284677Sdim } else { 701284677Sdim Base = Addr; 702284677Sdim Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 703284677Sdim } 704284677Sdim 705284677Sdim return true; 706284677Sdim} 707284677Sdim 708284677SdimSDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 709284677Sdim SDLoc DL(N); 710284677Sdim SDValue LHS = N->getOperand(0); 711284677Sdim SDValue RHS = N->getOperand(1); 712284677Sdim 713284677Sdim bool IsAdd = (N->getOpcode() == ISD::ADD); 714284677Sdim 715284677Sdim SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 716284677Sdim SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 717284677Sdim 718284677Sdim SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 719284677Sdim DL, MVT::i32, LHS, Sub0); 720284677Sdim SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 721284677Sdim DL, MVT::i32, LHS, Sub1); 722284677Sdim 723284677Sdim SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 724284677Sdim DL, MVT::i32, RHS, Sub0); 725284677Sdim SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 726284677Sdim DL, MVT::i32, RHS, Sub1); 727284677Sdim 728284677Sdim SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 729284677Sdim SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 730284677Sdim 731284677Sdim 732284677Sdim unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 733284677Sdim unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 734284677Sdim 735284677Sdim SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 736284677Sdim SDValue Carry(AddLo, 1); 737284677Sdim SDNode *AddHi 738284677Sdim = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 739284677Sdim SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 740284677Sdim 741284677Sdim SDValue Args[5] = { 742284677Sdim CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 743284677Sdim SDValue(AddLo,0), 744284677Sdim Sub0, 745284677Sdim SDValue(AddHi,0), 746284677Sdim Sub1, 747284677Sdim }; 748284677Sdim return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 749284677Sdim} 750284677Sdim 751284677Sdim// We need to handle this here because tablegen doesn't support matching 752284677Sdim// instructions with multiple outputs. 753284677SdimSDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 754284677Sdim SDLoc SL(N); 755284677Sdim EVT VT = N->getValueType(0); 756284677Sdim 757284677Sdim assert(VT == MVT::f32 || VT == MVT::f64); 758284677Sdim 759284677Sdim unsigned Opc 760284677Sdim = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 761284677Sdim 762296417Sdim // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 763296417Sdim // omod 764284677Sdim SDValue Ops[8]; 765284677Sdim 766284677Sdim SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 767284677Sdim SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 768284677Sdim SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 769284677Sdim return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 770284677Sdim} 771284677Sdim 772284677Sdimbool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 773284677Sdim unsigned OffsetBits) const { 774284677Sdim if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 775284677Sdim (OffsetBits == 8 && !isUInt<8>(Offset))) 776284677Sdim return false; 777284677Sdim 778286684Sdim if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 779286684Sdim Subtarget->unsafeDSOffsetFoldingEnabled()) 780284677Sdim return true; 781284677Sdim 782284677Sdim // On Southern Islands instruction with a negative base value and an offset 783284677Sdim // don't seem to work. 784284677Sdim return CurDAG->SignBitIsZero(Base); 785284677Sdim} 786284677Sdim 787284677Sdimbool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 788284677Sdim SDValue &Offset) const { 789284677Sdim if (CurDAG->isBaseWithConstantOffset(Addr)) { 790284677Sdim SDValue N0 = Addr.getOperand(0); 791284677Sdim SDValue N1 = Addr.getOperand(1); 792284677Sdim ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 793284677Sdim if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 794284677Sdim // (add n0, c0) 795284677Sdim Base = N0; 796284677Sdim Offset = N1; 797284677Sdim return true; 798284677Sdim } 799296417Sdim } else if (Addr.getOpcode() == ISD::SUB) { 800296417Sdim // sub C, x -> add (sub 0, x), C 801296417Sdim if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 802296417Sdim int64_t ByteOffset = C->getSExtValue(); 803296417Sdim if (isUInt<16>(ByteOffset)) { 804296417Sdim SDLoc DL(Addr); 805296417Sdim SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 806284677Sdim 807296417Sdim // XXX - This is kind of hacky. Create a dummy sub node so we can check 808296417Sdim // the known bits in isDSOffsetLegal. We need to emit the selected node 809296417Sdim // here, so this is thrown away. 810296417Sdim SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 811296417Sdim Zero, Addr.getOperand(1)); 812284677Sdim 813296417Sdim if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 814296417Sdim MachineSDNode *MachineSub 815296417Sdim = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 816296417Sdim Zero, Addr.getOperand(1)); 817296417Sdim 818296417Sdim Base = SDValue(MachineSub, 0); 819296417Sdim Offset = Addr.getOperand(0); 820296417Sdim return true; 821296417Sdim } 822296417Sdim } 823296417Sdim } 824296417Sdim } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 825296417Sdim // If we have a constant address, prefer to put the constant into the 826296417Sdim // offset. This can save moves to load the constant address since multiple 827296417Sdim // operations can share the zero base address register, and enables merging 828296417Sdim // into read2 / write2 instructions. 829296417Sdim 830296417Sdim SDLoc DL(Addr); 831296417Sdim 832284677Sdim if (isUInt<16>(CAddr->getZExtValue())) { 833284677Sdim SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 834284677Sdim MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 835284677Sdim DL, MVT::i32, Zero); 836284677Sdim Base = SDValue(MovZero, 0); 837284677Sdim Offset = Addr; 838284677Sdim return true; 839284677Sdim } 840284677Sdim } 841284677Sdim 842284677Sdim // default case 843284677Sdim Base = Addr; 844296417Sdim Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 845284677Sdim return true; 846284677Sdim} 847284677Sdim 848296417Sdim// TODO: If offset is too big, put low 16-bit into offset. 849284677Sdimbool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 850284677Sdim SDValue &Offset0, 851284677Sdim SDValue &Offset1) const { 852284677Sdim SDLoc DL(Addr); 853284677Sdim 854284677Sdim if (CurDAG->isBaseWithConstantOffset(Addr)) { 855284677Sdim SDValue N0 = Addr.getOperand(0); 856284677Sdim SDValue N1 = Addr.getOperand(1); 857284677Sdim ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 858284677Sdim unsigned DWordOffset0 = C1->getZExtValue() / 4; 859284677Sdim unsigned DWordOffset1 = DWordOffset0 + 1; 860284677Sdim // (add n0, c0) 861284677Sdim if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 862284677Sdim Base = N0; 863284677Sdim Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 864284677Sdim Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 865284677Sdim return true; 866284677Sdim } 867296417Sdim } else if (Addr.getOpcode() == ISD::SUB) { 868296417Sdim // sub C, x -> add (sub 0, x), C 869296417Sdim if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 870296417Sdim unsigned DWordOffset0 = C->getZExtValue() / 4; 871296417Sdim unsigned DWordOffset1 = DWordOffset0 + 1; 872284677Sdim 873296417Sdim if (isUInt<8>(DWordOffset0)) { 874296417Sdim SDLoc DL(Addr); 875296417Sdim SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 876296417Sdim 877296417Sdim // XXX - This is kind of hacky. Create a dummy sub node so we can check 878296417Sdim // the known bits in isDSOffsetLegal. We need to emit the selected node 879296417Sdim // here, so this is thrown away. 880296417Sdim SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 881296417Sdim Zero, Addr.getOperand(1)); 882296417Sdim 883296417Sdim if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 884296417Sdim MachineSDNode *MachineSub 885296417Sdim = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 886296417Sdim Zero, Addr.getOperand(1)); 887296417Sdim 888296417Sdim Base = SDValue(MachineSub, 0); 889296417Sdim Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 890296417Sdim Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 891296417Sdim return true; 892296417Sdim } 893296417Sdim } 894296417Sdim } 895296417Sdim } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 896284677Sdim unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 897284677Sdim unsigned DWordOffset1 = DWordOffset0 + 1; 898284677Sdim assert(4 * DWordOffset0 == CAddr->getZExtValue()); 899284677Sdim 900284677Sdim if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 901284677Sdim SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 902284677Sdim MachineSDNode *MovZero 903284677Sdim = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 904284677Sdim DL, MVT::i32, Zero); 905284677Sdim Base = SDValue(MovZero, 0); 906284677Sdim Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 907284677Sdim Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 908284677Sdim return true; 909284677Sdim } 910284677Sdim } 911284677Sdim 912284677Sdim // default case 913284677Sdim Base = Addr; 914284677Sdim Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 915284677Sdim Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 916284677Sdim return true; 917284677Sdim} 918284677Sdim 919284677Sdimstatic bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 920284677Sdim return isUInt<12>(Imm->getZExtValue()); 921284677Sdim} 922284677Sdim 923296417Sdimbool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 924284677Sdim SDValue &VAddr, SDValue &SOffset, 925284677Sdim SDValue &Offset, SDValue &Offen, 926284677Sdim SDValue &Idxen, SDValue &Addr64, 927284677Sdim SDValue &GLC, SDValue &SLC, 928284677Sdim SDValue &TFE) const { 929296417Sdim // Subtarget prefers to use flat instruction 930296417Sdim if (Subtarget->useFlatForGlobal()) 931296417Sdim return false; 932296417Sdim 933284677Sdim SDLoc DL(Addr); 934284677Sdim 935284677Sdim GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 936284677Sdim SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 937284677Sdim TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 938284677Sdim 939284677Sdim Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 940284677Sdim Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 941284677Sdim Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 942284677Sdim SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 943284677Sdim 944284677Sdim if (CurDAG->isBaseWithConstantOffset(Addr)) { 945284677Sdim SDValue N0 = Addr.getOperand(0); 946284677Sdim SDValue N1 = Addr.getOperand(1); 947284677Sdim ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 948284677Sdim 949284677Sdim if (N0.getOpcode() == ISD::ADD) { 950284677Sdim // (add (add N2, N3), C1) -> addr64 951284677Sdim SDValue N2 = N0.getOperand(0); 952284677Sdim SDValue N3 = N0.getOperand(1); 953284677Sdim Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 954284677Sdim Ptr = N2; 955284677Sdim VAddr = N3; 956284677Sdim } else { 957284677Sdim 958284677Sdim // (add N0, C1) -> offset 959284677Sdim VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 960284677Sdim Ptr = N0; 961284677Sdim } 962284677Sdim 963284677Sdim if (isLegalMUBUFImmOffset(C1)) { 964284677Sdim Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 965296417Sdim return true; 966284677Sdim } else if (isUInt<32>(C1->getZExtValue())) { 967284677Sdim // Illegal offset, store it in soffset. 968284677Sdim Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 969284677Sdim SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 970284677Sdim CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 971284677Sdim 0); 972296417Sdim return true; 973284677Sdim } 974284677Sdim } 975284677Sdim 976284677Sdim if (Addr.getOpcode() == ISD::ADD) { 977284677Sdim // (add N0, N1) -> addr64 978284677Sdim SDValue N0 = Addr.getOperand(0); 979284677Sdim SDValue N1 = Addr.getOperand(1); 980284677Sdim Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 981284677Sdim Ptr = N0; 982284677Sdim VAddr = N1; 983284677Sdim Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 984296417Sdim return true; 985284677Sdim } 986284677Sdim 987284677Sdim // default case -> offset 988284677Sdim VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 989284677Sdim Ptr = Addr; 990284677Sdim Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 991284677Sdim 992296417Sdim return true; 993284677Sdim} 994284677Sdim 995284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 996284677Sdim SDValue &VAddr, SDValue &SOffset, 997284677Sdim SDValue &Offset, SDValue &GLC, 998284677Sdim SDValue &SLC, SDValue &TFE) const { 999284677Sdim SDValue Ptr, Offen, Idxen, Addr64; 1000284677Sdim 1001287521Sdim // addr64 bit was removed for volcanic islands. 1002287521Sdim if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1003287521Sdim return false; 1004287521Sdim 1005296417Sdim if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1006296417Sdim GLC, SLC, TFE)) 1007296417Sdim return false; 1008284677Sdim 1009284677Sdim ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1010284677Sdim if (C->getSExtValue()) { 1011284677Sdim SDLoc DL(Addr); 1012284677Sdim 1013284677Sdim const SITargetLowering& Lowering = 1014284677Sdim *static_cast<const SITargetLowering*>(getTargetLowering()); 1015284677Sdim 1016284677Sdim SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1017284677Sdim return true; 1018284677Sdim } 1019284677Sdim 1020284677Sdim return false; 1021284677Sdim} 1022284677Sdim 1023284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1024284677Sdim SDValue &VAddr, SDValue &SOffset, 1025296417Sdim SDValue &Offset, 1026296417Sdim SDValue &SLC) const { 1027284677Sdim SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1028284677Sdim SDValue GLC, TFE; 1029284677Sdim 1030284677Sdim return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1031284677Sdim} 1032284677Sdim 1033284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1034284677Sdim SDValue &VAddr, SDValue &SOffset, 1035284677Sdim SDValue &ImmOffset) const { 1036284677Sdim 1037284677Sdim SDLoc DL(Addr); 1038284677Sdim MachineFunction &MF = CurDAG->getMachineFunction(); 1039296417Sdim const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1040284677Sdim 1041296417Sdim Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1042296417Sdim SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1043284677Sdim 1044284677Sdim // (add n0, c1) 1045284677Sdim if (CurDAG->isBaseWithConstantOffset(Addr)) { 1046287521Sdim SDValue N0 = Addr.getOperand(0); 1047284677Sdim SDValue N1 = Addr.getOperand(1); 1048287521Sdim // Offsets in vaddr must be positive. 1049287521Sdim if (CurDAG->SignBitIsZero(N0)) { 1050287521Sdim ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1051287521Sdim if (isLegalMUBUFImmOffset(C1)) { 1052287521Sdim VAddr = N0; 1053287521Sdim ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1054287521Sdim return true; 1055287521Sdim } 1056284677Sdim } 1057284677Sdim } 1058284677Sdim 1059284677Sdim // (node) 1060284677Sdim VAddr = Addr; 1061284677Sdim ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1062284677Sdim return true; 1063284677Sdim} 1064284677Sdim 1065284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1066284677Sdim SDValue &SOffset, SDValue &Offset, 1067284677Sdim SDValue &GLC, SDValue &SLC, 1068284677Sdim SDValue &TFE) const { 1069284677Sdim SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1070284677Sdim const SIInstrInfo *TII = 1071284677Sdim static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1072284677Sdim 1073296417Sdim if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1074296417Sdim GLC, SLC, TFE)) 1075296417Sdim return false; 1076284677Sdim 1077284677Sdim if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1078284677Sdim !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1079284677Sdim !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1080284677Sdim uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1081284677Sdim APInt::getAllOnesValue(32).getZExtValue(); // Size 1082284677Sdim SDLoc DL(Addr); 1083284677Sdim 1084284677Sdim const SITargetLowering& Lowering = 1085284677Sdim *static_cast<const SITargetLowering*>(getTargetLowering()); 1086284677Sdim 1087284677Sdim SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1088284677Sdim return true; 1089284677Sdim } 1090284677Sdim return false; 1091284677Sdim} 1092284677Sdim 1093284677Sdimbool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1094284677Sdim SDValue &Soffset, SDValue &Offset, 1095284677Sdim SDValue &GLC) const { 1096284677Sdim SDValue SLC, TFE; 1097284677Sdim 1098284677Sdim return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1099284677Sdim} 1100284677Sdim 1101296417Sdim/// 1102296417Sdim/// \param EncodedOffset This is the immediate value that will be encoded 1103296417Sdim/// directly into the instruction. On SI/CI the \p EncodedOffset 1104296417Sdim/// will be in units of dwords and on VI+ it will be units of bytes. 1105296417Sdimstatic bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1106296417Sdim int64_t EncodedOffset) { 1107296417Sdim return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1108296417Sdim isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1109296417Sdim} 1110296417Sdim 1111296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1112296417Sdim SDValue &Offset, bool &Imm) const { 1113296417Sdim 1114296417Sdim // FIXME: Handle non-constant offsets. 1115296417Sdim ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1116296417Sdim if (!C) 1117296417Sdim return false; 1118296417Sdim 1119296417Sdim SDLoc SL(ByteOffsetNode); 1120296417Sdim AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1121296417Sdim int64_t ByteOffset = C->getSExtValue(); 1122296417Sdim int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1123296417Sdim ByteOffset >> 2 : ByteOffset; 1124296417Sdim 1125296417Sdim if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1126296417Sdim Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1127296417Sdim Imm = true; 1128296417Sdim return true; 1129296417Sdim } 1130296417Sdim 1131296417Sdim if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1132296417Sdim return false; 1133296417Sdim 1134296417Sdim if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1135296417Sdim // 32-bit Immediates are supported on Sea Islands. 1136296417Sdim Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1137296417Sdim } else { 1138296417Sdim SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1139296417Sdim Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1140296417Sdim C32Bit), 0); 1141296417Sdim } 1142296417Sdim Imm = false; 1143296417Sdim return true; 1144296417Sdim} 1145296417Sdim 1146296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1147296417Sdim SDValue &Offset, bool &Imm) const { 1148296417Sdim 1149296417Sdim SDLoc SL(Addr); 1150296417Sdim if (CurDAG->isBaseWithConstantOffset(Addr)) { 1151296417Sdim SDValue N0 = Addr.getOperand(0); 1152296417Sdim SDValue N1 = Addr.getOperand(1); 1153296417Sdim 1154296417Sdim if (SelectSMRDOffset(N1, Offset, Imm)) { 1155296417Sdim SBase = N0; 1156296417Sdim return true; 1157296417Sdim } 1158296417Sdim } 1159296417Sdim SBase = Addr; 1160296417Sdim Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1161296417Sdim Imm = true; 1162296417Sdim return true; 1163296417Sdim} 1164296417Sdim 1165296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1166296417Sdim SDValue &Offset) const { 1167296417Sdim bool Imm; 1168296417Sdim return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1169296417Sdim} 1170296417Sdim 1171296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1172296417Sdim SDValue &Offset) const { 1173296417Sdim 1174296417Sdim if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1175296417Sdim return false; 1176296417Sdim 1177296417Sdim bool Imm; 1178296417Sdim if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1179296417Sdim return false; 1180296417Sdim 1181296417Sdim return !Imm && isa<ConstantSDNode>(Offset); 1182296417Sdim} 1183296417Sdim 1184296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1185296417Sdim SDValue &Offset) const { 1186296417Sdim bool Imm; 1187296417Sdim return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1188296417Sdim !isa<ConstantSDNode>(Offset); 1189296417Sdim} 1190296417Sdim 1191296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1192296417Sdim SDValue &Offset) const { 1193296417Sdim bool Imm; 1194296417Sdim return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1195296417Sdim} 1196296417Sdim 1197296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1198296417Sdim SDValue &Offset) const { 1199296417Sdim if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1200296417Sdim return false; 1201296417Sdim 1202296417Sdim bool Imm; 1203296417Sdim if (!SelectSMRDOffset(Addr, Offset, Imm)) 1204296417Sdim return false; 1205296417Sdim 1206296417Sdim return !Imm && isa<ConstantSDNode>(Offset); 1207296417Sdim} 1208296417Sdim 1209296417Sdimbool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1210296417Sdim SDValue &Offset) const { 1211296417Sdim bool Imm; 1212296417Sdim return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1213296417Sdim !isa<ConstantSDNode>(Offset); 1214296417Sdim} 1215296417Sdim 1216284677Sdim// FIXME: This is incorrect and only enough to be able to compile. 1217284677SdimSDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1218284677Sdim AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1219284677Sdim SDLoc DL(N); 1220284677Sdim 1221296417Sdim const MachineFunction &MF = CurDAG->getMachineFunction(); 1222296417Sdim DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(), 1223296417Sdim "addrspacecast not implemented"); 1224296417Sdim CurDAG->getContext()->diagnose(NotImplemented); 1225296417Sdim 1226284677Sdim assert(Subtarget->hasFlatAddressSpace() && 1227284677Sdim "addrspacecast only supported with flat address space!"); 1228284677Sdim 1229284677Sdim assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1230284677Sdim ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1231284677Sdim "Can only cast to / from flat address space!"); 1232284677Sdim 1233284677Sdim // The flat instructions read the address as the index of the VGPR holding the 1234284677Sdim // address, so casting should just be reinterpreting the base VGPR, so just 1235284677Sdim // insert trunc / bitcast / zext. 1236284677Sdim 1237284677Sdim SDValue Src = ASC->getOperand(0); 1238284677Sdim EVT DestVT = ASC->getValueType(0); 1239284677Sdim EVT SrcVT = Src.getValueType(); 1240284677Sdim 1241284677Sdim unsigned SrcSize = SrcVT.getSizeInBits(); 1242284677Sdim unsigned DestSize = DestVT.getSizeInBits(); 1243284677Sdim 1244284677Sdim if (SrcSize > DestSize) { 1245284677Sdim assert(SrcSize == 64 && DestSize == 32); 1246284677Sdim return CurDAG->getMachineNode( 1247284677Sdim TargetOpcode::EXTRACT_SUBREG, 1248284677Sdim DL, 1249284677Sdim DestVT, 1250284677Sdim Src, 1251284677Sdim CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1252284677Sdim } 1253284677Sdim 1254284677Sdim if (DestSize > SrcSize) { 1255284677Sdim assert(SrcSize == 32 && DestSize == 64); 1256284677Sdim 1257284677Sdim // FIXME: This is probably wrong, we should never be defining 1258284677Sdim // a register class with both VGPRs and SGPRs 1259284677Sdim SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1260284677Sdim MVT::i32); 1261284677Sdim 1262284677Sdim const SDValue Ops[] = { 1263284677Sdim RC, 1264284677Sdim Src, 1265284677Sdim CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1266284677Sdim SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1267284677Sdim CurDAG->getConstant(0, DL, MVT::i32)), 0), 1268284677Sdim CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1269284677Sdim }; 1270284677Sdim 1271284677Sdim return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1272284677Sdim DL, N->getValueType(0), Ops); 1273284677Sdim } 1274284677Sdim 1275284677Sdim assert(SrcSize == 64 && DestSize == 64); 1276284677Sdim return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1277284677Sdim} 1278284677Sdim 1279284677SdimSDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1280284677Sdim uint32_t Offset, uint32_t Width) { 1281284677Sdim // Transformation function, pack the offset and width of a BFE into 1282284677Sdim // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1283284677Sdim // source, bits [5:0] contain the offset and bits [22:16] the width. 1284284677Sdim uint32_t PackedVal = Offset | (Width << 16); 1285284677Sdim SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1286284677Sdim 1287284677Sdim return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1288284677Sdim} 1289284677Sdim 1290284677SdimSDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1291284677Sdim // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1292284677Sdim // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1293284677Sdim // Predicate: 0 < b <= c < 32 1294284677Sdim 1295284677Sdim const SDValue &Shl = N->getOperand(0); 1296284677Sdim ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1297284677Sdim ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1298284677Sdim 1299284677Sdim if (B && C) { 1300284677Sdim uint32_t BVal = B->getZExtValue(); 1301284677Sdim uint32_t CVal = C->getZExtValue(); 1302284677Sdim 1303284677Sdim if (0 < BVal && BVal <= CVal && CVal < 32) { 1304284677Sdim bool Signed = N->getOpcode() == ISD::SRA; 1305284677Sdim unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1306284677Sdim 1307284677Sdim return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1308284677Sdim CVal - BVal, 32 - CVal); 1309284677Sdim } 1310284677Sdim } 1311284677Sdim return SelectCode(N); 1312284677Sdim} 1313284677Sdim 1314284677SdimSDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1315284677Sdim switch (N->getOpcode()) { 1316284677Sdim case ISD::AND: 1317284677Sdim if (N->getOperand(0).getOpcode() == ISD::SRL) { 1318284677Sdim // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1319284677Sdim // Predicate: isMask(mask) 1320284677Sdim const SDValue &Srl = N->getOperand(0); 1321284677Sdim ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1322284677Sdim ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1323284677Sdim 1324284677Sdim if (Shift && Mask) { 1325284677Sdim uint32_t ShiftVal = Shift->getZExtValue(); 1326284677Sdim uint32_t MaskVal = Mask->getZExtValue(); 1327284677Sdim 1328284677Sdim if (isMask_32(MaskVal)) { 1329284677Sdim uint32_t WidthVal = countPopulation(MaskVal); 1330284677Sdim 1331284677Sdim return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1332284677Sdim ShiftVal, WidthVal); 1333284677Sdim } 1334284677Sdim } 1335284677Sdim } 1336284677Sdim break; 1337284677Sdim case ISD::SRL: 1338284677Sdim if (N->getOperand(0).getOpcode() == ISD::AND) { 1339284677Sdim // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1340284677Sdim // Predicate: isMask(mask >> b) 1341284677Sdim const SDValue &And = N->getOperand(0); 1342284677Sdim ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1343284677Sdim ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1344284677Sdim 1345284677Sdim if (Shift && Mask) { 1346284677Sdim uint32_t ShiftVal = Shift->getZExtValue(); 1347284677Sdim uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1348284677Sdim 1349284677Sdim if (isMask_32(MaskVal)) { 1350284677Sdim uint32_t WidthVal = countPopulation(MaskVal); 1351284677Sdim 1352284677Sdim return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1353284677Sdim ShiftVal, WidthVal); 1354284677Sdim } 1355284677Sdim } 1356284677Sdim } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1357284677Sdim return SelectS_BFEFromShifts(N); 1358284677Sdim break; 1359284677Sdim case ISD::SRA: 1360284677Sdim if (N->getOperand(0).getOpcode() == ISD::SHL) 1361284677Sdim return SelectS_BFEFromShifts(N); 1362284677Sdim break; 1363284677Sdim } 1364284677Sdim 1365284677Sdim return SelectCode(N); 1366284677Sdim} 1367284677Sdim 1368284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1369284677Sdim SDValue &SrcMods) const { 1370284677Sdim 1371284677Sdim unsigned Mods = 0; 1372284677Sdim 1373284677Sdim Src = In; 1374284677Sdim 1375284677Sdim if (Src.getOpcode() == ISD::FNEG) { 1376284677Sdim Mods |= SISrcMods::NEG; 1377284677Sdim Src = Src.getOperand(0); 1378284677Sdim } 1379284677Sdim 1380284677Sdim if (Src.getOpcode() == ISD::FABS) { 1381284677Sdim Mods |= SISrcMods::ABS; 1382284677Sdim Src = Src.getOperand(0); 1383284677Sdim } 1384284677Sdim 1385284677Sdim SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1386284677Sdim 1387284677Sdim return true; 1388284677Sdim} 1389284677Sdim 1390286684Sdimbool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1391286684Sdim SDValue &SrcMods) const { 1392286684Sdim bool Res = SelectVOP3Mods(In, Src, SrcMods); 1393286684Sdim return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1394286684Sdim} 1395286684Sdim 1396284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1397284677Sdim SDValue &SrcMods, SDValue &Clamp, 1398284677Sdim SDValue &Omod) const { 1399284677Sdim SDLoc DL(In); 1400284677Sdim // FIXME: Handle Clamp and Omod 1401284677Sdim Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1402284677Sdim Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1403284677Sdim 1404284677Sdim return SelectVOP3Mods(In, Src, SrcMods); 1405284677Sdim} 1406284677Sdim 1407286684Sdimbool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1408286684Sdim SDValue &SrcMods, SDValue &Clamp, 1409286684Sdim SDValue &Omod) const { 1410286684Sdim bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1411286684Sdim 1412286684Sdim return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1413286684Sdim cast<ConstantSDNode>(Clamp)->isNullValue() && 1414286684Sdim cast<ConstantSDNode>(Omod)->isNullValue(); 1415286684Sdim} 1416286684Sdim 1417284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1418284677Sdim SDValue &SrcMods, 1419284677Sdim SDValue &Omod) const { 1420284677Sdim // FIXME: Handle Omod 1421284677Sdim Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1422284677Sdim 1423284677Sdim return SelectVOP3Mods(In, Src, SrcMods); 1424284677Sdim} 1425284677Sdim 1426284677Sdimbool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1427284677Sdim SDValue &SrcMods, 1428284677Sdim SDValue &Clamp, 1429284677Sdim SDValue &Omod) const { 1430284677Sdim Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1431284677Sdim return SelectVOP3Mods(In, Src, SrcMods); 1432284677Sdim} 1433284677Sdim 1434296417Sdimvoid AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1435296417Sdim bool Modified = false; 1436296417Sdim 1437296417Sdim // XXX - Other targets seem to be able to do this without a worklist. 1438296417Sdim SmallVector<LoadSDNode *, 8> LoadsToReplace; 1439296417Sdim SmallVector<StoreSDNode *, 8> StoresToReplace; 1440296417Sdim 1441296417Sdim for (SDNode &Node : CurDAG->allnodes()) { 1442296417Sdim if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1443296417Sdim EVT VT = LD->getValueType(0); 1444296417Sdim if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1445296417Sdim continue; 1446296417Sdim 1447296417Sdim // To simplify the TableGen patters, we replace all i64 loads with v2i32 1448296417Sdim // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1449296417Sdim // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1450296417Sdim // legalizer assume that if i64 is legal, so doing this promotion early 1451296417Sdim // can cause problems. 1452296417Sdim LoadsToReplace.push_back(LD); 1453296417Sdim } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1454296417Sdim // Handle i64 stores here for the same reason mentioned above for loads. 1455296417Sdim SDValue Value = ST->getValue(); 1456296417Sdim if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1457296417Sdim continue; 1458296417Sdim StoresToReplace.push_back(ST); 1459296417Sdim } 1460296417Sdim } 1461296417Sdim 1462296417Sdim for (LoadSDNode *LD : LoadsToReplace) { 1463296417Sdim SDLoc SL(LD); 1464296417Sdim 1465296417Sdim SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1466296417Sdim LD->getBasePtr(), LD->getMemOperand()); 1467296417Sdim SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1468296417Sdim MVT::i64, NewLoad); 1469296417Sdim CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1470296417Sdim CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1471296417Sdim Modified = true; 1472296417Sdim } 1473296417Sdim 1474296417Sdim for (StoreSDNode *ST : StoresToReplace) { 1475296417Sdim SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1476296417Sdim MVT::v2i32, ST->getValue()); 1477296417Sdim const SDValue StoreOps[] = { 1478296417Sdim ST->getChain(), 1479296417Sdim NewValue, 1480296417Sdim ST->getBasePtr(), 1481296417Sdim ST->getOffset() 1482296417Sdim }; 1483296417Sdim 1484296417Sdim CurDAG->UpdateNodeOperands(ST, StoreOps); 1485296417Sdim Modified = true; 1486296417Sdim } 1487296417Sdim 1488296417Sdim // XXX - Is this necessary? 1489296417Sdim if (Modified) 1490296417Sdim CurDAG->RemoveDeadNodes(); 1491296417Sdim} 1492296417Sdim 1493284677Sdimvoid AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1494284677Sdim const AMDGPUTargetLowering& Lowering = 1495284677Sdim *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1496284677Sdim bool IsModified = false; 1497284677Sdim do { 1498284677Sdim IsModified = false; 1499284677Sdim // Go over all selected nodes and try to fold them a bit more 1500286684Sdim for (SDNode &Node : CurDAG->allnodes()) { 1501286684Sdim MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1502284677Sdim if (!MachineNode) 1503284677Sdim continue; 1504284677Sdim 1505284677Sdim SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1506286684Sdim if (ResNode != &Node) { 1507286684Sdim ReplaceUses(&Node, ResNode); 1508284677Sdim IsModified = true; 1509284677Sdim } 1510284677Sdim } 1511284677Sdim CurDAG->RemoveDeadNodes(); 1512284677Sdim } while (IsModified); 1513284677Sdim} 1514