1249259Sdim//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim/// \file 11249259Sdim/// \brief Custom DAG lowering for R600 12249259Sdim// 13249259Sdim//===----------------------------------------------------------------------===// 14249259Sdim 15249259Sdim#include "R600ISelLowering.h" 16249259Sdim#include "R600Defines.h" 17249259Sdim#include "R600InstrInfo.h" 18249259Sdim#include "R600MachineFunctionInfo.h" 19249259Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 20249259Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 21249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 22249259Sdim#include "llvm/CodeGen/SelectionDAG.h" 23249259Sdim#include "llvm/IR/Argument.h" 24249259Sdim#include "llvm/IR/Function.h" 25249259Sdim 26249259Sdimusing namespace llvm; 27249259Sdim 28249259SdimR600TargetLowering::R600TargetLowering(TargetMachine &TM) : 29249259Sdim AMDGPUTargetLowering(TM), 30249259Sdim TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) { 31249259Sdim addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 32249259Sdim addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 33249259Sdim addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 34249259Sdim addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 35249259Sdim computeRegisterProperties(); 36249259Sdim 37249259Sdim setOperationAction(ISD::FADD, MVT::v4f32, Expand); 38249259Sdim setOperationAction(ISD::FMUL, MVT::v4f32, Expand); 39249259Sdim setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 40249259Sdim setOperationAction(ISD::FSUB, MVT::v4f32, Expand); 41249259Sdim 42249259Sdim setOperationAction(ISD::ADD, MVT::v4i32, Expand); 43249259Sdim setOperationAction(ISD::AND, MVT::v4i32, Expand); 44249259Sdim setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); 45249259Sdim setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); 46251662Sdim setOperationAction(ISD::MUL, MVT::v2i32, Expand); 47251662Sdim setOperationAction(ISD::MUL, MVT::v4i32, Expand); 48251662Sdim setOperationAction(ISD::OR, MVT::v4i32, Expand); 49251662Sdim setOperationAction(ISD::OR, MVT::v2i32, Expand); 50249259Sdim setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); 51251662Sdim setOperationAction(ISD::SHL, MVT::v4i32, Expand); 52251662Sdim setOperationAction(ISD::SHL, MVT::v2i32, Expand); 53251662Sdim setOperationAction(ISD::SRL, MVT::v4i32, Expand); 54251662Sdim setOperationAction(ISD::SRL, MVT::v2i32, Expand); 55251662Sdim setOperationAction(ISD::SRA, MVT::v4i32, Expand); 56251662Sdim setOperationAction(ISD::SRA, MVT::v2i32, Expand); 57251662Sdim setOperationAction(ISD::SUB, MVT::v4i32, Expand); 58251662Sdim setOperationAction(ISD::SUB, MVT::v2i32, Expand); 59249259Sdim setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); 60249259Sdim setOperationAction(ISD::UDIV, MVT::v4i32, Expand); 61249259Sdim setOperationAction(ISD::UREM, MVT::v4i32, Expand); 62249259Sdim setOperationAction(ISD::SETCC, MVT::v4i32, Expand); 63251662Sdim setOperationAction(ISD::XOR, MVT::v4i32, Expand); 64251662Sdim setOperationAction(ISD::XOR, MVT::v2i32, Expand); 65249259Sdim 66249259Sdim setOperationAction(ISD::BR_CC, MVT::i32, Expand); 67249259Sdim setOperationAction(ISD::BR_CC, MVT::f32, Expand); 68249259Sdim 69249259Sdim setOperationAction(ISD::FSUB, MVT::f32, Expand); 70249259Sdim 71249259Sdim setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 72249259Sdim setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 73249259Sdim setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); 74249259Sdim 75249259Sdim setOperationAction(ISD::ROTL, MVT::i32, Custom); 76249259Sdim 77249259Sdim setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 78249259Sdim setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 79249259Sdim 80249259Sdim setOperationAction(ISD::SETCC, MVT::i32, Expand); 81249259Sdim setOperationAction(ISD::SETCC, MVT::f32, Expand); 82249259Sdim setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); 83249259Sdim 84249259Sdim setOperationAction(ISD::SELECT, MVT::i32, Custom); 85249259Sdim setOperationAction(ISD::SELECT, MVT::f32, Custom); 86249259Sdim 87251662Sdim setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); 88251662Sdim setOperationAction(ISD::VSELECT, MVT::v2i32, Expand); 89251662Sdim 90249259Sdim // Legalize loads and stores to the private address space. 91249259Sdim setOperationAction(ISD::LOAD, MVT::i32, Custom); 92249259Sdim setOperationAction(ISD::LOAD, MVT::v2i32, Custom); 93249259Sdim setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 94249259Sdim setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); 95249259Sdim setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); 96249259Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); 97249259Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); 98249259Sdim setOperationAction(ISD::STORE, MVT::i8, Custom); 99249259Sdim setOperationAction(ISD::STORE, MVT::i32, Custom); 100249259Sdim setOperationAction(ISD::STORE, MVT::v2i32, Custom); 101249259Sdim setOperationAction(ISD::STORE, MVT::v4i32, Custom); 102249259Sdim 103249259Sdim setOperationAction(ISD::LOAD, MVT::i32, Custom); 104249259Sdim setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 105249259Sdim setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 106249259Sdim 107249259Sdim setTargetDAGCombine(ISD::FP_ROUND); 108249259Sdim setTargetDAGCombine(ISD::FP_TO_SINT); 109249259Sdim setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 110249259Sdim setTargetDAGCombine(ISD::SELECT_CC); 111249259Sdim 112249259Sdim setBooleanContents(ZeroOrNegativeOneBooleanContent); 113251662Sdim setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 114249259Sdim setSchedulingPreference(Sched::VLIW); 115249259Sdim} 116249259Sdim 117249259SdimMachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 118249259Sdim MachineInstr * MI, MachineBasicBlock * BB) const { 119249259Sdim MachineFunction * MF = BB->getParent(); 120249259Sdim MachineRegisterInfo &MRI = MF->getRegInfo(); 121249259Sdim MachineBasicBlock::iterator I = *MI; 122249259Sdim 123249259Sdim switch (MI->getOpcode()) { 124249259Sdim default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 125249259Sdim case AMDGPU::CLAMP_R600: { 126249259Sdim MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 127249259Sdim AMDGPU::MOV, 128249259Sdim MI->getOperand(0).getReg(), 129249259Sdim MI->getOperand(1).getReg()); 130249259Sdim TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); 131249259Sdim break; 132249259Sdim } 133249259Sdim 134249259Sdim case AMDGPU::FABS_R600: { 135249259Sdim MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 136249259Sdim AMDGPU::MOV, 137249259Sdim MI->getOperand(0).getReg(), 138249259Sdim MI->getOperand(1).getReg()); 139249259Sdim TII->addFlag(NewMI, 0, MO_FLAG_ABS); 140249259Sdim break; 141249259Sdim } 142249259Sdim 143249259Sdim case AMDGPU::FNEG_R600: { 144249259Sdim MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 145249259Sdim AMDGPU::MOV, 146249259Sdim MI->getOperand(0).getReg(), 147249259Sdim MI->getOperand(1).getReg()); 148249259Sdim TII->addFlag(NewMI, 0, MO_FLAG_NEG); 149249259Sdim break; 150249259Sdim } 151249259Sdim 152249259Sdim case AMDGPU::MASK_WRITE: { 153249259Sdim unsigned maskedRegister = MI->getOperand(0).getReg(); 154249259Sdim assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 155249259Sdim MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 156249259Sdim TII->addFlag(defInstr, 0, MO_FLAG_MASK); 157249259Sdim break; 158249259Sdim } 159249259Sdim 160249259Sdim case AMDGPU::MOV_IMM_F32: 161249259Sdim TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 162249259Sdim MI->getOperand(1).getFPImm()->getValueAPF() 163249259Sdim .bitcastToAPInt().getZExtValue()); 164249259Sdim break; 165249259Sdim case AMDGPU::MOV_IMM_I32: 166249259Sdim TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 167249259Sdim MI->getOperand(1).getImm()); 168249259Sdim break; 169249259Sdim case AMDGPU::CONST_COPY: { 170249259Sdim MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, 171249259Sdim MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); 172249259Sdim TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, 173249259Sdim MI->getOperand(1).getImm()); 174249259Sdim break; 175249259Sdim } 176249259Sdim 177249259Sdim case AMDGPU::RAT_WRITE_CACHELESS_32_eg: 178249259Sdim case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { 179249259Sdim unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; 180249259Sdim 181249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 182249259Sdim .addOperand(MI->getOperand(0)) 183249259Sdim .addOperand(MI->getOperand(1)) 184249259Sdim .addImm(EOP); // Set End of program bit 185249259Sdim break; 186249259Sdim } 187249259Sdim 188249259Sdim case AMDGPU::TXD: { 189249259Sdim unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 190249259Sdim unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 191249259Sdim 192249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 193249259Sdim .addOperand(MI->getOperand(3)) 194249259Sdim .addOperand(MI->getOperand(4)) 195249259Sdim .addOperand(MI->getOperand(5)) 196249259Sdim .addOperand(MI->getOperand(6)); 197249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 198249259Sdim .addOperand(MI->getOperand(2)) 199249259Sdim .addOperand(MI->getOperand(4)) 200249259Sdim .addOperand(MI->getOperand(5)) 201249259Sdim .addOperand(MI->getOperand(6)); 202249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 203249259Sdim .addOperand(MI->getOperand(0)) 204249259Sdim .addOperand(MI->getOperand(1)) 205249259Sdim .addOperand(MI->getOperand(4)) 206249259Sdim .addOperand(MI->getOperand(5)) 207249259Sdim .addOperand(MI->getOperand(6)) 208249259Sdim .addReg(T0, RegState::Implicit) 209249259Sdim .addReg(T1, RegState::Implicit); 210249259Sdim break; 211249259Sdim } 212249259Sdim 213249259Sdim case AMDGPU::TXD_SHADOW: { 214249259Sdim unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 215249259Sdim unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 216249259Sdim 217249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 218249259Sdim .addOperand(MI->getOperand(3)) 219249259Sdim .addOperand(MI->getOperand(4)) 220249259Sdim .addOperand(MI->getOperand(5)) 221249259Sdim .addOperand(MI->getOperand(6)); 222249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 223249259Sdim .addOperand(MI->getOperand(2)) 224249259Sdim .addOperand(MI->getOperand(4)) 225249259Sdim .addOperand(MI->getOperand(5)) 226249259Sdim .addOperand(MI->getOperand(6)); 227249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 228249259Sdim .addOperand(MI->getOperand(0)) 229249259Sdim .addOperand(MI->getOperand(1)) 230249259Sdim .addOperand(MI->getOperand(4)) 231249259Sdim .addOperand(MI->getOperand(5)) 232249259Sdim .addOperand(MI->getOperand(6)) 233249259Sdim .addReg(T0, RegState::Implicit) 234249259Sdim .addReg(T1, RegState::Implicit); 235249259Sdim break; 236249259Sdim } 237249259Sdim 238249259Sdim case AMDGPU::BRANCH: 239249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 240249259Sdim .addOperand(MI->getOperand(0)); 241249259Sdim break; 242249259Sdim 243249259Sdim case AMDGPU::BRANCH_COND_f32: { 244249259Sdim MachineInstr *NewMI = 245249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 246249259Sdim AMDGPU::PREDICATE_BIT) 247249259Sdim .addOperand(MI->getOperand(1)) 248249259Sdim .addImm(OPCODE_IS_NOT_ZERO) 249249259Sdim .addImm(0); // Flags 250249259Sdim TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 251249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) 252249259Sdim .addOperand(MI->getOperand(0)) 253249259Sdim .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 254249259Sdim break; 255249259Sdim } 256249259Sdim 257249259Sdim case AMDGPU::BRANCH_COND_i32: { 258249259Sdim MachineInstr *NewMI = 259249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 260249259Sdim AMDGPU::PREDICATE_BIT) 261249259Sdim .addOperand(MI->getOperand(1)) 262249259Sdim .addImm(OPCODE_IS_NOT_ZERO_INT) 263249259Sdim .addImm(0); // Flags 264249259Sdim TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 265249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) 266249259Sdim .addOperand(MI->getOperand(0)) 267249259Sdim .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 268249259Sdim break; 269249259Sdim } 270249259Sdim 271249259Sdim case AMDGPU::EG_ExportSwz: 272249259Sdim case AMDGPU::R600_ExportSwz: { 273249259Sdim // Instruction is left unmodified if its not the last one of its type 274249259Sdim bool isLastInstructionOfItsType = true; 275249259Sdim unsigned InstExportType = MI->getOperand(1).getImm(); 276249259Sdim for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), 277249259Sdim EndBlock = BB->end(); NextExportInst != EndBlock; 278249259Sdim NextExportInst = llvm::next(NextExportInst)) { 279249259Sdim if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || 280249259Sdim NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { 281249259Sdim unsigned CurrentInstExportType = NextExportInst->getOperand(1) 282249259Sdim .getImm(); 283249259Sdim if (CurrentInstExportType == InstExportType) { 284249259Sdim isLastInstructionOfItsType = false; 285249259Sdim break; 286249259Sdim } 287249259Sdim } 288249259Sdim } 289249259Sdim bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; 290249259Sdim if (!EOP && !isLastInstructionOfItsType) 291249259Sdim return BB; 292249259Sdim unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; 293249259Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 294249259Sdim .addOperand(MI->getOperand(0)) 295249259Sdim .addOperand(MI->getOperand(1)) 296249259Sdim .addOperand(MI->getOperand(2)) 297249259Sdim .addOperand(MI->getOperand(3)) 298249259Sdim .addOperand(MI->getOperand(4)) 299249259Sdim .addOperand(MI->getOperand(5)) 300249259Sdim .addOperand(MI->getOperand(6)) 301249259Sdim .addImm(CfInst) 302249259Sdim .addImm(EOP); 303249259Sdim break; 304249259Sdim } 305249259Sdim case AMDGPU::RETURN: { 306249259Sdim // RETURN instructions must have the live-out registers as implicit uses, 307249259Sdim // otherwise they appear dead. 308249259Sdim R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); 309249259Sdim MachineInstrBuilder MIB(*MF, MI); 310249259Sdim for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i) 311249259Sdim MIB.addReg(MFI->LiveOuts[i], RegState::Implicit); 312249259Sdim return BB; 313249259Sdim } 314249259Sdim } 315249259Sdim 316249259Sdim MI->eraseFromParent(); 317249259Sdim return BB; 318249259Sdim} 319249259Sdim 320249259Sdim//===----------------------------------------------------------------------===// 321249259Sdim// Custom DAG Lowering Operations 322249259Sdim//===----------------------------------------------------------------------===// 323249259Sdim 324249259Sdimusing namespace llvm::Intrinsic; 325249259Sdimusing namespace llvm::AMDGPUIntrinsic; 326249259Sdim 327249259SdimSDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 328249259Sdim switch (Op.getOpcode()) { 329249259Sdim default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 330249259Sdim case ISD::ROTL: return LowerROTL(Op, DAG); 331249259Sdim case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 332249259Sdim case ISD::SELECT: return LowerSELECT(Op, DAG); 333249259Sdim case ISD::STORE: return LowerSTORE(Op, DAG); 334249259Sdim case ISD::LOAD: return LowerLOAD(Op, DAG); 335249259Sdim case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 336249259Sdim case ISD::INTRINSIC_VOID: { 337249259Sdim SDValue Chain = Op.getOperand(0); 338249259Sdim unsigned IntrinsicID = 339249259Sdim cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 340249259Sdim switch (IntrinsicID) { 341249259Sdim case AMDGPUIntrinsic::AMDGPU_store_output: { 342249259Sdim MachineFunction &MF = DAG.getMachineFunction(); 343249259Sdim R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 344249259Sdim int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 345249259Sdim unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 346249259Sdim MFI->LiveOuts.push_back(Reg); 347249259Sdim return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 348249259Sdim } 349249259Sdim case AMDGPUIntrinsic::R600_store_swizzle: { 350249259Sdim const SDValue Args[8] = { 351249259Sdim Chain, 352249259Sdim Op.getOperand(2), // Export Value 353249259Sdim Op.getOperand(3), // ArrayBase 354249259Sdim Op.getOperand(4), // Type 355249259Sdim DAG.getConstant(0, MVT::i32), // SWZ_X 356249259Sdim DAG.getConstant(1, MVT::i32), // SWZ_Y 357249259Sdim DAG.getConstant(2, MVT::i32), // SWZ_Z 358249259Sdim DAG.getConstant(3, MVT::i32) // SWZ_W 359249259Sdim }; 360249259Sdim return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(), 361249259Sdim Args, 8); 362249259Sdim } 363249259Sdim 364249259Sdim // default for switch(IntrinsicID) 365249259Sdim default: break; 366249259Sdim } 367249259Sdim // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 368249259Sdim break; 369249259Sdim } 370249259Sdim case ISD::INTRINSIC_WO_CHAIN: { 371249259Sdim unsigned IntrinsicID = 372249259Sdim cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 373249259Sdim EVT VT = Op.getValueType(); 374249259Sdim DebugLoc DL = Op.getDebugLoc(); 375249259Sdim switch(IntrinsicID) { 376249259Sdim default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 377249259Sdim case AMDGPUIntrinsic::R600_load_input: { 378249259Sdim int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 379249259Sdim unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 380249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 381249259Sdim } 382249259Sdim 383249259Sdim case AMDGPUIntrinsic::R600_interp_input: { 384249259Sdim int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 385249259Sdim int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 386249259Sdim MachineSDNode *interp; 387249259Sdim if (ijb < 0) { 388249259Sdim interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, 389249259Sdim MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); 390249259Sdim return DAG.getTargetExtractSubreg( 391249259Sdim TII->getRegisterInfo().getSubRegFromChannel(slot % 4), 392249259Sdim DL, MVT::f32, SDValue(interp, 0)); 393249259Sdim } 394249259Sdim 395249259Sdim if (slot % 4 < 2) 396249259Sdim interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, 397249259Sdim MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 398249259Sdim CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 399249259Sdim AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), 400249259Sdim CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 401249259Sdim AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); 402249259Sdim else 403249259Sdim interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, 404249259Sdim MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 405249259Sdim CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 406249259Sdim AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), 407249259Sdim CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 408249259Sdim AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); 409249259Sdim 410249259Sdim return SDValue(interp, slot % 2); 411249259Sdim } 412249259Sdim 413249259Sdim case r600_read_ngroups_x: 414249259Sdim return LowerImplicitParameter(DAG, VT, DL, 0); 415249259Sdim case r600_read_ngroups_y: 416249259Sdim return LowerImplicitParameter(DAG, VT, DL, 1); 417249259Sdim case r600_read_ngroups_z: 418249259Sdim return LowerImplicitParameter(DAG, VT, DL, 2); 419249259Sdim case r600_read_global_size_x: 420249259Sdim return LowerImplicitParameter(DAG, VT, DL, 3); 421249259Sdim case r600_read_global_size_y: 422249259Sdim return LowerImplicitParameter(DAG, VT, DL, 4); 423249259Sdim case r600_read_global_size_z: 424249259Sdim return LowerImplicitParameter(DAG, VT, DL, 5); 425249259Sdim case r600_read_local_size_x: 426249259Sdim return LowerImplicitParameter(DAG, VT, DL, 6); 427249259Sdim case r600_read_local_size_y: 428249259Sdim return LowerImplicitParameter(DAG, VT, DL, 7); 429249259Sdim case r600_read_local_size_z: 430249259Sdim return LowerImplicitParameter(DAG, VT, DL, 8); 431249259Sdim 432249259Sdim case r600_read_tgid_x: 433249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 434249259Sdim AMDGPU::T1_X, VT); 435249259Sdim case r600_read_tgid_y: 436249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 437249259Sdim AMDGPU::T1_Y, VT); 438249259Sdim case r600_read_tgid_z: 439249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 440249259Sdim AMDGPU::T1_Z, VT); 441249259Sdim case r600_read_tidig_x: 442249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 443249259Sdim AMDGPU::T0_X, VT); 444249259Sdim case r600_read_tidig_y: 445249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 446249259Sdim AMDGPU::T0_Y, VT); 447249259Sdim case r600_read_tidig_z: 448249259Sdim return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 449249259Sdim AMDGPU::T0_Z, VT); 450249259Sdim } 451249259Sdim // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 452249259Sdim break; 453249259Sdim } 454249259Sdim } // end switch(Op.getOpcode()) 455249259Sdim return SDValue(); 456249259Sdim} 457249259Sdim 458249259Sdimvoid R600TargetLowering::ReplaceNodeResults(SDNode *N, 459249259Sdim SmallVectorImpl<SDValue> &Results, 460249259Sdim SelectionDAG &DAG) const { 461249259Sdim switch (N->getOpcode()) { 462249259Sdim default: return; 463249259Sdim case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); 464249259Sdim return; 465249259Sdim case ISD::LOAD: { 466249259Sdim SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); 467249259Sdim Results.push_back(SDValue(Node, 0)); 468249259Sdim Results.push_back(SDValue(Node, 1)); 469249259Sdim // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode 470249259Sdim // function 471249259Sdim DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); 472249259Sdim return; 473249259Sdim } 474249259Sdim case ISD::STORE: 475249259Sdim SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); 476249259Sdim Results.push_back(SDValue(Node, 0)); 477249259Sdim return; 478249259Sdim } 479249259Sdim} 480249259Sdim 481249259SdimSDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { 482249259Sdim return DAG.getNode( 483249259Sdim ISD::SETCC, 484249259Sdim Op.getDebugLoc(), 485249259Sdim MVT::i1, 486249259Sdim Op, DAG.getConstantFP(0.0f, MVT::f32), 487249259Sdim DAG.getCondCode(ISD::SETNE) 488249259Sdim ); 489249259Sdim} 490249259Sdim 491249259SdimSDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 492249259Sdim DebugLoc DL, 493249259Sdim unsigned DwordOffset) const { 494249259Sdim unsigned ByteOffset = DwordOffset * 4; 495249259Sdim PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 496249259Sdim AMDGPUAS::PARAM_I_ADDRESS); 497249259Sdim 498249259Sdim // We shouldn't be using an offset wider than 16-bits for implicit parameters. 499249259Sdim assert(isInt<16>(ByteOffset)); 500249259Sdim 501249259Sdim return DAG.getLoad(VT, DL, DAG.getEntryNode(), 502249259Sdim DAG.getConstant(ByteOffset, MVT::i32), // PTR 503249259Sdim MachinePointerInfo(ConstantPointerNull::get(PtrType)), 504249259Sdim false, false, false, 0); 505249259Sdim} 506249259Sdim 507249259SdimSDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { 508249259Sdim 509249259Sdim MachineFunction &MF = DAG.getMachineFunction(); 510249259Sdim const AMDGPUFrameLowering *TFL = 511249259Sdim static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 512249259Sdim 513249259Sdim FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 514249259Sdim assert(FIN); 515249259Sdim 516249259Sdim unsigned FrameIndex = FIN->getIndex(); 517249259Sdim unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 518249259Sdim return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32); 519249259Sdim} 520249259Sdim 521249259SdimSDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { 522249259Sdim DebugLoc DL = Op.getDebugLoc(); 523249259Sdim EVT VT = Op.getValueType(); 524249259Sdim 525249259Sdim return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 526249259Sdim Op.getOperand(0), 527249259Sdim Op.getOperand(0), 528249259Sdim DAG.getNode(ISD::SUB, DL, VT, 529249259Sdim DAG.getConstant(32, MVT::i32), 530249259Sdim Op.getOperand(1))); 531249259Sdim} 532249259Sdim 533249259Sdimbool R600TargetLowering::isZero(SDValue Op) const { 534249259Sdim if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { 535249259Sdim return Cst->isNullValue(); 536249259Sdim } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ 537249259Sdim return CstFP->isZero(); 538249259Sdim } else { 539249259Sdim return false; 540249259Sdim } 541249259Sdim} 542249259Sdim 543249259SdimSDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 544249259Sdim DebugLoc DL = Op.getDebugLoc(); 545249259Sdim EVT VT = Op.getValueType(); 546249259Sdim 547249259Sdim SDValue LHS = Op.getOperand(0); 548249259Sdim SDValue RHS = Op.getOperand(1); 549249259Sdim SDValue True = Op.getOperand(2); 550249259Sdim SDValue False = Op.getOperand(3); 551249259Sdim SDValue CC = Op.getOperand(4); 552249259Sdim SDValue Temp; 553249259Sdim 554249259Sdim // LHS and RHS are guaranteed to be the same value type 555249259Sdim EVT CompareVT = LHS.getValueType(); 556249259Sdim 557249259Sdim // Check if we can lower this to a native operation. 558249259Sdim 559249259Sdim // Try to lower to a SET* instruction: 560249259Sdim // 561249259Sdim // SET* can match the following patterns: 562249259Sdim // 563249259Sdim // select_cc f32, f32, -1, 0, cc_any 564249259Sdim // select_cc f32, f32, 1.0f, 0.0f, cc_any 565249259Sdim // select_cc i32, i32, -1, 0, cc_any 566249259Sdim // 567249259Sdim 568249259Sdim // Move hardware True/False values to the correct operand. 569249259Sdim if (isHWTrueValue(False) && isHWFalseValue(True)) { 570249259Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 571249259Sdim std::swap(False, True); 572249259Sdim CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); 573249259Sdim } 574249259Sdim 575249259Sdim if (isHWTrueValue(True) && isHWFalseValue(False) && 576249259Sdim (CompareVT == VT || VT == MVT::i32)) { 577249259Sdim // This can be matched by a SET* instruction. 578249259Sdim return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 579249259Sdim } 580249259Sdim 581249259Sdim // Try to lower to a CND* instruction: 582249259Sdim // 583249259Sdim // CND* can match the following patterns: 584249259Sdim // 585249259Sdim // select_cc f32, 0.0, f32, f32, cc_any 586249259Sdim // select_cc f32, 0.0, i32, i32, cc_any 587249259Sdim // select_cc i32, 0, f32, f32, cc_any 588249259Sdim // select_cc i32, 0, i32, i32, cc_any 589249259Sdim // 590249259Sdim if (isZero(LHS) || isZero(RHS)) { 591249259Sdim SDValue Cond = (isZero(LHS) ? RHS : LHS); 592249259Sdim SDValue Zero = (isZero(LHS) ? LHS : RHS); 593249259Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 594249259Sdim if (CompareVT != VT) { 595249259Sdim // Bitcast True / False to the correct types. This will end up being 596249259Sdim // a nop, but it allows us to define only a single pattern in the 597249259Sdim // .TD files for each CND* instruction rather than having to have 598249259Sdim // one pattern for integer True/False and one for fp True/False 599249259Sdim True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 600249259Sdim False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 601249259Sdim } 602249259Sdim if (isZero(LHS)) { 603249259Sdim CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); 604249259Sdim } 605249259Sdim 606249259Sdim switch (CCOpcode) { 607249259Sdim case ISD::SETONE: 608249259Sdim case ISD::SETUNE: 609249259Sdim case ISD::SETNE: 610249259Sdim case ISD::SETULE: 611249259Sdim case ISD::SETULT: 612249259Sdim case ISD::SETOLE: 613249259Sdim case ISD::SETOLT: 614249259Sdim case ISD::SETLE: 615249259Sdim case ISD::SETLT: 616249259Sdim CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); 617249259Sdim Temp = True; 618249259Sdim True = False; 619249259Sdim False = Temp; 620249259Sdim break; 621249259Sdim default: 622249259Sdim break; 623249259Sdim } 624249259Sdim SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 625249259Sdim Cond, Zero, 626249259Sdim True, False, 627249259Sdim DAG.getCondCode(CCOpcode)); 628249259Sdim return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 629249259Sdim } 630249259Sdim 631249259Sdim 632249259Sdim // Possible Min/Max pattern 633249259Sdim SDValue MinMax = LowerMinMax(Op, DAG); 634249259Sdim if (MinMax.getNode()) { 635249259Sdim return MinMax; 636249259Sdim } 637249259Sdim 638249259Sdim // If we make it this for it means we have no native instructions to handle 639249259Sdim // this SELECT_CC, so we must lower it. 640249259Sdim SDValue HWTrue, HWFalse; 641249259Sdim 642249259Sdim if (CompareVT == MVT::f32) { 643249259Sdim HWTrue = DAG.getConstantFP(1.0f, CompareVT); 644249259Sdim HWFalse = DAG.getConstantFP(0.0f, CompareVT); 645249259Sdim } else if (CompareVT == MVT::i32) { 646249259Sdim HWTrue = DAG.getConstant(-1, CompareVT); 647249259Sdim HWFalse = DAG.getConstant(0, CompareVT); 648249259Sdim } 649249259Sdim else { 650249259Sdim assert(!"Unhandled value type in LowerSELECT_CC"); 651249259Sdim } 652249259Sdim 653249259Sdim // Lower this unsupported SELECT_CC into a combination of two supported 654249259Sdim // SELECT_CC operations. 655249259Sdim SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 656249259Sdim 657249259Sdim return DAG.getNode(ISD::SELECT_CC, DL, VT, 658249259Sdim Cond, HWFalse, 659249259Sdim True, False, 660249259Sdim DAG.getCondCode(ISD::SETNE)); 661249259Sdim} 662249259Sdim 663249259SdimSDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 664249259Sdim return DAG.getNode(ISD::SELECT_CC, 665249259Sdim Op.getDebugLoc(), 666249259Sdim Op.getValueType(), 667249259Sdim Op.getOperand(0), 668249259Sdim DAG.getConstant(0, MVT::i32), 669249259Sdim Op.getOperand(1), 670249259Sdim Op.getOperand(2), 671249259Sdim DAG.getCondCode(ISD::SETNE)); 672249259Sdim} 673249259Sdim 674249259Sdim/// LLVM generates byte-addresed pointers. For indirect addressing, we need to 675249259Sdim/// convert these pointers to a register index. Each register holds 676249259Sdim/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the 677249259Sdim/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used 678249259Sdim/// for indirect addressing. 679249259SdimSDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, 680249259Sdim unsigned StackWidth, 681249259Sdim SelectionDAG &DAG) const { 682249259Sdim unsigned SRLPad; 683249259Sdim switch(StackWidth) { 684249259Sdim case 1: 685249259Sdim SRLPad = 2; 686249259Sdim break; 687249259Sdim case 2: 688249259Sdim SRLPad = 3; 689249259Sdim break; 690249259Sdim case 4: 691249259Sdim SRLPad = 4; 692249259Sdim break; 693249259Sdim default: llvm_unreachable("Invalid stack width"); 694249259Sdim } 695249259Sdim 696249259Sdim return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr, 697249259Sdim DAG.getConstant(SRLPad, MVT::i32)); 698249259Sdim} 699249259Sdim 700249259Sdimvoid R600TargetLowering::getStackAddress(unsigned StackWidth, 701249259Sdim unsigned ElemIdx, 702249259Sdim unsigned &Channel, 703249259Sdim unsigned &PtrIncr) const { 704249259Sdim switch (StackWidth) { 705249259Sdim default: 706249259Sdim case 1: 707249259Sdim Channel = 0; 708249259Sdim if (ElemIdx > 0) { 709249259Sdim PtrIncr = 1; 710249259Sdim } else { 711249259Sdim PtrIncr = 0; 712249259Sdim } 713249259Sdim break; 714249259Sdim case 2: 715249259Sdim Channel = ElemIdx % 2; 716249259Sdim if (ElemIdx == 2) { 717249259Sdim PtrIncr = 1; 718249259Sdim } else { 719249259Sdim PtrIncr = 0; 720249259Sdim } 721249259Sdim break; 722249259Sdim case 4: 723249259Sdim Channel = ElemIdx; 724249259Sdim PtrIncr = 0; 725249259Sdim break; 726249259Sdim } 727249259Sdim} 728249259Sdim 729249259SdimSDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 730249259Sdim DebugLoc DL = Op.getDebugLoc(); 731249259Sdim StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 732249259Sdim SDValue Chain = Op.getOperand(0); 733249259Sdim SDValue Value = Op.getOperand(1); 734249259Sdim SDValue Ptr = Op.getOperand(2); 735249259Sdim 736249259Sdim if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && 737249259Sdim Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { 738249259Sdim // Convert pointer from byte address to dword address. 739249259Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), 740249259Sdim DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), 741249259Sdim Ptr, DAG.getConstant(2, MVT::i32))); 742249259Sdim 743249259Sdim if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { 744249259Sdim assert(!"Truncated and indexed stores not supported yet"); 745249259Sdim } else { 746249259Sdim Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 747249259Sdim } 748249259Sdim return Chain; 749249259Sdim } 750249259Sdim 751249259Sdim EVT ValueVT = Value.getValueType(); 752249259Sdim 753249259Sdim if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 754249259Sdim return SDValue(); 755249259Sdim } 756249259Sdim 757249259Sdim // Lowering for indirect addressing 758249259Sdim 759249259Sdim const MachineFunction &MF = DAG.getMachineFunction(); 760249259Sdim const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>( 761249259Sdim getTargetMachine().getFrameLowering()); 762249259Sdim unsigned StackWidth = TFL->getStackWidth(MF); 763249259Sdim 764249259Sdim Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); 765249259Sdim 766249259Sdim if (ValueVT.isVector()) { 767249259Sdim unsigned NumElemVT = ValueVT.getVectorNumElements(); 768249259Sdim EVT ElemVT = ValueVT.getVectorElementType(); 769249259Sdim SDValue Stores[4]; 770249259Sdim 771249259Sdim assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " 772249259Sdim "vector width in load"); 773249259Sdim 774249259Sdim for (unsigned i = 0; i < NumElemVT; ++i) { 775249259Sdim unsigned Channel, PtrIncr; 776249259Sdim getStackAddress(StackWidth, i, Channel, PtrIncr); 777249259Sdim Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, 778249259Sdim DAG.getConstant(PtrIncr, MVT::i32)); 779249259Sdim SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, 780249259Sdim Value, DAG.getConstant(i, MVT::i32)); 781249259Sdim 782249259Sdim Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, 783249259Sdim Chain, Elem, Ptr, 784249259Sdim DAG.getTargetConstant(Channel, MVT::i32)); 785249259Sdim } 786249259Sdim Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT); 787249259Sdim } else { 788249259Sdim if (ValueVT == MVT::i8) { 789249259Sdim Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); 790249259Sdim } 791249259Sdim Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, 792249259Sdim DAG.getTargetConstant(0, MVT::i32)); // Channel 793249259Sdim } 794249259Sdim 795249259Sdim return Chain; 796249259Sdim} 797249259Sdim 798249259Sdim// return (512 + (kc_bank << 12) 799249259Sdimstatic int 800249259SdimConstantAddressBlock(unsigned AddressSpace) { 801249259Sdim switch (AddressSpace) { 802249259Sdim case AMDGPUAS::CONSTANT_BUFFER_0: 803249259Sdim return 512; 804249259Sdim case AMDGPUAS::CONSTANT_BUFFER_1: 805249259Sdim return 512 + 4096; 806249259Sdim case AMDGPUAS::CONSTANT_BUFFER_2: 807249259Sdim return 512 + 4096 * 2; 808249259Sdim case AMDGPUAS::CONSTANT_BUFFER_3: 809249259Sdim return 512 + 4096 * 3; 810249259Sdim case AMDGPUAS::CONSTANT_BUFFER_4: 811249259Sdim return 512 + 4096 * 4; 812249259Sdim case AMDGPUAS::CONSTANT_BUFFER_5: 813249259Sdim return 512 + 4096 * 5; 814249259Sdim case AMDGPUAS::CONSTANT_BUFFER_6: 815249259Sdim return 512 + 4096 * 6; 816249259Sdim case AMDGPUAS::CONSTANT_BUFFER_7: 817249259Sdim return 512 + 4096 * 7; 818249259Sdim case AMDGPUAS::CONSTANT_BUFFER_8: 819249259Sdim return 512 + 4096 * 8; 820249259Sdim case AMDGPUAS::CONSTANT_BUFFER_9: 821249259Sdim return 512 + 4096 * 9; 822249259Sdim case AMDGPUAS::CONSTANT_BUFFER_10: 823249259Sdim return 512 + 4096 * 10; 824249259Sdim case AMDGPUAS::CONSTANT_BUFFER_11: 825249259Sdim return 512 + 4096 * 11; 826249259Sdim case AMDGPUAS::CONSTANT_BUFFER_12: 827249259Sdim return 512 + 4096 * 12; 828249259Sdim case AMDGPUAS::CONSTANT_BUFFER_13: 829249259Sdim return 512 + 4096 * 13; 830249259Sdim case AMDGPUAS::CONSTANT_BUFFER_14: 831249259Sdim return 512 + 4096 * 14; 832249259Sdim case AMDGPUAS::CONSTANT_BUFFER_15: 833249259Sdim return 512 + 4096 * 15; 834249259Sdim default: 835249259Sdim return -1; 836249259Sdim } 837249259Sdim} 838249259Sdim 839249259SdimSDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const 840249259Sdim{ 841249259Sdim EVT VT = Op.getValueType(); 842249259Sdim DebugLoc DL = Op.getDebugLoc(); 843249259Sdim LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 844249259Sdim SDValue Chain = Op.getOperand(0); 845249259Sdim SDValue Ptr = Op.getOperand(1); 846249259Sdim SDValue LoweredLoad; 847249259Sdim 848249259Sdim int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 849249259Sdim if (ConstantBlock > -1) { 850249259Sdim SDValue Result; 851249259Sdim if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || 852249259Sdim dyn_cast<Constant>(LoadNode->getSrcValue()) || 853249259Sdim dyn_cast<ConstantSDNode>(Ptr)) { 854249259Sdim SDValue Slots[4]; 855249259Sdim for (unsigned i = 0; i < 4; i++) { 856249259Sdim // We want Const position encoded with the following formula : 857249259Sdim // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 858249259Sdim // const_index is Ptr computed by llvm using an alignment of 16. 859249259Sdim // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 860249259Sdim // then div by 4 at the ISel step 861249259Sdim SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 862249259Sdim DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); 863249259Sdim Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 864249259Sdim } 865249259Sdim Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); 866249259Sdim } else { 867249259Sdim // non constant ptr cant be folded, keeps it as a v4f32 load 868249259Sdim Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 869249259Sdim DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)), 870249259Sdim DAG.getConstant(LoadNode->getAddressSpace() - 871249259Sdim AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32) 872249259Sdim ); 873249259Sdim } 874249259Sdim 875249259Sdim if (!VT.isVector()) { 876249259Sdim Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 877249259Sdim DAG.getConstant(0, MVT::i32)); 878249259Sdim } 879249259Sdim 880249259Sdim SDValue MergedValues[2] = { 881249259Sdim Result, 882249259Sdim Chain 883249259Sdim }; 884249259Sdim return DAG.getMergeValues(MergedValues, 2, DL); 885249259Sdim } 886249259Sdim 887249259Sdim if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 888249259Sdim return SDValue(); 889249259Sdim } 890249259Sdim 891249259Sdim // Lowering for indirect addressing 892249259Sdim const MachineFunction &MF = DAG.getMachineFunction(); 893249259Sdim const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>( 894249259Sdim getTargetMachine().getFrameLowering()); 895249259Sdim unsigned StackWidth = TFL->getStackWidth(MF); 896249259Sdim 897249259Sdim Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); 898249259Sdim 899249259Sdim if (VT.isVector()) { 900249259Sdim unsigned NumElemVT = VT.getVectorNumElements(); 901249259Sdim EVT ElemVT = VT.getVectorElementType(); 902249259Sdim SDValue Loads[4]; 903249259Sdim 904249259Sdim assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " 905249259Sdim "vector width in load"); 906249259Sdim 907249259Sdim for (unsigned i = 0; i < NumElemVT; ++i) { 908249259Sdim unsigned Channel, PtrIncr; 909249259Sdim getStackAddress(StackWidth, i, Channel, PtrIncr); 910249259Sdim Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, 911249259Sdim DAG.getConstant(PtrIncr, MVT::i32)); 912249259Sdim Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, 913249259Sdim Chain, Ptr, 914249259Sdim DAG.getTargetConstant(Channel, MVT::i32), 915249259Sdim Op.getOperand(2)); 916249259Sdim } 917249259Sdim for (unsigned i = NumElemVT; i < 4; ++i) { 918249259Sdim Loads[i] = DAG.getUNDEF(ElemVT); 919249259Sdim } 920249259Sdim EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); 921249259Sdim LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); 922249259Sdim } else { 923249259Sdim LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, 924249259Sdim Chain, Ptr, 925249259Sdim DAG.getTargetConstant(0, MVT::i32), // Channel 926249259Sdim Op.getOperand(2)); 927249259Sdim } 928249259Sdim 929249259Sdim SDValue Ops[2]; 930249259Sdim Ops[0] = LoweredLoad; 931249259Sdim Ops[1] = Chain; 932249259Sdim 933249259Sdim return DAG.getMergeValues(Ops, 2, DL); 934249259Sdim} 935249259Sdim 936249259Sdim/// XXX Only kernel functions are supported, so we can assume for now that 937249259Sdim/// every function is a kernel function, but in the future we should use 938249259Sdim/// separate calling conventions for kernel and non-kernel functions. 939249259SdimSDValue R600TargetLowering::LowerFormalArguments( 940249259Sdim SDValue Chain, 941249259Sdim CallingConv::ID CallConv, 942249259Sdim bool isVarArg, 943249259Sdim const SmallVectorImpl<ISD::InputArg> &Ins, 944249259Sdim DebugLoc DL, SelectionDAG &DAG, 945249259Sdim SmallVectorImpl<SDValue> &InVals) const { 946249259Sdim unsigned ParamOffsetBytes = 36; 947249259Sdim Function::const_arg_iterator FuncArg = 948249259Sdim DAG.getMachineFunction().getFunction()->arg_begin(); 949249259Sdim for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { 950249259Sdim EVT VT = Ins[i].VT; 951249259Sdim Type *ArgType = FuncArg->getType(); 952249259Sdim unsigned ArgSizeInBits = ArgType->isPointerTy() ? 953249259Sdim 32 : ArgType->getPrimitiveSizeInBits(); 954249259Sdim unsigned ArgBytes = ArgSizeInBits >> 3; 955249259Sdim EVT ArgVT; 956249259Sdim if (ArgSizeInBits < VT.getSizeInBits()) { 957249259Sdim assert(!ArgType->isFloatTy() && 958249259Sdim "Extending floating point arguments not supported yet"); 959249259Sdim ArgVT = MVT::getIntegerVT(ArgSizeInBits); 960249259Sdim } else { 961249259Sdim ArgVT = VT; 962249259Sdim } 963249259Sdim PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 964249259Sdim AMDGPUAS::PARAM_I_ADDRESS); 965249259Sdim SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), 966249259Sdim DAG.getConstant(ParamOffsetBytes, MVT::i32), 967249259Sdim MachinePointerInfo(UndefValue::get(PtrTy)), 968249259Sdim ArgVT, false, false, ArgBytes); 969249259Sdim InVals.push_back(Arg); 970249259Sdim ParamOffsetBytes += ArgBytes; 971249259Sdim } 972249259Sdim return Chain; 973249259Sdim} 974249259Sdim 975249259SdimEVT R600TargetLowering::getSetCCResultType(EVT VT) const { 976249259Sdim if (!VT.isVector()) return MVT::i32; 977249259Sdim return VT.changeVectorElementTypeToInteger(); 978249259Sdim} 979249259Sdim 980249259Sdim//===----------------------------------------------------------------------===// 981249259Sdim// Custom DAG Optimizations 982249259Sdim//===----------------------------------------------------------------------===// 983249259Sdim 984249259SdimSDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 985249259Sdim DAGCombinerInfo &DCI) const { 986249259Sdim SelectionDAG &DAG = DCI.DAG; 987249259Sdim 988249259Sdim switch (N->getOpcode()) { 989249259Sdim // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 990249259Sdim case ISD::FP_ROUND: { 991249259Sdim SDValue Arg = N->getOperand(0); 992249259Sdim if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 993249259Sdim return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), 994249259Sdim Arg.getOperand(0)); 995249259Sdim } 996249259Sdim break; 997249259Sdim } 998249259Sdim 999249259Sdim // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> 1000249259Sdim // (i32 select_cc f32, f32, -1, 0 cc) 1001249259Sdim // 1002249259Sdim // Mesa's GLSL frontend generates the above pattern a lot and we can lower 1003249259Sdim // this to one of the SET*_DX10 instructions. 1004249259Sdim case ISD::FP_TO_SINT: { 1005249259Sdim SDValue FNeg = N->getOperand(0); 1006249259Sdim if (FNeg.getOpcode() != ISD::FNEG) { 1007249259Sdim return SDValue(); 1008249259Sdim } 1009249259Sdim SDValue SelectCC = FNeg.getOperand(0); 1010249259Sdim if (SelectCC.getOpcode() != ISD::SELECT_CC || 1011249259Sdim SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS 1012249259Sdim SelectCC.getOperand(2).getValueType() != MVT::f32 || // True 1013249259Sdim !isHWTrueValue(SelectCC.getOperand(2)) || 1014249259Sdim !isHWFalseValue(SelectCC.getOperand(3))) { 1015249259Sdim return SDValue(); 1016249259Sdim } 1017249259Sdim 1018249259Sdim return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0), 1019249259Sdim SelectCC.getOperand(0), // LHS 1020249259Sdim SelectCC.getOperand(1), // RHS 1021249259Sdim DAG.getConstant(-1, MVT::i32), // True 1022249259Sdim DAG.getConstant(0, MVT::i32), // Flase 1023249259Sdim SelectCC.getOperand(4)); // CC 1024249259Sdim 1025249259Sdim break; 1026249259Sdim } 1027249259Sdim // Extract_vec (Build_vector) generated by custom lowering 1028249259Sdim // also needs to be customly combined 1029249259Sdim case ISD::EXTRACT_VECTOR_ELT: { 1030249259Sdim SDValue Arg = N->getOperand(0); 1031249259Sdim if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 1032249259Sdim if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1033249259Sdim unsigned Element = Const->getZExtValue(); 1034249259Sdim return Arg->getOperand(Element); 1035249259Sdim } 1036249259Sdim } 1037249259Sdim if (Arg.getOpcode() == ISD::BITCAST && 1038249259Sdim Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { 1039249259Sdim if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1040249259Sdim unsigned Element = Const->getZExtValue(); 1041249259Sdim return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(), 1042249259Sdim Arg->getOperand(0).getOperand(Element)); 1043249259Sdim } 1044249259Sdim } 1045249259Sdim } 1046249259Sdim 1047249259Sdim case ISD::SELECT_CC: { 1048249259Sdim // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> 1049249259Sdim // selectcc x, y, a, b, inv(cc) 1050249259Sdim // 1051249259Sdim // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> 1052249259Sdim // selectcc x, y, a, b, cc 1053249259Sdim SDValue LHS = N->getOperand(0); 1054249259Sdim if (LHS.getOpcode() != ISD::SELECT_CC) { 1055249259Sdim return SDValue(); 1056249259Sdim } 1057249259Sdim 1058249259Sdim SDValue RHS = N->getOperand(1); 1059249259Sdim SDValue True = N->getOperand(2); 1060249259Sdim SDValue False = N->getOperand(3); 1061249259Sdim ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 1062249259Sdim 1063249259Sdim if (LHS.getOperand(2).getNode() != True.getNode() || 1064249259Sdim LHS.getOperand(3).getNode() != False.getNode() || 1065249259Sdim RHS.getNode() != False.getNode()) { 1066249259Sdim return SDValue(); 1067249259Sdim } 1068249259Sdim 1069249259Sdim switch (NCC) { 1070249259Sdim default: return SDValue(); 1071249259Sdim case ISD::SETNE: return LHS; 1072249259Sdim case ISD::SETEQ: { 1073249259Sdim ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); 1074249259Sdim LHSCC = ISD::getSetCCInverse(LHSCC, 1075249259Sdim LHS.getOperand(0).getValueType().isInteger()); 1076249259Sdim return DAG.getSelectCC(N->getDebugLoc(), 1077249259Sdim LHS.getOperand(0), 1078249259Sdim LHS.getOperand(1), 1079249259Sdim LHS.getOperand(2), 1080249259Sdim LHS.getOperand(3), 1081249259Sdim LHSCC); 1082249259Sdim } 1083249259Sdim } 1084249259Sdim } 1085249259Sdim case AMDGPUISD::EXPORT: { 1086249259Sdim SDValue Arg = N->getOperand(1); 1087249259Sdim if (Arg.getOpcode() != ISD::BUILD_VECTOR) 1088249259Sdim break; 1089249259Sdim SDValue NewBldVec[4] = { 1090249259Sdim DAG.getUNDEF(MVT::f32), 1091249259Sdim DAG.getUNDEF(MVT::f32), 1092249259Sdim DAG.getUNDEF(MVT::f32), 1093249259Sdim DAG.getUNDEF(MVT::f32) 1094249259Sdim }; 1095249259Sdim SDValue NewArgs[8] = { 1096249259Sdim N->getOperand(0), // Chain 1097249259Sdim SDValue(), 1098249259Sdim N->getOperand(2), // ArrayBase 1099249259Sdim N->getOperand(3), // Type 1100249259Sdim N->getOperand(4), // SWZ_X 1101249259Sdim N->getOperand(5), // SWZ_Y 1102249259Sdim N->getOperand(6), // SWZ_Z 1103249259Sdim N->getOperand(7) // SWZ_W 1104249259Sdim }; 1105249259Sdim for (unsigned i = 0; i < Arg.getNumOperands(); i++) { 1106249259Sdim if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) { 1107249259Sdim if (C->isZero()) { 1108249259Sdim NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0 1109249259Sdim } else if (C->isExactlyValue(1.0)) { 1110249259Sdim NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0 1111249259Sdim } else { 1112249259Sdim NewBldVec[i] = Arg.getOperand(i); 1113249259Sdim } 1114249259Sdim } else { 1115249259Sdim NewBldVec[i] = Arg.getOperand(i); 1116249259Sdim } 1117249259Sdim } 1118249259Sdim DebugLoc DL = N->getDebugLoc(); 1119249259Sdim NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4); 1120249259Sdim return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); 1121249259Sdim } 1122249259Sdim } 1123249259Sdim return SDValue(); 1124249259Sdim} 1125