R600ISelLowering.cpp revision 344779
1284677Sdim//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2284677Sdim// 3284677Sdim// The LLVM Compiler Infrastructure 4284677Sdim// 5284677Sdim// This file is distributed under the University of Illinois Open Source 6284677Sdim// License. See LICENSE.TXT for details. 7284677Sdim// 8284677Sdim//===----------------------------------------------------------------------===// 9284677Sdim// 10284677Sdim/// \file 11341825Sdim/// Custom DAG lowering for R600 12284677Sdim// 13284677Sdim//===----------------------------------------------------------------------===// 14284677Sdim 15284677Sdim#include "R600ISelLowering.h" 16284677Sdim#include "AMDGPUFrameLowering.h" 17284677Sdim#include "AMDGPUSubtarget.h" 18284677Sdim#include "R600Defines.h" 19314564Sdim#include "R600FrameLowering.h" 20284677Sdim#include "R600InstrInfo.h" 21284677Sdim#include "R600MachineFunctionInfo.h" 22341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23314564Sdim#include "Utils/AMDGPUBaseInfo.h" 24314564Sdim#include "llvm/ADT/APFloat.h" 25314564Sdim#include "llvm/ADT/APInt.h" 26314564Sdim#include "llvm/ADT/ArrayRef.h" 27314564Sdim#include "llvm/ADT/DenseMap.h" 28314564Sdim#include "llvm/ADT/SmallVector.h" 29284677Sdim#include "llvm/CodeGen/CallingConvLower.h" 30314564Sdim#include "llvm/CodeGen/DAGCombine.h" 31314564Sdim#include "llvm/CodeGen/ISDOpcodes.h" 32314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 33314564Sdim#include "llvm/CodeGen/MachineFunction.h" 34314564Sdim#include "llvm/CodeGen/MachineInstr.h" 35284677Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 36314564Sdim#include "llvm/CodeGen/MachineMemOperand.h" 37284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 38284677Sdim#include "llvm/CodeGen/SelectionDAG.h" 39314564Sdim#include "llvm/IR/Constants.h" 40314564Sdim#include "llvm/IR/DerivedTypes.h" 41314564Sdim#include "llvm/Support/Casting.h" 42314564Sdim#include "llvm/Support/Compiler.h" 43314564Sdim#include "llvm/Support/ErrorHandling.h" 44341825Sdim#include "llvm/Support/MachineValueType.h" 45314564Sdim#include <cassert> 46314564Sdim#include <cstdint> 47314564Sdim#include <iterator> 48314564Sdim#include <utility> 49314564Sdim#include <vector> 50284677Sdim 51284677Sdimusing namespace llvm; 52284677Sdim 53341825Sdim#include "R600GenCallingConv.inc" 54341825Sdim 55309124SdimR600TargetLowering::R600TargetLowering(const TargetMachine &TM, 56309124Sdim const R600Subtarget &STI) 57341825Sdim : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { 58341825Sdim addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); 59341825Sdim addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); 60341825Sdim addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); 61341825Sdim addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); 62341825Sdim addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); 63341825Sdim addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); 64284677Sdim 65341825Sdim computeRegisterProperties(Subtarget->getRegisterInfo()); 66284677Sdim 67309124Sdim // Legalize loads and stores to the private address space. 68309124Sdim setOperationAction(ISD::LOAD, MVT::i32, Custom); 69309124Sdim setOperationAction(ISD::LOAD, MVT::v2i32, Custom); 70309124Sdim setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 71309124Sdim 72309124Sdim // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address 73309124Sdim // spaces, so it is custom lowered to handle those where it isn't. 74309124Sdim for (MVT VT : MVT::integer_valuetypes()) { 75309124Sdim setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 76309124Sdim setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); 77309124Sdim setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); 78309124Sdim 79309124Sdim setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 80309124Sdim setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); 81309124Sdim setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); 82309124Sdim 83309124Sdim setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 84309124Sdim setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); 85309124Sdim setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); 86309124Sdim } 87309124Sdim 88309124Sdim // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. 89309124Sdim setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand); 90309124Sdim setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); 91309124Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); 92309124Sdim 93309124Sdim setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand); 94309124Sdim setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); 95309124Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); 96309124Sdim 97309124Sdim setOperationAction(ISD::STORE, MVT::i8, Custom); 98309124Sdim setOperationAction(ISD::STORE, MVT::i32, Custom); 99309124Sdim setOperationAction(ISD::STORE, MVT::v2i32, Custom); 100309124Sdim setOperationAction(ISD::STORE, MVT::v4i32, Custom); 101309124Sdim 102309124Sdim setTruncStoreAction(MVT::i32, MVT::i8, Custom); 103309124Sdim setTruncStoreAction(MVT::i32, MVT::i16, Custom); 104314564Sdim // We need to include these since trunc STORES to PRIVATE need 105314564Sdim // special handling to accommodate RMW 106314564Sdim setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 107314564Sdim setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); 108314564Sdim setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); 109314564Sdim setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); 110314564Sdim setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); 111314564Sdim setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 112314564Sdim setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 113314564Sdim setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); 114314564Sdim setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); 115314564Sdim setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); 116309124Sdim 117309124Sdim // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. 118309124Sdim setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); 119309124Sdim setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); 120309124Sdim 121284677Sdim // Set condition code actions 122284677Sdim setCondCodeAction(ISD::SETO, MVT::f32, Expand); 123284677Sdim setCondCodeAction(ISD::SETUO, MVT::f32, Expand); 124284677Sdim setCondCodeAction(ISD::SETLT, MVT::f32, Expand); 125284677Sdim setCondCodeAction(ISD::SETLE, MVT::f32, Expand); 126284677Sdim setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); 127284677Sdim setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 128284677Sdim setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 129284677Sdim setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 130284677Sdim setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 131284677Sdim setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 132284677Sdim setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 133284677Sdim setCondCodeAction(ISD::SETULE, MVT::f32, Expand); 134284677Sdim 135284677Sdim setCondCodeAction(ISD::SETLE, MVT::i32, Expand); 136284677Sdim setCondCodeAction(ISD::SETLT, MVT::i32, Expand); 137284677Sdim setCondCodeAction(ISD::SETULE, MVT::i32, Expand); 138284677Sdim setCondCodeAction(ISD::SETULT, MVT::i32, Expand); 139284677Sdim 140284677Sdim setOperationAction(ISD::FCOS, MVT::f32, Custom); 141284677Sdim setOperationAction(ISD::FSIN, MVT::f32, Custom); 142284677Sdim 143284677Sdim setOperationAction(ISD::SETCC, MVT::v4i32, Expand); 144284677Sdim setOperationAction(ISD::SETCC, MVT::v2i32, Expand); 145284677Sdim 146284677Sdim setOperationAction(ISD::BR_CC, MVT::i32, Expand); 147284677Sdim setOperationAction(ISD::BR_CC, MVT::f32, Expand); 148284677Sdim setOperationAction(ISD::BRCOND, MVT::Other, Custom); 149284677Sdim 150284677Sdim setOperationAction(ISD::FSUB, MVT::f32, Expand); 151284677Sdim 152341825Sdim setOperationAction(ISD::FCEIL, MVT::f64, Custom); 153341825Sdim setOperationAction(ISD::FTRUNC, MVT::f64, Custom); 154341825Sdim setOperationAction(ISD::FRINT, MVT::f64, Custom); 155341825Sdim setOperationAction(ISD::FFLOOR, MVT::f64, Custom); 156341825Sdim 157284677Sdim setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 158284677Sdim setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 159284677Sdim 160284677Sdim setOperationAction(ISD::SETCC, MVT::i32, Expand); 161284677Sdim setOperationAction(ISD::SETCC, MVT::f32, Expand); 162284677Sdim setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); 163309124Sdim setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); 164284677Sdim setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 165284677Sdim setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 166284677Sdim 167284677Sdim setOperationAction(ISD::SELECT, MVT::i32, Expand); 168284677Sdim setOperationAction(ISD::SELECT, MVT::f32, Expand); 169284677Sdim setOperationAction(ISD::SELECT, MVT::v2i32, Expand); 170284677Sdim setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 171284677Sdim 172284677Sdim // ADD, SUB overflow. 173284677Sdim // TODO: turn these into Legal? 174284677Sdim if (Subtarget->hasCARRY()) 175284677Sdim setOperationAction(ISD::UADDO, MVT::i32, Custom); 176284677Sdim 177284677Sdim if (Subtarget->hasBORROW()) 178284677Sdim setOperationAction(ISD::USUBO, MVT::i32, Custom); 179284677Sdim 180284677Sdim // Expand sign extension of vectors 181284677Sdim if (!Subtarget->hasBFE()) 182284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 183284677Sdim 184284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); 185284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); 186284677Sdim 187284677Sdim if (!Subtarget->hasBFE()) 188284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 189284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); 190284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); 191284677Sdim 192284677Sdim if (!Subtarget->hasBFE()) 193284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 194284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); 195284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); 196284677Sdim 197284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 198284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); 199284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); 200284677Sdim 201284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 202284677Sdim 203284677Sdim setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 204284677Sdim 205284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); 206284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); 207284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); 208284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 209284677Sdim 210284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); 211284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); 212284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 213284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 214284677Sdim 215284677Sdim // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 216284677Sdim // to be Legal/Custom in order to avoid library calls. 217284677Sdim setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 218284677Sdim setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 219284677Sdim setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 220284677Sdim 221327952Sdim if (!Subtarget->hasFMA()) { 222327952Sdim setOperationAction(ISD::FMA, MVT::f32, Expand); 223327952Sdim setOperationAction(ISD::FMA, MVT::f64, Expand); 224327952Sdim } 225327952Sdim 226341825Sdim // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we 227341825Sdim // need it for R600. 228341825Sdim if (!Subtarget->hasFP32Denormals()) 229341825Sdim setOperationAction(ISD::FMAD, MVT::f32, Legal); 230341825Sdim 231341825Sdim if (!Subtarget->hasBFI()) { 232341825Sdim // fcopysign can be done in a single instruction with BFI. 233341825Sdim setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 234341825Sdim setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 235341825Sdim } 236341825Sdim 237341825Sdim if (!Subtarget->hasBCNT(32)) 238341825Sdim setOperationAction(ISD::CTPOP, MVT::i32, Expand); 239341825Sdim 240341825Sdim if (!Subtarget->hasBCNT(64)) 241341825Sdim setOperationAction(ISD::CTPOP, MVT::i64, Expand); 242341825Sdim 243341825Sdim if (Subtarget->hasFFBH()) 244341825Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); 245341825Sdim 246341825Sdim if (Subtarget->hasFFBL()) 247341825Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); 248341825Sdim 249341825Sdim // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we 250341825Sdim // need it for R600. 251341825Sdim if (Subtarget->hasBFE()) 252341825Sdim setHasExtractBitsInsn(true); 253341825Sdim 254284677Sdim setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 255284677Sdim 256284677Sdim const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 257284677Sdim for (MVT VT : ScalarIntVTs) { 258284677Sdim setOperationAction(ISD::ADDC, VT, Expand); 259284677Sdim setOperationAction(ISD::SUBC, VT, Expand); 260284677Sdim setOperationAction(ISD::ADDE, VT, Expand); 261284677Sdim setOperationAction(ISD::SUBE, VT, Expand); 262284677Sdim } 263284677Sdim 264321369Sdim // LLVM will expand these to atomic_cmp_swap(0) 265321369Sdim // and atomic_swap, respectively. 266321369Sdim setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 267321369Sdim setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 268321369Sdim 269321369Sdim // We need to custom lower some of the intrinsics 270321369Sdim setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 271321369Sdim setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 272321369Sdim 273284677Sdim setSchedulingPreference(Sched::Source); 274309124Sdim 275309124Sdim setTargetDAGCombine(ISD::FP_ROUND); 276309124Sdim setTargetDAGCombine(ISD::FP_TO_SINT); 277309124Sdim setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 278309124Sdim setTargetDAGCombine(ISD::SELECT_CC); 279309124Sdim setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 280314564Sdim setTargetDAGCombine(ISD::LOAD); 281284677Sdim} 282284677Sdim 283296417Sdimstatic inline bool isEOP(MachineBasicBlock::iterator I) { 284314564Sdim if (std::next(I) == I->getParent()->end()) 285314564Sdim return false; 286341825Sdim return std::next(I)->getOpcode() == R600::RETURN; 287296417Sdim} 288296417Sdim 289309124SdimMachineBasicBlock * 290309124SdimR600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 291309124Sdim MachineBasicBlock *BB) const { 292314564Sdim MachineFunction *MF = BB->getParent(); 293284677Sdim MachineRegisterInfo &MRI = MF->getRegInfo(); 294309124Sdim MachineBasicBlock::iterator I = MI; 295341825Sdim const R600InstrInfo *TII = Subtarget->getInstrInfo(); 296284677Sdim 297309124Sdim switch (MI.getOpcode()) { 298284677Sdim default: 299284677Sdim // Replace LDS_*_RET instruction that don't have any uses with the 300284677Sdim // equivalent LDS_*_NORET instruction. 301309124Sdim if (TII->isLDSRetInstr(MI.getOpcode())) { 302341825Sdim int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); 303284677Sdim assert(DstIdx != -1); 304284677Sdim MachineInstrBuilder NewMI; 305284677Sdim // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add 306284677Sdim // LDS_1A2D support and remove this special case. 307309124Sdim if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || 308341825Sdim MI.getOpcode() == R600::LDS_CMPST_RET) 309284677Sdim return BB; 310284677Sdim 311284677Sdim NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), 312341825Sdim TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); 313309124Sdim for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { 314321369Sdim NewMI.add(MI.getOperand(i)); 315284677Sdim } 316284677Sdim } else { 317284677Sdim return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 318284677Sdim } 319284677Sdim break; 320284677Sdim 321341825Sdim case R600::FABS_R600: { 322309124Sdim MachineInstr *NewMI = TII->buildDefaultInstruction( 323341825Sdim *BB, I, R600::MOV, MI.getOperand(0).getReg(), 324309124Sdim MI.getOperand(1).getReg()); 325309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_ABS); 326284677Sdim break; 327284677Sdim } 328284677Sdim 329341825Sdim case R600::FNEG_R600: { 330309124Sdim MachineInstr *NewMI = TII->buildDefaultInstruction( 331341825Sdim *BB, I, R600::MOV, MI.getOperand(0).getReg(), 332309124Sdim MI.getOperand(1).getReg()); 333309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_NEG); 334284677Sdim break; 335284677Sdim } 336284677Sdim 337341825Sdim case R600::MASK_WRITE: { 338309124Sdim unsigned maskedRegister = MI.getOperand(0).getReg(); 339284677Sdim assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 340284677Sdim MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 341309124Sdim TII->addFlag(*defInstr, 0, MO_FLAG_MASK); 342284677Sdim break; 343284677Sdim } 344284677Sdim 345341825Sdim case R600::MOV_IMM_F32: 346309124Sdim TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) 347309124Sdim .getFPImm() 348309124Sdim ->getValueAPF() 349309124Sdim .bitcastToAPInt() 350309124Sdim .getZExtValue()); 351284677Sdim break; 352314564Sdim 353341825Sdim case R600::MOV_IMM_I32: 354309124Sdim TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), 355309124Sdim MI.getOperand(1).getImm()); 356284677Sdim break; 357314564Sdim 358341825Sdim case R600::MOV_IMM_GLOBAL_ADDR: { 359309124Sdim //TODO: Perhaps combine this instruction with the next if possible 360309124Sdim auto MIB = TII->buildDefaultInstruction( 361341825Sdim *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); 362341825Sdim int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); 363309124Sdim //TODO: Ugh this is rather ugly 364309124Sdim MIB->getOperand(Idx) = MI.getOperand(1); 365309124Sdim break; 366309124Sdim } 367314564Sdim 368341825Sdim case R600::CONST_COPY: { 369309124Sdim MachineInstr *NewMI = TII->buildDefaultInstruction( 370341825Sdim *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); 371341825Sdim TII->setImmOperand(*NewMI, R600::OpName::src0_sel, 372309124Sdim MI.getOperand(1).getImm()); 373284677Sdim break; 374284677Sdim } 375284677Sdim 376341825Sdim case R600::RAT_WRITE_CACHELESS_32_eg: 377341825Sdim case R600::RAT_WRITE_CACHELESS_64_eg: 378341825Sdim case R600::RAT_WRITE_CACHELESS_128_eg: 379309124Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 380321369Sdim .add(MI.getOperand(0)) 381321369Sdim .add(MI.getOperand(1)) 382309124Sdim .addImm(isEOP(I)); // Set End of program bit 383284677Sdim break; 384314564Sdim 385341825Sdim case R600::RAT_STORE_TYPED_eg: 386309124Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 387321369Sdim .add(MI.getOperand(0)) 388321369Sdim .add(MI.getOperand(1)) 389321369Sdim .add(MI.getOperand(2)) 390309124Sdim .addImm(isEOP(I)); // Set End of program bit 391296417Sdim break; 392284677Sdim 393341825Sdim case R600::BRANCH: 394341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) 395321369Sdim .add(MI.getOperand(0)); 396309124Sdim break; 397284677Sdim 398341825Sdim case R600::BRANCH_COND_f32: { 399284677Sdim MachineInstr *NewMI = 400341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), 401341825Sdim R600::PREDICATE_BIT) 402321369Sdim .add(MI.getOperand(1)) 403341825Sdim .addImm(R600::PRED_SETNE) 404309124Sdim .addImm(0); // Flags 405309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); 406341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) 407321369Sdim .add(MI.getOperand(0)) 408341825Sdim .addReg(R600::PREDICATE_BIT, RegState::Kill); 409284677Sdim break; 410284677Sdim } 411284677Sdim 412341825Sdim case R600::BRANCH_COND_i32: { 413284677Sdim MachineInstr *NewMI = 414341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), 415341825Sdim R600::PREDICATE_BIT) 416321369Sdim .add(MI.getOperand(1)) 417341825Sdim .addImm(R600::PRED_SETNE_INT) 418284677Sdim .addImm(0); // Flags 419309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); 420341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) 421321369Sdim .add(MI.getOperand(0)) 422341825Sdim .addReg(R600::PREDICATE_BIT, RegState::Kill); 423284677Sdim break; 424284677Sdim } 425284677Sdim 426341825Sdim case R600::EG_ExportSwz: 427341825Sdim case R600::R600_ExportSwz: { 428284677Sdim // Instruction is left unmodified if its not the last one of its type 429284677Sdim bool isLastInstructionOfItsType = true; 430309124Sdim unsigned InstExportType = MI.getOperand(1).getImm(); 431284677Sdim for (MachineBasicBlock::iterator NextExportInst = std::next(I), 432284677Sdim EndBlock = BB->end(); NextExportInst != EndBlock; 433284677Sdim NextExportInst = std::next(NextExportInst)) { 434341825Sdim if (NextExportInst->getOpcode() == R600::EG_ExportSwz || 435341825Sdim NextExportInst->getOpcode() == R600::R600_ExportSwz) { 436284677Sdim unsigned CurrentInstExportType = NextExportInst->getOperand(1) 437284677Sdim .getImm(); 438284677Sdim if (CurrentInstExportType == InstExportType) { 439284677Sdim isLastInstructionOfItsType = false; 440284677Sdim break; 441284677Sdim } 442284677Sdim } 443284677Sdim } 444296417Sdim bool EOP = isEOP(I); 445284677Sdim if (!EOP && !isLastInstructionOfItsType) 446284677Sdim return BB; 447341825Sdim unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40; 448309124Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 449321369Sdim .add(MI.getOperand(0)) 450321369Sdim .add(MI.getOperand(1)) 451321369Sdim .add(MI.getOperand(2)) 452321369Sdim .add(MI.getOperand(3)) 453321369Sdim .add(MI.getOperand(4)) 454321369Sdim .add(MI.getOperand(5)) 455321369Sdim .add(MI.getOperand(6)) 456309124Sdim .addImm(CfInst) 457309124Sdim .addImm(EOP); 458284677Sdim break; 459284677Sdim } 460341825Sdim case R600::RETURN: { 461284677Sdim return BB; 462284677Sdim } 463284677Sdim } 464284677Sdim 465309124Sdim MI.eraseFromParent(); 466284677Sdim return BB; 467284677Sdim} 468284677Sdim 469284677Sdim//===----------------------------------------------------------------------===// 470284677Sdim// Custom DAG Lowering Operations 471284677Sdim//===----------------------------------------------------------------------===// 472284677Sdim 473284677SdimSDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 474284677Sdim MachineFunction &MF = DAG.getMachineFunction(); 475284677Sdim R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 476284677Sdim switch (Op.getOpcode()) { 477284677Sdim default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 478284677Sdim case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 479284677Sdim case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 480284677Sdim case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); 481284677Sdim case ISD::SRA_PARTS: 482284677Sdim case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); 483284677Sdim case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); 484284677Sdim case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); 485284677Sdim case ISD::FCOS: 486284677Sdim case ISD::FSIN: return LowerTrig(Op, DAG); 487284677Sdim case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 488284677Sdim case ISD::STORE: return LowerSTORE(Op, DAG); 489284677Sdim case ISD::LOAD: { 490284677Sdim SDValue Result = LowerLOAD(Op, DAG); 491284677Sdim assert((!Result.getNode() || 492284677Sdim Result.getNode()->getNumValues() == 2) && 493284677Sdim "Load should return a value and a chain"); 494284677Sdim return Result; 495284677Sdim } 496284677Sdim 497284677Sdim case ISD::BRCOND: return LowerBRCOND(Op, DAG); 498284677Sdim case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); 499309124Sdim case ISD::FrameIndex: return lowerFrameIndex(Op, DAG); 500284677Sdim case ISD::INTRINSIC_VOID: { 501284677Sdim SDValue Chain = Op.getOperand(0); 502284677Sdim unsigned IntrinsicID = 503284677Sdim cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 504284677Sdim switch (IntrinsicID) { 505341825Sdim case Intrinsic::r600_store_swizzle: { 506284677Sdim SDLoc DL(Op); 507284677Sdim const SDValue Args[8] = { 508284677Sdim Chain, 509284677Sdim Op.getOperand(2), // Export Value 510284677Sdim Op.getOperand(3), // ArrayBase 511284677Sdim Op.getOperand(4), // Type 512284677Sdim DAG.getConstant(0, DL, MVT::i32), // SWZ_X 513284677Sdim DAG.getConstant(1, DL, MVT::i32), // SWZ_Y 514284677Sdim DAG.getConstant(2, DL, MVT::i32), // SWZ_Z 515284677Sdim DAG.getConstant(3, DL, MVT::i32) // SWZ_W 516284677Sdim }; 517314564Sdim return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); 518284677Sdim } 519284677Sdim 520284677Sdim // default for switch(IntrinsicID) 521284677Sdim default: break; 522284677Sdim } 523284677Sdim // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 524284677Sdim break; 525284677Sdim } 526284677Sdim case ISD::INTRINSIC_WO_CHAIN: { 527284677Sdim unsigned IntrinsicID = 528284677Sdim cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 529284677Sdim EVT VT = Op.getValueType(); 530284677Sdim SDLoc DL(Op); 531321369Sdim switch (IntrinsicID) { 532341825Sdim case Intrinsic::r600_tex: 533341825Sdim case Intrinsic::r600_texc: { 534284677Sdim unsigned TextureOp; 535284677Sdim switch (IntrinsicID) { 536341825Sdim case Intrinsic::r600_tex: 537284677Sdim TextureOp = 0; 538284677Sdim break; 539341825Sdim case Intrinsic::r600_texc: 540284677Sdim TextureOp = 1; 541284677Sdim break; 542284677Sdim default: 543314564Sdim llvm_unreachable("unhandled texture operation"); 544284677Sdim } 545284677Sdim 546284677Sdim SDValue TexArgs[19] = { 547284677Sdim DAG.getConstant(TextureOp, DL, MVT::i32), 548284677Sdim Op.getOperand(1), 549284677Sdim DAG.getConstant(0, DL, MVT::i32), 550284677Sdim DAG.getConstant(1, DL, MVT::i32), 551284677Sdim DAG.getConstant(2, DL, MVT::i32), 552284677Sdim DAG.getConstant(3, DL, MVT::i32), 553284677Sdim Op.getOperand(2), 554284677Sdim Op.getOperand(3), 555284677Sdim Op.getOperand(4), 556284677Sdim DAG.getConstant(0, DL, MVT::i32), 557284677Sdim DAG.getConstant(1, DL, MVT::i32), 558284677Sdim DAG.getConstant(2, DL, MVT::i32), 559284677Sdim DAG.getConstant(3, DL, MVT::i32), 560284677Sdim Op.getOperand(5), 561284677Sdim Op.getOperand(6), 562284677Sdim Op.getOperand(7), 563284677Sdim Op.getOperand(8), 564284677Sdim Op.getOperand(9), 565284677Sdim Op.getOperand(10) 566284677Sdim }; 567284677Sdim return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); 568284677Sdim } 569341825Sdim case Intrinsic::r600_dot4: { 570284677Sdim SDValue Args[8] = { 571284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 572284677Sdim DAG.getConstant(0, DL, MVT::i32)), 573284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 574284677Sdim DAG.getConstant(0, DL, MVT::i32)), 575284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 576284677Sdim DAG.getConstant(1, DL, MVT::i32)), 577284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 578284677Sdim DAG.getConstant(1, DL, MVT::i32)), 579284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 580284677Sdim DAG.getConstant(2, DL, MVT::i32)), 581284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 582284677Sdim DAG.getConstant(2, DL, MVT::i32)), 583284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 584284677Sdim DAG.getConstant(3, DL, MVT::i32)), 585284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 586284677Sdim DAG.getConstant(3, DL, MVT::i32)) 587284677Sdim }; 588284677Sdim return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); 589284677Sdim } 590284677Sdim 591309124Sdim case Intrinsic::r600_implicitarg_ptr: { 592344779Sdim MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); 593341825Sdim uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); 594309124Sdim return DAG.getConstant(ByteOffset, DL, PtrVT); 595309124Sdim } 596284677Sdim case Intrinsic::r600_read_ngroups_x: 597284677Sdim return LowerImplicitParameter(DAG, VT, DL, 0); 598284677Sdim case Intrinsic::r600_read_ngroups_y: 599284677Sdim return LowerImplicitParameter(DAG, VT, DL, 1); 600284677Sdim case Intrinsic::r600_read_ngroups_z: 601284677Sdim return LowerImplicitParameter(DAG, VT, DL, 2); 602284677Sdim case Intrinsic::r600_read_global_size_x: 603284677Sdim return LowerImplicitParameter(DAG, VT, DL, 3); 604284677Sdim case Intrinsic::r600_read_global_size_y: 605284677Sdim return LowerImplicitParameter(DAG, VT, DL, 4); 606284677Sdim case Intrinsic::r600_read_global_size_z: 607284677Sdim return LowerImplicitParameter(DAG, VT, DL, 5); 608284677Sdim case Intrinsic::r600_read_local_size_x: 609284677Sdim return LowerImplicitParameter(DAG, VT, DL, 6); 610284677Sdim case Intrinsic::r600_read_local_size_y: 611284677Sdim return LowerImplicitParameter(DAG, VT, DL, 7); 612284677Sdim case Intrinsic::r600_read_local_size_z: 613284677Sdim return LowerImplicitParameter(DAG, VT, DL, 8); 614284677Sdim 615284677Sdim case Intrinsic::r600_read_tgid_x: 616341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 617341825Sdim R600::T1_X, VT); 618284677Sdim case Intrinsic::r600_read_tgid_y: 619341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 620341825Sdim R600::T1_Y, VT); 621284677Sdim case Intrinsic::r600_read_tgid_z: 622341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 623341825Sdim R600::T1_Z, VT); 624284677Sdim case Intrinsic::r600_read_tidig_x: 625341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 626341825Sdim R600::T0_X, VT); 627284677Sdim case Intrinsic::r600_read_tidig_y: 628341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 629341825Sdim R600::T0_Y, VT); 630284677Sdim case Intrinsic::r600_read_tidig_z: 631341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 632341825Sdim R600::T0_Z, VT); 633284677Sdim 634309124Sdim case Intrinsic::r600_recipsqrt_ieee: 635309124Sdim return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); 636309124Sdim 637309124Sdim case Intrinsic::r600_recipsqrt_clamped: 638309124Sdim return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); 639321369Sdim default: 640321369Sdim return Op; 641284677Sdim } 642309124Sdim 643284677Sdim // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 644284677Sdim break; 645284677Sdim } 646284677Sdim } // end switch(Op.getOpcode()) 647284677Sdim return SDValue(); 648284677Sdim} 649284677Sdim 650284677Sdimvoid R600TargetLowering::ReplaceNodeResults(SDNode *N, 651284677Sdim SmallVectorImpl<SDValue> &Results, 652284677Sdim SelectionDAG &DAG) const { 653284677Sdim switch (N->getOpcode()) { 654284677Sdim default: 655284677Sdim AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); 656284677Sdim return; 657284677Sdim case ISD::FP_TO_UINT: 658284677Sdim if (N->getValueType(0) == MVT::i1) { 659309124Sdim Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); 660284677Sdim return; 661284677Sdim } 662314564Sdim // Since we don't care about out of bounds values we can use FP_TO_SINT for 663314564Sdim // uints too. The DAGLegalizer code for uint considers some extra cases 664314564Sdim // which are not necessary here. 665314564Sdim LLVM_FALLTHROUGH; 666284677Sdim case ISD::FP_TO_SINT: { 667309124Sdim if (N->getValueType(0) == MVT::i1) { 668309124Sdim Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); 669309124Sdim return; 670309124Sdim } 671309124Sdim 672284677Sdim SDValue Result; 673284677Sdim if (expandFP_TO_SINT(N, Result, DAG)) 674284677Sdim Results.push_back(Result); 675284677Sdim return; 676284677Sdim } 677284677Sdim case ISD::SDIVREM: { 678284677Sdim SDValue Op = SDValue(N, 1); 679284677Sdim SDValue RES = LowerSDIVREM(Op, DAG); 680284677Sdim Results.push_back(RES); 681284677Sdim Results.push_back(RES.getValue(1)); 682284677Sdim break; 683284677Sdim } 684284677Sdim case ISD::UDIVREM: { 685284677Sdim SDValue Op = SDValue(N, 0); 686284677Sdim LowerUDIVREM64(Op, DAG, Results); 687284677Sdim break; 688284677Sdim } 689284677Sdim } 690284677Sdim} 691284677Sdim 692284677SdimSDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, 693284677Sdim SDValue Vector) const { 694284677Sdim SDLoc DL(Vector); 695284677Sdim EVT VecVT = Vector.getValueType(); 696284677Sdim EVT EltVT = VecVT.getVectorElementType(); 697284677Sdim SmallVector<SDValue, 8> Args; 698284677Sdim 699314564Sdim for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) { 700286684Sdim Args.push_back(DAG.getNode( 701286684Sdim ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, 702286684Sdim DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout())))); 703284677Sdim } 704284677Sdim 705284677Sdim return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); 706284677Sdim} 707284677Sdim 708284677SdimSDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 709284677Sdim SelectionDAG &DAG) const { 710284677Sdim SDLoc DL(Op); 711284677Sdim SDValue Vector = Op.getOperand(0); 712284677Sdim SDValue Index = Op.getOperand(1); 713284677Sdim 714284677Sdim if (isa<ConstantSDNode>(Index) || 715284677Sdim Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) 716284677Sdim return Op; 717284677Sdim 718284677Sdim Vector = vectorToVerticalVector(DAG, Vector); 719284677Sdim return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 720284677Sdim Vector, Index); 721284677Sdim} 722284677Sdim 723284677SdimSDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 724284677Sdim SelectionDAG &DAG) const { 725284677Sdim SDLoc DL(Op); 726284677Sdim SDValue Vector = Op.getOperand(0); 727284677Sdim SDValue Value = Op.getOperand(1); 728284677Sdim SDValue Index = Op.getOperand(2); 729284677Sdim 730284677Sdim if (isa<ConstantSDNode>(Index) || 731284677Sdim Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) 732284677Sdim return Op; 733284677Sdim 734284677Sdim Vector = vectorToVerticalVector(DAG, Vector); 735284677Sdim SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), 736284677Sdim Vector, Value, Index); 737284677Sdim return vectorToVerticalVector(DAG, Insert); 738284677Sdim} 739284677Sdim 740309124SdimSDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, 741309124Sdim SDValue Op, 742309124Sdim SelectionDAG &DAG) const { 743309124Sdim GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); 744344779Sdim if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) 745309124Sdim return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); 746309124Sdim 747309124Sdim const DataLayout &DL = DAG.getDataLayout(); 748309124Sdim const GlobalValue *GV = GSD->getGlobal(); 749344779Sdim MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); 750309124Sdim 751309124Sdim SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); 752309124Sdim return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); 753309124Sdim} 754309124Sdim 755284677SdimSDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { 756284677Sdim // On hw >= R700, COS/SIN input must be between -1. and 1. 757284677Sdim // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) 758284677Sdim EVT VT = Op.getValueType(); 759284677Sdim SDValue Arg = Op.getOperand(0); 760284677Sdim SDLoc DL(Op); 761296417Sdim 762296417Sdim // TODO: Should this propagate fast-math-flags? 763284677Sdim SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, 764284677Sdim DAG.getNode(ISD::FADD, DL, VT, 765284677Sdim DAG.getNode(ISD::FMUL, DL, VT, Arg, 766284677Sdim DAG.getConstantFP(0.15915494309, DL, MVT::f32)), 767284677Sdim DAG.getConstantFP(0.5, DL, MVT::f32))); 768284677Sdim unsigned TrigNode; 769284677Sdim switch (Op.getOpcode()) { 770284677Sdim case ISD::FCOS: 771284677Sdim TrigNode = AMDGPUISD::COS_HW; 772284677Sdim break; 773284677Sdim case ISD::FSIN: 774284677Sdim TrigNode = AMDGPUISD::SIN_HW; 775284677Sdim break; 776284677Sdim default: 777284677Sdim llvm_unreachable("Wrong trig opcode"); 778284677Sdim } 779284677Sdim SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, 780284677Sdim DAG.getNode(ISD::FADD, DL, VT, FractPart, 781284677Sdim DAG.getConstantFP(-0.5, DL, MVT::f32))); 782341825Sdim if (Gen >= AMDGPUSubtarget::R700) 783284677Sdim return TrigVal; 784284677Sdim // On R600 hw, COS/SIN input must be between -Pi and Pi. 785284677Sdim return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, 786284677Sdim DAG.getConstantFP(3.14159265359, DL, MVT::f32)); 787284677Sdim} 788284677Sdim 789284677SdimSDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { 790284677Sdim SDLoc DL(Op); 791284677Sdim EVT VT = Op.getValueType(); 792284677Sdim 793284677Sdim SDValue Lo = Op.getOperand(0); 794284677Sdim SDValue Hi = Op.getOperand(1); 795284677Sdim SDValue Shift = Op.getOperand(2); 796284677Sdim SDValue Zero = DAG.getConstant(0, DL, VT); 797284677Sdim SDValue One = DAG.getConstant(1, DL, VT); 798284677Sdim 799284677Sdim SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); 800284677Sdim SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); 801284677Sdim SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); 802284677Sdim SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); 803284677Sdim 804284677Sdim // The dance around Width1 is necessary for 0 special case. 805284677Sdim // Without it the CompShift might be 32, producing incorrect results in 806284677Sdim // Overflow. So we do the shift in two steps, the alternative is to 807284677Sdim // add a conditional to filter the special case. 808284677Sdim 809284677Sdim SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); 810284677Sdim Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); 811284677Sdim 812284677Sdim SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); 813284677Sdim HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); 814284677Sdim SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); 815284677Sdim 816284677Sdim SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); 817284677Sdim SDValue LoBig = Zero; 818284677Sdim 819284677Sdim Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); 820284677Sdim Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); 821284677Sdim 822284677Sdim return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); 823284677Sdim} 824284677Sdim 825284677SdimSDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { 826284677Sdim SDLoc DL(Op); 827284677Sdim EVT VT = Op.getValueType(); 828284677Sdim 829284677Sdim SDValue Lo = Op.getOperand(0); 830284677Sdim SDValue Hi = Op.getOperand(1); 831284677Sdim SDValue Shift = Op.getOperand(2); 832284677Sdim SDValue Zero = DAG.getConstant(0, DL, VT); 833284677Sdim SDValue One = DAG.getConstant(1, DL, VT); 834284677Sdim 835284677Sdim const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; 836284677Sdim 837284677Sdim SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); 838284677Sdim SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); 839284677Sdim SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); 840284677Sdim SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); 841284677Sdim 842284677Sdim // The dance around Width1 is necessary for 0 special case. 843284677Sdim // Without it the CompShift might be 32, producing incorrect results in 844284677Sdim // Overflow. So we do the shift in two steps, the alternative is to 845284677Sdim // add a conditional to filter the special case. 846284677Sdim 847284677Sdim SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); 848284677Sdim Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); 849284677Sdim 850284677Sdim SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift); 851284677Sdim SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); 852284677Sdim LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); 853284677Sdim 854284677Sdim SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift); 855284677Sdim SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero; 856284677Sdim 857284677Sdim Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); 858284677Sdim Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); 859284677Sdim 860284677Sdim return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); 861284677Sdim} 862284677Sdim 863284677SdimSDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, 864284677Sdim unsigned mainop, unsigned ovf) const { 865284677Sdim SDLoc DL(Op); 866284677Sdim EVT VT = Op.getValueType(); 867284677Sdim 868284677Sdim SDValue Lo = Op.getOperand(0); 869284677Sdim SDValue Hi = Op.getOperand(1); 870284677Sdim 871284677Sdim SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); 872284677Sdim // Extend sign. 873284677Sdim OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, 874284677Sdim DAG.getValueType(MVT::i1)); 875284677Sdim 876284677Sdim SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); 877284677Sdim 878284677Sdim return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); 879284677Sdim} 880284677Sdim 881309124SdimSDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { 882284677Sdim SDLoc DL(Op); 883284677Sdim return DAG.getNode( 884284677Sdim ISD::SETCC, 885284677Sdim DL, 886284677Sdim MVT::i1, 887309124Sdim Op, DAG.getConstantFP(1.0f, DL, MVT::f32), 888309124Sdim DAG.getCondCode(ISD::SETEQ)); 889284677Sdim} 890284677Sdim 891309124SdimSDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { 892309124Sdim SDLoc DL(Op); 893309124Sdim return DAG.getNode( 894309124Sdim ISD::SETCC, 895309124Sdim DL, 896309124Sdim MVT::i1, 897309124Sdim Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), 898309124Sdim DAG.getCondCode(ISD::SETEQ)); 899309124Sdim} 900309124Sdim 901284677SdimSDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 902309124Sdim const SDLoc &DL, 903284677Sdim unsigned DwordOffset) const { 904284677Sdim unsigned ByteOffset = DwordOffset * 4; 905284677Sdim PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 906344779Sdim AMDGPUAS::PARAM_I_ADDRESS); 907284677Sdim 908284677Sdim // We shouldn't be using an offset wider than 16-bits for implicit parameters. 909284677Sdim assert(isInt<16>(ByteOffset)); 910284677Sdim 911284677Sdim return DAG.getLoad(VT, DL, DAG.getEntryNode(), 912284677Sdim DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR 913309124Sdim MachinePointerInfo(ConstantPointerNull::get(PtrType))); 914284677Sdim} 915284677Sdim 916284677Sdimbool R600TargetLowering::isZero(SDValue Op) const { 917284677Sdim if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { 918284677Sdim return Cst->isNullValue(); 919284677Sdim } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ 920284677Sdim return CstFP->isZero(); 921284677Sdim } else { 922284677Sdim return false; 923284677Sdim } 924284677Sdim} 925284677Sdim 926309124Sdimbool R600TargetLowering::isHWTrueValue(SDValue Op) const { 927309124Sdim if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 928309124Sdim return CFP->isExactlyValue(1.0); 929309124Sdim } 930309124Sdim return isAllOnesConstant(Op); 931309124Sdim} 932309124Sdim 933309124Sdimbool R600TargetLowering::isHWFalseValue(SDValue Op) const { 934309124Sdim if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 935309124Sdim return CFP->getValueAPF().isZero(); 936309124Sdim } 937309124Sdim return isNullConstant(Op); 938309124Sdim} 939309124Sdim 940284677SdimSDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 941284677Sdim SDLoc DL(Op); 942284677Sdim EVT VT = Op.getValueType(); 943284677Sdim 944284677Sdim SDValue LHS = Op.getOperand(0); 945284677Sdim SDValue RHS = Op.getOperand(1); 946284677Sdim SDValue True = Op.getOperand(2); 947284677Sdim SDValue False = Op.getOperand(3); 948284677Sdim SDValue CC = Op.getOperand(4); 949284677Sdim SDValue Temp; 950284677Sdim 951284677Sdim if (VT == MVT::f32) { 952284677Sdim DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); 953321369Sdim SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); 954284677Sdim if (MinMax) 955284677Sdim return MinMax; 956284677Sdim } 957284677Sdim 958284677Sdim // LHS and RHS are guaranteed to be the same value type 959284677Sdim EVT CompareVT = LHS.getValueType(); 960284677Sdim 961284677Sdim // Check if we can lower this to a native operation. 962284677Sdim 963284677Sdim // Try to lower to a SET* instruction: 964284677Sdim // 965284677Sdim // SET* can match the following patterns: 966284677Sdim // 967284677Sdim // select_cc f32, f32, -1, 0, cc_supported 968284677Sdim // select_cc f32, f32, 1.0f, 0.0f, cc_supported 969284677Sdim // select_cc i32, i32, -1, 0, cc_supported 970284677Sdim // 971284677Sdim 972284677Sdim // Move hardware True/False values to the correct operand. 973284677Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 974284677Sdim ISD::CondCode InverseCC = 975284677Sdim ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); 976284677Sdim if (isHWTrueValue(False) && isHWFalseValue(True)) { 977284677Sdim if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) { 978284677Sdim std::swap(False, True); 979284677Sdim CC = DAG.getCondCode(InverseCC); 980284677Sdim } else { 981284677Sdim ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC); 982284677Sdim if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) { 983284677Sdim std::swap(False, True); 984284677Sdim std::swap(LHS, RHS); 985284677Sdim CC = DAG.getCondCode(SwapInvCC); 986284677Sdim } 987284677Sdim } 988284677Sdim } 989284677Sdim 990284677Sdim if (isHWTrueValue(True) && isHWFalseValue(False) && 991284677Sdim (CompareVT == VT || VT == MVT::i32)) { 992284677Sdim // This can be matched by a SET* instruction. 993284677Sdim return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 994284677Sdim } 995284677Sdim 996284677Sdim // Try to lower to a CND* instruction: 997284677Sdim // 998284677Sdim // CND* can match the following patterns: 999284677Sdim // 1000284677Sdim // select_cc f32, 0.0, f32, f32, cc_supported 1001284677Sdim // select_cc f32, 0.0, i32, i32, cc_supported 1002284677Sdim // select_cc i32, 0, f32, f32, cc_supported 1003284677Sdim // select_cc i32, 0, i32, i32, cc_supported 1004284677Sdim // 1005284677Sdim 1006284677Sdim // Try to move the zero value to the RHS 1007284677Sdim if (isZero(LHS)) { 1008284677Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 1009284677Sdim // Try swapping the operands 1010284677Sdim ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode); 1011284677Sdim if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { 1012284677Sdim std::swap(LHS, RHS); 1013284677Sdim CC = DAG.getCondCode(CCSwapped); 1014284677Sdim } else { 1015284677Sdim // Try inverting the conditon and then swapping the operands 1016284677Sdim ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger()); 1017284677Sdim CCSwapped = ISD::getSetCCSwappedOperands(CCInv); 1018284677Sdim if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { 1019284677Sdim std::swap(True, False); 1020284677Sdim std::swap(LHS, RHS); 1021284677Sdim CC = DAG.getCondCode(CCSwapped); 1022284677Sdim } 1023284677Sdim } 1024284677Sdim } 1025284677Sdim if (isZero(RHS)) { 1026284677Sdim SDValue Cond = LHS; 1027284677Sdim SDValue Zero = RHS; 1028284677Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 1029284677Sdim if (CompareVT != VT) { 1030284677Sdim // Bitcast True / False to the correct types. This will end up being 1031284677Sdim // a nop, but it allows us to define only a single pattern in the 1032284677Sdim // .TD files for each CND* instruction rather than having to have 1033284677Sdim // one pattern for integer True/False and one for fp True/False 1034284677Sdim True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 1035284677Sdim False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 1036284677Sdim } 1037284677Sdim 1038284677Sdim switch (CCOpcode) { 1039284677Sdim case ISD::SETONE: 1040284677Sdim case ISD::SETUNE: 1041284677Sdim case ISD::SETNE: 1042284677Sdim CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); 1043284677Sdim Temp = True; 1044284677Sdim True = False; 1045284677Sdim False = Temp; 1046284677Sdim break; 1047284677Sdim default: 1048284677Sdim break; 1049284677Sdim } 1050284677Sdim SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 1051284677Sdim Cond, Zero, 1052284677Sdim True, False, 1053284677Sdim DAG.getCondCode(CCOpcode)); 1054284677Sdim return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 1055284677Sdim } 1056284677Sdim 1057284677Sdim // If we make it this for it means we have no native instructions to handle 1058284677Sdim // this SELECT_CC, so we must lower it. 1059284677Sdim SDValue HWTrue, HWFalse; 1060284677Sdim 1061284677Sdim if (CompareVT == MVT::f32) { 1062284677Sdim HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); 1063284677Sdim HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); 1064284677Sdim } else if (CompareVT == MVT::i32) { 1065284677Sdim HWTrue = DAG.getConstant(-1, DL, CompareVT); 1066284677Sdim HWFalse = DAG.getConstant(0, DL, CompareVT); 1067284677Sdim } 1068284677Sdim else { 1069284677Sdim llvm_unreachable("Unhandled value type in LowerSELECT_CC"); 1070284677Sdim } 1071284677Sdim 1072284677Sdim // Lower this unsupported SELECT_CC into a combination of two supported 1073284677Sdim // SELECT_CC operations. 1074284677Sdim SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 1075284677Sdim 1076284677Sdim return DAG.getNode(ISD::SELECT_CC, DL, VT, 1077284677Sdim Cond, HWFalse, 1078284677Sdim True, False, 1079284677Sdim DAG.getCondCode(ISD::SETNE)); 1080284677Sdim} 1081284677Sdim 1082284677Sdim/// LLVM generates byte-addressed pointers. For indirect addressing, we need to 1083284677Sdim/// convert these pointers to a register index. Each register holds 1084284677Sdim/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the 1085284677Sdim/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used 1086284677Sdim/// for indirect addressing. 1087284677SdimSDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, 1088284677Sdim unsigned StackWidth, 1089284677Sdim SelectionDAG &DAG) const { 1090284677Sdim unsigned SRLPad; 1091284677Sdim switch(StackWidth) { 1092284677Sdim case 1: 1093284677Sdim SRLPad = 2; 1094284677Sdim break; 1095284677Sdim case 2: 1096284677Sdim SRLPad = 3; 1097284677Sdim break; 1098284677Sdim case 4: 1099284677Sdim SRLPad = 4; 1100284677Sdim break; 1101284677Sdim default: llvm_unreachable("Invalid stack width"); 1102284677Sdim } 1103284677Sdim 1104284677Sdim SDLoc DL(Ptr); 1105284677Sdim return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, 1106284677Sdim DAG.getConstant(SRLPad, DL, MVT::i32)); 1107284677Sdim} 1108284677Sdim 1109284677Sdimvoid R600TargetLowering::getStackAddress(unsigned StackWidth, 1110284677Sdim unsigned ElemIdx, 1111284677Sdim unsigned &Channel, 1112284677Sdim unsigned &PtrIncr) const { 1113284677Sdim switch (StackWidth) { 1114284677Sdim default: 1115284677Sdim case 1: 1116284677Sdim Channel = 0; 1117284677Sdim if (ElemIdx > 0) { 1118284677Sdim PtrIncr = 1; 1119284677Sdim } else { 1120284677Sdim PtrIncr = 0; 1121284677Sdim } 1122284677Sdim break; 1123284677Sdim case 2: 1124284677Sdim Channel = ElemIdx % 2; 1125284677Sdim if (ElemIdx == 2) { 1126284677Sdim PtrIncr = 1; 1127284677Sdim } else { 1128284677Sdim PtrIncr = 0; 1129284677Sdim } 1130284677Sdim break; 1131284677Sdim case 4: 1132284677Sdim Channel = ElemIdx; 1133284677Sdim PtrIncr = 0; 1134284677Sdim break; 1135284677Sdim } 1136284677Sdim} 1137284677Sdim 1138309124SdimSDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, 1139309124Sdim SelectionDAG &DAG) const { 1140309124Sdim SDLoc DL(Store); 1141314564Sdim //TODO: Who creates the i8 stores? 1142314564Sdim assert(Store->isTruncatingStore() 1143314564Sdim || Store->getValue().getValueType() == MVT::i8); 1144344779Sdim assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); 1145309124Sdim 1146314564Sdim SDValue Mask; 1147309124Sdim if (Store->getMemoryVT() == MVT::i8) { 1148314564Sdim assert(Store->getAlignment() >= 1); 1149314564Sdim Mask = DAG.getConstant(0xff, DL, MVT::i32); 1150309124Sdim } else if (Store->getMemoryVT() == MVT::i16) { 1151314564Sdim assert(Store->getAlignment() >= 2); 1152321369Sdim Mask = DAG.getConstant(0xffff, DL, MVT::i32); 1153314564Sdim } else { 1154314564Sdim llvm_unreachable("Unsupported private trunc store"); 1155309124Sdim } 1156309124Sdim 1157314564Sdim SDValue OldChain = Store->getChain(); 1158314564Sdim bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); 1159314564Sdim // Skip dummy 1160314564Sdim SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; 1161309124Sdim SDValue BasePtr = Store->getBasePtr(); 1162314564Sdim SDValue Offset = Store->getOffset(); 1163309124Sdim EVT MemVT = Store->getMemoryVT(); 1164309124Sdim 1165314564Sdim SDValue LoadPtr = BasePtr; 1166314564Sdim if (!Offset.isUndef()) { 1167314564Sdim LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); 1168314564Sdim } 1169309124Sdim 1170314564Sdim // Get dword location 1171314564Sdim // TODO: this should be eliminated by the future SHR ptr, 2 1172314564Sdim SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 1173314564Sdim DAG.getConstant(0xfffffffc, DL, MVT::i32)); 1174314564Sdim 1175314564Sdim // Load dword 1176314564Sdim // TODO: can we be smarter about machine pointer info? 1177327952Sdim MachinePointerInfo PtrInfo(UndefValue::get( 1178344779Sdim Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))); 1179327952Sdim SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); 1180314564Sdim 1181314564Sdim Chain = Dst.getValue(1); 1182314564Sdim 1183314564Sdim // Get offset in dword 1184314564Sdim SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 1185309124Sdim DAG.getConstant(0x3, DL, MVT::i32)); 1186309124Sdim 1187314564Sdim // Convert byte offset to bit shift 1188309124Sdim SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 1189309124Sdim DAG.getConstant(3, DL, MVT::i32)); 1190309124Sdim 1191314564Sdim // TODO: Contrary to the name of the functiom, 1192314564Sdim // it also handles sub i32 non-truncating stores (like i1) 1193309124Sdim SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, 1194309124Sdim Store->getValue()); 1195309124Sdim 1196314564Sdim // Mask the value to the right type 1197309124Sdim SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); 1198309124Sdim 1199314564Sdim // Shift the value in place 1200309124Sdim SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, 1201309124Sdim MaskedValue, ShiftAmt); 1202309124Sdim 1203314564Sdim // Shift the mask in place 1204314564Sdim SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); 1205314564Sdim 1206314564Sdim // Invert the mask. NOTE: if we had native ROL instructions we could 1207314564Sdim // use inverted mask 1208314564Sdim DstMask = DAG.getNOT(DL, DstMask, MVT::i32); 1209314564Sdim 1210314564Sdim // Cleanup the target bits 1211309124Sdim Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); 1212309124Sdim 1213314564Sdim // Add the new bits 1214309124Sdim SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); 1215314564Sdim 1216314564Sdim // Store dword 1217314564Sdim // TODO: Can we be smarter about MachinePointerInfo? 1218327952Sdim SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo); 1219314564Sdim 1220314564Sdim // If we are part of expanded vector, make our neighbors depend on this store 1221314564Sdim if (VectorTrunc) { 1222314564Sdim // Make all other vector elements depend on this store 1223314564Sdim Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); 1224314564Sdim DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); 1225314564Sdim } 1226314564Sdim return NewStore; 1227309124Sdim} 1228309124Sdim 1229284677SdimSDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1230284677Sdim StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 1231309124Sdim unsigned AS = StoreNode->getAddressSpace(); 1232314564Sdim 1233314564Sdim SDValue Chain = StoreNode->getChain(); 1234314564Sdim SDValue Ptr = StoreNode->getBasePtr(); 1235309124Sdim SDValue Value = StoreNode->getValue(); 1236284677Sdim 1237314564Sdim EVT VT = Value.getValueType(); 1238314564Sdim EVT MemVT = StoreNode->getMemoryVT(); 1239314564Sdim EVT PtrVT = Ptr.getValueType(); 1240314564Sdim 1241314564Sdim SDLoc DL(Op); 1242314564Sdim 1243314564Sdim // Neither LOCAL nor PRIVATE can do vectors at the moment 1244344779Sdim if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && 1245314564Sdim VT.isVector()) { 1246344779Sdim if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && 1247321369Sdim StoreNode->isTruncatingStore()) { 1248314564Sdim // Add an extra level of chain to isolate this vector 1249314564Sdim SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); 1250314564Sdim // TODO: can the chain be replaced without creating a new store? 1251314564Sdim SDValue NewStore = DAG.getTruncStore( 1252314564Sdim NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), 1253314564Sdim MemVT, StoreNode->getAlignment(), 1254314564Sdim StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); 1255314564Sdim StoreNode = cast<StoreSDNode>(NewStore); 1256314564Sdim } 1257314564Sdim 1258314564Sdim return scalarizeVectorStore(StoreNode, DAG); 1259284677Sdim } 1260284677Sdim 1261314564Sdim unsigned Align = StoreNode->getAlignment(); 1262314564Sdim if (Align < MemVT.getStoreSize() && 1263314564Sdim !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) { 1264314564Sdim return expandUnalignedStore(StoreNode, DAG); 1265314564Sdim } 1266309124Sdim 1267314564Sdim SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, 1268314564Sdim DAG.getConstant(2, DL, PtrVT)); 1269314564Sdim 1270344779Sdim if (AS == AMDGPUAS::GLOBAL_ADDRESS) { 1271314564Sdim // It is beneficial to create MSKOR here instead of combiner to avoid 1272314564Sdim // artificial dependencies introduced by RMW 1273284677Sdim if (StoreNode->isTruncatingStore()) { 1274284677Sdim assert(VT.bitsLE(MVT::i32)); 1275284677Sdim SDValue MaskConstant; 1276284677Sdim if (MemVT == MVT::i8) { 1277284677Sdim MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32); 1278284677Sdim } else { 1279284677Sdim assert(MemVT == MVT::i16); 1280314564Sdim assert(StoreNode->getAlignment() >= 2); 1281284677Sdim MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); 1282284677Sdim } 1283314564Sdim 1284314564Sdim SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, 1285314564Sdim DAG.getConstant(0x00000003, DL, PtrVT)); 1286314564Sdim SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, 1287314564Sdim DAG.getConstant(3, DL, VT)); 1288314564Sdim 1289314564Sdim // Put the mask in correct place 1290314564Sdim SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); 1291314564Sdim 1292314564Sdim // Put the value bits in correct place 1293284677Sdim SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); 1294314564Sdim SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); 1295314564Sdim 1296284677Sdim // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 1297284677Sdim // vector instead. 1298284677Sdim SDValue Src[4] = { 1299284677Sdim ShiftedValue, 1300284677Sdim DAG.getConstant(0, DL, MVT::i32), 1301284677Sdim DAG.getConstant(0, DL, MVT::i32), 1302284677Sdim Mask 1303284677Sdim }; 1304309124Sdim SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src); 1305284677Sdim SDValue Args[3] = { Chain, Input, DWordAddr }; 1306284677Sdim return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, 1307284677Sdim Op->getVTList(), Args, MemVT, 1308284677Sdim StoreNode->getMemOperand()); 1309314564Sdim } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) { 1310284677Sdim // Convert pointer from byte address to dword address. 1311314564Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); 1312284677Sdim 1313284677Sdim if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { 1314284677Sdim llvm_unreachable("Truncated and indexed stores not supported yet"); 1315284677Sdim } else { 1316284677Sdim Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 1317284677Sdim } 1318284677Sdim return Chain; 1319284677Sdim } 1320284677Sdim } 1321284677Sdim 1322314564Sdim // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes 1323344779Sdim if (AS != AMDGPUAS::PRIVATE_ADDRESS) 1324284677Sdim return SDValue(); 1325284677Sdim 1326309124Sdim if (MemVT.bitsLT(MVT::i32)) 1327309124Sdim return lowerPrivateTruncStore(StoreNode, DAG); 1328309124Sdim 1329314564Sdim // Standard i32+ store, tag it with DWORDADDR to note that the address 1330314564Sdim // has been shifted 1331314564Sdim if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { 1332314564Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); 1333314564Sdim return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 1334284677Sdim } 1335284677Sdim 1336314564Sdim // Tagged i32+ stores will be matched by patterns 1337314564Sdim return SDValue(); 1338284677Sdim} 1339284677Sdim 1340284677Sdim// return (512 + (kc_bank << 12) 1341284677Sdimstatic int 1342327952SdimConstantAddressBlock(unsigned AddressSpace) { 1343284677Sdim switch (AddressSpace) { 1344327952Sdim case AMDGPUAS::CONSTANT_BUFFER_0: 1345284677Sdim return 512; 1346327952Sdim case AMDGPUAS::CONSTANT_BUFFER_1: 1347284677Sdim return 512 + 4096; 1348327952Sdim case AMDGPUAS::CONSTANT_BUFFER_2: 1349284677Sdim return 512 + 4096 * 2; 1350327952Sdim case AMDGPUAS::CONSTANT_BUFFER_3: 1351284677Sdim return 512 + 4096 * 3; 1352327952Sdim case AMDGPUAS::CONSTANT_BUFFER_4: 1353284677Sdim return 512 + 4096 * 4; 1354327952Sdim case AMDGPUAS::CONSTANT_BUFFER_5: 1355284677Sdim return 512 + 4096 * 5; 1356327952Sdim case AMDGPUAS::CONSTANT_BUFFER_6: 1357284677Sdim return 512 + 4096 * 6; 1358327952Sdim case AMDGPUAS::CONSTANT_BUFFER_7: 1359284677Sdim return 512 + 4096 * 7; 1360327952Sdim case AMDGPUAS::CONSTANT_BUFFER_8: 1361284677Sdim return 512 + 4096 * 8; 1362327952Sdim case AMDGPUAS::CONSTANT_BUFFER_9: 1363284677Sdim return 512 + 4096 * 9; 1364327952Sdim case AMDGPUAS::CONSTANT_BUFFER_10: 1365284677Sdim return 512 + 4096 * 10; 1366327952Sdim case AMDGPUAS::CONSTANT_BUFFER_11: 1367284677Sdim return 512 + 4096 * 11; 1368327952Sdim case AMDGPUAS::CONSTANT_BUFFER_12: 1369284677Sdim return 512 + 4096 * 12; 1370327952Sdim case AMDGPUAS::CONSTANT_BUFFER_13: 1371284677Sdim return 512 + 4096 * 13; 1372327952Sdim case AMDGPUAS::CONSTANT_BUFFER_14: 1373284677Sdim return 512 + 4096 * 14; 1374327952Sdim case AMDGPUAS::CONSTANT_BUFFER_15: 1375284677Sdim return 512 + 4096 * 15; 1376284677Sdim default: 1377284677Sdim return -1; 1378284677Sdim } 1379284677Sdim} 1380284677Sdim 1381309124SdimSDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, 1382309124Sdim SelectionDAG &DAG) const { 1383284677Sdim SDLoc DL(Op); 1384309124Sdim LoadSDNode *Load = cast<LoadSDNode>(Op); 1385309124Sdim ISD::LoadExtType ExtType = Load->getExtensionType(); 1386309124Sdim EVT MemVT = Load->getMemoryVT(); 1387314564Sdim assert(Load->getAlignment() >= MemVT.getStoreSize()); 1388284677Sdim 1389314564Sdim SDValue BasePtr = Load->getBasePtr(); 1390314564Sdim SDValue Chain = Load->getChain(); 1391314564Sdim SDValue Offset = Load->getOffset(); 1392284677Sdim 1393314564Sdim SDValue LoadPtr = BasePtr; 1394314564Sdim if (!Offset.isUndef()) { 1395314564Sdim LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); 1396314564Sdim } 1397284677Sdim 1398314564Sdim // Get dword location 1399314564Sdim // NOTE: this should be eliminated by the future SHR ptr, 2 1400314564Sdim SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 1401314564Sdim DAG.getConstant(0xfffffffc, DL, MVT::i32)); 1402314564Sdim 1403314564Sdim // Load dword 1404314564Sdim // TODO: can we be smarter about machine pointer info? 1405327952Sdim MachinePointerInfo PtrInfo(UndefValue::get( 1406344779Sdim Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))); 1407327952Sdim SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); 1408314564Sdim 1409309124Sdim // Get offset within the register. 1410309124Sdim SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, 1411314564Sdim LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); 1412309124Sdim 1413309124Sdim // Bit offset of target byte (byteIdx * 8). 1414309124Sdim SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 1415309124Sdim DAG.getConstant(3, DL, MVT::i32)); 1416309124Sdim 1417309124Sdim // Shift to the right. 1418314564Sdim SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); 1419309124Sdim 1420309124Sdim // Eliminate the upper bits by setting them to ... 1421309124Sdim EVT MemEltVT = MemVT.getScalarType(); 1422309124Sdim 1423314564Sdim if (ExtType == ISD::SEXTLOAD) { // ... ones. 1424309124Sdim SDValue MemEltVTNode = DAG.getValueType(MemEltVT); 1425314564Sdim Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); 1426314564Sdim } else { // ... or zeros. 1427314564Sdim Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); 1428284677Sdim } 1429284677Sdim 1430309124Sdim SDValue Ops[] = { 1431314564Sdim Ret, 1432314564Sdim Read.getValue(1) // This should be our output chain 1433309124Sdim }; 1434309124Sdim 1435309124Sdim return DAG.getMergeValues(Ops, DL); 1436309124Sdim} 1437309124Sdim 1438309124SdimSDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1439309124Sdim LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 1440309124Sdim unsigned AS = LoadNode->getAddressSpace(); 1441309124Sdim EVT MemVT = LoadNode->getMemoryVT(); 1442309124Sdim ISD::LoadExtType ExtType = LoadNode->getExtensionType(); 1443309124Sdim 1444344779Sdim if (AS == AMDGPUAS::PRIVATE_ADDRESS && 1445309124Sdim ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { 1446309124Sdim return lowerPrivateExtLoad(Op, DAG); 1447309124Sdim } 1448309124Sdim 1449309124Sdim SDLoc DL(Op); 1450309124Sdim EVT VT = Op.getValueType(); 1451309124Sdim SDValue Chain = LoadNode->getChain(); 1452309124Sdim SDValue Ptr = LoadNode->getBasePtr(); 1453309124Sdim 1454344779Sdim if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 1455344779Sdim LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 1456314564Sdim VT.isVector()) { 1457314564Sdim return scalarizeVectorLoad(LoadNode, DAG); 1458284677Sdim } 1459284677Sdim 1460341825Sdim // This is still used for explicit load from addrspace(8) 1461327952Sdim int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 1462284677Sdim if (ConstantBlock > -1 && 1463284677Sdim ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || 1464284677Sdim (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { 1465284677Sdim SDValue Result; 1466341825Sdim if (isa<Constant>(LoadNode->getMemOperand()->getValue()) || 1467284677Sdim isa<ConstantSDNode>(Ptr)) { 1468341825Sdim return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG); 1469284677Sdim } else { 1470341825Sdim //TODO: Does this even work? 1471284677Sdim // non-constant ptr can't be folded, keeps it as a v4f32 load 1472284677Sdim Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 1473284677Sdim DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, 1474284677Sdim DAG.getConstant(4, DL, MVT::i32)), 1475284677Sdim DAG.getConstant(LoadNode->getAddressSpace() - 1476344779Sdim AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) 1477284677Sdim ); 1478284677Sdim } 1479284677Sdim 1480284677Sdim if (!VT.isVector()) { 1481284677Sdim Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 1482284677Sdim DAG.getConstant(0, DL, MVT::i32)); 1483284677Sdim } 1484284677Sdim 1485284677Sdim SDValue MergedValues[2] = { 1486284677Sdim Result, 1487284677Sdim Chain 1488284677Sdim }; 1489284677Sdim return DAG.getMergeValues(MergedValues, DL); 1490284677Sdim } 1491284677Sdim 1492284677Sdim // For most operations returning SDValue() will result in the node being 1493284677Sdim // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we 1494284677Sdim // need to manually expand loads that may be legal in some address spaces and 1495284677Sdim // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for 1496284677Sdim // compute shaders, since the data is sign extended when it is uploaded to the 1497284677Sdim // buffer. However SEXT loads from other address spaces are not supported, so 1498284677Sdim // we need to expand them here. 1499284677Sdim if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { 1500284677Sdim EVT MemVT = LoadNode->getMemoryVT(); 1501284677Sdim assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); 1502309124Sdim SDValue NewLoad = DAG.getExtLoad( 1503309124Sdim ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT, 1504309124Sdim LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags()); 1505284677Sdim SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad, 1506284677Sdim DAG.getValueType(MemVT)); 1507284677Sdim 1508284677Sdim SDValue MergedValues[2] = { Res, Chain }; 1509284677Sdim return DAG.getMergeValues(MergedValues, DL); 1510284677Sdim } 1511284677Sdim 1512344779Sdim if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 1513284677Sdim return SDValue(); 1514284677Sdim } 1515284677Sdim 1516314564Sdim // DWORDADDR ISD marks already shifted address 1517314564Sdim if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { 1518314564Sdim assert(VT == MVT::i32); 1519314564Sdim Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); 1520314564Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); 1521314564Sdim return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); 1522284677Sdim } 1523314564Sdim return SDValue(); 1524284677Sdim} 1525284677Sdim 1526284677SdimSDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 1527284677Sdim SDValue Chain = Op.getOperand(0); 1528284677Sdim SDValue Cond = Op.getOperand(1); 1529284677Sdim SDValue Jump = Op.getOperand(2); 1530284677Sdim 1531284677Sdim return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), 1532284677Sdim Chain, Jump, Cond); 1533284677Sdim} 1534284677Sdim 1535309124SdimSDValue R600TargetLowering::lowerFrameIndex(SDValue Op, 1536309124Sdim SelectionDAG &DAG) const { 1537309124Sdim MachineFunction &MF = DAG.getMachineFunction(); 1538341825Sdim const R600FrameLowering *TFL = Subtarget->getFrameLowering(); 1539309124Sdim 1540309124Sdim FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); 1541309124Sdim 1542309124Sdim unsigned FrameIndex = FIN->getIndex(); 1543309124Sdim unsigned IgnoredFrameReg; 1544309124Sdim unsigned Offset = 1545309124Sdim TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); 1546309124Sdim return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op), 1547309124Sdim Op.getValueType()); 1548309124Sdim} 1549309124Sdim 1550341825SdimCCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, 1551341825Sdim bool IsVarArg) const { 1552341825Sdim switch (CC) { 1553341825Sdim case CallingConv::AMDGPU_KERNEL: 1554341825Sdim case CallingConv::SPIR_KERNEL: 1555341825Sdim case CallingConv::C: 1556341825Sdim case CallingConv::Fast: 1557341825Sdim case CallingConv::Cold: 1558341825Sdim llvm_unreachable("kernels should not be handled here"); 1559341825Sdim case CallingConv::AMDGPU_VS: 1560341825Sdim case CallingConv::AMDGPU_GS: 1561341825Sdim case CallingConv::AMDGPU_PS: 1562341825Sdim case CallingConv::AMDGPU_CS: 1563341825Sdim case CallingConv::AMDGPU_HS: 1564341825Sdim case CallingConv::AMDGPU_ES: 1565341825Sdim case CallingConv::AMDGPU_LS: 1566341825Sdim return CC_R600; 1567341825Sdim default: 1568341825Sdim report_fatal_error("Unsupported calling convention."); 1569341825Sdim } 1570341825Sdim} 1571341825Sdim 1572284677Sdim/// XXX Only kernel functions are supported, so we can assume for now that 1573284677Sdim/// every function is a kernel function, but in the future we should use 1574284677Sdim/// separate calling conventions for kernel and non-kernel functions. 1575284677SdimSDValue R600TargetLowering::LowerFormalArguments( 1576309124Sdim SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1577309124Sdim const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1578309124Sdim SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1579284677Sdim SmallVector<CCValAssign, 16> ArgLocs; 1580284677Sdim CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1581284677Sdim *DAG.getContext()); 1582284677Sdim MachineFunction &MF = DAG.getMachineFunction(); 1583284677Sdim SmallVector<ISD::InputArg, 8> LocalIns; 1584284677Sdim 1585314564Sdim if (AMDGPU::isShader(CallConv)) { 1586321369Sdim CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); 1587314564Sdim } else { 1588314564Sdim analyzeFormalArgumentsCompute(CCInfo, Ins); 1589314564Sdim } 1590284677Sdim 1591284677Sdim for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 1592284677Sdim CCValAssign &VA = ArgLocs[i]; 1593284677Sdim const ISD::InputArg &In = Ins[i]; 1594284677Sdim EVT VT = In.VT; 1595284677Sdim EVT MemVT = VA.getLocVT(); 1596284677Sdim if (!VT.isVector() && MemVT.isVector()) { 1597284677Sdim // Get load source type if scalarized. 1598284677Sdim MemVT = MemVT.getVectorElementType(); 1599284677Sdim } 1600284677Sdim 1601309124Sdim if (AMDGPU::isShader(CallConv)) { 1602341825Sdim unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); 1603284677Sdim SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); 1604284677Sdim InVals.push_back(Register); 1605284677Sdim continue; 1606284677Sdim } 1607284677Sdim 1608284677Sdim PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 1609344779Sdim AMDGPUAS::PARAM_I_ADDRESS); 1610284677Sdim 1611284677Sdim // i64 isn't a legal type, so the register type used ends up as i32, which 1612284677Sdim // isn't expected here. It attempts to create this sextload, but it ends up 1613284677Sdim // being invalid. Somehow this seems to work with i64 arguments, but breaks 1614284677Sdim // for <1 x i64>. 1615284677Sdim 1616284677Sdim // The first 36 bytes of the input buffer contains information about 1617284677Sdim // thread group and global sizes. 1618284677Sdim ISD::LoadExtType Ext = ISD::NON_EXTLOAD; 1619284677Sdim if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) { 1620284677Sdim // FIXME: This should really check the extload type, but the handling of 1621284677Sdim // extload vector parameters seems to be broken. 1622284677Sdim 1623284677Sdim // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 1624284677Sdim Ext = ISD::SEXTLOAD; 1625284677Sdim } 1626284677Sdim 1627284677Sdim // Compute the offset from the value. 1628284677Sdim // XXX - I think PartOffset should give you this, but it seems to give the 1629284677Sdim // size of the register which isn't useful. 1630284677Sdim 1631284677Sdim unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); 1632284677Sdim unsigned PartOffset = VA.getLocMemOffset(); 1633341825Sdim unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset); 1634284677Sdim 1635284677Sdim MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); 1636309124Sdim SDValue Arg = DAG.getLoad( 1637309124Sdim ISD::UNINDEXED, Ext, VT, DL, Chain, 1638341825Sdim DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), 1639341825Sdim PtrInfo, 1640341825Sdim MemVT, Alignment, MachineMemOperand::MONonTemporal | 1641314564Sdim MachineMemOperand::MODereferenceable | 1642314564Sdim MachineMemOperand::MOInvariant); 1643284677Sdim 1644284677Sdim InVals.push_back(Arg); 1645284677Sdim } 1646284677Sdim return Chain; 1647284677Sdim} 1648284677Sdim 1649286684SdimEVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 1650286684Sdim EVT VT) const { 1651284677Sdim if (!VT.isVector()) 1652284677Sdim return MVT::i32; 1653284677Sdim return VT.changeVectorElementTypeToInteger(); 1654284677Sdim} 1655284677Sdim 1656321369Sdimbool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, 1657321369Sdim const SelectionDAG &DAG) const { 1658321369Sdim // Local and Private addresses do not handle vectors. Limit to i32 1659344779Sdim if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) { 1660321369Sdim return (MemVT.getSizeInBits() <= 32); 1661321369Sdim } 1662321369Sdim return true; 1663321369Sdim} 1664321369Sdim 1665309124Sdimbool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 1666309124Sdim unsigned AddrSpace, 1667309124Sdim unsigned Align, 1668309124Sdim bool *IsFast) const { 1669309124Sdim if (IsFast) 1670309124Sdim *IsFast = false; 1671309124Sdim 1672309124Sdim if (!VT.isSimple() || VT == MVT::Other) 1673309124Sdim return false; 1674309124Sdim 1675309124Sdim if (VT.bitsLT(MVT::i32)) 1676309124Sdim return false; 1677309124Sdim 1678309124Sdim // TODO: This is a rough estimate. 1679309124Sdim if (IsFast) 1680309124Sdim *IsFast = true; 1681309124Sdim 1682309124Sdim return VT.bitsGT(MVT::i32) && Align % 4 == 0; 1683309124Sdim} 1684309124Sdim 1685284677Sdimstatic SDValue CompactSwizzlableVector( 1686284677Sdim SelectionDAG &DAG, SDValue VectorEntry, 1687284677Sdim DenseMap<unsigned, unsigned> &RemapSwizzle) { 1688284677Sdim assert(RemapSwizzle.empty()); 1689284677Sdim 1690344779Sdim SDLoc DL(VectorEntry); 1691344779Sdim EVT EltTy = VectorEntry.getValueType().getVectorElementType(); 1692344779Sdim 1693344779Sdim SDValue NewBldVec[4]; 1694344779Sdim for (unsigned i = 0; i < 4; i++) 1695344779Sdim NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, 1696344779Sdim DAG.getIntPtrConstant(i, DL)); 1697344779Sdim 1698284677Sdim for (unsigned i = 0; i < 4; i++) { 1699309124Sdim if (NewBldVec[i].isUndef()) 1700284677Sdim // We mask write here to teach later passes that the ith element of this 1701284677Sdim // vector is undef. Thus we can use it to reduce 128 bits reg usage, 1702284677Sdim // break false dependencies and additionnaly make assembly easier to read. 1703284677Sdim RemapSwizzle[i] = 7; // SEL_MASK_WRITE 1704284677Sdim if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { 1705284677Sdim if (C->isZero()) { 1706284677Sdim RemapSwizzle[i] = 4; // SEL_0 1707284677Sdim NewBldVec[i] = DAG.getUNDEF(MVT::f32); 1708284677Sdim } else if (C->isExactlyValue(1.0)) { 1709284677Sdim RemapSwizzle[i] = 5; // SEL_1 1710284677Sdim NewBldVec[i] = DAG.getUNDEF(MVT::f32); 1711284677Sdim } 1712284677Sdim } 1713284677Sdim 1714309124Sdim if (NewBldVec[i].isUndef()) 1715284677Sdim continue; 1716284677Sdim for (unsigned j = 0; j < i; j++) { 1717284677Sdim if (NewBldVec[i] == NewBldVec[j]) { 1718284677Sdim NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); 1719284677Sdim RemapSwizzle[i] = j; 1720284677Sdim break; 1721284677Sdim } 1722284677Sdim } 1723284677Sdim } 1724284677Sdim 1725309124Sdim return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), 1726309124Sdim NewBldVec); 1727284677Sdim} 1728284677Sdim 1729284677Sdimstatic SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, 1730284677Sdim DenseMap<unsigned, unsigned> &RemapSwizzle) { 1731284677Sdim assert(RemapSwizzle.empty()); 1732344779Sdim 1733344779Sdim SDLoc DL(VectorEntry); 1734344779Sdim EVT EltTy = VectorEntry.getValueType().getVectorElementType(); 1735344779Sdim 1736344779Sdim SDValue NewBldVec[4]; 1737344779Sdim bool isUnmovable[4] = {false, false, false, false}; 1738344779Sdim for (unsigned i = 0; i < 4; i++) 1739344779Sdim NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, 1740344779Sdim DAG.getIntPtrConstant(i, DL)); 1741344779Sdim 1742284677Sdim for (unsigned i = 0; i < 4; i++) { 1743284677Sdim RemapSwizzle[i] = i; 1744284677Sdim if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1745284677Sdim unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) 1746284677Sdim ->getZExtValue(); 1747284677Sdim if (i == Idx) 1748284677Sdim isUnmovable[Idx] = true; 1749284677Sdim } 1750284677Sdim } 1751284677Sdim 1752284677Sdim for (unsigned i = 0; i < 4; i++) { 1753284677Sdim if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1754284677Sdim unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) 1755284677Sdim ->getZExtValue(); 1756284677Sdim if (isUnmovable[Idx]) 1757284677Sdim continue; 1758284677Sdim // Swap i and Idx 1759284677Sdim std::swap(NewBldVec[Idx], NewBldVec[i]); 1760284677Sdim std::swap(RemapSwizzle[i], RemapSwizzle[Idx]); 1761284677Sdim break; 1762284677Sdim } 1763284677Sdim } 1764284677Sdim 1765309124Sdim return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), 1766309124Sdim NewBldVec); 1767284677Sdim} 1768284677Sdim 1769309124SdimSDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4], 1770309124Sdim SelectionDAG &DAG, 1771309124Sdim const SDLoc &DL) const { 1772284677Sdim // Old -> New swizzle values 1773284677Sdim DenseMap<unsigned, unsigned> SwizzleRemap; 1774284677Sdim 1775284677Sdim BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); 1776284677Sdim for (unsigned i = 0; i < 4; i++) { 1777284677Sdim unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); 1778284677Sdim if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) 1779284677Sdim Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); 1780284677Sdim } 1781284677Sdim 1782284677Sdim SwizzleRemap.clear(); 1783284677Sdim BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); 1784284677Sdim for (unsigned i = 0; i < 4; i++) { 1785284677Sdim unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); 1786284677Sdim if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) 1787284677Sdim Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); 1788284677Sdim } 1789284677Sdim 1790284677Sdim return BuildVector; 1791284677Sdim} 1792284677Sdim 1793341825SdimSDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block, 1794341825Sdim SelectionDAG &DAG) const { 1795341825Sdim SDLoc DL(LoadNode); 1796341825Sdim EVT VT = LoadNode->getValueType(0); 1797341825Sdim SDValue Chain = LoadNode->getChain(); 1798341825Sdim SDValue Ptr = LoadNode->getBasePtr(); 1799341825Sdim assert (isa<ConstantSDNode>(Ptr)); 1800341825Sdim 1801341825Sdim //TODO: Support smaller loads 1802341825Sdim if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode)) 1803341825Sdim return SDValue(); 1804341825Sdim 1805341825Sdim if (LoadNode->getAlignment() < 4) 1806341825Sdim return SDValue(); 1807341825Sdim 1808341825Sdim int ConstantBlock = ConstantAddressBlock(Block); 1809341825Sdim 1810341825Sdim SDValue Slots[4]; 1811341825Sdim for (unsigned i = 0; i < 4; i++) { 1812341825Sdim // We want Const position encoded with the following formula : 1813341825Sdim // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 1814341825Sdim // const_index is Ptr computed by llvm using an alignment of 16. 1815341825Sdim // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 1816341825Sdim // then div by 4 at the ISel step 1817341825Sdim SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 1818341825Sdim DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32)); 1819341825Sdim Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 1820341825Sdim } 1821341825Sdim EVT NewVT = MVT::v4i32; 1822341825Sdim unsigned NumElements = 4; 1823341825Sdim if (VT.isVector()) { 1824341825Sdim NewVT = VT; 1825341825Sdim NumElements = VT.getVectorNumElements(); 1826341825Sdim } 1827341825Sdim SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements)); 1828341825Sdim if (!VT.isVector()) { 1829341825Sdim Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 1830341825Sdim DAG.getConstant(0, DL, MVT::i32)); 1831341825Sdim } 1832341825Sdim SDValue MergedValues[2] = { 1833341825Sdim Result, 1834341825Sdim Chain 1835341825Sdim }; 1836341825Sdim return DAG.getMergeValues(MergedValues, DL); 1837341825Sdim} 1838341825Sdim 1839284677Sdim//===----------------------------------------------------------------------===// 1840284677Sdim// Custom DAG Optimizations 1841284677Sdim//===----------------------------------------------------------------------===// 1842284677Sdim 1843284677SdimSDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 1844284677Sdim DAGCombinerInfo &DCI) const { 1845284677Sdim SelectionDAG &DAG = DCI.DAG; 1846314564Sdim SDLoc DL(N); 1847284677Sdim 1848284677Sdim switch (N->getOpcode()) { 1849284677Sdim // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 1850284677Sdim case ISD::FP_ROUND: { 1851284677Sdim SDValue Arg = N->getOperand(0); 1852284677Sdim if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 1853314564Sdim return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0), 1854284677Sdim Arg.getOperand(0)); 1855284677Sdim } 1856284677Sdim break; 1857284677Sdim } 1858284677Sdim 1859284677Sdim // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> 1860284677Sdim // (i32 select_cc f32, f32, -1, 0 cc) 1861284677Sdim // 1862284677Sdim // Mesa's GLSL frontend generates the above pattern a lot and we can lower 1863284677Sdim // this to one of the SET*_DX10 instructions. 1864284677Sdim case ISD::FP_TO_SINT: { 1865284677Sdim SDValue FNeg = N->getOperand(0); 1866284677Sdim if (FNeg.getOpcode() != ISD::FNEG) { 1867284677Sdim return SDValue(); 1868284677Sdim } 1869284677Sdim SDValue SelectCC = FNeg.getOperand(0); 1870284677Sdim if (SelectCC.getOpcode() != ISD::SELECT_CC || 1871284677Sdim SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS 1872284677Sdim SelectCC.getOperand(2).getValueType() != MVT::f32 || // True 1873284677Sdim !isHWTrueValue(SelectCC.getOperand(2)) || 1874284677Sdim !isHWFalseValue(SelectCC.getOperand(3))) { 1875284677Sdim return SDValue(); 1876284677Sdim } 1877284677Sdim 1878314564Sdim return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), 1879284677Sdim SelectCC.getOperand(0), // LHS 1880284677Sdim SelectCC.getOperand(1), // RHS 1881314564Sdim DAG.getConstant(-1, DL, MVT::i32), // True 1882314564Sdim DAG.getConstant(0, DL, MVT::i32), // False 1883284677Sdim SelectCC.getOperand(4)); // CC 1884284677Sdim 1885284677Sdim break; 1886284677Sdim } 1887284677Sdim 1888284677Sdim // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx 1889284677Sdim // => build_vector elt0, ... , NewEltIdx, ... , eltN 1890284677Sdim case ISD::INSERT_VECTOR_ELT: { 1891284677Sdim SDValue InVec = N->getOperand(0); 1892284677Sdim SDValue InVal = N->getOperand(1); 1893284677Sdim SDValue EltNo = N->getOperand(2); 1894284677Sdim 1895284677Sdim // If the inserted element is an UNDEF, just use the input vector. 1896309124Sdim if (InVal.isUndef()) 1897284677Sdim return InVec; 1898284677Sdim 1899284677Sdim EVT VT = InVec.getValueType(); 1900284677Sdim 1901284677Sdim // If we can't generate a legal BUILD_VECTOR, exit 1902284677Sdim if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) 1903284677Sdim return SDValue(); 1904284677Sdim 1905284677Sdim // Check that we know which element is being inserted 1906284677Sdim if (!isa<ConstantSDNode>(EltNo)) 1907284677Sdim return SDValue(); 1908284677Sdim unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 1909284677Sdim 1910284677Sdim // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 1911284677Sdim // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 1912284677Sdim // vector elements. 1913284677Sdim SmallVector<SDValue, 8> Ops; 1914284677Sdim if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 1915284677Sdim Ops.append(InVec.getNode()->op_begin(), 1916284677Sdim InVec.getNode()->op_end()); 1917309124Sdim } else if (InVec.isUndef()) { 1918284677Sdim unsigned NElts = VT.getVectorNumElements(); 1919284677Sdim Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 1920284677Sdim } else { 1921284677Sdim return SDValue(); 1922284677Sdim } 1923284677Sdim 1924284677Sdim // Insert the element 1925284677Sdim if (Elt < Ops.size()) { 1926284677Sdim // All the operands of BUILD_VECTOR must have the same type; 1927284677Sdim // we enforce that here. 1928284677Sdim EVT OpVT = Ops[0].getValueType(); 1929284677Sdim if (InVal.getValueType() != OpVT) 1930284677Sdim InVal = OpVT.bitsGT(InVal.getValueType()) ? 1931314564Sdim DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : 1932314564Sdim DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); 1933284677Sdim Ops[Elt] = InVal; 1934284677Sdim } 1935284677Sdim 1936284677Sdim // Return the new vector 1937314564Sdim return DAG.getBuildVector(VT, DL, Ops); 1938284677Sdim } 1939284677Sdim 1940284677Sdim // Extract_vec (Build_vector) generated by custom lowering 1941284677Sdim // also needs to be customly combined 1942284677Sdim case ISD::EXTRACT_VECTOR_ELT: { 1943284677Sdim SDValue Arg = N->getOperand(0); 1944284677Sdim if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 1945284677Sdim if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1946284677Sdim unsigned Element = Const->getZExtValue(); 1947284677Sdim return Arg->getOperand(Element); 1948284677Sdim } 1949284677Sdim } 1950284677Sdim if (Arg.getOpcode() == ISD::BITCAST && 1951314564Sdim Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 1952314564Sdim (Arg.getOperand(0).getValueType().getVectorNumElements() == 1953314564Sdim Arg.getValueType().getVectorNumElements())) { 1954284677Sdim if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1955284677Sdim unsigned Element = Const->getZExtValue(); 1956314564Sdim return DAG.getNode(ISD::BITCAST, DL, N->getVTList(), 1957314564Sdim Arg->getOperand(0).getOperand(Element)); 1958284677Sdim } 1959284677Sdim } 1960296417Sdim break; 1961284677Sdim } 1962284677Sdim 1963284677Sdim case ISD::SELECT_CC: { 1964284677Sdim // Try common optimizations 1965309124Sdim if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI)) 1966284677Sdim return Ret; 1967284677Sdim 1968284677Sdim // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> 1969284677Sdim // selectcc x, y, a, b, inv(cc) 1970284677Sdim // 1971284677Sdim // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> 1972284677Sdim // selectcc x, y, a, b, cc 1973284677Sdim SDValue LHS = N->getOperand(0); 1974284677Sdim if (LHS.getOpcode() != ISD::SELECT_CC) { 1975284677Sdim return SDValue(); 1976284677Sdim } 1977284677Sdim 1978284677Sdim SDValue RHS = N->getOperand(1); 1979284677Sdim SDValue True = N->getOperand(2); 1980284677Sdim SDValue False = N->getOperand(3); 1981284677Sdim ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 1982284677Sdim 1983284677Sdim if (LHS.getOperand(2).getNode() != True.getNode() || 1984284677Sdim LHS.getOperand(3).getNode() != False.getNode() || 1985284677Sdim RHS.getNode() != False.getNode()) { 1986284677Sdim return SDValue(); 1987284677Sdim } 1988284677Sdim 1989284677Sdim switch (NCC) { 1990284677Sdim default: return SDValue(); 1991284677Sdim case ISD::SETNE: return LHS; 1992284677Sdim case ISD::SETEQ: { 1993284677Sdim ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); 1994284677Sdim LHSCC = ISD::getSetCCInverse(LHSCC, 1995284677Sdim LHS.getOperand(0).getValueType().isInteger()); 1996284677Sdim if (DCI.isBeforeLegalizeOps() || 1997284677Sdim isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType())) 1998314564Sdim return DAG.getSelectCC(DL, 1999284677Sdim LHS.getOperand(0), 2000284677Sdim LHS.getOperand(1), 2001284677Sdim LHS.getOperand(2), 2002284677Sdim LHS.getOperand(3), 2003284677Sdim LHSCC); 2004284677Sdim break; 2005284677Sdim } 2006284677Sdim } 2007284677Sdim return SDValue(); 2008284677Sdim } 2009284677Sdim 2010314564Sdim case AMDGPUISD::R600_EXPORT: { 2011284677Sdim SDValue Arg = N->getOperand(1); 2012284677Sdim if (Arg.getOpcode() != ISD::BUILD_VECTOR) 2013284677Sdim break; 2014284677Sdim 2015284677Sdim SDValue NewArgs[8] = { 2016284677Sdim N->getOperand(0), // Chain 2017284677Sdim SDValue(), 2018284677Sdim N->getOperand(2), // ArrayBase 2019284677Sdim N->getOperand(3), // Type 2020284677Sdim N->getOperand(4), // SWZ_X 2021284677Sdim N->getOperand(5), // SWZ_Y 2022284677Sdim N->getOperand(6), // SWZ_Z 2023284677Sdim N->getOperand(7) // SWZ_W 2024284677Sdim }; 2025284677Sdim NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); 2026314564Sdim return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); 2027284677Sdim } 2028284677Sdim case AMDGPUISD::TEXTURE_FETCH: { 2029284677Sdim SDValue Arg = N->getOperand(1); 2030284677Sdim if (Arg.getOpcode() != ISD::BUILD_VECTOR) 2031284677Sdim break; 2032284677Sdim 2033284677Sdim SDValue NewArgs[19] = { 2034284677Sdim N->getOperand(0), 2035284677Sdim N->getOperand(1), 2036284677Sdim N->getOperand(2), 2037284677Sdim N->getOperand(3), 2038284677Sdim N->getOperand(4), 2039284677Sdim N->getOperand(5), 2040284677Sdim N->getOperand(6), 2041284677Sdim N->getOperand(7), 2042284677Sdim N->getOperand(8), 2043284677Sdim N->getOperand(9), 2044284677Sdim N->getOperand(10), 2045284677Sdim N->getOperand(11), 2046284677Sdim N->getOperand(12), 2047284677Sdim N->getOperand(13), 2048284677Sdim N->getOperand(14), 2049284677Sdim N->getOperand(15), 2050284677Sdim N->getOperand(16), 2051284677Sdim N->getOperand(17), 2052284677Sdim N->getOperand(18), 2053284677Sdim }; 2054284677Sdim NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL); 2055284677Sdim return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs); 2056284677Sdim } 2057341825Sdim 2058341825Sdim case ISD::LOAD: { 2059341825Sdim LoadSDNode *LoadNode = cast<LoadSDNode>(N); 2060341825Sdim SDValue Ptr = LoadNode->getBasePtr(); 2061341825Sdim if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS && 2062341825Sdim isa<ConstantSDNode>(Ptr)) 2063341825Sdim return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG); 2064341825Sdim break; 2065341825Sdim } 2066341825Sdim 2067314564Sdim default: break; 2068284677Sdim } 2069284677Sdim 2070284677Sdim return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); 2071284677Sdim} 2072284677Sdim 2073309124Sdimbool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, 2074309124Sdim SDValue &Src, SDValue &Neg, SDValue &Abs, 2075309124Sdim SDValue &Sel, SDValue &Imm, 2076309124Sdim SelectionDAG &DAG) const { 2077341825Sdim const R600InstrInfo *TII = Subtarget->getInstrInfo(); 2078284677Sdim if (!Src.isMachineOpcode()) 2079284677Sdim return false; 2080309124Sdim 2081284677Sdim switch (Src.getMachineOpcode()) { 2082341825Sdim case R600::FNEG_R600: 2083284677Sdim if (!Neg.getNode()) 2084284677Sdim return false; 2085284677Sdim Src = Src.getOperand(0); 2086284677Sdim Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); 2087284677Sdim return true; 2088341825Sdim case R600::FABS_R600: 2089284677Sdim if (!Abs.getNode()) 2090284677Sdim return false; 2091284677Sdim Src = Src.getOperand(0); 2092284677Sdim Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); 2093284677Sdim return true; 2094341825Sdim case R600::CONST_COPY: { 2095284677Sdim unsigned Opcode = ParentNode->getMachineOpcode(); 2096341825Sdim bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 2097284677Sdim 2098284677Sdim if (!Sel.getNode()) 2099284677Sdim return false; 2100284677Sdim 2101284677Sdim SDValue CstOffset = Src.getOperand(0); 2102284677Sdim if (ParentNode->getValueType(0).isVector()) 2103284677Sdim return false; 2104284677Sdim 2105284677Sdim // Gather constants values 2106284677Sdim int SrcIndices[] = { 2107341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0), 2108341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1), 2109341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src2), 2110341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_X), 2111341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Y), 2112341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Z), 2113341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_W), 2114341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_X), 2115341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Y), 2116341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Z), 2117341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_W) 2118284677Sdim }; 2119284677Sdim std::vector<unsigned> Consts; 2120284677Sdim for (int OtherSrcIdx : SrcIndices) { 2121284677Sdim int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); 2122284677Sdim if (OtherSrcIdx < 0 || OtherSelIdx < 0) 2123284677Sdim continue; 2124284677Sdim if (HasDst) { 2125284677Sdim OtherSrcIdx--; 2126284677Sdim OtherSelIdx--; 2127284677Sdim } 2128284677Sdim if (RegisterSDNode *Reg = 2129284677Sdim dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { 2130341825Sdim if (Reg->getReg() == R600::ALU_CONST) { 2131284677Sdim ConstantSDNode *Cst 2132284677Sdim = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx)); 2133284677Sdim Consts.push_back(Cst->getZExtValue()); 2134284677Sdim } 2135284677Sdim } 2136284677Sdim } 2137284677Sdim 2138284677Sdim ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset); 2139284677Sdim Consts.push_back(Cst->getZExtValue()); 2140284677Sdim if (!TII->fitsConstReadLimitations(Consts)) { 2141284677Sdim return false; 2142284677Sdim } 2143284677Sdim 2144284677Sdim Sel = CstOffset; 2145341825Sdim Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); 2146284677Sdim return true; 2147284677Sdim } 2148341825Sdim case R600::MOV_IMM_GLOBAL_ADDR: 2149309124Sdim // Check if the Imm slot is used. Taken from below. 2150309124Sdim if (cast<ConstantSDNode>(Imm)->getZExtValue()) 2151309124Sdim return false; 2152309124Sdim Imm = Src.getOperand(0); 2153341825Sdim Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); 2154309124Sdim return true; 2155341825Sdim case R600::MOV_IMM_I32: 2156341825Sdim case R600::MOV_IMM_F32: { 2157341825Sdim unsigned ImmReg = R600::ALU_LITERAL_X; 2158284677Sdim uint64_t ImmValue = 0; 2159284677Sdim 2160341825Sdim if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { 2161284677Sdim ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0)); 2162284677Sdim float FloatValue = FPC->getValueAPF().convertToFloat(); 2163284677Sdim if (FloatValue == 0.0) { 2164341825Sdim ImmReg = R600::ZERO; 2165284677Sdim } else if (FloatValue == 0.5) { 2166341825Sdim ImmReg = R600::HALF; 2167284677Sdim } else if (FloatValue == 1.0) { 2168341825Sdim ImmReg = R600::ONE; 2169284677Sdim } else { 2170284677Sdim ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); 2171284677Sdim } 2172284677Sdim } else { 2173284677Sdim ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0)); 2174284677Sdim uint64_t Value = C->getZExtValue(); 2175284677Sdim if (Value == 0) { 2176341825Sdim ImmReg = R600::ZERO; 2177284677Sdim } else if (Value == 1) { 2178341825Sdim ImmReg = R600::ONE_INT; 2179284677Sdim } else { 2180284677Sdim ImmValue = Value; 2181284677Sdim } 2182284677Sdim } 2183284677Sdim 2184284677Sdim // Check that we aren't already using an immediate. 2185284677Sdim // XXX: It's possible for an instruction to have more than one 2186284677Sdim // immediate operand, but this is not supported yet. 2187341825Sdim if (ImmReg == R600::ALU_LITERAL_X) { 2188284677Sdim if (!Imm.getNode()) 2189284677Sdim return false; 2190284677Sdim ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm); 2191284677Sdim assert(C); 2192284677Sdim if (C->getZExtValue()) 2193284677Sdim return false; 2194284677Sdim Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32); 2195284677Sdim } 2196284677Sdim Src = DAG.getRegister(ImmReg, MVT::i32); 2197284677Sdim return true; 2198284677Sdim } 2199284677Sdim default: 2200284677Sdim return false; 2201284677Sdim } 2202284677Sdim} 2203284677Sdim 2204341825Sdim/// Fold the instructions after selecting them 2205284677SdimSDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, 2206284677Sdim SelectionDAG &DAG) const { 2207341825Sdim const R600InstrInfo *TII = Subtarget->getInstrInfo(); 2208284677Sdim if (!Node->isMachineOpcode()) 2209284677Sdim return Node; 2210309124Sdim 2211284677Sdim unsigned Opcode = Node->getMachineOpcode(); 2212284677Sdim SDValue FakeOp; 2213284677Sdim 2214284677Sdim std::vector<SDValue> Ops(Node->op_begin(), Node->op_end()); 2215284677Sdim 2216341825Sdim if (Opcode == R600::DOT_4) { 2217284677Sdim int OperandIdx[] = { 2218341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_X), 2219341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Y), 2220341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Z), 2221341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_W), 2222341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_X), 2223341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Y), 2224341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Z), 2225341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_W) 2226284677Sdim }; 2227284677Sdim int NegIdx[] = { 2228341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), 2229341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), 2230341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), 2231341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), 2232341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), 2233341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), 2234341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), 2235341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) 2236284677Sdim }; 2237284677Sdim int AbsIdx[] = { 2238341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), 2239341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), 2240341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), 2241341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), 2242341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), 2243341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), 2244341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), 2245341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) 2246284677Sdim }; 2247284677Sdim for (unsigned i = 0; i < 8; i++) { 2248284677Sdim if (OperandIdx[i] < 0) 2249284677Sdim return Node; 2250284677Sdim SDValue &Src = Ops[OperandIdx[i] - 1]; 2251284677Sdim SDValue &Neg = Ops[NegIdx[i] - 1]; 2252284677Sdim SDValue &Abs = Ops[AbsIdx[i] - 1]; 2253341825Sdim bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 2254284677Sdim int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); 2255284677Sdim if (HasDst) 2256284677Sdim SelIdx--; 2257284677Sdim SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; 2258284677Sdim if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) 2259284677Sdim return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 2260284677Sdim } 2261341825Sdim } else if (Opcode == R600::REG_SEQUENCE) { 2262284677Sdim for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { 2263284677Sdim SDValue &Src = Ops[i]; 2264284677Sdim if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) 2265284677Sdim return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 2266284677Sdim } 2267284677Sdim } else { 2268284677Sdim if (!TII->hasInstrModifiers(Opcode)) 2269284677Sdim return Node; 2270284677Sdim int OperandIdx[] = { 2271341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0), 2272341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1), 2273341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src2) 2274284677Sdim }; 2275284677Sdim int NegIdx[] = { 2276341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg), 2277341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg), 2278341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src2_neg) 2279284677Sdim }; 2280284677Sdim int AbsIdx[] = { 2281341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs), 2282341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs), 2283284677Sdim -1 2284284677Sdim }; 2285284677Sdim for (unsigned i = 0; i < 3; i++) { 2286284677Sdim if (OperandIdx[i] < 0) 2287284677Sdim return Node; 2288284677Sdim SDValue &Src = Ops[OperandIdx[i] - 1]; 2289284677Sdim SDValue &Neg = Ops[NegIdx[i] - 1]; 2290284677Sdim SDValue FakeAbs; 2291284677Sdim SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; 2292341825Sdim bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 2293284677Sdim int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); 2294341825Sdim int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); 2295284677Sdim if (HasDst) { 2296284677Sdim SelIdx--; 2297284677Sdim ImmIdx--; 2298284677Sdim } 2299284677Sdim SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; 2300284677Sdim SDValue &Imm = Ops[ImmIdx]; 2301284677Sdim if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) 2302284677Sdim return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 2303284677Sdim } 2304284677Sdim } 2305284677Sdim 2306284677Sdim return Node; 2307284677Sdim} 2308