1284677Sdim//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2284677Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6284677Sdim// 7284677Sdim//===----------------------------------------------------------------------===// 8284677Sdim// 9284677Sdim/// \file 10341825Sdim/// Custom DAG lowering for R600 11284677Sdim// 12284677Sdim//===----------------------------------------------------------------------===// 13284677Sdim 14284677Sdim#include "R600ISelLowering.h" 15284677Sdim#include "AMDGPUFrameLowering.h" 16284677Sdim#include "AMDGPUSubtarget.h" 17284677Sdim#include "R600Defines.h" 18314564Sdim#include "R600FrameLowering.h" 19284677Sdim#include "R600InstrInfo.h" 20284677Sdim#include "R600MachineFunctionInfo.h" 21341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22314564Sdim#include "Utils/AMDGPUBaseInfo.h" 23314564Sdim#include "llvm/ADT/APFloat.h" 24314564Sdim#include "llvm/ADT/APInt.h" 25314564Sdim#include "llvm/ADT/ArrayRef.h" 26314564Sdim#include "llvm/ADT/DenseMap.h" 27314564Sdim#include "llvm/ADT/SmallVector.h" 28284677Sdim#include "llvm/CodeGen/CallingConvLower.h" 29314564Sdim#include "llvm/CodeGen/DAGCombine.h" 30314564Sdim#include "llvm/CodeGen/ISDOpcodes.h" 31314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 32314564Sdim#include "llvm/CodeGen/MachineFunction.h" 33314564Sdim#include "llvm/CodeGen/MachineInstr.h" 34284677Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 35314564Sdim#include "llvm/CodeGen/MachineMemOperand.h" 36284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 37284677Sdim#include "llvm/CodeGen/SelectionDAG.h" 38314564Sdim#include "llvm/IR/Constants.h" 39314564Sdim#include "llvm/IR/DerivedTypes.h" 40360784Sdim#include "llvm/IR/IntrinsicsR600.h" 41314564Sdim#include "llvm/Support/Casting.h" 42314564Sdim#include "llvm/Support/Compiler.h" 43314564Sdim#include "llvm/Support/ErrorHandling.h" 44341825Sdim#include "llvm/Support/MachineValueType.h" 45360784Sdim#include "llvm/Support/MathExtras.h" 46314564Sdim#include <cassert> 47314564Sdim#include <cstdint> 48314564Sdim#include <iterator> 49314564Sdim#include <utility> 50314564Sdim#include <vector> 51284677Sdim 52284677Sdimusing namespace llvm; 53284677Sdim 54341825Sdim#include "R600GenCallingConv.inc" 55341825Sdim 56309124SdimR600TargetLowering::R600TargetLowering(const TargetMachine &TM, 57309124Sdim const R600Subtarget &STI) 58341825Sdim : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { 59341825Sdim addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); 60341825Sdim addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); 61341825Sdim addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); 62341825Sdim addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); 63341825Sdim addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); 64341825Sdim addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); 65284677Sdim 66360784Sdim setBooleanContents(ZeroOrNegativeOneBooleanContent); 67360784Sdim setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 68360784Sdim 69341825Sdim computeRegisterProperties(Subtarget->getRegisterInfo()); 70284677Sdim 71309124Sdim // Legalize loads and stores to the private address space. 72309124Sdim setOperationAction(ISD::LOAD, MVT::i32, Custom); 73309124Sdim setOperationAction(ISD::LOAD, MVT::v2i32, Custom); 74309124Sdim setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 75309124Sdim 76309124Sdim // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address 77309124Sdim // spaces, so it is custom lowered to handle those where it isn't. 78309124Sdim for (MVT VT : MVT::integer_valuetypes()) { 79309124Sdim setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 80309124Sdim setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); 81309124Sdim setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); 82309124Sdim 83309124Sdim setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 84309124Sdim setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); 85309124Sdim setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); 86309124Sdim 87309124Sdim setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 88309124Sdim setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); 89309124Sdim setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); 90309124Sdim } 91309124Sdim 92309124Sdim // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. 93309124Sdim setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand); 94309124Sdim setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); 95309124Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); 96309124Sdim 97309124Sdim setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand); 98309124Sdim setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); 99309124Sdim setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); 100309124Sdim 101309124Sdim setOperationAction(ISD::STORE, MVT::i8, Custom); 102309124Sdim setOperationAction(ISD::STORE, MVT::i32, Custom); 103309124Sdim setOperationAction(ISD::STORE, MVT::v2i32, Custom); 104309124Sdim setOperationAction(ISD::STORE, MVT::v4i32, Custom); 105309124Sdim 106309124Sdim setTruncStoreAction(MVT::i32, MVT::i8, Custom); 107309124Sdim setTruncStoreAction(MVT::i32, MVT::i16, Custom); 108314564Sdim // We need to include these since trunc STORES to PRIVATE need 109314564Sdim // special handling to accommodate RMW 110314564Sdim setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 111314564Sdim setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); 112314564Sdim setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); 113314564Sdim setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); 114314564Sdim setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); 115314564Sdim setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 116314564Sdim setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 117314564Sdim setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); 118314564Sdim setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); 119314564Sdim setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); 120309124Sdim 121309124Sdim // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. 122309124Sdim setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); 123309124Sdim setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); 124309124Sdim 125284677Sdim // Set condition code actions 126284677Sdim setCondCodeAction(ISD::SETO, MVT::f32, Expand); 127284677Sdim setCondCodeAction(ISD::SETUO, MVT::f32, Expand); 128284677Sdim setCondCodeAction(ISD::SETLT, MVT::f32, Expand); 129284677Sdim setCondCodeAction(ISD::SETLE, MVT::f32, Expand); 130284677Sdim setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); 131284677Sdim setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 132284677Sdim setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 133284677Sdim setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 134284677Sdim setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 135284677Sdim setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 136284677Sdim setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 137284677Sdim setCondCodeAction(ISD::SETULE, MVT::f32, Expand); 138284677Sdim 139284677Sdim setCondCodeAction(ISD::SETLE, MVT::i32, Expand); 140284677Sdim setCondCodeAction(ISD::SETLT, MVT::i32, Expand); 141284677Sdim setCondCodeAction(ISD::SETULE, MVT::i32, Expand); 142284677Sdim setCondCodeAction(ISD::SETULT, MVT::i32, Expand); 143284677Sdim 144284677Sdim setOperationAction(ISD::FCOS, MVT::f32, Custom); 145284677Sdim setOperationAction(ISD::FSIN, MVT::f32, Custom); 146284677Sdim 147284677Sdim setOperationAction(ISD::SETCC, MVT::v4i32, Expand); 148284677Sdim setOperationAction(ISD::SETCC, MVT::v2i32, Expand); 149284677Sdim 150284677Sdim setOperationAction(ISD::BR_CC, MVT::i32, Expand); 151284677Sdim setOperationAction(ISD::BR_CC, MVT::f32, Expand); 152284677Sdim setOperationAction(ISD::BRCOND, MVT::Other, Custom); 153284677Sdim 154284677Sdim setOperationAction(ISD::FSUB, MVT::f32, Expand); 155284677Sdim 156341825Sdim setOperationAction(ISD::FCEIL, MVT::f64, Custom); 157341825Sdim setOperationAction(ISD::FTRUNC, MVT::f64, Custom); 158341825Sdim setOperationAction(ISD::FRINT, MVT::f64, Custom); 159341825Sdim setOperationAction(ISD::FFLOOR, MVT::f64, Custom); 160341825Sdim 161284677Sdim setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 162284677Sdim setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 163284677Sdim 164284677Sdim setOperationAction(ISD::SETCC, MVT::i32, Expand); 165284677Sdim setOperationAction(ISD::SETCC, MVT::f32, Expand); 166284677Sdim setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); 167309124Sdim setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); 168284677Sdim setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 169284677Sdim setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 170284677Sdim 171284677Sdim setOperationAction(ISD::SELECT, MVT::i32, Expand); 172284677Sdim setOperationAction(ISD::SELECT, MVT::f32, Expand); 173284677Sdim setOperationAction(ISD::SELECT, MVT::v2i32, Expand); 174284677Sdim setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 175284677Sdim 176284677Sdim // ADD, SUB overflow. 177284677Sdim // TODO: turn these into Legal? 178284677Sdim if (Subtarget->hasCARRY()) 179284677Sdim setOperationAction(ISD::UADDO, MVT::i32, Custom); 180284677Sdim 181284677Sdim if (Subtarget->hasBORROW()) 182284677Sdim setOperationAction(ISD::USUBO, MVT::i32, Custom); 183284677Sdim 184284677Sdim // Expand sign extension of vectors 185284677Sdim if (!Subtarget->hasBFE()) 186284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 187284677Sdim 188284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); 189284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); 190284677Sdim 191284677Sdim if (!Subtarget->hasBFE()) 192284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 193284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); 194284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); 195284677Sdim 196284677Sdim if (!Subtarget->hasBFE()) 197284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 198284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); 199284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); 200284677Sdim 201284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 202284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); 203284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); 204284677Sdim 205284677Sdim setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 206284677Sdim 207284677Sdim setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 208284677Sdim 209284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); 210284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); 211284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); 212284677Sdim setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 213284677Sdim 214284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); 215284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); 216284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 217284677Sdim setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 218284677Sdim 219284677Sdim // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 220284677Sdim // to be Legal/Custom in order to avoid library calls. 221284677Sdim setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 222284677Sdim setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 223284677Sdim setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 224284677Sdim 225327952Sdim if (!Subtarget->hasFMA()) { 226327952Sdim setOperationAction(ISD::FMA, MVT::f32, Expand); 227327952Sdim setOperationAction(ISD::FMA, MVT::f64, Expand); 228327952Sdim } 229327952Sdim 230360784Sdim // FIXME: May need no denormals check 231360784Sdim setOperationAction(ISD::FMAD, MVT::f32, Legal); 232341825Sdim 233341825Sdim if (!Subtarget->hasBFI()) { 234341825Sdim // fcopysign can be done in a single instruction with BFI. 235341825Sdim setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 236341825Sdim setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 237341825Sdim } 238341825Sdim 239341825Sdim if (!Subtarget->hasBCNT(32)) 240341825Sdim setOperationAction(ISD::CTPOP, MVT::i32, Expand); 241341825Sdim 242341825Sdim if (!Subtarget->hasBCNT(64)) 243341825Sdim setOperationAction(ISD::CTPOP, MVT::i64, Expand); 244341825Sdim 245341825Sdim if (Subtarget->hasFFBH()) 246341825Sdim setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); 247341825Sdim 248341825Sdim if (Subtarget->hasFFBL()) 249341825Sdim setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); 250341825Sdim 251341825Sdim // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we 252341825Sdim // need it for R600. 253341825Sdim if (Subtarget->hasBFE()) 254341825Sdim setHasExtractBitsInsn(true); 255341825Sdim 256284677Sdim setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 257284677Sdim 258284677Sdim const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 259284677Sdim for (MVT VT : ScalarIntVTs) { 260284677Sdim setOperationAction(ISD::ADDC, VT, Expand); 261284677Sdim setOperationAction(ISD::SUBC, VT, Expand); 262284677Sdim setOperationAction(ISD::ADDE, VT, Expand); 263284677Sdim setOperationAction(ISD::SUBE, VT, Expand); 264284677Sdim } 265284677Sdim 266321369Sdim // LLVM will expand these to atomic_cmp_swap(0) 267321369Sdim // and atomic_swap, respectively. 268321369Sdim setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 269321369Sdim setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 270321369Sdim 271321369Sdim // We need to custom lower some of the intrinsics 272321369Sdim setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 273321369Sdim setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 274321369Sdim 275284677Sdim setSchedulingPreference(Sched::Source); 276309124Sdim 277309124Sdim setTargetDAGCombine(ISD::FP_ROUND); 278309124Sdim setTargetDAGCombine(ISD::FP_TO_SINT); 279309124Sdim setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 280309124Sdim setTargetDAGCombine(ISD::SELECT_CC); 281309124Sdim setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 282314564Sdim setTargetDAGCombine(ISD::LOAD); 283284677Sdim} 284284677Sdim 285296417Sdimstatic inline bool isEOP(MachineBasicBlock::iterator I) { 286314564Sdim if (std::next(I) == I->getParent()->end()) 287314564Sdim return false; 288341825Sdim return std::next(I)->getOpcode() == R600::RETURN; 289296417Sdim} 290296417Sdim 291309124SdimMachineBasicBlock * 292309124SdimR600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 293309124Sdim MachineBasicBlock *BB) const { 294314564Sdim MachineFunction *MF = BB->getParent(); 295284677Sdim MachineRegisterInfo &MRI = MF->getRegInfo(); 296309124Sdim MachineBasicBlock::iterator I = MI; 297341825Sdim const R600InstrInfo *TII = Subtarget->getInstrInfo(); 298284677Sdim 299309124Sdim switch (MI.getOpcode()) { 300284677Sdim default: 301284677Sdim // Replace LDS_*_RET instruction that don't have any uses with the 302284677Sdim // equivalent LDS_*_NORET instruction. 303309124Sdim if (TII->isLDSRetInstr(MI.getOpcode())) { 304341825Sdim int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); 305284677Sdim assert(DstIdx != -1); 306284677Sdim MachineInstrBuilder NewMI; 307284677Sdim // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add 308284677Sdim // LDS_1A2D support and remove this special case. 309309124Sdim if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || 310341825Sdim MI.getOpcode() == R600::LDS_CMPST_RET) 311284677Sdim return BB; 312284677Sdim 313284677Sdim NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), 314341825Sdim TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); 315309124Sdim for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { 316321369Sdim NewMI.add(MI.getOperand(i)); 317284677Sdim } 318284677Sdim } else { 319284677Sdim return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 320284677Sdim } 321284677Sdim break; 322284677Sdim 323341825Sdim case R600::FABS_R600: { 324309124Sdim MachineInstr *NewMI = TII->buildDefaultInstruction( 325341825Sdim *BB, I, R600::MOV, MI.getOperand(0).getReg(), 326309124Sdim MI.getOperand(1).getReg()); 327309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_ABS); 328284677Sdim break; 329284677Sdim } 330284677Sdim 331341825Sdim case R600::FNEG_R600: { 332309124Sdim MachineInstr *NewMI = TII->buildDefaultInstruction( 333341825Sdim *BB, I, R600::MOV, MI.getOperand(0).getReg(), 334309124Sdim MI.getOperand(1).getReg()); 335309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_NEG); 336284677Sdim break; 337284677Sdim } 338284677Sdim 339341825Sdim case R600::MASK_WRITE: { 340360784Sdim Register maskedRegister = MI.getOperand(0).getReg(); 341360784Sdim assert(Register::isVirtualRegister(maskedRegister)); 342284677Sdim MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 343309124Sdim TII->addFlag(*defInstr, 0, MO_FLAG_MASK); 344284677Sdim break; 345284677Sdim } 346284677Sdim 347341825Sdim case R600::MOV_IMM_F32: 348309124Sdim TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) 349309124Sdim .getFPImm() 350309124Sdim ->getValueAPF() 351309124Sdim .bitcastToAPInt() 352309124Sdim .getZExtValue()); 353284677Sdim break; 354314564Sdim 355341825Sdim case R600::MOV_IMM_I32: 356309124Sdim TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), 357309124Sdim MI.getOperand(1).getImm()); 358284677Sdim break; 359314564Sdim 360341825Sdim case R600::MOV_IMM_GLOBAL_ADDR: { 361309124Sdim //TODO: Perhaps combine this instruction with the next if possible 362309124Sdim auto MIB = TII->buildDefaultInstruction( 363341825Sdim *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); 364341825Sdim int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); 365309124Sdim //TODO: Ugh this is rather ugly 366309124Sdim MIB->getOperand(Idx) = MI.getOperand(1); 367309124Sdim break; 368309124Sdim } 369314564Sdim 370341825Sdim case R600::CONST_COPY: { 371309124Sdim MachineInstr *NewMI = TII->buildDefaultInstruction( 372341825Sdim *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); 373341825Sdim TII->setImmOperand(*NewMI, R600::OpName::src0_sel, 374309124Sdim MI.getOperand(1).getImm()); 375284677Sdim break; 376284677Sdim } 377284677Sdim 378341825Sdim case R600::RAT_WRITE_CACHELESS_32_eg: 379341825Sdim case R600::RAT_WRITE_CACHELESS_64_eg: 380341825Sdim case R600::RAT_WRITE_CACHELESS_128_eg: 381309124Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 382321369Sdim .add(MI.getOperand(0)) 383321369Sdim .add(MI.getOperand(1)) 384309124Sdim .addImm(isEOP(I)); // Set End of program bit 385284677Sdim break; 386314564Sdim 387341825Sdim case R600::RAT_STORE_TYPED_eg: 388309124Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 389321369Sdim .add(MI.getOperand(0)) 390321369Sdim .add(MI.getOperand(1)) 391321369Sdim .add(MI.getOperand(2)) 392309124Sdim .addImm(isEOP(I)); // Set End of program bit 393296417Sdim break; 394284677Sdim 395341825Sdim case R600::BRANCH: 396341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) 397321369Sdim .add(MI.getOperand(0)); 398309124Sdim break; 399284677Sdim 400341825Sdim case R600::BRANCH_COND_f32: { 401284677Sdim MachineInstr *NewMI = 402341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), 403341825Sdim R600::PREDICATE_BIT) 404321369Sdim .add(MI.getOperand(1)) 405341825Sdim .addImm(R600::PRED_SETNE) 406309124Sdim .addImm(0); // Flags 407309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); 408341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) 409321369Sdim .add(MI.getOperand(0)) 410341825Sdim .addReg(R600::PREDICATE_BIT, RegState::Kill); 411284677Sdim break; 412284677Sdim } 413284677Sdim 414341825Sdim case R600::BRANCH_COND_i32: { 415284677Sdim MachineInstr *NewMI = 416341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), 417341825Sdim R600::PREDICATE_BIT) 418321369Sdim .add(MI.getOperand(1)) 419341825Sdim .addImm(R600::PRED_SETNE_INT) 420284677Sdim .addImm(0); // Flags 421309124Sdim TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); 422341825Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) 423321369Sdim .add(MI.getOperand(0)) 424341825Sdim .addReg(R600::PREDICATE_BIT, RegState::Kill); 425284677Sdim break; 426284677Sdim } 427284677Sdim 428341825Sdim case R600::EG_ExportSwz: 429341825Sdim case R600::R600_ExportSwz: { 430284677Sdim // Instruction is left unmodified if its not the last one of its type 431284677Sdim bool isLastInstructionOfItsType = true; 432309124Sdim unsigned InstExportType = MI.getOperand(1).getImm(); 433284677Sdim for (MachineBasicBlock::iterator NextExportInst = std::next(I), 434284677Sdim EndBlock = BB->end(); NextExportInst != EndBlock; 435284677Sdim NextExportInst = std::next(NextExportInst)) { 436341825Sdim if (NextExportInst->getOpcode() == R600::EG_ExportSwz || 437341825Sdim NextExportInst->getOpcode() == R600::R600_ExportSwz) { 438284677Sdim unsigned CurrentInstExportType = NextExportInst->getOperand(1) 439284677Sdim .getImm(); 440284677Sdim if (CurrentInstExportType == InstExportType) { 441284677Sdim isLastInstructionOfItsType = false; 442284677Sdim break; 443284677Sdim } 444284677Sdim } 445284677Sdim } 446296417Sdim bool EOP = isEOP(I); 447284677Sdim if (!EOP && !isLastInstructionOfItsType) 448284677Sdim return BB; 449341825Sdim unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40; 450309124Sdim BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) 451321369Sdim .add(MI.getOperand(0)) 452321369Sdim .add(MI.getOperand(1)) 453321369Sdim .add(MI.getOperand(2)) 454321369Sdim .add(MI.getOperand(3)) 455321369Sdim .add(MI.getOperand(4)) 456321369Sdim .add(MI.getOperand(5)) 457321369Sdim .add(MI.getOperand(6)) 458309124Sdim .addImm(CfInst) 459309124Sdim .addImm(EOP); 460284677Sdim break; 461284677Sdim } 462341825Sdim case R600::RETURN: { 463284677Sdim return BB; 464284677Sdim } 465284677Sdim } 466284677Sdim 467309124Sdim MI.eraseFromParent(); 468284677Sdim return BB; 469284677Sdim} 470284677Sdim 471284677Sdim//===----------------------------------------------------------------------===// 472284677Sdim// Custom DAG Lowering Operations 473284677Sdim//===----------------------------------------------------------------------===// 474284677Sdim 475284677SdimSDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 476284677Sdim MachineFunction &MF = DAG.getMachineFunction(); 477284677Sdim R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 478284677Sdim switch (Op.getOpcode()) { 479284677Sdim default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 480284677Sdim case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 481284677Sdim case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 482284677Sdim case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); 483284677Sdim case ISD::SRA_PARTS: 484284677Sdim case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); 485284677Sdim case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); 486284677Sdim case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); 487284677Sdim case ISD::FCOS: 488284677Sdim case ISD::FSIN: return LowerTrig(Op, DAG); 489284677Sdim case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 490284677Sdim case ISD::STORE: return LowerSTORE(Op, DAG); 491284677Sdim case ISD::LOAD: { 492284677Sdim SDValue Result = LowerLOAD(Op, DAG); 493284677Sdim assert((!Result.getNode() || 494284677Sdim Result.getNode()->getNumValues() == 2) && 495284677Sdim "Load should return a value and a chain"); 496284677Sdim return Result; 497284677Sdim } 498284677Sdim 499284677Sdim case ISD::BRCOND: return LowerBRCOND(Op, DAG); 500284677Sdim case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); 501309124Sdim case ISD::FrameIndex: return lowerFrameIndex(Op, DAG); 502284677Sdim case ISD::INTRINSIC_VOID: { 503284677Sdim SDValue Chain = Op.getOperand(0); 504284677Sdim unsigned IntrinsicID = 505284677Sdim cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 506284677Sdim switch (IntrinsicID) { 507341825Sdim case Intrinsic::r600_store_swizzle: { 508284677Sdim SDLoc DL(Op); 509284677Sdim const SDValue Args[8] = { 510284677Sdim Chain, 511284677Sdim Op.getOperand(2), // Export Value 512284677Sdim Op.getOperand(3), // ArrayBase 513284677Sdim Op.getOperand(4), // Type 514284677Sdim DAG.getConstant(0, DL, MVT::i32), // SWZ_X 515284677Sdim DAG.getConstant(1, DL, MVT::i32), // SWZ_Y 516284677Sdim DAG.getConstant(2, DL, MVT::i32), // SWZ_Z 517284677Sdim DAG.getConstant(3, DL, MVT::i32) // SWZ_W 518284677Sdim }; 519314564Sdim return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); 520284677Sdim } 521284677Sdim 522284677Sdim // default for switch(IntrinsicID) 523284677Sdim default: break; 524284677Sdim } 525284677Sdim // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 526284677Sdim break; 527284677Sdim } 528284677Sdim case ISD::INTRINSIC_WO_CHAIN: { 529284677Sdim unsigned IntrinsicID = 530284677Sdim cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 531284677Sdim EVT VT = Op.getValueType(); 532284677Sdim SDLoc DL(Op); 533321369Sdim switch (IntrinsicID) { 534341825Sdim case Intrinsic::r600_tex: 535341825Sdim case Intrinsic::r600_texc: { 536284677Sdim unsigned TextureOp; 537284677Sdim switch (IntrinsicID) { 538341825Sdim case Intrinsic::r600_tex: 539284677Sdim TextureOp = 0; 540284677Sdim break; 541341825Sdim case Intrinsic::r600_texc: 542284677Sdim TextureOp = 1; 543284677Sdim break; 544284677Sdim default: 545314564Sdim llvm_unreachable("unhandled texture operation"); 546284677Sdim } 547284677Sdim 548284677Sdim SDValue TexArgs[19] = { 549284677Sdim DAG.getConstant(TextureOp, DL, MVT::i32), 550284677Sdim Op.getOperand(1), 551284677Sdim DAG.getConstant(0, DL, MVT::i32), 552284677Sdim DAG.getConstant(1, DL, MVT::i32), 553284677Sdim DAG.getConstant(2, DL, MVT::i32), 554284677Sdim DAG.getConstant(3, DL, MVT::i32), 555284677Sdim Op.getOperand(2), 556284677Sdim Op.getOperand(3), 557284677Sdim Op.getOperand(4), 558284677Sdim DAG.getConstant(0, DL, MVT::i32), 559284677Sdim DAG.getConstant(1, DL, MVT::i32), 560284677Sdim DAG.getConstant(2, DL, MVT::i32), 561284677Sdim DAG.getConstant(3, DL, MVT::i32), 562284677Sdim Op.getOperand(5), 563284677Sdim Op.getOperand(6), 564284677Sdim Op.getOperand(7), 565284677Sdim Op.getOperand(8), 566284677Sdim Op.getOperand(9), 567284677Sdim Op.getOperand(10) 568284677Sdim }; 569284677Sdim return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); 570284677Sdim } 571341825Sdim case Intrinsic::r600_dot4: { 572284677Sdim SDValue Args[8] = { 573284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 574284677Sdim DAG.getConstant(0, DL, MVT::i32)), 575284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 576284677Sdim DAG.getConstant(0, DL, MVT::i32)), 577284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 578284677Sdim DAG.getConstant(1, DL, MVT::i32)), 579284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 580284677Sdim DAG.getConstant(1, DL, MVT::i32)), 581284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 582284677Sdim DAG.getConstant(2, DL, MVT::i32)), 583284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 584284677Sdim DAG.getConstant(2, DL, MVT::i32)), 585284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 586284677Sdim DAG.getConstant(3, DL, MVT::i32)), 587284677Sdim DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 588284677Sdim DAG.getConstant(3, DL, MVT::i32)) 589284677Sdim }; 590284677Sdim return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); 591284677Sdim } 592284677Sdim 593309124Sdim case Intrinsic::r600_implicitarg_ptr: { 594344779Sdim MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); 595341825Sdim uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); 596309124Sdim return DAG.getConstant(ByteOffset, DL, PtrVT); 597309124Sdim } 598284677Sdim case Intrinsic::r600_read_ngroups_x: 599284677Sdim return LowerImplicitParameter(DAG, VT, DL, 0); 600284677Sdim case Intrinsic::r600_read_ngroups_y: 601284677Sdim return LowerImplicitParameter(DAG, VT, DL, 1); 602284677Sdim case Intrinsic::r600_read_ngroups_z: 603284677Sdim return LowerImplicitParameter(DAG, VT, DL, 2); 604284677Sdim case Intrinsic::r600_read_global_size_x: 605284677Sdim return LowerImplicitParameter(DAG, VT, DL, 3); 606284677Sdim case Intrinsic::r600_read_global_size_y: 607284677Sdim return LowerImplicitParameter(DAG, VT, DL, 4); 608284677Sdim case Intrinsic::r600_read_global_size_z: 609284677Sdim return LowerImplicitParameter(DAG, VT, DL, 5); 610284677Sdim case Intrinsic::r600_read_local_size_x: 611284677Sdim return LowerImplicitParameter(DAG, VT, DL, 6); 612284677Sdim case Intrinsic::r600_read_local_size_y: 613284677Sdim return LowerImplicitParameter(DAG, VT, DL, 7); 614284677Sdim case Intrinsic::r600_read_local_size_z: 615284677Sdim return LowerImplicitParameter(DAG, VT, DL, 8); 616284677Sdim 617284677Sdim case Intrinsic::r600_read_tgid_x: 618341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 619341825Sdim R600::T1_X, VT); 620284677Sdim case Intrinsic::r600_read_tgid_y: 621341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 622341825Sdim R600::T1_Y, VT); 623284677Sdim case Intrinsic::r600_read_tgid_z: 624341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 625341825Sdim R600::T1_Z, VT); 626284677Sdim case Intrinsic::r600_read_tidig_x: 627341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 628341825Sdim R600::T0_X, VT); 629284677Sdim case Intrinsic::r600_read_tidig_y: 630341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 631341825Sdim R600::T0_Y, VT); 632284677Sdim case Intrinsic::r600_read_tidig_z: 633341825Sdim return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, 634341825Sdim R600::T0_Z, VT); 635284677Sdim 636309124Sdim case Intrinsic::r600_recipsqrt_ieee: 637309124Sdim return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); 638309124Sdim 639309124Sdim case Intrinsic::r600_recipsqrt_clamped: 640309124Sdim return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); 641321369Sdim default: 642321369Sdim return Op; 643284677Sdim } 644309124Sdim 645284677Sdim // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 646284677Sdim break; 647284677Sdim } 648284677Sdim } // end switch(Op.getOpcode()) 649284677Sdim return SDValue(); 650284677Sdim} 651284677Sdim 652284677Sdimvoid R600TargetLowering::ReplaceNodeResults(SDNode *N, 653284677Sdim SmallVectorImpl<SDValue> &Results, 654284677Sdim SelectionDAG &DAG) const { 655284677Sdim switch (N->getOpcode()) { 656284677Sdim default: 657284677Sdim AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); 658284677Sdim return; 659284677Sdim case ISD::FP_TO_UINT: 660284677Sdim if (N->getValueType(0) == MVT::i1) { 661309124Sdim Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); 662284677Sdim return; 663284677Sdim } 664314564Sdim // Since we don't care about out of bounds values we can use FP_TO_SINT for 665314564Sdim // uints too. The DAGLegalizer code for uint considers some extra cases 666314564Sdim // which are not necessary here. 667314564Sdim LLVM_FALLTHROUGH; 668284677Sdim case ISD::FP_TO_SINT: { 669309124Sdim if (N->getValueType(0) == MVT::i1) { 670309124Sdim Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); 671309124Sdim return; 672309124Sdim } 673309124Sdim 674284677Sdim SDValue Result; 675284677Sdim if (expandFP_TO_SINT(N, Result, DAG)) 676284677Sdim Results.push_back(Result); 677284677Sdim return; 678284677Sdim } 679284677Sdim case ISD::SDIVREM: { 680284677Sdim SDValue Op = SDValue(N, 1); 681284677Sdim SDValue RES = LowerSDIVREM(Op, DAG); 682284677Sdim Results.push_back(RES); 683284677Sdim Results.push_back(RES.getValue(1)); 684284677Sdim break; 685284677Sdim } 686284677Sdim case ISD::UDIVREM: { 687284677Sdim SDValue Op = SDValue(N, 0); 688284677Sdim LowerUDIVREM64(Op, DAG, Results); 689284677Sdim break; 690284677Sdim } 691284677Sdim } 692284677Sdim} 693284677Sdim 694284677SdimSDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, 695284677Sdim SDValue Vector) const { 696284677Sdim SDLoc DL(Vector); 697284677Sdim EVT VecVT = Vector.getValueType(); 698284677Sdim EVT EltVT = VecVT.getVectorElementType(); 699284677Sdim SmallVector<SDValue, 8> Args; 700284677Sdim 701314564Sdim for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) { 702286684Sdim Args.push_back(DAG.getNode( 703286684Sdim ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, 704286684Sdim DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout())))); 705284677Sdim } 706284677Sdim 707284677Sdim return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); 708284677Sdim} 709284677Sdim 710284677SdimSDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 711284677Sdim SelectionDAG &DAG) const { 712284677Sdim SDLoc DL(Op); 713284677Sdim SDValue Vector = Op.getOperand(0); 714284677Sdim SDValue Index = Op.getOperand(1); 715284677Sdim 716284677Sdim if (isa<ConstantSDNode>(Index) || 717284677Sdim Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) 718284677Sdim return Op; 719284677Sdim 720284677Sdim Vector = vectorToVerticalVector(DAG, Vector); 721284677Sdim return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 722284677Sdim Vector, Index); 723284677Sdim} 724284677Sdim 725284677SdimSDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 726284677Sdim SelectionDAG &DAG) const { 727284677Sdim SDLoc DL(Op); 728284677Sdim SDValue Vector = Op.getOperand(0); 729284677Sdim SDValue Value = Op.getOperand(1); 730284677Sdim SDValue Index = Op.getOperand(2); 731284677Sdim 732284677Sdim if (isa<ConstantSDNode>(Index) || 733284677Sdim Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) 734284677Sdim return Op; 735284677Sdim 736284677Sdim Vector = vectorToVerticalVector(DAG, Vector); 737284677Sdim SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), 738284677Sdim Vector, Value, Index); 739284677Sdim return vectorToVerticalVector(DAG, Insert); 740284677Sdim} 741284677Sdim 742309124SdimSDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, 743309124Sdim SDValue Op, 744309124Sdim SelectionDAG &DAG) const { 745309124Sdim GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); 746344779Sdim if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) 747309124Sdim return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); 748309124Sdim 749309124Sdim const DataLayout &DL = DAG.getDataLayout(); 750309124Sdim const GlobalValue *GV = GSD->getGlobal(); 751344779Sdim MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); 752309124Sdim 753309124Sdim SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); 754309124Sdim return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); 755309124Sdim} 756309124Sdim 757284677SdimSDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { 758284677Sdim // On hw >= R700, COS/SIN input must be between -1. and 1. 759284677Sdim // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) 760284677Sdim EVT VT = Op.getValueType(); 761284677Sdim SDValue Arg = Op.getOperand(0); 762284677Sdim SDLoc DL(Op); 763296417Sdim 764296417Sdim // TODO: Should this propagate fast-math-flags? 765284677Sdim SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, 766284677Sdim DAG.getNode(ISD::FADD, DL, VT, 767284677Sdim DAG.getNode(ISD::FMUL, DL, VT, Arg, 768284677Sdim DAG.getConstantFP(0.15915494309, DL, MVT::f32)), 769284677Sdim DAG.getConstantFP(0.5, DL, MVT::f32))); 770284677Sdim unsigned TrigNode; 771284677Sdim switch (Op.getOpcode()) { 772284677Sdim case ISD::FCOS: 773284677Sdim TrigNode = AMDGPUISD::COS_HW; 774284677Sdim break; 775284677Sdim case ISD::FSIN: 776284677Sdim TrigNode = AMDGPUISD::SIN_HW; 777284677Sdim break; 778284677Sdim default: 779284677Sdim llvm_unreachable("Wrong trig opcode"); 780284677Sdim } 781284677Sdim SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, 782284677Sdim DAG.getNode(ISD::FADD, DL, VT, FractPart, 783284677Sdim DAG.getConstantFP(-0.5, DL, MVT::f32))); 784341825Sdim if (Gen >= AMDGPUSubtarget::R700) 785284677Sdim return TrigVal; 786284677Sdim // On R600 hw, COS/SIN input must be between -Pi and Pi. 787284677Sdim return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, 788360784Sdim DAG.getConstantFP(numbers::pif, DL, MVT::f32)); 789284677Sdim} 790284677Sdim 791284677SdimSDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { 792284677Sdim SDLoc DL(Op); 793284677Sdim EVT VT = Op.getValueType(); 794284677Sdim 795284677Sdim SDValue Lo = Op.getOperand(0); 796284677Sdim SDValue Hi = Op.getOperand(1); 797284677Sdim SDValue Shift = Op.getOperand(2); 798284677Sdim SDValue Zero = DAG.getConstant(0, DL, VT); 799284677Sdim SDValue One = DAG.getConstant(1, DL, VT); 800284677Sdim 801284677Sdim SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); 802284677Sdim SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); 803284677Sdim SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); 804284677Sdim SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); 805284677Sdim 806284677Sdim // The dance around Width1 is necessary for 0 special case. 807284677Sdim // Without it the CompShift might be 32, producing incorrect results in 808284677Sdim // Overflow. So we do the shift in two steps, the alternative is to 809284677Sdim // add a conditional to filter the special case. 810284677Sdim 811284677Sdim SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); 812284677Sdim Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); 813284677Sdim 814284677Sdim SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); 815284677Sdim HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); 816284677Sdim SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); 817284677Sdim 818284677Sdim SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); 819284677Sdim SDValue LoBig = Zero; 820284677Sdim 821284677Sdim Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); 822284677Sdim Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); 823284677Sdim 824284677Sdim return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); 825284677Sdim} 826284677Sdim 827284677SdimSDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { 828284677Sdim SDLoc DL(Op); 829284677Sdim EVT VT = Op.getValueType(); 830284677Sdim 831284677Sdim SDValue Lo = Op.getOperand(0); 832284677Sdim SDValue Hi = Op.getOperand(1); 833284677Sdim SDValue Shift = Op.getOperand(2); 834284677Sdim SDValue Zero = DAG.getConstant(0, DL, VT); 835284677Sdim SDValue One = DAG.getConstant(1, DL, VT); 836284677Sdim 837284677Sdim const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; 838284677Sdim 839284677Sdim SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); 840284677Sdim SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); 841284677Sdim SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); 842284677Sdim SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); 843284677Sdim 844284677Sdim // The dance around Width1 is necessary for 0 special case. 845284677Sdim // Without it the CompShift might be 32, producing incorrect results in 846284677Sdim // Overflow. So we do the shift in two steps, the alternative is to 847284677Sdim // add a conditional to filter the special case. 848284677Sdim 849284677Sdim SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); 850284677Sdim Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); 851284677Sdim 852284677Sdim SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift); 853284677Sdim SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); 854284677Sdim LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); 855284677Sdim 856284677Sdim SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift); 857284677Sdim SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero; 858284677Sdim 859284677Sdim Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); 860284677Sdim Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); 861284677Sdim 862284677Sdim return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); 863284677Sdim} 864284677Sdim 865284677SdimSDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, 866284677Sdim unsigned mainop, unsigned ovf) const { 867284677Sdim SDLoc DL(Op); 868284677Sdim EVT VT = Op.getValueType(); 869284677Sdim 870284677Sdim SDValue Lo = Op.getOperand(0); 871284677Sdim SDValue Hi = Op.getOperand(1); 872284677Sdim 873284677Sdim SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); 874284677Sdim // Extend sign. 875284677Sdim OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, 876284677Sdim DAG.getValueType(MVT::i1)); 877284677Sdim 878284677Sdim SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); 879284677Sdim 880284677Sdim return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); 881284677Sdim} 882284677Sdim 883309124SdimSDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { 884284677Sdim SDLoc DL(Op); 885284677Sdim return DAG.getNode( 886284677Sdim ISD::SETCC, 887284677Sdim DL, 888284677Sdim MVT::i1, 889309124Sdim Op, DAG.getConstantFP(1.0f, DL, MVT::f32), 890309124Sdim DAG.getCondCode(ISD::SETEQ)); 891284677Sdim} 892284677Sdim 893309124SdimSDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { 894309124Sdim SDLoc DL(Op); 895309124Sdim return DAG.getNode( 896309124Sdim ISD::SETCC, 897309124Sdim DL, 898309124Sdim MVT::i1, 899309124Sdim Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), 900309124Sdim DAG.getCondCode(ISD::SETEQ)); 901309124Sdim} 902309124Sdim 903284677SdimSDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 904309124Sdim const SDLoc &DL, 905284677Sdim unsigned DwordOffset) const { 906284677Sdim unsigned ByteOffset = DwordOffset * 4; 907284677Sdim PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 908344779Sdim AMDGPUAS::PARAM_I_ADDRESS); 909284677Sdim 910284677Sdim // We shouldn't be using an offset wider than 16-bits for implicit parameters. 911284677Sdim assert(isInt<16>(ByteOffset)); 912284677Sdim 913284677Sdim return DAG.getLoad(VT, DL, DAG.getEntryNode(), 914284677Sdim DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR 915309124Sdim MachinePointerInfo(ConstantPointerNull::get(PtrType))); 916284677Sdim} 917284677Sdim 918284677Sdimbool R600TargetLowering::isZero(SDValue Op) const { 919284677Sdim if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { 920284677Sdim return Cst->isNullValue(); 921284677Sdim } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ 922284677Sdim return CstFP->isZero(); 923284677Sdim } else { 924284677Sdim return false; 925284677Sdim } 926284677Sdim} 927284677Sdim 928309124Sdimbool R600TargetLowering::isHWTrueValue(SDValue Op) const { 929309124Sdim if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 930309124Sdim return CFP->isExactlyValue(1.0); 931309124Sdim } 932309124Sdim return isAllOnesConstant(Op); 933309124Sdim} 934309124Sdim 935309124Sdimbool R600TargetLowering::isHWFalseValue(SDValue Op) const { 936309124Sdim if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 937309124Sdim return CFP->getValueAPF().isZero(); 938309124Sdim } 939309124Sdim return isNullConstant(Op); 940309124Sdim} 941309124Sdim 942284677SdimSDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 943284677Sdim SDLoc DL(Op); 944284677Sdim EVT VT = Op.getValueType(); 945284677Sdim 946284677Sdim SDValue LHS = Op.getOperand(0); 947284677Sdim SDValue RHS = Op.getOperand(1); 948284677Sdim SDValue True = Op.getOperand(2); 949284677Sdim SDValue False = Op.getOperand(3); 950284677Sdim SDValue CC = Op.getOperand(4); 951284677Sdim SDValue Temp; 952284677Sdim 953284677Sdim if (VT == MVT::f32) { 954284677Sdim DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); 955321369Sdim SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); 956284677Sdim if (MinMax) 957284677Sdim return MinMax; 958284677Sdim } 959284677Sdim 960284677Sdim // LHS and RHS are guaranteed to be the same value type 961284677Sdim EVT CompareVT = LHS.getValueType(); 962284677Sdim 963284677Sdim // Check if we can lower this to a native operation. 964284677Sdim 965284677Sdim // Try to lower to a SET* instruction: 966284677Sdim // 967284677Sdim // SET* can match the following patterns: 968284677Sdim // 969284677Sdim // select_cc f32, f32, -1, 0, cc_supported 970284677Sdim // select_cc f32, f32, 1.0f, 0.0f, cc_supported 971284677Sdim // select_cc i32, i32, -1, 0, cc_supported 972284677Sdim // 973284677Sdim 974284677Sdim // Move hardware True/False values to the correct operand. 975284677Sdim if (isHWTrueValue(False) && isHWFalseValue(True)) { 976360784Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 977360784Sdim ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT); 978284677Sdim if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) { 979284677Sdim std::swap(False, True); 980284677Sdim CC = DAG.getCondCode(InverseCC); 981284677Sdim } else { 982284677Sdim ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC); 983284677Sdim if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) { 984284677Sdim std::swap(False, True); 985284677Sdim std::swap(LHS, RHS); 986284677Sdim CC = DAG.getCondCode(SwapInvCC); 987284677Sdim } 988284677Sdim } 989284677Sdim } 990284677Sdim 991284677Sdim if (isHWTrueValue(True) && isHWFalseValue(False) && 992284677Sdim (CompareVT == VT || VT == MVT::i32)) { 993284677Sdim // This can be matched by a SET* instruction. 994284677Sdim return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 995284677Sdim } 996284677Sdim 997284677Sdim // Try to lower to a CND* instruction: 998284677Sdim // 999284677Sdim // CND* can match the following patterns: 1000284677Sdim // 1001284677Sdim // select_cc f32, 0.0, f32, f32, cc_supported 1002284677Sdim // select_cc f32, 0.0, i32, i32, cc_supported 1003284677Sdim // select_cc i32, 0, f32, f32, cc_supported 1004284677Sdim // select_cc i32, 0, i32, i32, cc_supported 1005284677Sdim // 1006284677Sdim 1007284677Sdim // Try to move the zero value to the RHS 1008284677Sdim if (isZero(LHS)) { 1009284677Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 1010284677Sdim // Try swapping the operands 1011284677Sdim ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode); 1012284677Sdim if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { 1013284677Sdim std::swap(LHS, RHS); 1014284677Sdim CC = DAG.getCondCode(CCSwapped); 1015284677Sdim } else { 1016284677Sdim // Try inverting the conditon and then swapping the operands 1017360784Sdim ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT); 1018284677Sdim CCSwapped = ISD::getSetCCSwappedOperands(CCInv); 1019284677Sdim if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { 1020284677Sdim std::swap(True, False); 1021284677Sdim std::swap(LHS, RHS); 1022284677Sdim CC = DAG.getCondCode(CCSwapped); 1023284677Sdim } 1024284677Sdim } 1025284677Sdim } 1026284677Sdim if (isZero(RHS)) { 1027284677Sdim SDValue Cond = LHS; 1028284677Sdim SDValue Zero = RHS; 1029284677Sdim ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 1030284677Sdim if (CompareVT != VT) { 1031284677Sdim // Bitcast True / False to the correct types. This will end up being 1032284677Sdim // a nop, but it allows us to define only a single pattern in the 1033284677Sdim // .TD files for each CND* instruction rather than having to have 1034284677Sdim // one pattern for integer True/False and one for fp True/False 1035284677Sdim True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 1036284677Sdim False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 1037284677Sdim } 1038284677Sdim 1039284677Sdim switch (CCOpcode) { 1040284677Sdim case ISD::SETONE: 1041284677Sdim case ISD::SETUNE: 1042284677Sdim case ISD::SETNE: 1043360784Sdim CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT); 1044284677Sdim Temp = True; 1045284677Sdim True = False; 1046284677Sdim False = Temp; 1047284677Sdim break; 1048284677Sdim default: 1049284677Sdim break; 1050284677Sdim } 1051284677Sdim SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 1052284677Sdim Cond, Zero, 1053284677Sdim True, False, 1054284677Sdim DAG.getCondCode(CCOpcode)); 1055284677Sdim return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 1056284677Sdim } 1057284677Sdim 1058284677Sdim // If we make it this for it means we have no native instructions to handle 1059284677Sdim // this SELECT_CC, so we must lower it. 1060284677Sdim SDValue HWTrue, HWFalse; 1061284677Sdim 1062284677Sdim if (CompareVT == MVT::f32) { 1063284677Sdim HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); 1064284677Sdim HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); 1065284677Sdim } else if (CompareVT == MVT::i32) { 1066284677Sdim HWTrue = DAG.getConstant(-1, DL, CompareVT); 1067284677Sdim HWFalse = DAG.getConstant(0, DL, CompareVT); 1068284677Sdim } 1069284677Sdim else { 1070284677Sdim llvm_unreachable("Unhandled value type in LowerSELECT_CC"); 1071284677Sdim } 1072284677Sdim 1073284677Sdim // Lower this unsupported SELECT_CC into a combination of two supported 1074284677Sdim // SELECT_CC operations. 1075284677Sdim SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 1076284677Sdim 1077284677Sdim return DAG.getNode(ISD::SELECT_CC, DL, VT, 1078284677Sdim Cond, HWFalse, 1079284677Sdim True, False, 1080284677Sdim DAG.getCondCode(ISD::SETNE)); 1081284677Sdim} 1082284677Sdim 1083284677Sdim/// LLVM generates byte-addressed pointers. For indirect addressing, we need to 1084284677Sdim/// convert these pointers to a register index. Each register holds 1085284677Sdim/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the 1086284677Sdim/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used 1087284677Sdim/// for indirect addressing. 1088284677SdimSDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, 1089284677Sdim unsigned StackWidth, 1090284677Sdim SelectionDAG &DAG) const { 1091284677Sdim unsigned SRLPad; 1092284677Sdim switch(StackWidth) { 1093284677Sdim case 1: 1094284677Sdim SRLPad = 2; 1095284677Sdim break; 1096284677Sdim case 2: 1097284677Sdim SRLPad = 3; 1098284677Sdim break; 1099284677Sdim case 4: 1100284677Sdim SRLPad = 4; 1101284677Sdim break; 1102284677Sdim default: llvm_unreachable("Invalid stack width"); 1103284677Sdim } 1104284677Sdim 1105284677Sdim SDLoc DL(Ptr); 1106284677Sdim return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, 1107284677Sdim DAG.getConstant(SRLPad, DL, MVT::i32)); 1108284677Sdim} 1109284677Sdim 1110284677Sdimvoid R600TargetLowering::getStackAddress(unsigned StackWidth, 1111284677Sdim unsigned ElemIdx, 1112284677Sdim unsigned &Channel, 1113284677Sdim unsigned &PtrIncr) const { 1114284677Sdim switch (StackWidth) { 1115284677Sdim default: 1116284677Sdim case 1: 1117284677Sdim Channel = 0; 1118284677Sdim if (ElemIdx > 0) { 1119284677Sdim PtrIncr = 1; 1120284677Sdim } else { 1121284677Sdim PtrIncr = 0; 1122284677Sdim } 1123284677Sdim break; 1124284677Sdim case 2: 1125284677Sdim Channel = ElemIdx % 2; 1126284677Sdim if (ElemIdx == 2) { 1127284677Sdim PtrIncr = 1; 1128284677Sdim } else { 1129284677Sdim PtrIncr = 0; 1130284677Sdim } 1131284677Sdim break; 1132284677Sdim case 4: 1133284677Sdim Channel = ElemIdx; 1134284677Sdim PtrIncr = 0; 1135284677Sdim break; 1136284677Sdim } 1137284677Sdim} 1138284677Sdim 1139309124SdimSDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, 1140309124Sdim SelectionDAG &DAG) const { 1141309124Sdim SDLoc DL(Store); 1142314564Sdim //TODO: Who creates the i8 stores? 1143314564Sdim assert(Store->isTruncatingStore() 1144314564Sdim || Store->getValue().getValueType() == MVT::i8); 1145344779Sdim assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); 1146309124Sdim 1147314564Sdim SDValue Mask; 1148309124Sdim if (Store->getMemoryVT() == MVT::i8) { 1149314564Sdim assert(Store->getAlignment() >= 1); 1150314564Sdim Mask = DAG.getConstant(0xff, DL, MVT::i32); 1151309124Sdim } else if (Store->getMemoryVT() == MVT::i16) { 1152314564Sdim assert(Store->getAlignment() >= 2); 1153321369Sdim Mask = DAG.getConstant(0xffff, DL, MVT::i32); 1154314564Sdim } else { 1155314564Sdim llvm_unreachable("Unsupported private trunc store"); 1156309124Sdim } 1157309124Sdim 1158314564Sdim SDValue OldChain = Store->getChain(); 1159314564Sdim bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); 1160314564Sdim // Skip dummy 1161314564Sdim SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; 1162309124Sdim SDValue BasePtr = Store->getBasePtr(); 1163314564Sdim SDValue Offset = Store->getOffset(); 1164309124Sdim EVT MemVT = Store->getMemoryVT(); 1165309124Sdim 1166314564Sdim SDValue LoadPtr = BasePtr; 1167314564Sdim if (!Offset.isUndef()) { 1168314564Sdim LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); 1169314564Sdim } 1170309124Sdim 1171314564Sdim // Get dword location 1172314564Sdim // TODO: this should be eliminated by the future SHR ptr, 2 1173314564Sdim SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 1174314564Sdim DAG.getConstant(0xfffffffc, DL, MVT::i32)); 1175314564Sdim 1176314564Sdim // Load dword 1177314564Sdim // TODO: can we be smarter about machine pointer info? 1178360784Sdim MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS); 1179327952Sdim SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); 1180314564Sdim 1181314564Sdim Chain = Dst.getValue(1); 1182314564Sdim 1183314564Sdim // Get offset in dword 1184314564Sdim SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 1185309124Sdim DAG.getConstant(0x3, DL, MVT::i32)); 1186309124Sdim 1187314564Sdim // Convert byte offset to bit shift 1188309124Sdim SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 1189309124Sdim DAG.getConstant(3, DL, MVT::i32)); 1190309124Sdim 1191314564Sdim // TODO: Contrary to the name of the functiom, 1192314564Sdim // it also handles sub i32 non-truncating stores (like i1) 1193309124Sdim SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, 1194309124Sdim Store->getValue()); 1195309124Sdim 1196314564Sdim // Mask the value to the right type 1197309124Sdim SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); 1198309124Sdim 1199314564Sdim // Shift the value in place 1200309124Sdim SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, 1201309124Sdim MaskedValue, ShiftAmt); 1202309124Sdim 1203314564Sdim // Shift the mask in place 1204314564Sdim SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); 1205314564Sdim 1206314564Sdim // Invert the mask. NOTE: if we had native ROL instructions we could 1207314564Sdim // use inverted mask 1208314564Sdim DstMask = DAG.getNOT(DL, DstMask, MVT::i32); 1209314564Sdim 1210314564Sdim // Cleanup the target bits 1211309124Sdim Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); 1212309124Sdim 1213314564Sdim // Add the new bits 1214309124Sdim SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); 1215314564Sdim 1216314564Sdim // Store dword 1217314564Sdim // TODO: Can we be smarter about MachinePointerInfo? 1218327952Sdim SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo); 1219314564Sdim 1220314564Sdim // If we are part of expanded vector, make our neighbors depend on this store 1221314564Sdim if (VectorTrunc) { 1222314564Sdim // Make all other vector elements depend on this store 1223314564Sdim Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); 1224314564Sdim DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); 1225314564Sdim } 1226314564Sdim return NewStore; 1227309124Sdim} 1228309124Sdim 1229284677SdimSDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1230284677Sdim StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 1231309124Sdim unsigned AS = StoreNode->getAddressSpace(); 1232314564Sdim 1233314564Sdim SDValue Chain = StoreNode->getChain(); 1234314564Sdim SDValue Ptr = StoreNode->getBasePtr(); 1235309124Sdim SDValue Value = StoreNode->getValue(); 1236284677Sdim 1237314564Sdim EVT VT = Value.getValueType(); 1238314564Sdim EVT MemVT = StoreNode->getMemoryVT(); 1239314564Sdim EVT PtrVT = Ptr.getValueType(); 1240314564Sdim 1241314564Sdim SDLoc DL(Op); 1242314564Sdim 1243353358Sdim const bool TruncatingStore = StoreNode->isTruncatingStore(); 1244353358Sdim 1245314564Sdim // Neither LOCAL nor PRIVATE can do vectors at the moment 1246353358Sdim if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS || 1247353358Sdim TruncatingStore) && 1248314564Sdim VT.isVector()) { 1249353358Sdim if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) { 1250314564Sdim // Add an extra level of chain to isolate this vector 1251314564Sdim SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); 1252314564Sdim // TODO: can the chain be replaced without creating a new store? 1253314564Sdim SDValue NewStore = DAG.getTruncStore( 1254314564Sdim NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), 1255314564Sdim MemVT, StoreNode->getAlignment(), 1256314564Sdim StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); 1257314564Sdim StoreNode = cast<StoreSDNode>(NewStore); 1258314564Sdim } 1259314564Sdim 1260314564Sdim return scalarizeVectorStore(StoreNode, DAG); 1261284677Sdim } 1262284677Sdim 1263314564Sdim unsigned Align = StoreNode->getAlignment(); 1264314564Sdim if (Align < MemVT.getStoreSize() && 1265353358Sdim !allowsMisalignedMemoryAccesses( 1266353358Sdim MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) { 1267314564Sdim return expandUnalignedStore(StoreNode, DAG); 1268314564Sdim } 1269309124Sdim 1270314564Sdim SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, 1271314564Sdim DAG.getConstant(2, DL, PtrVT)); 1272314564Sdim 1273344779Sdim if (AS == AMDGPUAS::GLOBAL_ADDRESS) { 1274314564Sdim // It is beneficial to create MSKOR here instead of combiner to avoid 1275314564Sdim // artificial dependencies introduced by RMW 1276353358Sdim if (TruncatingStore) { 1277284677Sdim assert(VT.bitsLE(MVT::i32)); 1278284677Sdim SDValue MaskConstant; 1279284677Sdim if (MemVT == MVT::i8) { 1280284677Sdim MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32); 1281284677Sdim } else { 1282284677Sdim assert(MemVT == MVT::i16); 1283314564Sdim assert(StoreNode->getAlignment() >= 2); 1284284677Sdim MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); 1285284677Sdim } 1286314564Sdim 1287314564Sdim SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, 1288314564Sdim DAG.getConstant(0x00000003, DL, PtrVT)); 1289314564Sdim SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, 1290314564Sdim DAG.getConstant(3, DL, VT)); 1291314564Sdim 1292314564Sdim // Put the mask in correct place 1293314564Sdim SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); 1294314564Sdim 1295314564Sdim // Put the value bits in correct place 1296284677Sdim SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); 1297314564Sdim SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); 1298314564Sdim 1299284677Sdim // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 1300284677Sdim // vector instead. 1301284677Sdim SDValue Src[4] = { 1302284677Sdim ShiftedValue, 1303284677Sdim DAG.getConstant(0, DL, MVT::i32), 1304284677Sdim DAG.getConstant(0, DL, MVT::i32), 1305284677Sdim Mask 1306284677Sdim }; 1307309124Sdim SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src); 1308284677Sdim SDValue Args[3] = { Chain, Input, DWordAddr }; 1309284677Sdim return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, 1310284677Sdim Op->getVTList(), Args, MemVT, 1311284677Sdim StoreNode->getMemOperand()); 1312314564Sdim } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) { 1313284677Sdim // Convert pointer from byte address to dword address. 1314314564Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); 1315284677Sdim 1316353358Sdim if (StoreNode->isIndexed()) { 1317353358Sdim llvm_unreachable("Indexed stores not supported yet"); 1318284677Sdim } else { 1319284677Sdim Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 1320284677Sdim } 1321284677Sdim return Chain; 1322284677Sdim } 1323284677Sdim } 1324284677Sdim 1325314564Sdim // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes 1326344779Sdim if (AS != AMDGPUAS::PRIVATE_ADDRESS) 1327284677Sdim return SDValue(); 1328284677Sdim 1329309124Sdim if (MemVT.bitsLT(MVT::i32)) 1330309124Sdim return lowerPrivateTruncStore(StoreNode, DAG); 1331309124Sdim 1332314564Sdim // Standard i32+ store, tag it with DWORDADDR to note that the address 1333314564Sdim // has been shifted 1334314564Sdim if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { 1335314564Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); 1336314564Sdim return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 1337284677Sdim } 1338284677Sdim 1339314564Sdim // Tagged i32+ stores will be matched by patterns 1340314564Sdim return SDValue(); 1341284677Sdim} 1342284677Sdim 1343284677Sdim// return (512 + (kc_bank << 12) 1344284677Sdimstatic int 1345327952SdimConstantAddressBlock(unsigned AddressSpace) { 1346284677Sdim switch (AddressSpace) { 1347327952Sdim case AMDGPUAS::CONSTANT_BUFFER_0: 1348284677Sdim return 512; 1349327952Sdim case AMDGPUAS::CONSTANT_BUFFER_1: 1350284677Sdim return 512 + 4096; 1351327952Sdim case AMDGPUAS::CONSTANT_BUFFER_2: 1352284677Sdim return 512 + 4096 * 2; 1353327952Sdim case AMDGPUAS::CONSTANT_BUFFER_3: 1354284677Sdim return 512 + 4096 * 3; 1355327952Sdim case AMDGPUAS::CONSTANT_BUFFER_4: 1356284677Sdim return 512 + 4096 * 4; 1357327952Sdim case AMDGPUAS::CONSTANT_BUFFER_5: 1358284677Sdim return 512 + 4096 * 5; 1359327952Sdim case AMDGPUAS::CONSTANT_BUFFER_6: 1360284677Sdim return 512 + 4096 * 6; 1361327952Sdim case AMDGPUAS::CONSTANT_BUFFER_7: 1362284677Sdim return 512 + 4096 * 7; 1363327952Sdim case AMDGPUAS::CONSTANT_BUFFER_8: 1364284677Sdim return 512 + 4096 * 8; 1365327952Sdim case AMDGPUAS::CONSTANT_BUFFER_9: 1366284677Sdim return 512 + 4096 * 9; 1367327952Sdim case AMDGPUAS::CONSTANT_BUFFER_10: 1368284677Sdim return 512 + 4096 * 10; 1369327952Sdim case AMDGPUAS::CONSTANT_BUFFER_11: 1370284677Sdim return 512 + 4096 * 11; 1371327952Sdim case AMDGPUAS::CONSTANT_BUFFER_12: 1372284677Sdim return 512 + 4096 * 12; 1373327952Sdim case AMDGPUAS::CONSTANT_BUFFER_13: 1374284677Sdim return 512 + 4096 * 13; 1375327952Sdim case AMDGPUAS::CONSTANT_BUFFER_14: 1376284677Sdim return 512 + 4096 * 14; 1377327952Sdim case AMDGPUAS::CONSTANT_BUFFER_15: 1378284677Sdim return 512 + 4096 * 15; 1379284677Sdim default: 1380284677Sdim return -1; 1381284677Sdim } 1382284677Sdim} 1383284677Sdim 1384309124SdimSDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, 1385309124Sdim SelectionDAG &DAG) const { 1386284677Sdim SDLoc DL(Op); 1387309124Sdim LoadSDNode *Load = cast<LoadSDNode>(Op); 1388309124Sdim ISD::LoadExtType ExtType = Load->getExtensionType(); 1389309124Sdim EVT MemVT = Load->getMemoryVT(); 1390314564Sdim assert(Load->getAlignment() >= MemVT.getStoreSize()); 1391284677Sdim 1392314564Sdim SDValue BasePtr = Load->getBasePtr(); 1393314564Sdim SDValue Chain = Load->getChain(); 1394314564Sdim SDValue Offset = Load->getOffset(); 1395284677Sdim 1396314564Sdim SDValue LoadPtr = BasePtr; 1397314564Sdim if (!Offset.isUndef()) { 1398314564Sdim LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); 1399314564Sdim } 1400284677Sdim 1401314564Sdim // Get dword location 1402314564Sdim // NOTE: this should be eliminated by the future SHR ptr, 2 1403314564Sdim SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, 1404314564Sdim DAG.getConstant(0xfffffffc, DL, MVT::i32)); 1405314564Sdim 1406314564Sdim // Load dword 1407314564Sdim // TODO: can we be smarter about machine pointer info? 1408360784Sdim MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS); 1409327952Sdim SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); 1410314564Sdim 1411309124Sdim // Get offset within the register. 1412309124Sdim SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, 1413314564Sdim LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); 1414309124Sdim 1415309124Sdim // Bit offset of target byte (byteIdx * 8). 1416309124Sdim SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 1417309124Sdim DAG.getConstant(3, DL, MVT::i32)); 1418309124Sdim 1419309124Sdim // Shift to the right. 1420314564Sdim SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); 1421309124Sdim 1422309124Sdim // Eliminate the upper bits by setting them to ... 1423309124Sdim EVT MemEltVT = MemVT.getScalarType(); 1424309124Sdim 1425314564Sdim if (ExtType == ISD::SEXTLOAD) { // ... ones. 1426309124Sdim SDValue MemEltVTNode = DAG.getValueType(MemEltVT); 1427314564Sdim Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); 1428314564Sdim } else { // ... or zeros. 1429314564Sdim Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); 1430284677Sdim } 1431284677Sdim 1432309124Sdim SDValue Ops[] = { 1433314564Sdim Ret, 1434314564Sdim Read.getValue(1) // This should be our output chain 1435309124Sdim }; 1436309124Sdim 1437309124Sdim return DAG.getMergeValues(Ops, DL); 1438309124Sdim} 1439309124Sdim 1440309124SdimSDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1441309124Sdim LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 1442309124Sdim unsigned AS = LoadNode->getAddressSpace(); 1443309124Sdim EVT MemVT = LoadNode->getMemoryVT(); 1444309124Sdim ISD::LoadExtType ExtType = LoadNode->getExtensionType(); 1445309124Sdim 1446344779Sdim if (AS == AMDGPUAS::PRIVATE_ADDRESS && 1447309124Sdim ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { 1448309124Sdim return lowerPrivateExtLoad(Op, DAG); 1449309124Sdim } 1450309124Sdim 1451309124Sdim SDLoc DL(Op); 1452309124Sdim EVT VT = Op.getValueType(); 1453309124Sdim SDValue Chain = LoadNode->getChain(); 1454309124Sdim SDValue Ptr = LoadNode->getBasePtr(); 1455309124Sdim 1456344779Sdim if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 1457344779Sdim LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 1458314564Sdim VT.isVector()) { 1459360784Sdim SDValue Ops[2]; 1460360784Sdim std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG); 1461360784Sdim return DAG.getMergeValues(Ops, DL); 1462284677Sdim } 1463284677Sdim 1464341825Sdim // This is still used for explicit load from addrspace(8) 1465327952Sdim int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 1466284677Sdim if (ConstantBlock > -1 && 1467284677Sdim ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || 1468284677Sdim (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { 1469284677Sdim SDValue Result; 1470341825Sdim if (isa<Constant>(LoadNode->getMemOperand()->getValue()) || 1471284677Sdim isa<ConstantSDNode>(Ptr)) { 1472341825Sdim return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG); 1473284677Sdim } else { 1474341825Sdim //TODO: Does this even work? 1475284677Sdim // non-constant ptr can't be folded, keeps it as a v4f32 load 1476284677Sdim Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 1477284677Sdim DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, 1478284677Sdim DAG.getConstant(4, DL, MVT::i32)), 1479284677Sdim DAG.getConstant(LoadNode->getAddressSpace() - 1480344779Sdim AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) 1481284677Sdim ); 1482284677Sdim } 1483284677Sdim 1484284677Sdim if (!VT.isVector()) { 1485284677Sdim Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 1486284677Sdim DAG.getConstant(0, DL, MVT::i32)); 1487284677Sdim } 1488284677Sdim 1489284677Sdim SDValue MergedValues[2] = { 1490284677Sdim Result, 1491284677Sdim Chain 1492284677Sdim }; 1493284677Sdim return DAG.getMergeValues(MergedValues, DL); 1494284677Sdim } 1495284677Sdim 1496284677Sdim // For most operations returning SDValue() will result in the node being 1497284677Sdim // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we 1498284677Sdim // need to manually expand loads that may be legal in some address spaces and 1499284677Sdim // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for 1500284677Sdim // compute shaders, since the data is sign extended when it is uploaded to the 1501284677Sdim // buffer. However SEXT loads from other address spaces are not supported, so 1502284677Sdim // we need to expand them here. 1503284677Sdim if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { 1504284677Sdim assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); 1505309124Sdim SDValue NewLoad = DAG.getExtLoad( 1506309124Sdim ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT, 1507309124Sdim LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags()); 1508284677Sdim SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad, 1509284677Sdim DAG.getValueType(MemVT)); 1510284677Sdim 1511284677Sdim SDValue MergedValues[2] = { Res, Chain }; 1512284677Sdim return DAG.getMergeValues(MergedValues, DL); 1513284677Sdim } 1514284677Sdim 1515344779Sdim if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 1516284677Sdim return SDValue(); 1517284677Sdim } 1518284677Sdim 1519314564Sdim // DWORDADDR ISD marks already shifted address 1520314564Sdim if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { 1521314564Sdim assert(VT == MVT::i32); 1522314564Sdim Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); 1523314564Sdim Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); 1524314564Sdim return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); 1525284677Sdim } 1526314564Sdim return SDValue(); 1527284677Sdim} 1528284677Sdim 1529284677SdimSDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 1530284677Sdim SDValue Chain = Op.getOperand(0); 1531284677Sdim SDValue Cond = Op.getOperand(1); 1532284677Sdim SDValue Jump = Op.getOperand(2); 1533284677Sdim 1534284677Sdim return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), 1535284677Sdim Chain, Jump, Cond); 1536284677Sdim} 1537284677Sdim 1538309124SdimSDValue R600TargetLowering::lowerFrameIndex(SDValue Op, 1539309124Sdim SelectionDAG &DAG) const { 1540309124Sdim MachineFunction &MF = DAG.getMachineFunction(); 1541341825Sdim const R600FrameLowering *TFL = Subtarget->getFrameLowering(); 1542309124Sdim 1543309124Sdim FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); 1544309124Sdim 1545309124Sdim unsigned FrameIndex = FIN->getIndex(); 1546309124Sdim unsigned IgnoredFrameReg; 1547309124Sdim unsigned Offset = 1548309124Sdim TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); 1549309124Sdim return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op), 1550309124Sdim Op.getValueType()); 1551309124Sdim} 1552309124Sdim 1553341825SdimCCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, 1554341825Sdim bool IsVarArg) const { 1555341825Sdim switch (CC) { 1556341825Sdim case CallingConv::AMDGPU_KERNEL: 1557341825Sdim case CallingConv::SPIR_KERNEL: 1558341825Sdim case CallingConv::C: 1559341825Sdim case CallingConv::Fast: 1560341825Sdim case CallingConv::Cold: 1561341825Sdim llvm_unreachable("kernels should not be handled here"); 1562341825Sdim case CallingConv::AMDGPU_VS: 1563341825Sdim case CallingConv::AMDGPU_GS: 1564341825Sdim case CallingConv::AMDGPU_PS: 1565341825Sdim case CallingConv::AMDGPU_CS: 1566341825Sdim case CallingConv::AMDGPU_HS: 1567341825Sdim case CallingConv::AMDGPU_ES: 1568341825Sdim case CallingConv::AMDGPU_LS: 1569341825Sdim return CC_R600; 1570341825Sdim default: 1571341825Sdim report_fatal_error("Unsupported calling convention."); 1572341825Sdim } 1573341825Sdim} 1574341825Sdim 1575284677Sdim/// XXX Only kernel functions are supported, so we can assume for now that 1576284677Sdim/// every function is a kernel function, but in the future we should use 1577284677Sdim/// separate calling conventions for kernel and non-kernel functions. 1578284677SdimSDValue R600TargetLowering::LowerFormalArguments( 1579309124Sdim SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1580309124Sdim const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1581309124Sdim SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1582284677Sdim SmallVector<CCValAssign, 16> ArgLocs; 1583284677Sdim CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1584284677Sdim *DAG.getContext()); 1585284677Sdim MachineFunction &MF = DAG.getMachineFunction(); 1586284677Sdim SmallVector<ISD::InputArg, 8> LocalIns; 1587284677Sdim 1588314564Sdim if (AMDGPU::isShader(CallConv)) { 1589321369Sdim CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); 1590314564Sdim } else { 1591314564Sdim analyzeFormalArgumentsCompute(CCInfo, Ins); 1592314564Sdim } 1593284677Sdim 1594284677Sdim for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 1595284677Sdim CCValAssign &VA = ArgLocs[i]; 1596284677Sdim const ISD::InputArg &In = Ins[i]; 1597284677Sdim EVT VT = In.VT; 1598284677Sdim EVT MemVT = VA.getLocVT(); 1599284677Sdim if (!VT.isVector() && MemVT.isVector()) { 1600284677Sdim // Get load source type if scalarized. 1601284677Sdim MemVT = MemVT.getVectorElementType(); 1602284677Sdim } 1603284677Sdim 1604309124Sdim if (AMDGPU::isShader(CallConv)) { 1605341825Sdim unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); 1606284677Sdim SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); 1607284677Sdim InVals.push_back(Register); 1608284677Sdim continue; 1609284677Sdim } 1610284677Sdim 1611284677Sdim // i64 isn't a legal type, so the register type used ends up as i32, which 1612284677Sdim // isn't expected here. It attempts to create this sextload, but it ends up 1613284677Sdim // being invalid. Somehow this seems to work with i64 arguments, but breaks 1614284677Sdim // for <1 x i64>. 1615284677Sdim 1616284677Sdim // The first 36 bytes of the input buffer contains information about 1617284677Sdim // thread group and global sizes. 1618284677Sdim ISD::LoadExtType Ext = ISD::NON_EXTLOAD; 1619284677Sdim if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) { 1620284677Sdim // FIXME: This should really check the extload type, but the handling of 1621284677Sdim // extload vector parameters seems to be broken. 1622284677Sdim 1623284677Sdim // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 1624284677Sdim Ext = ISD::SEXTLOAD; 1625284677Sdim } 1626284677Sdim 1627284677Sdim // Compute the offset from the value. 1628284677Sdim // XXX - I think PartOffset should give you this, but it seems to give the 1629284677Sdim // size of the register which isn't useful. 1630284677Sdim 1631284677Sdim unsigned PartOffset = VA.getLocMemOffset(); 1632341825Sdim unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset); 1633284677Sdim 1634360784Sdim MachinePointerInfo PtrInfo(AMDGPUAS::PARAM_I_ADDRESS); 1635309124Sdim SDValue Arg = DAG.getLoad( 1636309124Sdim ISD::UNINDEXED, Ext, VT, DL, Chain, 1637341825Sdim DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), 1638341825Sdim PtrInfo, 1639341825Sdim MemVT, Alignment, MachineMemOperand::MONonTemporal | 1640314564Sdim MachineMemOperand::MODereferenceable | 1641314564Sdim MachineMemOperand::MOInvariant); 1642284677Sdim 1643284677Sdim InVals.push_back(Arg); 1644284677Sdim } 1645284677Sdim return Chain; 1646284677Sdim} 1647284677Sdim 1648286684SdimEVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 1649286684Sdim EVT VT) const { 1650284677Sdim if (!VT.isVector()) 1651284677Sdim return MVT::i32; 1652284677Sdim return VT.changeVectorElementTypeToInteger(); 1653284677Sdim} 1654284677Sdim 1655321369Sdimbool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, 1656321369Sdim const SelectionDAG &DAG) const { 1657321369Sdim // Local and Private addresses do not handle vectors. Limit to i32 1658344779Sdim if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) { 1659321369Sdim return (MemVT.getSizeInBits() <= 32); 1660321369Sdim } 1661321369Sdim return true; 1662321369Sdim} 1663321369Sdim 1664353358Sdimbool R600TargetLowering::allowsMisalignedMemoryAccesses( 1665353358Sdim EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, 1666353358Sdim bool *IsFast) const { 1667309124Sdim if (IsFast) 1668309124Sdim *IsFast = false; 1669309124Sdim 1670309124Sdim if (!VT.isSimple() || VT == MVT::Other) 1671309124Sdim return false; 1672309124Sdim 1673309124Sdim if (VT.bitsLT(MVT::i32)) 1674309124Sdim return false; 1675309124Sdim 1676309124Sdim // TODO: This is a rough estimate. 1677309124Sdim if (IsFast) 1678309124Sdim *IsFast = true; 1679309124Sdim 1680309124Sdim return VT.bitsGT(MVT::i32) && Align % 4 == 0; 1681309124Sdim} 1682309124Sdim 1683284677Sdimstatic SDValue CompactSwizzlableVector( 1684284677Sdim SelectionDAG &DAG, SDValue VectorEntry, 1685284677Sdim DenseMap<unsigned, unsigned> &RemapSwizzle) { 1686284677Sdim assert(RemapSwizzle.empty()); 1687284677Sdim 1688344779Sdim SDLoc DL(VectorEntry); 1689344779Sdim EVT EltTy = VectorEntry.getValueType().getVectorElementType(); 1690344779Sdim 1691344779Sdim SDValue NewBldVec[4]; 1692344779Sdim for (unsigned i = 0; i < 4; i++) 1693344779Sdim NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, 1694344779Sdim DAG.getIntPtrConstant(i, DL)); 1695344779Sdim 1696284677Sdim for (unsigned i = 0; i < 4; i++) { 1697309124Sdim if (NewBldVec[i].isUndef()) 1698284677Sdim // We mask write here to teach later passes that the ith element of this 1699284677Sdim // vector is undef. Thus we can use it to reduce 128 bits reg usage, 1700284677Sdim // break false dependencies and additionnaly make assembly easier to read. 1701284677Sdim RemapSwizzle[i] = 7; // SEL_MASK_WRITE 1702284677Sdim if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { 1703284677Sdim if (C->isZero()) { 1704284677Sdim RemapSwizzle[i] = 4; // SEL_0 1705284677Sdim NewBldVec[i] = DAG.getUNDEF(MVT::f32); 1706284677Sdim } else if (C->isExactlyValue(1.0)) { 1707284677Sdim RemapSwizzle[i] = 5; // SEL_1 1708284677Sdim NewBldVec[i] = DAG.getUNDEF(MVT::f32); 1709284677Sdim } 1710284677Sdim } 1711284677Sdim 1712309124Sdim if (NewBldVec[i].isUndef()) 1713284677Sdim continue; 1714360784Sdim 1715284677Sdim for (unsigned j = 0; j < i; j++) { 1716284677Sdim if (NewBldVec[i] == NewBldVec[j]) { 1717284677Sdim NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); 1718284677Sdim RemapSwizzle[i] = j; 1719284677Sdim break; 1720284677Sdim } 1721284677Sdim } 1722284677Sdim } 1723284677Sdim 1724309124Sdim return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), 1725309124Sdim NewBldVec); 1726284677Sdim} 1727284677Sdim 1728284677Sdimstatic SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, 1729284677Sdim DenseMap<unsigned, unsigned> &RemapSwizzle) { 1730284677Sdim assert(RemapSwizzle.empty()); 1731344779Sdim 1732344779Sdim SDLoc DL(VectorEntry); 1733344779Sdim EVT EltTy = VectorEntry.getValueType().getVectorElementType(); 1734344779Sdim 1735344779Sdim SDValue NewBldVec[4]; 1736344779Sdim bool isUnmovable[4] = {false, false, false, false}; 1737344779Sdim for (unsigned i = 0; i < 4; i++) 1738344779Sdim NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, 1739344779Sdim DAG.getIntPtrConstant(i, DL)); 1740344779Sdim 1741284677Sdim for (unsigned i = 0; i < 4; i++) { 1742284677Sdim RemapSwizzle[i] = i; 1743284677Sdim if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1744284677Sdim unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) 1745284677Sdim ->getZExtValue(); 1746284677Sdim if (i == Idx) 1747284677Sdim isUnmovable[Idx] = true; 1748284677Sdim } 1749284677Sdim } 1750284677Sdim 1751284677Sdim for (unsigned i = 0; i < 4; i++) { 1752284677Sdim if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1753284677Sdim unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) 1754284677Sdim ->getZExtValue(); 1755284677Sdim if (isUnmovable[Idx]) 1756284677Sdim continue; 1757284677Sdim // Swap i and Idx 1758284677Sdim std::swap(NewBldVec[Idx], NewBldVec[i]); 1759284677Sdim std::swap(RemapSwizzle[i], RemapSwizzle[Idx]); 1760284677Sdim break; 1761284677Sdim } 1762284677Sdim } 1763284677Sdim 1764309124Sdim return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), 1765309124Sdim NewBldVec); 1766284677Sdim} 1767284677Sdim 1768309124SdimSDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4], 1769309124Sdim SelectionDAG &DAG, 1770309124Sdim const SDLoc &DL) const { 1771284677Sdim // Old -> New swizzle values 1772284677Sdim DenseMap<unsigned, unsigned> SwizzleRemap; 1773284677Sdim 1774284677Sdim BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); 1775284677Sdim for (unsigned i = 0; i < 4; i++) { 1776284677Sdim unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); 1777284677Sdim if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) 1778284677Sdim Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); 1779284677Sdim } 1780284677Sdim 1781284677Sdim SwizzleRemap.clear(); 1782284677Sdim BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); 1783284677Sdim for (unsigned i = 0; i < 4; i++) { 1784284677Sdim unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); 1785284677Sdim if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) 1786284677Sdim Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); 1787284677Sdim } 1788284677Sdim 1789284677Sdim return BuildVector; 1790284677Sdim} 1791284677Sdim 1792341825SdimSDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block, 1793341825Sdim SelectionDAG &DAG) const { 1794341825Sdim SDLoc DL(LoadNode); 1795341825Sdim EVT VT = LoadNode->getValueType(0); 1796341825Sdim SDValue Chain = LoadNode->getChain(); 1797341825Sdim SDValue Ptr = LoadNode->getBasePtr(); 1798341825Sdim assert (isa<ConstantSDNode>(Ptr)); 1799341825Sdim 1800341825Sdim //TODO: Support smaller loads 1801341825Sdim if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode)) 1802341825Sdim return SDValue(); 1803341825Sdim 1804341825Sdim if (LoadNode->getAlignment() < 4) 1805341825Sdim return SDValue(); 1806341825Sdim 1807341825Sdim int ConstantBlock = ConstantAddressBlock(Block); 1808341825Sdim 1809341825Sdim SDValue Slots[4]; 1810341825Sdim for (unsigned i = 0; i < 4; i++) { 1811341825Sdim // We want Const position encoded with the following formula : 1812341825Sdim // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 1813341825Sdim // const_index is Ptr computed by llvm using an alignment of 16. 1814341825Sdim // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 1815341825Sdim // then div by 4 at the ISel step 1816341825Sdim SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 1817341825Sdim DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32)); 1818341825Sdim Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 1819341825Sdim } 1820341825Sdim EVT NewVT = MVT::v4i32; 1821341825Sdim unsigned NumElements = 4; 1822341825Sdim if (VT.isVector()) { 1823341825Sdim NewVT = VT; 1824341825Sdim NumElements = VT.getVectorNumElements(); 1825341825Sdim } 1826341825Sdim SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements)); 1827341825Sdim if (!VT.isVector()) { 1828341825Sdim Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 1829341825Sdim DAG.getConstant(0, DL, MVT::i32)); 1830341825Sdim } 1831341825Sdim SDValue MergedValues[2] = { 1832341825Sdim Result, 1833341825Sdim Chain 1834341825Sdim }; 1835341825Sdim return DAG.getMergeValues(MergedValues, DL); 1836341825Sdim} 1837341825Sdim 1838284677Sdim//===----------------------------------------------------------------------===// 1839284677Sdim// Custom DAG Optimizations 1840284677Sdim//===----------------------------------------------------------------------===// 1841284677Sdim 1842284677SdimSDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 1843284677Sdim DAGCombinerInfo &DCI) const { 1844284677Sdim SelectionDAG &DAG = DCI.DAG; 1845314564Sdim SDLoc DL(N); 1846284677Sdim 1847284677Sdim switch (N->getOpcode()) { 1848284677Sdim // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 1849284677Sdim case ISD::FP_ROUND: { 1850284677Sdim SDValue Arg = N->getOperand(0); 1851284677Sdim if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 1852314564Sdim return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0), 1853284677Sdim Arg.getOperand(0)); 1854284677Sdim } 1855284677Sdim break; 1856284677Sdim } 1857284677Sdim 1858284677Sdim // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> 1859284677Sdim // (i32 select_cc f32, f32, -1, 0 cc) 1860284677Sdim // 1861284677Sdim // Mesa's GLSL frontend generates the above pattern a lot and we can lower 1862284677Sdim // this to one of the SET*_DX10 instructions. 1863284677Sdim case ISD::FP_TO_SINT: { 1864284677Sdim SDValue FNeg = N->getOperand(0); 1865284677Sdim if (FNeg.getOpcode() != ISD::FNEG) { 1866284677Sdim return SDValue(); 1867284677Sdim } 1868284677Sdim SDValue SelectCC = FNeg.getOperand(0); 1869284677Sdim if (SelectCC.getOpcode() != ISD::SELECT_CC || 1870284677Sdim SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS 1871284677Sdim SelectCC.getOperand(2).getValueType() != MVT::f32 || // True 1872284677Sdim !isHWTrueValue(SelectCC.getOperand(2)) || 1873284677Sdim !isHWFalseValue(SelectCC.getOperand(3))) { 1874284677Sdim return SDValue(); 1875284677Sdim } 1876284677Sdim 1877314564Sdim return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), 1878284677Sdim SelectCC.getOperand(0), // LHS 1879284677Sdim SelectCC.getOperand(1), // RHS 1880314564Sdim DAG.getConstant(-1, DL, MVT::i32), // True 1881314564Sdim DAG.getConstant(0, DL, MVT::i32), // False 1882284677Sdim SelectCC.getOperand(4)); // CC 1883284677Sdim } 1884284677Sdim 1885284677Sdim // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx 1886284677Sdim // => build_vector elt0, ... , NewEltIdx, ... , eltN 1887284677Sdim case ISD::INSERT_VECTOR_ELT: { 1888284677Sdim SDValue InVec = N->getOperand(0); 1889284677Sdim SDValue InVal = N->getOperand(1); 1890284677Sdim SDValue EltNo = N->getOperand(2); 1891284677Sdim 1892284677Sdim // If the inserted element is an UNDEF, just use the input vector. 1893309124Sdim if (InVal.isUndef()) 1894284677Sdim return InVec; 1895284677Sdim 1896284677Sdim EVT VT = InVec.getValueType(); 1897284677Sdim 1898284677Sdim // If we can't generate a legal BUILD_VECTOR, exit 1899284677Sdim if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) 1900284677Sdim return SDValue(); 1901284677Sdim 1902284677Sdim // Check that we know which element is being inserted 1903284677Sdim if (!isa<ConstantSDNode>(EltNo)) 1904284677Sdim return SDValue(); 1905284677Sdim unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 1906284677Sdim 1907284677Sdim // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 1908284677Sdim // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 1909284677Sdim // vector elements. 1910284677Sdim SmallVector<SDValue, 8> Ops; 1911284677Sdim if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 1912284677Sdim Ops.append(InVec.getNode()->op_begin(), 1913284677Sdim InVec.getNode()->op_end()); 1914309124Sdim } else if (InVec.isUndef()) { 1915284677Sdim unsigned NElts = VT.getVectorNumElements(); 1916284677Sdim Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 1917284677Sdim } else { 1918284677Sdim return SDValue(); 1919284677Sdim } 1920284677Sdim 1921284677Sdim // Insert the element 1922284677Sdim if (Elt < Ops.size()) { 1923284677Sdim // All the operands of BUILD_VECTOR must have the same type; 1924284677Sdim // we enforce that here. 1925284677Sdim EVT OpVT = Ops[0].getValueType(); 1926284677Sdim if (InVal.getValueType() != OpVT) 1927284677Sdim InVal = OpVT.bitsGT(InVal.getValueType()) ? 1928314564Sdim DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : 1929314564Sdim DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); 1930284677Sdim Ops[Elt] = InVal; 1931284677Sdim } 1932284677Sdim 1933284677Sdim // Return the new vector 1934314564Sdim return DAG.getBuildVector(VT, DL, Ops); 1935284677Sdim } 1936284677Sdim 1937284677Sdim // Extract_vec (Build_vector) generated by custom lowering 1938284677Sdim // also needs to be customly combined 1939284677Sdim case ISD::EXTRACT_VECTOR_ELT: { 1940284677Sdim SDValue Arg = N->getOperand(0); 1941284677Sdim if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 1942284677Sdim if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1943284677Sdim unsigned Element = Const->getZExtValue(); 1944284677Sdim return Arg->getOperand(Element); 1945284677Sdim } 1946284677Sdim } 1947284677Sdim if (Arg.getOpcode() == ISD::BITCAST && 1948314564Sdim Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 1949314564Sdim (Arg.getOperand(0).getValueType().getVectorNumElements() == 1950314564Sdim Arg.getValueType().getVectorNumElements())) { 1951284677Sdim if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1952284677Sdim unsigned Element = Const->getZExtValue(); 1953314564Sdim return DAG.getNode(ISD::BITCAST, DL, N->getVTList(), 1954314564Sdim Arg->getOperand(0).getOperand(Element)); 1955284677Sdim } 1956284677Sdim } 1957296417Sdim break; 1958284677Sdim } 1959284677Sdim 1960284677Sdim case ISD::SELECT_CC: { 1961284677Sdim // Try common optimizations 1962309124Sdim if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI)) 1963284677Sdim return Ret; 1964284677Sdim 1965284677Sdim // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> 1966284677Sdim // selectcc x, y, a, b, inv(cc) 1967284677Sdim // 1968284677Sdim // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> 1969284677Sdim // selectcc x, y, a, b, cc 1970284677Sdim SDValue LHS = N->getOperand(0); 1971284677Sdim if (LHS.getOpcode() != ISD::SELECT_CC) { 1972284677Sdim return SDValue(); 1973284677Sdim } 1974284677Sdim 1975284677Sdim SDValue RHS = N->getOperand(1); 1976284677Sdim SDValue True = N->getOperand(2); 1977284677Sdim SDValue False = N->getOperand(3); 1978284677Sdim ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 1979284677Sdim 1980284677Sdim if (LHS.getOperand(2).getNode() != True.getNode() || 1981284677Sdim LHS.getOperand(3).getNode() != False.getNode() || 1982284677Sdim RHS.getNode() != False.getNode()) { 1983284677Sdim return SDValue(); 1984284677Sdim } 1985284677Sdim 1986284677Sdim switch (NCC) { 1987284677Sdim default: return SDValue(); 1988284677Sdim case ISD::SETNE: return LHS; 1989284677Sdim case ISD::SETEQ: { 1990284677Sdim ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); 1991360784Sdim LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType()); 1992284677Sdim if (DCI.isBeforeLegalizeOps() || 1993284677Sdim isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType())) 1994314564Sdim return DAG.getSelectCC(DL, 1995284677Sdim LHS.getOperand(0), 1996284677Sdim LHS.getOperand(1), 1997284677Sdim LHS.getOperand(2), 1998284677Sdim LHS.getOperand(3), 1999284677Sdim LHSCC); 2000284677Sdim break; 2001284677Sdim } 2002284677Sdim } 2003284677Sdim return SDValue(); 2004284677Sdim } 2005284677Sdim 2006314564Sdim case AMDGPUISD::R600_EXPORT: { 2007284677Sdim SDValue Arg = N->getOperand(1); 2008284677Sdim if (Arg.getOpcode() != ISD::BUILD_VECTOR) 2009284677Sdim break; 2010284677Sdim 2011284677Sdim SDValue NewArgs[8] = { 2012284677Sdim N->getOperand(0), // Chain 2013284677Sdim SDValue(), 2014284677Sdim N->getOperand(2), // ArrayBase 2015284677Sdim N->getOperand(3), // Type 2016284677Sdim N->getOperand(4), // SWZ_X 2017284677Sdim N->getOperand(5), // SWZ_Y 2018284677Sdim N->getOperand(6), // SWZ_Z 2019284677Sdim N->getOperand(7) // SWZ_W 2020284677Sdim }; 2021284677Sdim NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); 2022314564Sdim return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); 2023284677Sdim } 2024284677Sdim case AMDGPUISD::TEXTURE_FETCH: { 2025284677Sdim SDValue Arg = N->getOperand(1); 2026284677Sdim if (Arg.getOpcode() != ISD::BUILD_VECTOR) 2027284677Sdim break; 2028284677Sdim 2029284677Sdim SDValue NewArgs[19] = { 2030284677Sdim N->getOperand(0), 2031284677Sdim N->getOperand(1), 2032284677Sdim N->getOperand(2), 2033284677Sdim N->getOperand(3), 2034284677Sdim N->getOperand(4), 2035284677Sdim N->getOperand(5), 2036284677Sdim N->getOperand(6), 2037284677Sdim N->getOperand(7), 2038284677Sdim N->getOperand(8), 2039284677Sdim N->getOperand(9), 2040284677Sdim N->getOperand(10), 2041284677Sdim N->getOperand(11), 2042284677Sdim N->getOperand(12), 2043284677Sdim N->getOperand(13), 2044284677Sdim N->getOperand(14), 2045284677Sdim N->getOperand(15), 2046284677Sdim N->getOperand(16), 2047284677Sdim N->getOperand(17), 2048284677Sdim N->getOperand(18), 2049284677Sdim }; 2050284677Sdim NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL); 2051284677Sdim return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs); 2052284677Sdim } 2053341825Sdim 2054341825Sdim case ISD::LOAD: { 2055341825Sdim LoadSDNode *LoadNode = cast<LoadSDNode>(N); 2056341825Sdim SDValue Ptr = LoadNode->getBasePtr(); 2057341825Sdim if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS && 2058341825Sdim isa<ConstantSDNode>(Ptr)) 2059341825Sdim return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG); 2060341825Sdim break; 2061341825Sdim } 2062341825Sdim 2063314564Sdim default: break; 2064284677Sdim } 2065284677Sdim 2066284677Sdim return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); 2067284677Sdim} 2068284677Sdim 2069309124Sdimbool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, 2070309124Sdim SDValue &Src, SDValue &Neg, SDValue &Abs, 2071309124Sdim SDValue &Sel, SDValue &Imm, 2072309124Sdim SelectionDAG &DAG) const { 2073341825Sdim const R600InstrInfo *TII = Subtarget->getInstrInfo(); 2074284677Sdim if (!Src.isMachineOpcode()) 2075284677Sdim return false; 2076309124Sdim 2077284677Sdim switch (Src.getMachineOpcode()) { 2078341825Sdim case R600::FNEG_R600: 2079284677Sdim if (!Neg.getNode()) 2080284677Sdim return false; 2081284677Sdim Src = Src.getOperand(0); 2082284677Sdim Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); 2083284677Sdim return true; 2084341825Sdim case R600::FABS_R600: 2085284677Sdim if (!Abs.getNode()) 2086284677Sdim return false; 2087284677Sdim Src = Src.getOperand(0); 2088284677Sdim Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); 2089284677Sdim return true; 2090341825Sdim case R600::CONST_COPY: { 2091284677Sdim unsigned Opcode = ParentNode->getMachineOpcode(); 2092341825Sdim bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 2093284677Sdim 2094284677Sdim if (!Sel.getNode()) 2095284677Sdim return false; 2096284677Sdim 2097284677Sdim SDValue CstOffset = Src.getOperand(0); 2098284677Sdim if (ParentNode->getValueType(0).isVector()) 2099284677Sdim return false; 2100284677Sdim 2101284677Sdim // Gather constants values 2102284677Sdim int SrcIndices[] = { 2103341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0), 2104341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1), 2105341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src2), 2106341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_X), 2107341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Y), 2108341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Z), 2109341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_W), 2110341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_X), 2111341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Y), 2112341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Z), 2113341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_W) 2114284677Sdim }; 2115284677Sdim std::vector<unsigned> Consts; 2116284677Sdim for (int OtherSrcIdx : SrcIndices) { 2117284677Sdim int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); 2118284677Sdim if (OtherSrcIdx < 0 || OtherSelIdx < 0) 2119284677Sdim continue; 2120284677Sdim if (HasDst) { 2121284677Sdim OtherSrcIdx--; 2122284677Sdim OtherSelIdx--; 2123284677Sdim } 2124284677Sdim if (RegisterSDNode *Reg = 2125284677Sdim dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { 2126341825Sdim if (Reg->getReg() == R600::ALU_CONST) { 2127284677Sdim ConstantSDNode *Cst 2128284677Sdim = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx)); 2129284677Sdim Consts.push_back(Cst->getZExtValue()); 2130284677Sdim } 2131284677Sdim } 2132284677Sdim } 2133284677Sdim 2134284677Sdim ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset); 2135284677Sdim Consts.push_back(Cst->getZExtValue()); 2136284677Sdim if (!TII->fitsConstReadLimitations(Consts)) { 2137284677Sdim return false; 2138284677Sdim } 2139284677Sdim 2140284677Sdim Sel = CstOffset; 2141341825Sdim Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); 2142284677Sdim return true; 2143284677Sdim } 2144341825Sdim case R600::MOV_IMM_GLOBAL_ADDR: 2145309124Sdim // Check if the Imm slot is used. Taken from below. 2146309124Sdim if (cast<ConstantSDNode>(Imm)->getZExtValue()) 2147309124Sdim return false; 2148309124Sdim Imm = Src.getOperand(0); 2149341825Sdim Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); 2150309124Sdim return true; 2151341825Sdim case R600::MOV_IMM_I32: 2152341825Sdim case R600::MOV_IMM_F32: { 2153341825Sdim unsigned ImmReg = R600::ALU_LITERAL_X; 2154284677Sdim uint64_t ImmValue = 0; 2155284677Sdim 2156341825Sdim if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { 2157284677Sdim ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0)); 2158284677Sdim float FloatValue = FPC->getValueAPF().convertToFloat(); 2159284677Sdim if (FloatValue == 0.0) { 2160341825Sdim ImmReg = R600::ZERO; 2161284677Sdim } else if (FloatValue == 0.5) { 2162341825Sdim ImmReg = R600::HALF; 2163284677Sdim } else if (FloatValue == 1.0) { 2164341825Sdim ImmReg = R600::ONE; 2165284677Sdim } else { 2166284677Sdim ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); 2167284677Sdim } 2168284677Sdim } else { 2169284677Sdim ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0)); 2170284677Sdim uint64_t Value = C->getZExtValue(); 2171284677Sdim if (Value == 0) { 2172341825Sdim ImmReg = R600::ZERO; 2173284677Sdim } else if (Value == 1) { 2174341825Sdim ImmReg = R600::ONE_INT; 2175284677Sdim } else { 2176284677Sdim ImmValue = Value; 2177284677Sdim } 2178284677Sdim } 2179284677Sdim 2180284677Sdim // Check that we aren't already using an immediate. 2181284677Sdim // XXX: It's possible for an instruction to have more than one 2182284677Sdim // immediate operand, but this is not supported yet. 2183341825Sdim if (ImmReg == R600::ALU_LITERAL_X) { 2184284677Sdim if (!Imm.getNode()) 2185284677Sdim return false; 2186284677Sdim ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm); 2187284677Sdim assert(C); 2188284677Sdim if (C->getZExtValue()) 2189284677Sdim return false; 2190284677Sdim Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32); 2191284677Sdim } 2192284677Sdim Src = DAG.getRegister(ImmReg, MVT::i32); 2193284677Sdim return true; 2194284677Sdim } 2195284677Sdim default: 2196284677Sdim return false; 2197284677Sdim } 2198284677Sdim} 2199284677Sdim 2200341825Sdim/// Fold the instructions after selecting them 2201284677SdimSDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, 2202284677Sdim SelectionDAG &DAG) const { 2203341825Sdim const R600InstrInfo *TII = Subtarget->getInstrInfo(); 2204284677Sdim if (!Node->isMachineOpcode()) 2205284677Sdim return Node; 2206309124Sdim 2207284677Sdim unsigned Opcode = Node->getMachineOpcode(); 2208284677Sdim SDValue FakeOp; 2209284677Sdim 2210284677Sdim std::vector<SDValue> Ops(Node->op_begin(), Node->op_end()); 2211284677Sdim 2212341825Sdim if (Opcode == R600::DOT_4) { 2213284677Sdim int OperandIdx[] = { 2214341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_X), 2215341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Y), 2216341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_Z), 2217341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_W), 2218341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_X), 2219341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Y), 2220341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_Z), 2221341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_W) 2222284677Sdim }; 2223284677Sdim int NegIdx[] = { 2224341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), 2225341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), 2226341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), 2227341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), 2228341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), 2229341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), 2230341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), 2231341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) 2232284677Sdim }; 2233284677Sdim int AbsIdx[] = { 2234341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), 2235341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), 2236341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), 2237341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), 2238341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), 2239341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), 2240341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), 2241341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) 2242284677Sdim }; 2243284677Sdim for (unsigned i = 0; i < 8; i++) { 2244284677Sdim if (OperandIdx[i] < 0) 2245284677Sdim return Node; 2246284677Sdim SDValue &Src = Ops[OperandIdx[i] - 1]; 2247284677Sdim SDValue &Neg = Ops[NegIdx[i] - 1]; 2248284677Sdim SDValue &Abs = Ops[AbsIdx[i] - 1]; 2249341825Sdim bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 2250284677Sdim int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); 2251284677Sdim if (HasDst) 2252284677Sdim SelIdx--; 2253284677Sdim SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; 2254284677Sdim if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) 2255284677Sdim return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 2256284677Sdim } 2257341825Sdim } else if (Opcode == R600::REG_SEQUENCE) { 2258284677Sdim for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { 2259284677Sdim SDValue &Src = Ops[i]; 2260284677Sdim if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) 2261284677Sdim return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 2262284677Sdim } 2263284677Sdim } else { 2264284677Sdim if (!TII->hasInstrModifiers(Opcode)) 2265284677Sdim return Node; 2266284677Sdim int OperandIdx[] = { 2267341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0), 2268341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1), 2269341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src2) 2270284677Sdim }; 2271284677Sdim int NegIdx[] = { 2272341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_neg), 2273341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_neg), 2274341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src2_neg) 2275284677Sdim }; 2276284677Sdim int AbsIdx[] = { 2277341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src0_abs), 2278341825Sdim TII->getOperandIdx(Opcode, R600::OpName::src1_abs), 2279284677Sdim -1 2280284677Sdim }; 2281284677Sdim for (unsigned i = 0; i < 3; i++) { 2282284677Sdim if (OperandIdx[i] < 0) 2283284677Sdim return Node; 2284284677Sdim SDValue &Src = Ops[OperandIdx[i] - 1]; 2285284677Sdim SDValue &Neg = Ops[NegIdx[i] - 1]; 2286284677Sdim SDValue FakeAbs; 2287284677Sdim SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; 2288341825Sdim bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; 2289284677Sdim int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); 2290341825Sdim int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); 2291284677Sdim if (HasDst) { 2292284677Sdim SelIdx--; 2293284677Sdim ImmIdx--; 2294284677Sdim } 2295284677Sdim SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; 2296284677Sdim SDValue &Imm = Ops[ImmIdx]; 2297284677Sdim if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) 2298284677Sdim return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); 2299284677Sdim } 2300284677Sdim } 2301284677Sdim 2302284677Sdim return Node; 2303284677Sdim} 2304