1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief This is the parent TargetLowering class for hardware code gen 12/// targets. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUISelLowering.h" 17#include "AMDGPU.h" 18#include "AMDGPUFrameLowering.h" 19#include "AMDGPURegisterInfo.h" 20#include "AMDGPUSubtarget.h" 21#include "AMDILIntrinsicInfo.h" 22#include "R600MachineFunctionInfo.h" 23#include "SIMachineFunctionInfo.h" 24#include "llvm/CodeGen/CallingConvLower.h" 25#include "llvm/CodeGen/MachineFunction.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29#include "llvm/IR/DataLayout.h" 30 31using namespace llvm; 32static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, 33 CCValAssign::LocInfo LocInfo, 34 ISD::ArgFlagsTy ArgFlags, CCState &State) { 35 unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); 36 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 37 38 return true; 39} 40 41#include "AMDGPUGenCallingConv.inc" 42 43AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 44 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 45 46 // Initialize target lowering borrowed from AMDIL 47 InitAMDILLowering(); 48 49 // We need to custom lower some of the intrinsics 50 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 51 52 // Library functions. These default to Expand, but we have instructions 53 // for them. 54 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 55 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 56 setOperationAction(ISD::FPOW, MVT::f32, Legal); 57 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 58 setOperationAction(ISD::FABS, MVT::f32, Legal); 59 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 60 setOperationAction(ISD::FRINT, MVT::f32, Legal); 61 setOperationAction(ISD::FROUND, MVT::f32, Legal); 62 63 // The hardware supports ROTR, but not ROTL 64 setOperationAction(ISD::ROTL, MVT::i32, Expand); 65 66 // Lower floating point store/load to integer store/load to reduce the number 67 // of patterns in tablegen. 68 setOperationAction(ISD::STORE, MVT::f32, Promote); 69 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 70 71 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 72 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 73 74 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 75 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 76 77 setOperationAction(ISD::STORE, MVT::v8f32, Promote); 78 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 79 80 setOperationAction(ISD::STORE, MVT::v16f32, Promote); 81 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 82 83 setOperationAction(ISD::STORE, MVT::f64, Promote); 84 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 85 86 // Custom lowering of vector stores is required for local address space 87 // stores. 88 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 89 // XXX: Native v2i32 local address space stores are possible, but not 90 // currently implemented. 91 setOperationAction(ISD::STORE, MVT::v2i32, Custom); 92 93 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 94 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 95 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 96 // XXX: This can be change to Custom, once ExpandVectorStores can 97 // handle 64-bit stores. 98 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 99 100 setOperationAction(ISD::LOAD, MVT::f32, Promote); 101 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 102 103 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 104 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 105 106 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 107 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 108 109 setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 110 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 111 112 setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 113 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 114 115 setOperationAction(ISD::LOAD, MVT::f64, Promote); 116 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 117 118 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 119 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 121 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 122 123 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 124 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 125 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 126 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 127 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 128 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 129 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 130 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 131 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 132 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 133 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 134 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 135 136 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 137 138 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 139 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 140 141 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 142 143 setOperationAction(ISD::MUL, MVT::i64, Expand); 144 145 setOperationAction(ISD::UDIV, MVT::i32, Expand); 146 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 147 setOperationAction(ISD::UREM, MVT::i32, Expand); 148 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 149 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 150 151 static const MVT::SimpleValueType IntTypes[] = { 152 MVT::v2i32, MVT::v4i32 153 }; 154 const size_t NumIntTypes = array_lengthof(IntTypes); 155 156 for (unsigned int x = 0; x < NumIntTypes; ++x) { 157 MVT::SimpleValueType VT = IntTypes[x]; 158 //Expand the following operations for the current type by default 159 setOperationAction(ISD::ADD, VT, Expand); 160 setOperationAction(ISD::AND, VT, Expand); 161 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 162 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 163 setOperationAction(ISD::MUL, VT, Expand); 164 setOperationAction(ISD::OR, VT, Expand); 165 setOperationAction(ISD::SHL, VT, Expand); 166 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 167 setOperationAction(ISD::SRL, VT, Expand); 168 setOperationAction(ISD::SRA, VT, Expand); 169 setOperationAction(ISD::SUB, VT, Expand); 170 setOperationAction(ISD::UDIV, VT, Expand); 171 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 172 setOperationAction(ISD::UREM, VT, Expand); 173 setOperationAction(ISD::VSELECT, VT, Expand); 174 setOperationAction(ISD::XOR, VT, Expand); 175 } 176 177 static const MVT::SimpleValueType FloatTypes[] = { 178 MVT::v2f32, MVT::v4f32 179 }; 180 const size_t NumFloatTypes = array_lengthof(FloatTypes); 181 182 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 183 MVT::SimpleValueType VT = FloatTypes[x]; 184 setOperationAction(ISD::FABS, VT, Expand); 185 setOperationAction(ISD::FADD, VT, Expand); 186 setOperationAction(ISD::FDIV, VT, Expand); 187 setOperationAction(ISD::FFLOOR, VT, Expand); 188 setOperationAction(ISD::FMUL, VT, Expand); 189 setOperationAction(ISD::FRINT, VT, Expand); 190 setOperationAction(ISD::FSQRT, VT, Expand); 191 setOperationAction(ISD::FSUB, VT, Expand); 192 } 193} 194 195//===----------------------------------------------------------------------===// 196// Target Information 197//===----------------------------------------------------------------------===// 198 199MVT AMDGPUTargetLowering::getVectorIdxTy() const { 200 return MVT::i32; 201} 202 203bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, 204 EVT CastTy) const { 205 if (LoadTy.getSizeInBits() != CastTy.getSizeInBits()) 206 return true; 207 208 unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits(); 209 unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits(); 210 211 return ((LScalarSize <= CastScalarSize) || 212 (CastScalarSize >= 32) || 213 (LScalarSize < 32)); 214} 215 216//===---------------------------------------------------------------------===// 217// Target Properties 218//===---------------------------------------------------------------------===// 219 220bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 221 assert(VT.isFloatingPoint()); 222 return VT == MVT::f32; 223} 224 225bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 226 assert(VT.isFloatingPoint()); 227 return VT == MVT::f32; 228} 229 230//===---------------------------------------------------------------------===// 231// TargetLowering Callbacks 232//===---------------------------------------------------------------------===// 233 234void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 235 const SmallVectorImpl<ISD::InputArg> &Ins) const { 236 237 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 238} 239 240SDValue AMDGPUTargetLowering::LowerReturn( 241 SDValue Chain, 242 CallingConv::ID CallConv, 243 bool isVarArg, 244 const SmallVectorImpl<ISD::OutputArg> &Outs, 245 const SmallVectorImpl<SDValue> &OutVals, 246 SDLoc DL, SelectionDAG &DAG) const { 247 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 248} 249 250//===---------------------------------------------------------------------===// 251// Target specific lowering 252//===---------------------------------------------------------------------===// 253 254SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 255 const { 256 switch (Op.getOpcode()) { 257 default: 258 Op.getNode()->dump(); 259 assert(0 && "Custom lowering code for this" 260 "instruction is not implemented yet!"); 261 break; 262 // AMDIL DAG lowering 263 case ISD::SDIV: return LowerSDIV(Op, DAG); 264 case ISD::SREM: return LowerSREM(Op, DAG); 265 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 266 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 267 // AMDGPU DAG lowering 268 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 269 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 270 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 271 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 272 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 273 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 274 } 275 return Op; 276} 277 278SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 279 SDValue Op, 280 SelectionDAG &DAG) const { 281 282 const DataLayout *TD = getTargetMachine().getDataLayout(); 283 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 284 285 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); 286 // XXX: What does the value of G->getOffset() mean? 287 assert(G->getOffset() == 0 && 288 "Do not know what to do with an non-zero offset"); 289 290 const GlobalValue *GV = G->getGlobal(); 291 292 unsigned Offset; 293 if (MFI->LocalMemoryObjects.count(GV) == 0) { 294 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 295 Offset = MFI->LDSSize; 296 MFI->LocalMemoryObjects[GV] = Offset; 297 // XXX: Account for alignment? 298 MFI->LDSSize += Size; 299 } else { 300 Offset = MFI->LocalMemoryObjects[GV]; 301 } 302 303 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); 304} 305 306void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 307 SmallVectorImpl<SDValue> &Args, 308 unsigned Start, 309 unsigned Count) const { 310 EVT VT = Op.getValueType(); 311 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 312 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 313 VT.getVectorElementType(), 314 Op, DAG.getConstant(i, MVT::i32))); 315 } 316} 317 318SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 319 SelectionDAG &DAG) const { 320 SmallVector<SDValue, 8> Args; 321 SDValue A = Op.getOperand(0); 322 SDValue B = Op.getOperand(1); 323 324 ExtractVectorElements(A, DAG, Args, 0, 325 A.getValueType().getVectorNumElements()); 326 ExtractVectorElements(B, DAG, Args, 0, 327 B.getValueType().getVectorNumElements()); 328 329 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 330 &Args[0], Args.size()); 331} 332 333SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 334 SelectionDAG &DAG) const { 335 336 SmallVector<SDValue, 8> Args; 337 EVT VT = Op.getValueType(); 338 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 339 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 340 VT.getVectorNumElements()); 341 342 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 343 &Args[0], Args.size()); 344} 345 346SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, 347 SelectionDAG &DAG) const { 348 349 MachineFunction &MF = DAG.getMachineFunction(); 350 const AMDGPUFrameLowering *TFL = 351 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 352 353 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 354 assert(FIN); 355 356 unsigned FrameIndex = FIN->getIndex(); 357 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 358 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), 359 Op.getValueType()); 360} 361 362SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 363 SelectionDAG &DAG) const { 364 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 365 SDLoc DL(Op); 366 EVT VT = Op.getValueType(); 367 368 switch (IntrinsicID) { 369 default: return Op; 370 case AMDGPUIntrinsic::AMDIL_abs: 371 return LowerIntrinsicIABS(Op, DAG); 372 case AMDGPUIntrinsic::AMDIL_exp: 373 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 374 case AMDGPUIntrinsic::AMDGPU_lrp: 375 return LowerIntrinsicLRP(Op, DAG); 376 case AMDGPUIntrinsic::AMDIL_fraction: 377 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 378 case AMDGPUIntrinsic::AMDIL_max: 379 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 380 Op.getOperand(2)); 381 case AMDGPUIntrinsic::AMDGPU_imax: 382 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 383 Op.getOperand(2)); 384 case AMDGPUIntrinsic::AMDGPU_umax: 385 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 386 Op.getOperand(2)); 387 case AMDGPUIntrinsic::AMDIL_min: 388 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 389 Op.getOperand(2)); 390 case AMDGPUIntrinsic::AMDGPU_imin: 391 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 392 Op.getOperand(2)); 393 case AMDGPUIntrinsic::AMDGPU_umin: 394 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 395 Op.getOperand(2)); 396 case AMDGPUIntrinsic::AMDIL_round_nearest: 397 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 398 } 399} 400 401///IABS(a) = SMAX(sub(0, a), a) 402SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 403 SelectionDAG &DAG) const { 404 405 SDLoc DL(Op); 406 EVT VT = Op.getValueType(); 407 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 408 Op.getOperand(1)); 409 410 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 411} 412 413/// Linear Interpolation 414/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 415SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 416 SelectionDAG &DAG) const { 417 SDLoc DL(Op); 418 EVT VT = Op.getValueType(); 419 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 420 DAG.getConstantFP(1.0f, MVT::f32), 421 Op.getOperand(1)); 422 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 423 Op.getOperand(3)); 424 return DAG.getNode(ISD::FADD, DL, VT, 425 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 426 OneSubAC); 427} 428 429/// \brief Generate Min/Max node 430SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 431 SelectionDAG &DAG) const { 432 SDLoc DL(Op); 433 EVT VT = Op.getValueType(); 434 435 SDValue LHS = Op.getOperand(0); 436 SDValue RHS = Op.getOperand(1); 437 SDValue True = Op.getOperand(2); 438 SDValue False = Op.getOperand(3); 439 SDValue CC = Op.getOperand(4); 440 441 if (VT != MVT::f32 || 442 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 443 return SDValue(); 444 } 445 446 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 447 switch (CCOpcode) { 448 case ISD::SETOEQ: 449 case ISD::SETONE: 450 case ISD::SETUNE: 451 case ISD::SETNE: 452 case ISD::SETUEQ: 453 case ISD::SETEQ: 454 case ISD::SETFALSE: 455 case ISD::SETFALSE2: 456 case ISD::SETTRUE: 457 case ISD::SETTRUE2: 458 case ISD::SETUO: 459 case ISD::SETO: 460 assert(0 && "Operation should already be optimised !"); 461 case ISD::SETULE: 462 case ISD::SETULT: 463 case ISD::SETOLE: 464 case ISD::SETOLT: 465 case ISD::SETLE: 466 case ISD::SETLT: { 467 if (LHS == True) 468 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 469 else 470 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 471 } 472 case ISD::SETGT: 473 case ISD::SETGE: 474 case ISD::SETUGE: 475 case ISD::SETOGE: 476 case ISD::SETUGT: 477 case ISD::SETOGT: { 478 if (LHS == True) 479 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 480 else 481 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 482 } 483 case ISD::SETCC_INVALID: 484 assert(0 && "Invalid setcc condcode !"); 485 } 486 return Op; 487} 488 489SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, 490 SelectionDAG &DAG) const { 491 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); 492 EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); 493 EVT EltVT = Op.getValueType().getVectorElementType(); 494 EVT PtrVT = Load->getBasePtr().getValueType(); 495 unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); 496 SmallVector<SDValue, 8> Loads; 497 SDLoc SL(Op); 498 499 for (unsigned i = 0, e = NumElts; i != e; ++i) { 500 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), 501 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); 502 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, 503 Load->getChain(), Ptr, 504 MachinePointerInfo(Load->getMemOperand()->getValue()), 505 MemEltVT, Load->isVolatile(), Load->isNonTemporal(), 506 Load->getAlignment())); 507 } 508 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], 509 Loads.size()); 510} 511 512SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, 513 SelectionDAG &DAG) const { 514 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 515 EVT MemVT = Store->getMemoryVT(); 516 unsigned MemBits = MemVT.getSizeInBits(); 517 518 // Byte stores are really expensive, so if possible, try to pack 519 // 32-bit vector truncatating store into an i32 store. 520 // XXX: We could also handle optimize other vector bitwidths 521 if (!MemVT.isVector() || MemBits > 32) { 522 return SDValue(); 523 } 524 525 SDLoc DL(Op); 526 const SDValue &Value = Store->getValue(); 527 EVT VT = Value.getValueType(); 528 const SDValue &Ptr = Store->getBasePtr(); 529 EVT MemEltVT = MemVT.getVectorElementType(); 530 unsigned MemEltBits = MemEltVT.getSizeInBits(); 531 unsigned MemNumElements = MemVT.getVectorNumElements(); 532 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 533 SDValue Mask; 534 switch(MemEltBits) { 535 case 8: 536 Mask = DAG.getConstant(0xFF, PackedVT); 537 break; 538 case 16: 539 Mask = DAG.getConstant(0xFFFF, PackedVT); 540 break; 541 default: 542 llvm_unreachable("Cannot lower this vector store"); 543 } 544 SDValue PackedValue; 545 for (unsigned i = 0; i < MemNumElements; ++i) { 546 EVT ElemVT = VT.getVectorElementType(); 547 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 548 DAG.getConstant(i, MVT::i32)); 549 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 550 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 551 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 552 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 553 if (i == 0) { 554 PackedValue = Elt; 555 } else { 556 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 557 } 558 } 559 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 560 MachinePointerInfo(Store->getMemOperand()->getValue()), 561 Store->isVolatile(), Store->isNonTemporal(), 562 Store->getAlignment()); 563} 564 565SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 566 SelectionDAG &DAG) const { 567 StoreSDNode *Store = cast<StoreSDNode>(Op); 568 EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); 569 EVT EltVT = Store->getValue().getValueType().getVectorElementType(); 570 EVT PtrVT = Store->getBasePtr().getValueType(); 571 unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); 572 SDLoc SL(Op); 573 574 SmallVector<SDValue, 8> Chains; 575 576 for (unsigned i = 0, e = NumElts; i != e; ++i) { 577 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, 578 Store->getValue(), DAG.getConstant(i, MVT::i32)); 579 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, 580 Store->getBasePtr(), 581 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), 582 PtrVT)); 583 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, 584 MachinePointerInfo(Store->getMemOperand()->getValue()), 585 MemEltVT, Store->isVolatile(), Store->isNonTemporal(), 586 Store->getAlignment())); 587 } 588 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); 589} 590 591SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 592 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); 593 if (Result.getNode()) { 594 return Result; 595 } 596 597 StoreSDNode *Store = cast<StoreSDNode>(Op); 598 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 599 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 600 Store->getValue().getValueType().isVector()) { 601 return SplitVectorStore(Op, DAG); 602 } 603 return SDValue(); 604} 605 606SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 607 SelectionDAG &DAG) const { 608 SDLoc DL(Op); 609 EVT VT = Op.getValueType(); 610 611 SDValue Num = Op.getOperand(0); 612 SDValue Den = Op.getOperand(1); 613 614 SmallVector<SDValue, 8> Results; 615 616 // RCP = URECIP(Den) = 2^32 / Den + e 617 // e is rounding error. 618 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 619 620 // RCP_LO = umulo(RCP, Den) */ 621 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 622 623 // RCP_HI = mulhu (RCP, Den) */ 624 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 625 626 // NEG_RCP_LO = -RCP_LO 627 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 628 RCP_LO); 629 630 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 631 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 632 NEG_RCP_LO, RCP_LO, 633 ISD::SETEQ); 634 // Calculate the rounding error from the URECIP instruction 635 // E = mulhu(ABS_RCP_LO, RCP) 636 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 637 638 // RCP_A_E = RCP + E 639 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 640 641 // RCP_S_E = RCP - E 642 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 643 644 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 645 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 646 RCP_A_E, RCP_S_E, 647 ISD::SETEQ); 648 // Quotient = mulhu(Tmp0, Num) 649 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 650 651 // Num_S_Remainder = Quotient * Den 652 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 653 654 // Remainder = Num - Num_S_Remainder 655 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 656 657 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 658 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 659 DAG.getConstant(-1, VT), 660 DAG.getConstant(0, VT), 661 ISD::SETUGE); 662 // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) 663 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, 664 Num_S_Remainder, 665 DAG.getConstant(-1, VT), 666 DAG.getConstant(0, VT), 667 ISD::SETUGE); 668 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 669 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 670 Remainder_GE_Zero); 671 672 // Calculate Division result: 673 674 // Quotient_A_One = Quotient + 1 675 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 676 DAG.getConstant(1, VT)); 677 678 // Quotient_S_One = Quotient - 1 679 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 680 DAG.getConstant(1, VT)); 681 682 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 683 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 684 Quotient, Quotient_A_One, ISD::SETEQ); 685 686 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 687 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 688 Quotient_S_One, Div, ISD::SETEQ); 689 690 // Calculate Rem result: 691 692 // Remainder_S_Den = Remainder - Den 693 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 694 695 // Remainder_A_Den = Remainder + Den 696 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 697 698 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 699 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 700 Remainder, Remainder_S_Den, ISD::SETEQ); 701 702 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 703 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 704 Remainder_A_Den, Rem, ISD::SETEQ); 705 SDValue Ops[2]; 706 Ops[0] = Div; 707 Ops[1] = Rem; 708 return DAG.getMergeValues(Ops, 2, DL); 709} 710 711SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 712 SelectionDAG &DAG) const { 713 SDValue S0 = Op.getOperand(0); 714 SDLoc DL(Op); 715 if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) 716 return SDValue(); 717 718 // f32 uint_to_fp i64 719 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 720 DAG.getConstant(0, MVT::i32)); 721 SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); 722 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 723 DAG.getConstant(1, MVT::i32)); 724 SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); 725 FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, 726 DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 727 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); 728 729} 730 731//===----------------------------------------------------------------------===// 732// Helper functions 733//===----------------------------------------------------------------------===// 734 735void AMDGPUTargetLowering::getOriginalFunctionArgs( 736 SelectionDAG &DAG, 737 const Function *F, 738 const SmallVectorImpl<ISD::InputArg> &Ins, 739 SmallVectorImpl<ISD::InputArg> &OrigIns) const { 740 741 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 742 if (Ins[i].ArgVT == Ins[i].VT) { 743 OrigIns.push_back(Ins[i]); 744 continue; 745 } 746 747 EVT VT; 748 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) { 749 // Vector has been split into scalars. 750 VT = Ins[i].ArgVT.getVectorElementType(); 751 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() && 752 Ins[i].ArgVT.getVectorElementType() != 753 Ins[i].VT.getVectorElementType()) { 754 // Vector elements have been promoted 755 VT = Ins[i].ArgVT; 756 } else { 757 // Vector has been spilt into smaller vectors. 758 VT = Ins[i].VT; 759 } 760 761 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used, 762 Ins[i].OrigArgIndex, Ins[i].PartOffset); 763 OrigIns.push_back(Arg); 764 } 765} 766 767bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 768 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 769 return CFP->isExactlyValue(1.0); 770 } 771 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 772 return C->isAllOnesValue(); 773 } 774 return false; 775} 776 777bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 778 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 779 return CFP->getValueAPF().isZero(); 780 } 781 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 782 return C->isNullValue(); 783 } 784 return false; 785} 786 787SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 788 const TargetRegisterClass *RC, 789 unsigned Reg, EVT VT) const { 790 MachineFunction &MF = DAG.getMachineFunction(); 791 MachineRegisterInfo &MRI = MF.getRegInfo(); 792 unsigned VirtualRegister; 793 if (!MRI.isLiveIn(Reg)) { 794 VirtualRegister = MRI.createVirtualRegister(RC); 795 MRI.addLiveIn(Reg, VirtualRegister); 796 } else { 797 VirtualRegister = MRI.getLiveInVirtReg(Reg); 798 } 799 return DAG.getRegister(VirtualRegister, VT); 800} 801 802#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 803 804const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 805 switch (Opcode) { 806 default: return 0; 807 // AMDIL DAG nodes 808 NODE_NAME_CASE(CALL); 809 NODE_NAME_CASE(UMUL); 810 NODE_NAME_CASE(DIV_INF); 811 NODE_NAME_CASE(RET_FLAG); 812 NODE_NAME_CASE(BRANCH_COND); 813 814 // AMDGPU DAG nodes 815 NODE_NAME_CASE(DWORDADDR) 816 NODE_NAME_CASE(FRACT) 817 NODE_NAME_CASE(FMAX) 818 NODE_NAME_CASE(SMAX) 819 NODE_NAME_CASE(UMAX) 820 NODE_NAME_CASE(FMIN) 821 NODE_NAME_CASE(SMIN) 822 NODE_NAME_CASE(UMIN) 823 NODE_NAME_CASE(URECIP) 824 NODE_NAME_CASE(EXPORT) 825 NODE_NAME_CASE(CONST_ADDRESS) 826 NODE_NAME_CASE(REGISTER_LOAD) 827 NODE_NAME_CASE(REGISTER_STORE) 828 NODE_NAME_CASE(LOAD_CONSTANT) 829 NODE_NAME_CASE(LOAD_INPUT) 830 NODE_NAME_CASE(SAMPLE) 831 NODE_NAME_CASE(SAMPLEB) 832 NODE_NAME_CASE(SAMPLED) 833 NODE_NAME_CASE(SAMPLEL) 834 NODE_NAME_CASE(STORE_MSKOR) 835 NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 836 } 837} 838