1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12#include "AMDGPU.h" 13#include "AMDKernelCodeT.h" 14#include "SIDefines.h" 15#include "llvm/IR/CallingConv.h" 16#include "llvm/MC/MCInstrDesc.h" 17#include "llvm/Support/AMDHSAKernelDescriptor.h" 18#include "llvm/Support/Alignment.h" 19#include "llvm/Support/Compiler.h" 20#include "llvm/Support/ErrorHandling.h" 21#include "llvm/Support/TargetParser.h" 22#include <cstdint> 23#include <string> 24#include <utility> 25 26namespace llvm { 27 28class Argument; 29class Function; 30class GCNSubtarget; 31class GlobalValue; 32class MCRegisterClass; 33class MCRegisterInfo; 34class MCSubtargetInfo; 35class StringRef; 36class Triple; 37 38namespace AMDGPU { 39 40struct GcnBufferFormatInfo { 41 unsigned Format; 42 unsigned BitsPerComp; 43 unsigned NumComponents; 44 unsigned NumFormat; 45 unsigned DataFormat; 46}; 47 48#define GET_MIMGBaseOpcode_DECL 49#define GET_MIMGDim_DECL 50#define GET_MIMGEncoding_DECL 51#define GET_MIMGLZMapping_DECL 52#define GET_MIMGMIPMapping_DECL 53#include "AMDGPUGenSearchableTables.inc" 54 55namespace IsaInfo { 56 57enum { 58 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 59 // doesn't spill SGPRs as much as when 80 is set. 60 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 61 TRAP_NUM_SGPRS = 16 62}; 63 64/// Streams isa version string for given subtarget \p STI into \p Stream. 65void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 66 67/// \returns True if given subtarget \p STI supports code object version 3, 68/// false otherwise. 69bool hasCodeObjectV3(const MCSubtargetInfo *STI); 70 71/// \returns Wavefront size for given subtarget \p STI. 72unsigned getWavefrontSize(const MCSubtargetInfo *STI); 73 74/// \returns Local memory size in bytes for given subtarget \p STI. 75unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 76 77/// \returns Number of execution units per compute unit for given subtarget \p 78/// STI. 79unsigned getEUsPerCU(const MCSubtargetInfo *STI); 80 81/// \returns Maximum number of work groups per compute unit for given subtarget 82/// \p STI and limited by given \p FlatWorkGroupSize. 83unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 84 unsigned FlatWorkGroupSize); 85 86/// \returns Minimum number of waves per execution unit for given subtarget \p 87/// STI. 88unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 89 90/// \returns Maximum number of waves per execution unit for given subtarget \p 91/// STI without any kind of limitation. 92unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 93 94/// \returns Number of waves per execution unit required to support the given \p 95/// FlatWorkGroupSize. 96unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 97 unsigned FlatWorkGroupSize); 98 99/// \returns Minimum flat work group size for given subtarget \p STI. 100unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 101 102/// \returns Maximum flat work group size for given subtarget \p STI. 103unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 104 105/// \returns Number of waves per work group for given subtarget \p STI and 106/// \p FlatWorkGroupSize. 107unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 108 unsigned FlatWorkGroupSize); 109 110/// \returns SGPR allocation granularity for given subtarget \p STI. 111unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 112 113/// \returns SGPR encoding granularity for given subtarget \p STI. 114unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 115 116/// \returns Total number of SGPRs for given subtarget \p STI. 117unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 118 119/// \returns Addressable number of SGPRs for given subtarget \p STI. 120unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 121 122/// \returns Minimum number of SGPRs that meets the given number of waves per 123/// execution unit requirement for given subtarget \p STI. 124unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 125 126/// \returns Maximum number of SGPRs that meets the given number of waves per 127/// execution unit requirement for given subtarget \p STI. 128unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 129 bool Addressable); 130 131/// \returns Number of extra SGPRs implicitly required by given subtarget \p 132/// STI when the given special registers are used. 133unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 134 bool FlatScrUsed, bool XNACKUsed); 135 136/// \returns Number of extra SGPRs implicitly required by given subtarget \p 137/// STI when the given special registers are used. XNACK is inferred from 138/// \p STI. 139unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 140 bool FlatScrUsed); 141 142/// \returns Number of SGPR blocks needed for given subtarget \p STI when 143/// \p NumSGPRs are used. \p NumSGPRs should already include any special 144/// register counts. 145unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 146 147/// \returns VGPR allocation granularity for given subtarget \p STI. 148/// 149/// For subtargets which support it, \p EnableWavefrontSize32 should match 150/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 151unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 152 Optional<bool> EnableWavefrontSize32 = None); 153 154/// \returns VGPR encoding granularity for given subtarget \p STI. 155/// 156/// For subtargets which support it, \p EnableWavefrontSize32 should match 157/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 158unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 159 Optional<bool> EnableWavefrontSize32 = None); 160 161/// \returns Total number of VGPRs for given subtarget \p STI. 162unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 163 164/// \returns Addressable number of VGPRs for given subtarget \p STI. 165unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 166 167/// \returns Minimum number of VGPRs that meets given number of waves per 168/// execution unit requirement for given subtarget \p STI. 169unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 170 171/// \returns Maximum number of VGPRs that meets given number of waves per 172/// execution unit requirement for given subtarget \p STI. 173unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 174 175/// \returns Number of VGPR blocks needed for given subtarget \p STI when 176/// \p NumVGPRs are used. 177/// 178/// For subtargets which support it, \p EnableWavefrontSize32 should match the 179/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 180unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 181 Optional<bool> EnableWavefrontSize32 = None); 182 183} // end namespace IsaInfo 184 185LLVM_READONLY 186int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 187 188LLVM_READONLY 189int getSOPPWithRelaxation(uint16_t Opcode); 190 191struct MIMGBaseOpcodeInfo { 192 MIMGBaseOpcode BaseOpcode; 193 bool Store; 194 bool Atomic; 195 bool AtomicX2; 196 bool Sampler; 197 bool Gather4; 198 199 uint8_t NumExtraArgs; 200 bool Gradients; 201 bool G16; 202 bool Coordinates; 203 bool LodOrClampOrMip; 204 bool HasD16; 205}; 206 207LLVM_READONLY 208const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 209 210struct MIMGDimInfo { 211 MIMGDim Dim; 212 uint8_t NumCoords; 213 uint8_t NumGradients; 214 bool DA; 215 uint8_t Encoding; 216 const char *AsmSuffix; 217}; 218 219LLVM_READONLY 220const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 221 222LLVM_READONLY 223const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 224 225LLVM_READONLY 226const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 227 228struct MIMGLZMappingInfo { 229 MIMGBaseOpcode L; 230 MIMGBaseOpcode LZ; 231}; 232 233struct MIMGMIPMappingInfo { 234 MIMGBaseOpcode MIP; 235 MIMGBaseOpcode NONMIP; 236}; 237 238struct MIMGG16MappingInfo { 239 MIMGBaseOpcode G; 240 MIMGBaseOpcode G16; 241}; 242 243LLVM_READONLY 244const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 245 246LLVM_READONLY 247const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 248 249LLVM_READONLY 250const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 251 252LLVM_READONLY 253int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 254 unsigned VDataDwords, unsigned VAddrDwords); 255 256LLVM_READONLY 257int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 258 259struct MIMGInfo { 260 uint16_t Opcode; 261 uint16_t BaseOpcode; 262 uint8_t MIMGEncoding; 263 uint8_t VDataDwords; 264 uint8_t VAddrDwords; 265}; 266 267LLVM_READONLY 268const MIMGInfo *getMIMGInfo(unsigned Opc); 269 270LLVM_READONLY 271int getMTBUFBaseOpcode(unsigned Opc); 272 273LLVM_READONLY 274int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 275 276LLVM_READONLY 277int getMTBUFElements(unsigned Opc); 278 279LLVM_READONLY 280bool getMTBUFHasVAddr(unsigned Opc); 281 282LLVM_READONLY 283bool getMTBUFHasSrsrc(unsigned Opc); 284 285LLVM_READONLY 286bool getMTBUFHasSoffset(unsigned Opc); 287 288LLVM_READONLY 289int getMUBUFBaseOpcode(unsigned Opc); 290 291LLVM_READONLY 292int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 293 294LLVM_READONLY 295int getMUBUFElements(unsigned Opc); 296 297LLVM_READONLY 298bool getMUBUFHasVAddr(unsigned Opc); 299 300LLVM_READONLY 301bool getMUBUFHasSrsrc(unsigned Opc); 302 303LLVM_READONLY 304bool getMUBUFHasSoffset(unsigned Opc); 305 306LLVM_READONLY 307bool getSMEMIsBuffer(unsigned Opc); 308 309LLVM_READONLY 310const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 311 uint8_t NumComponents, 312 uint8_t NumFormat, 313 const MCSubtargetInfo &STI); 314LLVM_READONLY 315const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 316 const MCSubtargetInfo &STI); 317 318LLVM_READONLY 319int getMCOpcode(uint16_t Opcode, unsigned Gen); 320 321void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 322 const MCSubtargetInfo *STI); 323 324amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 325 const MCSubtargetInfo *STI); 326 327bool isGroupSegment(const GlobalValue *GV); 328bool isGlobalSegment(const GlobalValue *GV); 329bool isReadOnlySegment(const GlobalValue *GV); 330 331/// \returns True if constants should be emitted to .text section for given 332/// target triple \p TT, false otherwise. 333bool shouldEmitConstantsToTextSection(const Triple &TT); 334 335/// \returns Integer value requested using \p F's \p Name attribute. 336/// 337/// \returns \p Default if attribute is not present. 338/// 339/// \returns \p Default and emits error if requested value cannot be converted 340/// to integer. 341int getIntegerAttribute(const Function &F, StringRef Name, int Default); 342 343/// \returns A pair of integer values requested using \p F's \p Name attribute 344/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 345/// is false). 346/// 347/// \returns \p Default if attribute is not present. 348/// 349/// \returns \p Default and emits error if one of the requested values cannot be 350/// converted to integer, or \p OnlyFirstRequired is false and "second" value is 351/// not present. 352std::pair<int, int> getIntegerPairAttribute(const Function &F, 353 StringRef Name, 354 std::pair<int, int> Default, 355 bool OnlyFirstRequired = false); 356 357/// Represents the counter values to wait for in an s_waitcnt instruction. 358/// 359/// Large values (including the maximum possible integer) can be used to 360/// represent "don't care" waits. 361struct Waitcnt { 362 unsigned VmCnt = ~0u; 363 unsigned ExpCnt = ~0u; 364 unsigned LgkmCnt = ~0u; 365 unsigned VsCnt = ~0u; 366 367 Waitcnt() {} 368 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 369 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 370 371 static Waitcnt allZero(const IsaVersion &Version) { 372 return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); 373 } 374 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 375 376 bool hasWait() const { 377 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 378 } 379 380 bool dominates(const Waitcnt &Other) const { 381 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 382 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 383 } 384 385 Waitcnt combined(const Waitcnt &Other) const { 386 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 387 std::min(LgkmCnt, Other.LgkmCnt), 388 std::min(VsCnt, Other.VsCnt)); 389 } 390}; 391 392/// \returns Vmcnt bit mask for given isa \p Version. 393unsigned getVmcntBitMask(const IsaVersion &Version); 394 395/// \returns Expcnt bit mask for given isa \p Version. 396unsigned getExpcntBitMask(const IsaVersion &Version); 397 398/// \returns Lgkmcnt bit mask for given isa \p Version. 399unsigned getLgkmcntBitMask(const IsaVersion &Version); 400 401/// \returns Waitcnt bit mask for given isa \p Version. 402unsigned getWaitcntBitMask(const IsaVersion &Version); 403 404/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 405unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 406 407/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 408unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 409 410/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 411unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 412 413/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 414/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 415/// \p Lgkmcnt respectively. 416/// 417/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 418/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 419/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 420/// \p Expcnt = \p Waitcnt[6:4] 421/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 422/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 423void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 424 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 425 426Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 427 428/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 429unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 430 unsigned Vmcnt); 431 432/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 433unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 434 unsigned Expcnt); 435 436/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 437unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 438 unsigned Lgkmcnt); 439 440/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 441/// \p Version. 442/// 443/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 444/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 445/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 446/// Waitcnt[6:4] = \p Expcnt 447/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 448/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 449/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 450/// 451/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 452/// isa \p Version. 453unsigned encodeWaitcnt(const IsaVersion &Version, 454 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 455 456unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 457 458namespace Hwreg { 459 460LLVM_READONLY 461int64_t getHwregId(const StringRef Name); 462 463LLVM_READNONE 464bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 465 466LLVM_READNONE 467bool isValidHwreg(int64_t Id); 468 469LLVM_READNONE 470bool isValidHwregOffset(int64_t Offset); 471 472LLVM_READNONE 473bool isValidHwregWidth(int64_t Width); 474 475LLVM_READNONE 476uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 477 478LLVM_READNONE 479StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 480 481void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 482 483} // namespace Hwreg 484 485namespace SendMsg { 486 487LLVM_READONLY 488int64_t getMsgId(const StringRef Name); 489 490LLVM_READONLY 491int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 492 493LLVM_READNONE 494StringRef getMsgName(int64_t MsgId); 495 496LLVM_READNONE 497StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 498 499LLVM_READNONE 500bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 501 502LLVM_READNONE 503bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); 504 505LLVM_READNONE 506bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); 507 508LLVM_READNONE 509bool msgRequiresOp(int64_t MsgId); 510 511LLVM_READNONE 512bool msgSupportsStream(int64_t MsgId, int64_t OpId); 513 514void decodeMsg(unsigned Val, 515 uint16_t &MsgId, 516 uint16_t &OpId, 517 uint16_t &StreamId); 518 519LLVM_READNONE 520uint64_t encodeMsg(uint64_t MsgId, 521 uint64_t OpId, 522 uint64_t StreamId); 523 524} // namespace SendMsg 525 526 527unsigned getInitialPSInputAddr(const Function &F); 528 529LLVM_READNONE 530bool isShader(CallingConv::ID CC); 531 532LLVM_READNONE 533bool isCompute(CallingConv::ID CC); 534 535LLVM_READNONE 536bool isEntryFunctionCC(CallingConv::ID CC); 537 538// FIXME: Remove this when calling conventions cleaned up 539LLVM_READNONE 540inline bool isKernel(CallingConv::ID CC) { 541 switch (CC) { 542 case CallingConv::AMDGPU_KERNEL: 543 case CallingConv::SPIR_KERNEL: 544 return true; 545 default: 546 return false; 547 } 548} 549 550bool hasXNACK(const MCSubtargetInfo &STI); 551bool hasSRAMECC(const MCSubtargetInfo &STI); 552bool hasMIMG_R128(const MCSubtargetInfo &STI); 553bool hasGFX10A16(const MCSubtargetInfo &STI); 554bool hasG16(const MCSubtargetInfo &STI); 555bool hasPackedD16(const MCSubtargetInfo &STI); 556 557bool isSI(const MCSubtargetInfo &STI); 558bool isCI(const MCSubtargetInfo &STI); 559bool isVI(const MCSubtargetInfo &STI); 560bool isGFX9(const MCSubtargetInfo &STI); 561bool isGFX10(const MCSubtargetInfo &STI); 562bool isGCN3Encoding(const MCSubtargetInfo &STI); 563bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 564bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 565 566/// Is Reg - scalar register 567bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 568 569/// Is there any intersection between registers 570bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 571 572/// If \p Reg is a pseudo reg, return the correct hardware register given 573/// \p STI otherwise return \p Reg. 574unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 575 576/// Convert hardware register \p Reg to a pseudo register 577LLVM_READNONE 578unsigned mc2PseudoReg(unsigned Reg); 579 580/// Can this operand also contain immediate values? 581bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 582 583/// Is this floating-point operand? 584bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 585 586/// Does this opearnd support only inlinable literals? 587bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 588 589/// Get the size in bits of a register from the register class \p RC. 590unsigned getRegBitWidth(unsigned RCID); 591 592/// Get the size in bits of a register from the register class \p RC. 593unsigned getRegBitWidth(const MCRegisterClass &RC); 594 595/// Get size of register operand 596unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 597 unsigned OpNo); 598 599LLVM_READNONE 600inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 601 switch (OpInfo.OperandType) { 602 case AMDGPU::OPERAND_REG_IMM_INT32: 603 case AMDGPU::OPERAND_REG_IMM_FP32: 604 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 605 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 606 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 607 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 608 return 4; 609 610 case AMDGPU::OPERAND_REG_IMM_INT64: 611 case AMDGPU::OPERAND_REG_IMM_FP64: 612 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 613 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 614 return 8; 615 616 case AMDGPU::OPERAND_REG_IMM_INT16: 617 case AMDGPU::OPERAND_REG_IMM_FP16: 618 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 619 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 620 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 621 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 622 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 623 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 624 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 625 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 626 case AMDGPU::OPERAND_REG_IMM_V2INT16: 627 case AMDGPU::OPERAND_REG_IMM_V2FP16: 628 return 2; 629 630 default: 631 llvm_unreachable("unhandled operand type"); 632 } 633} 634 635LLVM_READNONE 636inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 637 return getOperandSize(Desc.OpInfo[OpNo]); 638} 639 640/// Is this literal inlinable, and not one of the values intended for floating 641/// point values. 642LLVM_READNONE 643inline bool isInlinableIntLiteral(int64_t Literal) { 644 return Literal >= -16 && Literal <= 64; 645} 646 647/// Is this literal inlinable 648LLVM_READNONE 649bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 650 651LLVM_READNONE 652bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 653 654LLVM_READNONE 655bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 656 657LLVM_READNONE 658bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 659 660LLVM_READNONE 661bool isInlinableIntLiteralV216(int32_t Literal); 662 663LLVM_READNONE 664bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 665 666bool isArgPassedInSGPR(const Argument *Arg); 667 668LLVM_READONLY 669bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 670 int64_t EncodedOffset); 671 672LLVM_READONLY 673bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 674 int64_t EncodedOffset, 675 bool IsBuffer); 676 677/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 678/// offsets. 679uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 680 681/// \returns The encoding that will be used for \p ByteOffset in the 682/// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 683/// S_LOAD instructions have a signed offset, on other subtargets it is 684/// unsigned. S_BUFFER has an unsigned offset for all subtargets. 685Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 686 int64_t ByteOffset, bool IsBuffer); 687 688/// \return The encoding that can be used for a 32-bit literal offset in an SMRD 689/// instruction. This is only useful on CI.s 690Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 691 int64_t ByteOffset); 692 693/// \returns true if this offset is small enough to fit in the SMRD 694/// offset field. \p ByteOffset should be the offset in bytes and 695/// not the encoded offset. 696bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 697 698bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 699 const GCNSubtarget *Subtarget, 700 Align Alignment = Align(4)); 701 702/// \returns true if the intrinsic is divergent 703bool isIntrinsicSourceOfDivergence(unsigned IntrID); 704 705// Track defaults for fields in the MODE registser. 706struct SIModeRegisterDefaults { 707 /// Floating point opcodes that support exception flag gathering quiet and 708 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 709 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 710 /// quieting. 711 bool IEEE : 1; 712 713 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 714 /// clamp NaN to zero; otherwise, pass NaN through. 715 bool DX10Clamp : 1; 716 717 /// If this is set, neither input or output denormals are flushed for most f32 718 /// instructions. 719 bool FP32InputDenormals : 1; 720 bool FP32OutputDenormals : 1; 721 722 /// If this is set, neither input or output denormals are flushed for both f64 723 /// and f16/v2f16 instructions. 724 bool FP64FP16InputDenormals : 1; 725 bool FP64FP16OutputDenormals : 1; 726 727 SIModeRegisterDefaults() : 728 IEEE(true), 729 DX10Clamp(true), 730 FP32InputDenormals(true), 731 FP32OutputDenormals(true), 732 FP64FP16InputDenormals(true), 733 FP64FP16OutputDenormals(true) {} 734 735 SIModeRegisterDefaults(const Function &F); 736 737 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 738 const bool IsCompute = AMDGPU::isCompute(CC); 739 740 SIModeRegisterDefaults Mode; 741 Mode.IEEE = IsCompute; 742 return Mode; 743 } 744 745 bool operator ==(const SIModeRegisterDefaults Other) const { 746 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 747 FP32InputDenormals == Other.FP32InputDenormals && 748 FP32OutputDenormals == Other.FP32OutputDenormals && 749 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 750 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 751 } 752 753 bool allFP32Denormals() const { 754 return FP32InputDenormals && FP32OutputDenormals; 755 } 756 757 bool allFP64FP16Denormals() const { 758 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 759 } 760 761 /// Get the encoding value for the FP_DENORM bits of the mode register for the 762 /// FP32 denormal mode. 763 uint32_t fpDenormModeSPValue() const { 764 if (FP32InputDenormals && FP32OutputDenormals) 765 return FP_DENORM_FLUSH_NONE; 766 if (FP32InputDenormals) 767 return FP_DENORM_FLUSH_OUT; 768 if (FP32OutputDenormals) 769 return FP_DENORM_FLUSH_IN; 770 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 771 } 772 773 /// Get the encoding value for the FP_DENORM bits of the mode register for the 774 /// FP64/FP16 denormal mode. 775 uint32_t fpDenormModeDPValue() const { 776 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 777 return FP_DENORM_FLUSH_NONE; 778 if (FP64FP16InputDenormals) 779 return FP_DENORM_FLUSH_OUT; 780 if (FP64FP16OutputDenormals) 781 return FP_DENORM_FLUSH_IN; 782 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 783 } 784 785 /// Returns true if a flag is compatible if it's enabled in the callee, but 786 /// disabled in the caller. 787 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 788 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 789 } 790 791 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 792 // be able to override. 793 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 794 if (DX10Clamp != CalleeMode.DX10Clamp) 795 return false; 796 if (IEEE != CalleeMode.IEEE) 797 return false; 798 799 // Allow inlining denormals enabled into denormals flushed functions. 800 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 801 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 802 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 803 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 804 } 805}; 806 807} // end namespace AMDGPU 808} // end namespace llvm 809 810#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 811