1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12#include "AMDGPU.h" 13#include "AMDKernelCodeT.h" 14#include "SIDefines.h" 15#include "llvm/ADT/StringRef.h" 16#include "llvm/IR/CallingConv.h" 17#include "llvm/MC/MCInstrDesc.h" 18#include "llvm/Support/AMDHSAKernelDescriptor.h" 19#include "llvm/Support/Compiler.h" 20#include "llvm/Support/ErrorHandling.h" 21#include "llvm/Support/TargetParser.h" 22#include <cstdint> 23#include <string> 24#include <utility> 25 26namespace llvm { 27 28class Argument; 29class AMDGPUSubtarget; 30class FeatureBitset; 31class Function; 32class GCNSubtarget; 33class GlobalValue; 34class MCContext; 35class MCRegisterClass; 36class MCRegisterInfo; 37class MCSection; 38class MCSubtargetInfo; 39class MachineMemOperand; 40class Triple; 41 42namespace AMDGPU { 43 44struct GcnBufferFormatInfo { 45 unsigned Format; 46 unsigned BitsPerComp; 47 unsigned NumComponents; 48 unsigned NumFormat; 49 unsigned DataFormat; 50}; 51 52#define GET_MIMGBaseOpcode_DECL 53#define GET_MIMGDim_DECL 54#define GET_MIMGEncoding_DECL 55#define GET_MIMGLZMapping_DECL 56#define GET_MIMGMIPMapping_DECL 57#include "AMDGPUGenSearchableTables.inc" 58 59namespace IsaInfo { 60 61enum { 62 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 63 // doesn't spill SGPRs as much as when 80 is set. 64 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 65 TRAP_NUM_SGPRS = 16 66}; 67 68/// Streams isa version string for given subtarget \p STI into \p Stream. 69void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 70 71/// \returns True if given subtarget \p STI supports code object version 3, 72/// false otherwise. 73bool hasCodeObjectV3(const MCSubtargetInfo *STI); 74 75/// \returns Wavefront size for given subtarget \p STI. 76unsigned getWavefrontSize(const MCSubtargetInfo *STI); 77 78/// \returns Local memory size in bytes for given subtarget \p STI. 79unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 80 81/// \returns Number of execution units per compute unit for given subtarget \p 82/// STI. 83unsigned getEUsPerCU(const MCSubtargetInfo *STI); 84 85/// \returns Maximum number of work groups per compute unit for given subtarget 86/// \p STI and limited by given \p FlatWorkGroupSize. 87unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 88 unsigned FlatWorkGroupSize); 89 90/// \returns Maximum number of waves per compute unit for given subtarget \p 91/// STI without any kind of limitation. 92unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); 93 94/// \returns Maximum number of waves per compute unit for given subtarget \p 95/// STI and limited by given \p FlatWorkGroupSize. 96unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, 97 unsigned FlatWorkGroupSize); 98 99/// \returns Minimum number of waves per execution unit for given subtarget \p 100/// STI. 101unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 102 103/// \returns Maximum number of waves per execution unit for given subtarget \p 104/// STI without any kind of limitation. 105unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 106 107/// \returns Maximum number of waves per execution unit for given subtarget \p 108/// STI and limited by given \p FlatWorkGroupSize. 109unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, 110 unsigned FlatWorkGroupSize); 111 112/// \returns Minimum flat work group size for given subtarget \p STI. 113unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 114 115/// \returns Maximum flat work group size for given subtarget \p STI. 116unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 117 118/// \returns Number of waves per work group for given subtarget \p STI and 119/// limited by given \p FlatWorkGroupSize. 120unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 121 unsigned FlatWorkGroupSize); 122 123/// \returns SGPR allocation granularity for given subtarget \p STI. 124unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 125 126/// \returns SGPR encoding granularity for given subtarget \p STI. 127unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 128 129/// \returns Total number of SGPRs for given subtarget \p STI. 130unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 131 132/// \returns Addressable number of SGPRs for given subtarget \p STI. 133unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 134 135/// \returns Minimum number of SGPRs that meets the given number of waves per 136/// execution unit requirement for given subtarget \p STI. 137unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 138 139/// \returns Maximum number of SGPRs that meets the given number of waves per 140/// execution unit requirement for given subtarget \p STI. 141unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 142 bool Addressable); 143 144/// \returns Number of extra SGPRs implicitly required by given subtarget \p 145/// STI when the given special registers are used. 146unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 147 bool FlatScrUsed, bool XNACKUsed); 148 149/// \returns Number of extra SGPRs implicitly required by given subtarget \p 150/// STI when the given special registers are used. XNACK is inferred from 151/// \p STI. 152unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 153 bool FlatScrUsed); 154 155/// \returns Number of SGPR blocks needed for given subtarget \p STI when 156/// \p NumSGPRs are used. \p NumSGPRs should already include any special 157/// register counts. 158unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 159 160/// \returns VGPR allocation granularity for given subtarget \p STI. 161/// 162/// For subtargets which support it, \p EnableWavefrontSize32 should match 163/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 164unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 165 Optional<bool> EnableWavefrontSize32 = None); 166 167/// \returns VGPR encoding granularity for given subtarget \p STI. 168/// 169/// For subtargets which support it, \p EnableWavefrontSize32 should match 170/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 171unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 172 Optional<bool> EnableWavefrontSize32 = None); 173 174/// \returns Total number of VGPRs for given subtarget \p STI. 175unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 176 177/// \returns Addressable number of VGPRs for given subtarget \p STI. 178unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 179 180/// \returns Minimum number of VGPRs that meets given number of waves per 181/// execution unit requirement for given subtarget \p STI. 182unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 183 184/// \returns Maximum number of VGPRs that meets given number of waves per 185/// execution unit requirement for given subtarget \p STI. 186unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 187 188/// \returns Number of VGPR blocks needed for given subtarget \p STI when 189/// \p NumVGPRs are used. 190/// 191/// For subtargets which support it, \p EnableWavefrontSize32 should match the 192/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 193unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 194 Optional<bool> EnableWavefrontSize32 = None); 195 196} // end namespace IsaInfo 197 198LLVM_READONLY 199int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 200 201LLVM_READONLY 202int getSOPPWithRelaxation(uint16_t Opcode); 203 204struct MIMGBaseOpcodeInfo { 205 MIMGBaseOpcode BaseOpcode; 206 bool Store; 207 bool Atomic; 208 bool AtomicX2; 209 bool Sampler; 210 bool Gather4; 211 212 uint8_t NumExtraArgs; 213 bool Gradients; 214 bool Coordinates; 215 bool LodOrClampOrMip; 216 bool HasD16; 217}; 218 219LLVM_READONLY 220const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 221 222struct MIMGDimInfo { 223 MIMGDim Dim; 224 uint8_t NumCoords; 225 uint8_t NumGradients; 226 bool DA; 227 uint8_t Encoding; 228 const char *AsmSuffix; 229}; 230 231LLVM_READONLY 232const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 233 234LLVM_READONLY 235const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 236 237LLVM_READONLY 238const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 239 240struct MIMGLZMappingInfo { 241 MIMGBaseOpcode L; 242 MIMGBaseOpcode LZ; 243}; 244 245struct MIMGMIPMappingInfo { 246 MIMGBaseOpcode MIP; 247 MIMGBaseOpcode NONMIP; 248}; 249 250LLVM_READONLY 251const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 252 253LLVM_READONLY 254const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L); 255 256LLVM_READONLY 257int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 258 unsigned VDataDwords, unsigned VAddrDwords); 259 260LLVM_READONLY 261int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 262 263struct MIMGInfo { 264 uint16_t Opcode; 265 uint16_t BaseOpcode; 266 uint8_t MIMGEncoding; 267 uint8_t VDataDwords; 268 uint8_t VAddrDwords; 269}; 270 271LLVM_READONLY 272const MIMGInfo *getMIMGInfo(unsigned Opc); 273 274LLVM_READONLY 275int getMTBUFBaseOpcode(unsigned Opc); 276 277LLVM_READONLY 278int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 279 280LLVM_READONLY 281int getMTBUFElements(unsigned Opc); 282 283LLVM_READONLY 284bool getMTBUFHasVAddr(unsigned Opc); 285 286LLVM_READONLY 287bool getMTBUFHasSrsrc(unsigned Opc); 288 289LLVM_READONLY 290bool getMTBUFHasSoffset(unsigned Opc); 291 292LLVM_READONLY 293int getMUBUFBaseOpcode(unsigned Opc); 294 295LLVM_READONLY 296int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 297 298LLVM_READONLY 299int getMUBUFElements(unsigned Opc); 300 301LLVM_READONLY 302bool getMUBUFHasVAddr(unsigned Opc); 303 304LLVM_READONLY 305bool getMUBUFHasSrsrc(unsigned Opc); 306 307LLVM_READONLY 308bool getMUBUFHasSoffset(unsigned Opc); 309 310LLVM_READONLY 311const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 312 uint8_t NumComponents, 313 uint8_t NumFormat, 314 const MCSubtargetInfo &STI); 315LLVM_READONLY 316const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 317 const MCSubtargetInfo &STI); 318 319LLVM_READONLY 320int getMCOpcode(uint16_t Opcode, unsigned Gen); 321 322void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 323 const MCSubtargetInfo *STI); 324 325amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 326 const MCSubtargetInfo *STI); 327 328bool isGroupSegment(const GlobalValue *GV); 329bool isGlobalSegment(const GlobalValue *GV); 330bool isReadOnlySegment(const GlobalValue *GV); 331 332/// \returns True if constants should be emitted to .text section for given 333/// target triple \p TT, false otherwise. 334bool shouldEmitConstantsToTextSection(const Triple &TT); 335 336/// \returns Integer value requested using \p F's \p Name attribute. 337/// 338/// \returns \p Default if attribute is not present. 339/// 340/// \returns \p Default and emits error if requested value cannot be converted 341/// to integer. 342int getIntegerAttribute(const Function &F, StringRef Name, int Default); 343 344/// \returns A pair of integer values requested using \p F's \p Name attribute 345/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 346/// is false). 347/// 348/// \returns \p Default if attribute is not present. 349/// 350/// \returns \p Default and emits error if one of the requested values cannot be 351/// converted to integer, or \p OnlyFirstRequired is false and "second" value is 352/// not present. 353std::pair<int, int> getIntegerPairAttribute(const Function &F, 354 StringRef Name, 355 std::pair<int, int> Default, 356 bool OnlyFirstRequired = false); 357 358/// Represents the counter values to wait for in an s_waitcnt instruction. 359/// 360/// Large values (including the maximum possible integer) can be used to 361/// represent "don't care" waits. 362struct Waitcnt { 363 unsigned VmCnt = ~0u; 364 unsigned ExpCnt = ~0u; 365 unsigned LgkmCnt = ~0u; 366 unsigned VsCnt = ~0u; 367 368 Waitcnt() {} 369 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 370 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 371 372 static Waitcnt allZero(const IsaVersion &Version) { 373 return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); 374 } 375 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 376 377 bool hasWait() const { 378 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 379 } 380 381 bool dominates(const Waitcnt &Other) const { 382 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 383 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 384 } 385 386 Waitcnt combined(const Waitcnt &Other) const { 387 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 388 std::min(LgkmCnt, Other.LgkmCnt), 389 std::min(VsCnt, Other.VsCnt)); 390 } 391}; 392 393/// \returns Vmcnt bit mask for given isa \p Version. 394unsigned getVmcntBitMask(const IsaVersion &Version); 395 396/// \returns Expcnt bit mask for given isa \p Version. 397unsigned getExpcntBitMask(const IsaVersion &Version); 398 399/// \returns Lgkmcnt bit mask for given isa \p Version. 400unsigned getLgkmcntBitMask(const IsaVersion &Version); 401 402/// \returns Waitcnt bit mask for given isa \p Version. 403unsigned getWaitcntBitMask(const IsaVersion &Version); 404 405/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 406unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 407 408/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 409unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 410 411/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 412unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 413 414/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 415/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 416/// \p Lgkmcnt respectively. 417/// 418/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 419/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 420/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 421/// \p Expcnt = \p Waitcnt[6:4] 422/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 423/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 424void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 425 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 426 427Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 428 429/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 430unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 431 unsigned Vmcnt); 432 433/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 434unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 435 unsigned Expcnt); 436 437/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 438unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 439 unsigned Lgkmcnt); 440 441/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 442/// \p Version. 443/// 444/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 445/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 446/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 447/// Waitcnt[6:4] = \p Expcnt 448/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 449/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 450/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 451/// 452/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 453/// isa \p Version. 454unsigned encodeWaitcnt(const IsaVersion &Version, 455 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 456 457unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 458 459namespace Hwreg { 460 461LLVM_READONLY 462int64_t getHwregId(const StringRef Name); 463 464LLVM_READNONE 465bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 466 467LLVM_READNONE 468bool isValidHwreg(int64_t Id); 469 470LLVM_READNONE 471bool isValidHwregOffset(int64_t Offset); 472 473LLVM_READNONE 474bool isValidHwregWidth(int64_t Width); 475 476LLVM_READNONE 477uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 478 479LLVM_READNONE 480StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 481 482void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 483 484} // namespace Hwreg 485 486namespace SendMsg { 487 488LLVM_READONLY 489int64_t getMsgId(const StringRef Name); 490 491LLVM_READONLY 492int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 493 494LLVM_READNONE 495StringRef getMsgName(int64_t MsgId); 496 497LLVM_READNONE 498StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 499 500LLVM_READNONE 501bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 502 503LLVM_READNONE 504bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); 505 506LLVM_READNONE 507bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); 508 509LLVM_READNONE 510bool msgRequiresOp(int64_t MsgId); 511 512LLVM_READNONE 513bool msgSupportsStream(int64_t MsgId, int64_t OpId); 514 515void decodeMsg(unsigned Val, 516 uint16_t &MsgId, 517 uint16_t &OpId, 518 uint16_t &StreamId); 519 520LLVM_READNONE 521uint64_t encodeMsg(uint64_t MsgId, 522 uint64_t OpId, 523 uint64_t StreamId); 524 525} // namespace SendMsg 526 527 528unsigned getInitialPSInputAddr(const Function &F); 529 530LLVM_READNONE 531bool isShader(CallingConv::ID CC); 532 533LLVM_READNONE 534bool isCompute(CallingConv::ID CC); 535 536LLVM_READNONE 537bool isEntryFunctionCC(CallingConv::ID CC); 538 539// FIXME: Remove this when calling conventions cleaned up 540LLVM_READNONE 541inline bool isKernel(CallingConv::ID CC) { 542 switch (CC) { 543 case CallingConv::AMDGPU_KERNEL: 544 case CallingConv::SPIR_KERNEL: 545 return true; 546 default: 547 return false; 548 } 549} 550 551bool hasXNACK(const MCSubtargetInfo &STI); 552bool hasSRAMECC(const MCSubtargetInfo &STI); 553bool hasMIMG_R128(const MCSubtargetInfo &STI); 554bool hasPackedD16(const MCSubtargetInfo &STI); 555 556bool isSI(const MCSubtargetInfo &STI); 557bool isCI(const MCSubtargetInfo &STI); 558bool isVI(const MCSubtargetInfo &STI); 559bool isGFX9(const MCSubtargetInfo &STI); 560bool isGFX10(const MCSubtargetInfo &STI); 561 562/// Is Reg - scalar register 563bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 564 565/// Is there any intersection between registers 566bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 567 568/// If \p Reg is a pseudo reg, return the correct hardware register given 569/// \p STI otherwise return \p Reg. 570unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 571 572/// Convert hardware register \p Reg to a pseudo register 573LLVM_READNONE 574unsigned mc2PseudoReg(unsigned Reg); 575 576/// Can this operand also contain immediate values? 577bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 578 579/// Is this floating-point operand? 580bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 581 582/// Does this opearnd support only inlinable literals? 583bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 584 585/// Get the size in bits of a register from the register class \p RC. 586unsigned getRegBitWidth(unsigned RCID); 587 588/// Get the size in bits of a register from the register class \p RC. 589unsigned getRegBitWidth(const MCRegisterClass &RC); 590 591/// Get size of register operand 592unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 593 unsigned OpNo); 594 595LLVM_READNONE 596inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 597 switch (OpInfo.OperandType) { 598 case AMDGPU::OPERAND_REG_IMM_INT32: 599 case AMDGPU::OPERAND_REG_IMM_FP32: 600 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 601 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 602 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 603 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 604 return 4; 605 606 case AMDGPU::OPERAND_REG_IMM_INT64: 607 case AMDGPU::OPERAND_REG_IMM_FP64: 608 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 609 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 610 return 8; 611 612 case AMDGPU::OPERAND_REG_IMM_INT16: 613 case AMDGPU::OPERAND_REG_IMM_FP16: 614 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 615 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 616 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 617 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 618 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 619 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 620 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 621 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 622 case AMDGPU::OPERAND_REG_IMM_V2INT16: 623 case AMDGPU::OPERAND_REG_IMM_V2FP16: 624 return 2; 625 626 default: 627 llvm_unreachable("unhandled operand type"); 628 } 629} 630 631LLVM_READNONE 632inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 633 return getOperandSize(Desc.OpInfo[OpNo]); 634} 635 636/// Is this literal inlinable 637LLVM_READNONE 638bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 639 640LLVM_READNONE 641bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 642 643LLVM_READNONE 644bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 645 646LLVM_READNONE 647bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 648 649bool isArgPassedInSGPR(const Argument *Arg); 650 651/// \returns The encoding that will be used for \p ByteOffset in the SMRD 652/// offset field. 653int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 654 655/// \returns true if this offset is small enough to fit in the SMRD 656/// offset field. \p ByteOffset should be the offset in bytes and 657/// not the encoded offset. 658bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 659 660bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 661 const GCNSubtarget *Subtarget, uint32_t Align = 4); 662 663/// \returns true if the intrinsic is divergent 664bool isIntrinsicSourceOfDivergence(unsigned IntrID); 665 666// Track defaults for fields in the MODE registser. 667struct SIModeRegisterDefaults { 668 /// Floating point opcodes that support exception flag gathering quiet and 669 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 670 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 671 /// quieting. 672 bool IEEE : 1; 673 674 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 675 /// clamp NaN to zero; otherwise, pass NaN through. 676 bool DX10Clamp : 1; 677 678 /// If this is set, neither input or output denormals are flushed for most f32 679 /// instructions. 680 /// 681 /// TODO: Split into separate input and output fields if necessary like the 682 /// control bits really provide? 683 bool FP32Denormals : 1; 684 685 /// If this is set, neither input or output denormals are flushed for both f64 686 /// and f16/v2f16 instructions. 687 bool FP64FP16Denormals : 1; 688 689 SIModeRegisterDefaults() : 690 IEEE(true), 691 DX10Clamp(true), 692 FP32Denormals(true), 693 FP64FP16Denormals(true) {} 694 695 // FIXME: Should not depend on the subtarget 696 SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); 697 698 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 699 const bool IsCompute = AMDGPU::isCompute(CC); 700 701 SIModeRegisterDefaults Mode; 702 Mode.DX10Clamp = true; 703 Mode.IEEE = IsCompute; 704 Mode.FP32Denormals = false; // FIXME: Should be on by default. 705 Mode.FP64FP16Denormals = true; 706 return Mode; 707 } 708 709 bool operator ==(const SIModeRegisterDefaults Other) const { 710 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 711 FP32Denormals == Other.FP32Denormals && 712 FP64FP16Denormals == Other.FP64FP16Denormals; 713 } 714 715 /// Returns true if a flag is compatible if it's enabled in the callee, but 716 /// disabled in the caller. 717 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 718 return CallerMode == CalleeMode || (CallerMode && !CalleeMode); 719 } 720 721 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 722 // be able to override. 723 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 724 if (DX10Clamp != CalleeMode.DX10Clamp) 725 return false; 726 if (IEEE != CalleeMode.IEEE) 727 return false; 728 729 // Allow inlining denormals enabled into denormals flushed functions. 730 return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && 731 oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); 732 } 733}; 734 735} // end namespace AMDGPU 736} // end namespace llvm 737 738#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 739