1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "AMDGPUBaseInfo.h" 10#include "AMDGPU.h" 11#include "AMDGPUAsmUtils.h" 12#include "AMDKernelCodeT.h" 13#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14#include "llvm/BinaryFormat/ELF.h" 15#include "llvm/IR/Attributes.h" 16#include "llvm/IR/Constants.h" 17#include "llvm/IR/Function.h" 18#include "llvm/IR/GlobalValue.h" 19#include "llvm/IR/IntrinsicsAMDGPU.h" 20#include "llvm/IR/IntrinsicsR600.h" 21#include "llvm/IR/LLVMContext.h" 22#include "llvm/MC/MCInstrInfo.h" 23#include "llvm/MC/MCRegisterInfo.h" 24#include "llvm/MC/MCSubtargetInfo.h" 25#include "llvm/Support/AMDHSAKernelDescriptor.h" 26#include "llvm/Support/CommandLine.h" 27#include "llvm/TargetParser/TargetParser.h" 28#include <optional> 29 30#define GET_INSTRINFO_NAMED_OPS 31#define GET_INSTRMAP_INFO 32#include "AMDGPUGenInstrInfo.inc" 33 34static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( 35 "amdhsa-code-object-version", llvm::cl::Hidden, 36 llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), 37 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " 38 "or asm directive still take priority if present)")); 39 40namespace { 41 42/// \returns Bit mask for given bit \p Shift and bit \p Width. 43unsigned getBitMask(unsigned Shift, unsigned Width) { 44 return ((1 << Width) - 1) << Shift; 45} 46 47/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 48/// 49/// \returns Packed \p Dst. 50unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 51 unsigned Mask = getBitMask(Shift, Width); 52 return ((Src << Shift) & Mask) | (Dst & ~Mask); 53} 54 55/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 56/// 57/// \returns Unpacked bits. 58unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 59 return (Src & getBitMask(Shift, Width)) >> Shift; 60} 61 62/// \returns Vmcnt bit shift (lower bits). 63unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 64 return VersionMajor >= 11 ? 10 : 0; 65} 66 67/// \returns Vmcnt bit width (lower bits). 68unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 69 return VersionMajor >= 11 ? 6 : 4; 70} 71 72/// \returns Expcnt bit shift. 73unsigned getExpcntBitShift(unsigned VersionMajor) { 74 return VersionMajor >= 11 ? 0 : 4; 75} 76 77/// \returns Expcnt bit width. 78unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 79 80/// \returns Lgkmcnt bit shift. 81unsigned getLgkmcntBitShift(unsigned VersionMajor) { 82 return VersionMajor >= 11 ? 4 : 8; 83} 84 85/// \returns Lgkmcnt bit width. 86unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 87 return VersionMajor >= 10 ? 6 : 4; 88} 89 90/// \returns Vmcnt bit shift (higher bits). 91unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 92 93/// \returns Vmcnt bit width (higher bits). 94unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 95 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 96} 97 98/// \returns Loadcnt bit width 99unsigned getLoadcntBitWidth(unsigned VersionMajor) { 100 return VersionMajor >= 12 ? 6 : 0; 101} 102 103/// \returns Samplecnt bit width. 104unsigned getSamplecntBitWidth(unsigned VersionMajor) { 105 return VersionMajor >= 12 ? 6 : 0; 106} 107 108/// \returns Bvhcnt bit width. 109unsigned getBvhcntBitWidth(unsigned VersionMajor) { 110 return VersionMajor >= 12 ? 3 : 0; 111} 112 113/// \returns Dscnt bit width. 114unsigned getDscntBitWidth(unsigned VersionMajor) { 115 return VersionMajor >= 12 ? 6 : 0; 116} 117 118/// \returns Dscnt bit shift in combined S_WAIT instructions. 119unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } 120 121/// \returns Storecnt or Vscnt bit width, depending on VersionMajor. 122unsigned getStorecntBitWidth(unsigned VersionMajor) { 123 return VersionMajor >= 10 ? 6 : 0; 124} 125 126/// \returns Kmcnt bit width. 127unsigned getKmcntBitWidth(unsigned VersionMajor) { 128 return VersionMajor >= 12 ? 5 : 0; 129} 130 131/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. 132unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { 133 return VersionMajor >= 12 ? 8 : 0; 134} 135 136/// \returns VmVsrc bit width 137inline unsigned getVmVsrcBitWidth() { return 3; } 138 139/// \returns VmVsrc bit shift 140inline unsigned getVmVsrcBitShift() { return 2; } 141 142/// \returns VaVdst bit width 143inline unsigned getVaVdstBitWidth() { return 4; } 144 145/// \returns VaVdst bit shift 146inline unsigned getVaVdstBitShift() { return 12; } 147 148/// \returns SaSdst bit width 149inline unsigned getSaSdstBitWidth() { return 1; } 150 151/// \returns SaSdst bit shift 152inline unsigned getSaSdstBitShift() { return 0; } 153 154} // end namespace anonymous 155 156namespace llvm { 157 158namespace AMDGPU { 159 160/// \returns True if \p STI is AMDHSA. 161bool isHsaAbi(const MCSubtargetInfo &STI) { 162 return STI.getTargetTriple().getOS() == Triple::AMDHSA; 163} 164 165unsigned getAMDHSACodeObjectVersion(const Module &M) { 166 if (auto Ver = mdconst::extract_or_null<ConstantInt>( 167 M.getModuleFlag("amdgpu_code_object_version"))) { 168 return (unsigned)Ver->getZExtValue() / 100; 169 } 170 171 return getDefaultAMDHSACodeObjectVersion(); 172} 173 174unsigned getDefaultAMDHSACodeObjectVersion() { 175 return DefaultAMDHSACodeObjectVersion; 176} 177 178uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { 179 if (T.getOS() != Triple::AMDHSA) 180 return 0; 181 182 switch (CodeObjectVersion) { 183 case 4: 184 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 185 case 5: 186 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 187 default: 188 report_fatal_error("Unsupported AMDHSA Code Object Version " + 189 Twine(CodeObjectVersion)); 190 } 191} 192 193unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { 194 switch (CodeObjectVersion) { 195 case AMDHSA_COV4: 196 return 48; 197 case AMDHSA_COV5: 198 default: 199 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 200 } 201} 202 203 204// FIXME: All such magic numbers about the ABI should be in a 205// central TD file. 206unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { 207 switch (CodeObjectVersion) { 208 case AMDHSA_COV4: 209 return 24; 210 case AMDHSA_COV5: 211 default: 212 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 213 } 214} 215 216unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { 217 switch (CodeObjectVersion) { 218 case AMDHSA_COV4: 219 return 32; 220 case AMDHSA_COV5: 221 default: 222 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 223 } 224} 225 226unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { 227 switch (CodeObjectVersion) { 228 case AMDHSA_COV4: 229 return 40; 230 case AMDHSA_COV5: 231 default: 232 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 233 } 234} 235 236#define GET_MIMGBaseOpcodesTable_IMPL 237#define GET_MIMGDimInfoTable_IMPL 238#define GET_MIMGInfoTable_IMPL 239#define GET_MIMGLZMappingTable_IMPL 240#define GET_MIMGMIPMappingTable_IMPL 241#define GET_MIMGBiasMappingTable_IMPL 242#define GET_MIMGOffsetMappingTable_IMPL 243#define GET_MIMGG16MappingTable_IMPL 244#define GET_MAIInstInfoTable_IMPL 245#include "AMDGPUGenSearchableTables.inc" 246 247int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 248 unsigned VDataDwords, unsigned VAddrDwords) { 249 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 250 VDataDwords, VAddrDwords); 251 return Info ? Info->Opcode : -1; 252} 253 254const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 255 const MIMGInfo *Info = getMIMGInfo(Opc); 256 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 257} 258 259int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 260 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 261 const MIMGInfo *NewInfo = 262 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 263 NewChannels, OrigInfo->VAddrDwords); 264 return NewInfo ? NewInfo->Opcode : -1; 265} 266 267unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 268 const MIMGDimInfo *Dim, bool IsA16, 269 bool IsG16Supported) { 270 unsigned AddrWords = BaseOpcode->NumExtraArgs; 271 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 272 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 273 if (IsA16) 274 AddrWords += divideCeil(AddrComponents, 2); 275 else 276 AddrWords += AddrComponents; 277 278 // Note: For subtargets that support A16 but not G16, enabling A16 also 279 // enables 16 bit gradients. 280 // For subtargets that support A16 (operand) and G16 (done with a different 281 // instruction encoding), they are independent. 282 283 if (BaseOpcode->Gradients) { 284 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 285 // There are two gradients per coordinate, we pack them separately. 286 // For the 3d case, 287 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 288 AddrWords += alignTo<2>(Dim->NumGradients / 2); 289 else 290 AddrWords += Dim->NumGradients; 291 } 292 return AddrWords; 293} 294 295struct MUBUFInfo { 296 uint16_t Opcode; 297 uint16_t BaseOpcode; 298 uint8_t elements; 299 bool has_vaddr; 300 bool has_srsrc; 301 bool has_soffset; 302 bool IsBufferInv; 303}; 304 305struct MTBUFInfo { 306 uint16_t Opcode; 307 uint16_t BaseOpcode; 308 uint8_t elements; 309 bool has_vaddr; 310 bool has_srsrc; 311 bool has_soffset; 312}; 313 314struct SMInfo { 315 uint16_t Opcode; 316 bool IsBuffer; 317}; 318 319struct VOPInfo { 320 uint16_t Opcode; 321 bool IsSingle; 322}; 323 324struct VOPC64DPPInfo { 325 uint16_t Opcode; 326}; 327 328struct VOPDComponentInfo { 329 uint16_t BaseVOP; 330 uint16_t VOPDOp; 331 bool CanBeVOPDX; 332}; 333 334struct VOPDInfo { 335 uint16_t Opcode; 336 uint16_t OpX; 337 uint16_t OpY; 338 uint16_t Subtarget; 339}; 340 341struct VOPTrue16Info { 342 uint16_t Opcode; 343 bool IsTrue16; 344}; 345 346#define GET_MTBUFInfoTable_DECL 347#define GET_MTBUFInfoTable_IMPL 348#define GET_MUBUFInfoTable_DECL 349#define GET_MUBUFInfoTable_IMPL 350#define GET_SMInfoTable_DECL 351#define GET_SMInfoTable_IMPL 352#define GET_VOP1InfoTable_DECL 353#define GET_VOP1InfoTable_IMPL 354#define GET_VOP2InfoTable_DECL 355#define GET_VOP2InfoTable_IMPL 356#define GET_VOP3InfoTable_DECL 357#define GET_VOP3InfoTable_IMPL 358#define GET_VOPC64DPPTable_DECL 359#define GET_VOPC64DPPTable_IMPL 360#define GET_VOPC64DPP8Table_DECL 361#define GET_VOPC64DPP8Table_IMPL 362#define GET_VOPDComponentTable_DECL 363#define GET_VOPDComponentTable_IMPL 364#define GET_VOPDPairs_DECL 365#define GET_VOPDPairs_IMPL 366#define GET_VOPTrue16Table_DECL 367#define GET_VOPTrue16Table_IMPL 368#define GET_WMMAOpcode2AddrMappingTable_DECL 369#define GET_WMMAOpcode2AddrMappingTable_IMPL 370#define GET_WMMAOpcode3AddrMappingTable_DECL 371#define GET_WMMAOpcode3AddrMappingTable_IMPL 372#include "AMDGPUGenSearchableTables.inc" 373 374int getMTBUFBaseOpcode(unsigned Opc) { 375 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 376 return Info ? Info->BaseOpcode : -1; 377} 378 379int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 380 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 381 return Info ? Info->Opcode : -1; 382} 383 384int getMTBUFElements(unsigned Opc) { 385 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 386 return Info ? Info->elements : 0; 387} 388 389bool getMTBUFHasVAddr(unsigned Opc) { 390 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 391 return Info ? Info->has_vaddr : false; 392} 393 394bool getMTBUFHasSrsrc(unsigned Opc) { 395 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 396 return Info ? Info->has_srsrc : false; 397} 398 399bool getMTBUFHasSoffset(unsigned Opc) { 400 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 401 return Info ? Info->has_soffset : false; 402} 403 404int getMUBUFBaseOpcode(unsigned Opc) { 405 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 406 return Info ? Info->BaseOpcode : -1; 407} 408 409int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 410 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 411 return Info ? Info->Opcode : -1; 412} 413 414int getMUBUFElements(unsigned Opc) { 415 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 416 return Info ? Info->elements : 0; 417} 418 419bool getMUBUFHasVAddr(unsigned Opc) { 420 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 421 return Info ? Info->has_vaddr : false; 422} 423 424bool getMUBUFHasSrsrc(unsigned Opc) { 425 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 426 return Info ? Info->has_srsrc : false; 427} 428 429bool getMUBUFHasSoffset(unsigned Opc) { 430 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 431 return Info ? Info->has_soffset : false; 432} 433 434bool getMUBUFIsBufferInv(unsigned Opc) { 435 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 436 return Info ? Info->IsBufferInv : false; 437} 438 439bool getSMEMIsBuffer(unsigned Opc) { 440 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 441 return Info ? Info->IsBuffer : false; 442} 443 444bool getVOP1IsSingle(unsigned Opc) { 445 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 446 return Info ? Info->IsSingle : false; 447} 448 449bool getVOP2IsSingle(unsigned Opc) { 450 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 451 return Info ? Info->IsSingle : false; 452} 453 454bool getVOP3IsSingle(unsigned Opc) { 455 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 456 return Info ? Info->IsSingle : false; 457} 458 459bool isVOPC64DPP(unsigned Opc) { 460 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 461} 462 463bool getMAIIsDGEMM(unsigned Opc) { 464 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 465 return Info ? Info->is_dgemm : false; 466} 467 468bool getMAIIsGFX940XDL(unsigned Opc) { 469 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 470 return Info ? Info->is_gfx940_xdl : false; 471} 472 473unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { 474 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) 475 return SIEncodingFamily::GFX12; 476 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) 477 return SIEncodingFamily::GFX11; 478 llvm_unreachable("Subtarget generation does not support VOPD!"); 479} 480 481CanBeVOPD getCanBeVOPD(unsigned Opc) { 482 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 483 if (Info) 484 return {Info->CanBeVOPDX, true}; 485 else 486 return {false, false}; 487} 488 489unsigned getVOPDOpcode(unsigned Opc) { 490 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 491 return Info ? Info->VOPDOp : ~0u; 492} 493 494bool isVOPD(unsigned Opc) { 495 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 496} 497 498bool isMAC(unsigned Opc) { 499 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 500 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 501 Opc == AMDGPU::V_MAC_F32_e64_vi || 502 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 503 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 504 Opc == AMDGPU::V_MAC_F16_e64_vi || 505 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 506 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 507 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 508 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || 509 Opc == AMDGPU::V_FMAC_F32_e64_vi || 510 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 511 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 512 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 513 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || 514 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || 515 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 516 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 517 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 518 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 519} 520 521bool isPermlane16(unsigned Opc) { 522 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 523 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 524 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 525 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || 526 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || 527 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || 528 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || 529 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; 530} 531 532bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { 533 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || 534 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || 535 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || 536 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || 537 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || 538 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || 539 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || 540 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; 541} 542 543bool isGenericAtomic(unsigned Opc) { 544 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || 545 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || 546 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || 547 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || 548 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || 549 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || 550 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || 551 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || 552 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || 553 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || 554 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || 555 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || 556 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || 557 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || 558 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || 559 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || 560 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || 561 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || 562 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; 563} 564 565bool isTrue16Inst(unsigned Opc) { 566 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 567 return Info ? Info->IsTrue16 : false; 568} 569 570unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 571 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 572 return Info ? Info->Opcode3Addr : ~0u; 573} 574 575unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 576 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 577 return Info ? Info->Opcode2Addr : ~0u; 578} 579 580// Wrapper for Tablegen'd function. enum Subtarget is not defined in any 581// header files, so we need to wrap it in a function that takes unsigned 582// instead. 583int getMCOpcode(uint16_t Opcode, unsigned Gen) { 584 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 585} 586 587int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { 588 const VOPDInfo *Info = 589 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); 590 return Info ? Info->Opcode : -1; 591} 592 593std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 594 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 595 assert(Info); 596 auto OpX = getVOPDBaseFromComponent(Info->OpX); 597 auto OpY = getVOPDBaseFromComponent(Info->OpY); 598 assert(OpX && OpY); 599 return {OpX->BaseVOP, OpY->BaseVOP}; 600} 601 602namespace VOPD { 603 604ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { 605 assert(OpDesc.getNumDefs() == Component::DST_NUM); 606 607 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 608 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 609 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 610 assert(TiedIdx == -1 || TiedIdx == Component::DST); 611 HasSrc2Acc = TiedIdx != -1; 612 613 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); 614 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 615 616 auto OperandsNum = OpDesc.getNumOperands(); 617 unsigned CompOprIdx; 618 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 619 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 620 MandatoryLiteralIdx = CompOprIdx; 621 break; 622 } 623 } 624} 625 626unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 627 assert(CompOprIdx < Component::MAX_OPR_NUM); 628 629 if (CompOprIdx == Component::DST) 630 return getIndexOfDstInParsedOperands(); 631 632 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 633 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 634 return getIndexOfSrcInParsedOperands(CompSrcIdx); 635 636 // The specified operand does not exist. 637 return 0; 638} 639 640std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 641 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { 642 643 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); 644 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); 645 646 const unsigned CompOprNum = 647 SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; 648 unsigned CompOprIdx; 649 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { 650 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; 651 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && 652 ((OpXRegs[CompOprIdx] & BanksMasks) == 653 (OpYRegs[CompOprIdx] & BanksMasks))) 654 return CompOprIdx; 655 } 656 657 return {}; 658} 659 660// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 661// by the specified component. If an operand is unused 662// or is not a VGPR, the corresponding value is 0. 663// 664// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 665// for the specified component and MC operand. The callback must return 0 666// if the operand is not a register or not a VGPR. 667InstInfo::RegIndices InstInfo::getRegIndices( 668 unsigned CompIdx, 669 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 670 assert(CompIdx < COMPONENTS_NUM); 671 672 const auto &Comp = CompInfo[CompIdx]; 673 InstInfo::RegIndices RegIndices; 674 675 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 676 677 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 678 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 679 RegIndices[CompOprIdx] = 680 Comp.hasRegSrcOperand(CompSrcIdx) 681 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) 682 : 0; 683 } 684 return RegIndices; 685} 686 687} // namespace VOPD 688 689VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 690 return VOPD::InstInfo(OpX, OpY); 691} 692 693VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 694 const MCInstrInfo *InstrInfo) { 695 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 696 const auto &OpXDesc = InstrInfo->get(OpX); 697 const auto &OpYDesc = InstrInfo->get(OpY); 698 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); 699 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); 700 return VOPD::InstInfo(OpXInfo, OpYInfo); 701} 702 703namespace IsaInfo { 704 705AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 706 : STI(STI), XnackSetting(TargetIDSetting::Any), 707 SramEccSetting(TargetIDSetting::Any) { 708 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 709 XnackSetting = TargetIDSetting::Unsupported; 710 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 711 SramEccSetting = TargetIDSetting::Unsupported; 712} 713 714void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 715 // Check if xnack or sramecc is explicitly enabled or disabled. In the 716 // absence of the target features we assume we must generate code that can run 717 // in any environment. 718 SubtargetFeatures Features(FS); 719 std::optional<bool> XnackRequested; 720 std::optional<bool> SramEccRequested; 721 722 for (const std::string &Feature : Features.getFeatures()) { 723 if (Feature == "+xnack") 724 XnackRequested = true; 725 else if (Feature == "-xnack") 726 XnackRequested = false; 727 else if (Feature == "+sramecc") 728 SramEccRequested = true; 729 else if (Feature == "-sramecc") 730 SramEccRequested = false; 731 } 732 733 bool XnackSupported = isXnackSupported(); 734 bool SramEccSupported = isSramEccSupported(); 735 736 if (XnackRequested) { 737 if (XnackSupported) { 738 XnackSetting = 739 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 740 } else { 741 // If a specific xnack setting was requested and this GPU does not support 742 // xnack emit a warning. Setting will remain set to "Unsupported". 743 if (*XnackRequested) { 744 errs() << "warning: xnack 'On' was requested for a processor that does " 745 "not support it!\n"; 746 } else { 747 errs() << "warning: xnack 'Off' was requested for a processor that " 748 "does not support it!\n"; 749 } 750 } 751 } 752 753 if (SramEccRequested) { 754 if (SramEccSupported) { 755 SramEccSetting = 756 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 757 } else { 758 // If a specific sramecc setting was requested and this GPU does not 759 // support sramecc emit a warning. Setting will remain set to 760 // "Unsupported". 761 if (*SramEccRequested) { 762 errs() << "warning: sramecc 'On' was requested for a processor that " 763 "does not support it!\n"; 764 } else { 765 errs() << "warning: sramecc 'Off' was requested for a processor that " 766 "does not support it!\n"; 767 } 768 } 769 } 770} 771 772static TargetIDSetting 773getTargetIDSettingFromFeatureString(StringRef FeatureString) { 774 if (FeatureString.ends_with("-")) 775 return TargetIDSetting::Off; 776 if (FeatureString.ends_with("+")) 777 return TargetIDSetting::On; 778 779 llvm_unreachable("Malformed feature string"); 780} 781 782void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 783 SmallVector<StringRef, 3> TargetIDSplit; 784 TargetID.split(TargetIDSplit, ':'); 785 786 for (const auto &FeatureString : TargetIDSplit) { 787 if (FeatureString.starts_with("xnack")) 788 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 789 if (FeatureString.starts_with("sramecc")) 790 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 791 } 792} 793 794std::string AMDGPUTargetID::toString() const { 795 std::string StringRep; 796 raw_string_ostream StreamRep(StringRep); 797 798 auto TargetTriple = STI.getTargetTriple(); 799 auto Version = getIsaVersion(STI.getCPU()); 800 801 StreamRep << TargetTriple.getArchName() << '-' 802 << TargetTriple.getVendorName() << '-' 803 << TargetTriple.getOSName() << '-' 804 << TargetTriple.getEnvironmentName() << '-'; 805 806 std::string Processor; 807 // TODO: Following else statement is present here because we used various 808 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 809 // Remove once all aliases are removed from GCNProcessors.td. 810 if (Version.Major >= 9) 811 Processor = STI.getCPU().str(); 812 else 813 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 814 Twine(Version.Stepping)) 815 .str(); 816 817 std::string Features; 818 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { 819 // sramecc. 820 if (getSramEccSetting() == TargetIDSetting::Off) 821 Features += ":sramecc-"; 822 else if (getSramEccSetting() == TargetIDSetting::On) 823 Features += ":sramecc+"; 824 // xnack. 825 if (getXnackSetting() == TargetIDSetting::Off) 826 Features += ":xnack-"; 827 else if (getXnackSetting() == TargetIDSetting::On) 828 Features += ":xnack+"; 829 } 830 831 StreamRep << Processor << Features; 832 833 StreamRep.flush(); 834 return StringRep; 835} 836 837unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 838 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 839 return 16; 840 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 841 return 32; 842 843 return 64; 844} 845 846unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 847 unsigned BytesPerCU = 0; 848 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 849 BytesPerCU = 32768; 850 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 851 BytesPerCU = 65536; 852 853 // "Per CU" really means "per whatever functional block the waves of a 854 // workgroup must share". So the effective local memory size is doubled in 855 // WGP mode on gfx10. 856 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 857 BytesPerCU *= 2; 858 859 return BytesPerCU; 860} 861 862unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 863 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 864 return 32768; 865 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 866 return 65536; 867 return 0; 868} 869 870unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 871 // "Per CU" really means "per whatever functional block the waves of a 872 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 873 // two SIMDs. 874 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 875 return 2; 876 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 877 // two CUs, so a total of four SIMDs. 878 return 4; 879} 880 881unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 882 unsigned FlatWorkGroupSize) { 883 assert(FlatWorkGroupSize != 0); 884 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 885 return 8; 886 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 887 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 888 if (N == 1) { 889 // Single-wave workgroups don't consume barrier resources. 890 return MaxWaves; 891 } 892 893 unsigned MaxBarriers = 16; 894 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 895 MaxBarriers = 32; 896 897 return std::min(MaxWaves / N, MaxBarriers); 898} 899 900unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 901 return 1; 902} 903 904unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 905 // FIXME: Need to take scratch memory into account. 906 if (isGFX90A(*STI)) 907 return 8; 908 if (!isGFX10Plus(*STI)) 909 return 10; 910 return hasGFX10_3Insts(*STI) ? 16 : 20; 911} 912 913unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 914 unsigned FlatWorkGroupSize) { 915 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 916 getEUsPerCU(STI)); 917} 918 919unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 920 return 1; 921} 922 923unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 924 // Some subtargets allow encoding 2048, but this isn't tested or supported. 925 return 1024; 926} 927 928unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 929 unsigned FlatWorkGroupSize) { 930 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 931} 932 933unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 934 IsaVersion Version = getIsaVersion(STI->getCPU()); 935 if (Version.Major >= 10) 936 return getAddressableNumSGPRs(STI); 937 if (Version.Major >= 8) 938 return 16; 939 return 8; 940} 941 942unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 943 return 8; 944} 945 946unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 947 IsaVersion Version = getIsaVersion(STI->getCPU()); 948 if (Version.Major >= 8) 949 return 800; 950 return 512; 951} 952 953unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 954 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 955 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 956 957 IsaVersion Version = getIsaVersion(STI->getCPU()); 958 if (Version.Major >= 10) 959 return 106; 960 if (Version.Major >= 8) 961 return 102; 962 return 104; 963} 964 965unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 966 assert(WavesPerEU != 0); 967 968 IsaVersion Version = getIsaVersion(STI->getCPU()); 969 if (Version.Major >= 10) 970 return 0; 971 972 if (WavesPerEU >= getMaxWavesPerEU(STI)) 973 return 0; 974 975 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 976 if (STI->getFeatureBits().test(FeatureTrapHandler)) 977 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 978 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 979 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 980} 981 982unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 983 bool Addressable) { 984 assert(WavesPerEU != 0); 985 986 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 987 IsaVersion Version = getIsaVersion(STI->getCPU()); 988 if (Version.Major >= 10) 989 return Addressable ? AddressableNumSGPRs : 108; 990 if (Version.Major >= 8 && !Addressable) 991 AddressableNumSGPRs = 112; 992 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 993 if (STI->getFeatureBits().test(FeatureTrapHandler)) 994 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 995 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 996 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 997} 998 999unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1000 bool FlatScrUsed, bool XNACKUsed) { 1001 unsigned ExtraSGPRs = 0; 1002 if (VCCUsed) 1003 ExtraSGPRs = 2; 1004 1005 IsaVersion Version = getIsaVersion(STI->getCPU()); 1006 if (Version.Major >= 10) 1007 return ExtraSGPRs; 1008 1009 if (Version.Major < 8) { 1010 if (FlatScrUsed) 1011 ExtraSGPRs = 4; 1012 } else { 1013 if (XNACKUsed) 1014 ExtraSGPRs = 4; 1015 1016 if (FlatScrUsed || 1017 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 1018 ExtraSGPRs = 6; 1019 } 1020 1021 return ExtraSGPRs; 1022} 1023 1024unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1025 bool FlatScrUsed) { 1026 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 1027 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 1028} 1029 1030unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1031 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 1032 // SGPRBlocks is actual number of SGPR blocks minus 1. 1033 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 1034} 1035 1036unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1037 std::optional<bool> EnableWavefrontSize32) { 1038 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1039 return 8; 1040 1041 bool IsWave32 = EnableWavefrontSize32 ? 1042 *EnableWavefrontSize32 : 1043 STI->getFeatureBits().test(FeatureWavefrontSize32); 1044 1045 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1046 return IsWave32 ? 24 : 12; 1047 1048 if (hasGFX10_3Insts(*STI)) 1049 return IsWave32 ? 16 : 8; 1050 1051 return IsWave32 ? 8 : 4; 1052} 1053 1054unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1055 std::optional<bool> EnableWavefrontSize32) { 1056 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1057 return 8; 1058 1059 bool IsWave32 = EnableWavefrontSize32 ? 1060 *EnableWavefrontSize32 : 1061 STI->getFeatureBits().test(FeatureWavefrontSize32); 1062 1063 return IsWave32 ? 8 : 4; 1064} 1065 1066unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1067 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1068 return 512; 1069 if (!isGFX10Plus(*STI)) 1070 return 256; 1071 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1072 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1073 return IsWave32 ? 1536 : 768; 1074 return IsWave32 ? 1024 : 512; 1075} 1076 1077unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 1078 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1079 return 512; 1080 return 256; 1081} 1082 1083unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1084 unsigned NumVGPRs) { 1085 unsigned MaxWaves = getMaxWavesPerEU(STI); 1086 unsigned Granule = getVGPRAllocGranule(STI); 1087 if (NumVGPRs < Granule) 1088 return MaxWaves; 1089 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1090 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves); 1091} 1092 1093unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1094 assert(WavesPerEU != 0); 1095 1096 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1097 if (WavesPerEU >= MaxWavesPerEU) 1098 return 0; 1099 1100 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1101 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); 1102 unsigned Granule = getVGPRAllocGranule(STI); 1103 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1104 1105 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1106 return 0; 1107 1108 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); 1109 if (WavesPerEU < MinWavesPerEU) 1110 return getMinNumVGPRs(STI, MinWavesPerEU); 1111 1112 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1113 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1114 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1115} 1116 1117unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1118 assert(WavesPerEU != 0); 1119 1120 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1121 getVGPRAllocGranule(STI)); 1122 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 1123 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1124} 1125 1126unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1127 std::optional<bool> EnableWavefrontSize32) { 1128 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 1129 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 1130 // VGPRBlocks is actual number of VGPR blocks minus 1. 1131 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 1132} 1133 1134} // end namespace IsaInfo 1135 1136void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 1137 const MCSubtargetInfo *STI) { 1138 IsaVersion Version = getIsaVersion(STI->getCPU()); 1139 1140 memset(&Header, 0, sizeof(Header)); 1141 1142 Header.amd_kernel_code_version_major = 1; 1143 Header.amd_kernel_code_version_minor = 2; 1144 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1145 Header.amd_machine_version_major = Version.Major; 1146 Header.amd_machine_version_minor = Version.Minor; 1147 Header.amd_machine_version_stepping = Version.Stepping; 1148 Header.kernel_code_entry_byte_offset = sizeof(Header); 1149 Header.wavefront_size = 6; 1150 1151 // If the code object does not support indirect functions, then the value must 1152 // be 0xffffffff. 1153 Header.call_convention = -1; 1154 1155 // These alignment values are specified in powers of two, so alignment = 1156 // 2^n. The minimum alignment is 2^4 = 16. 1157 Header.kernarg_segment_alignment = 4; 1158 Header.group_segment_alignment = 4; 1159 Header.private_segment_alignment = 4; 1160 1161 if (Version.Major >= 10) { 1162 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1163 Header.wavefront_size = 5; 1164 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1165 } 1166 Header.compute_pgm_resource_registers |= 1167 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1168 S_00B848_MEM_ORDERED(1); 1169 } 1170} 1171 1172amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 1173 const MCSubtargetInfo *STI) { 1174 IsaVersion Version = getIsaVersion(STI->getCPU()); 1175 1176 amdhsa::kernel_descriptor_t KD; 1177 memset(&KD, 0, sizeof(KD)); 1178 1179 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1180 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 1181 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 1182 if (Version.Major >= 12) { 1183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1184 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0); 1185 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1186 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0); 1187 } else { 1188 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1189 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1); 1190 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1191 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1); 1192 } 1193 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 1194 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 1195 if (Version.Major >= 10) { 1196 AMDHSA_BITS_SET(KD.kernel_code_properties, 1197 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 1198 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 1199 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1200 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, 1201 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 1202 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1203 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1); 1204 } 1205 if (AMDGPU::isGFX90A(*STI)) { 1206 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 1207 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 1208 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 1209 } 1210 return KD; 1211} 1212 1213bool isGroupSegment(const GlobalValue *GV) { 1214 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1215} 1216 1217bool isGlobalSegment(const GlobalValue *GV) { 1218 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1219} 1220 1221bool isReadOnlySegment(const GlobalValue *GV) { 1222 unsigned AS = GV->getAddressSpace(); 1223 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1224 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1225} 1226 1227bool shouldEmitConstantsToTextSection(const Triple &TT) { 1228 return TT.getArch() == Triple::r600; 1229} 1230 1231std::pair<unsigned, unsigned> 1232getIntegerPairAttribute(const Function &F, StringRef Name, 1233 std::pair<unsigned, unsigned> Default, 1234 bool OnlyFirstRequired) { 1235 Attribute A = F.getFnAttribute(Name); 1236 if (!A.isStringAttribute()) 1237 return Default; 1238 1239 LLVMContext &Ctx = F.getContext(); 1240 std::pair<unsigned, unsigned> Ints = Default; 1241 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1242 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1243 Ctx.emitError("can't parse first integer attribute " + Name); 1244 return Default; 1245 } 1246 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 1247 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1248 Ctx.emitError("can't parse second integer attribute " + Name); 1249 return Default; 1250 } 1251 } 1252 1253 return Ints; 1254} 1255 1256unsigned getVmcntBitMask(const IsaVersion &Version) { 1257 return (1 << (getVmcntBitWidthLo(Version.Major) + 1258 getVmcntBitWidthHi(Version.Major))) - 1259 1; 1260} 1261 1262unsigned getLoadcntBitMask(const IsaVersion &Version) { 1263 return (1 << getLoadcntBitWidth(Version.Major)) - 1; 1264} 1265 1266unsigned getSamplecntBitMask(const IsaVersion &Version) { 1267 return (1 << getSamplecntBitWidth(Version.Major)) - 1; 1268} 1269 1270unsigned getBvhcntBitMask(const IsaVersion &Version) { 1271 return (1 << getBvhcntBitWidth(Version.Major)) - 1; 1272} 1273 1274unsigned getExpcntBitMask(const IsaVersion &Version) { 1275 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1276} 1277 1278unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1279 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1280} 1281 1282unsigned getDscntBitMask(const IsaVersion &Version) { 1283 return (1 << getDscntBitWidth(Version.Major)) - 1; 1284} 1285 1286unsigned getKmcntBitMask(const IsaVersion &Version) { 1287 return (1 << getKmcntBitWidth(Version.Major)) - 1; 1288} 1289 1290unsigned getStorecntBitMask(const IsaVersion &Version) { 1291 return (1 << getStorecntBitWidth(Version.Major)) - 1; 1292} 1293 1294unsigned getWaitcntBitMask(const IsaVersion &Version) { 1295 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1296 getVmcntBitWidthLo(Version.Major)); 1297 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1298 getExpcntBitWidth(Version.Major)); 1299 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1300 getLgkmcntBitWidth(Version.Major)); 1301 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1302 getVmcntBitWidthHi(Version.Major)); 1303 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1304} 1305 1306unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1307 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1308 getVmcntBitWidthLo(Version.Major)); 1309 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1310 getVmcntBitWidthHi(Version.Major)); 1311 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1312} 1313 1314unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1315 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1316 getExpcntBitWidth(Version.Major)); 1317} 1318 1319unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1320 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1321 getLgkmcntBitWidth(Version.Major)); 1322} 1323 1324void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1325 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1326 Vmcnt = decodeVmcnt(Version, Waitcnt); 1327 Expcnt = decodeExpcnt(Version, Waitcnt); 1328 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1329} 1330 1331Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1332 Waitcnt Decoded; 1333 Decoded.LoadCnt = decodeVmcnt(Version, Encoded); 1334 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1335 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded); 1336 return Decoded; 1337} 1338 1339unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1340 unsigned Vmcnt) { 1341 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1342 getVmcntBitWidthLo(Version.Major)); 1343 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1344 getVmcntBitShiftHi(Version.Major), 1345 getVmcntBitWidthHi(Version.Major)); 1346} 1347 1348unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1349 unsigned Expcnt) { 1350 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1351 getExpcntBitWidth(Version.Major)); 1352} 1353 1354unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1355 unsigned Lgkmcnt) { 1356 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1357 getLgkmcntBitWidth(Version.Major)); 1358} 1359 1360unsigned encodeWaitcnt(const IsaVersion &Version, 1361 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1362 unsigned Waitcnt = getWaitcntBitMask(Version); 1363 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1364 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1365 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1366 return Waitcnt; 1367} 1368 1369unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1370 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt); 1371} 1372 1373static unsigned getCombinedCountBitMask(const IsaVersion &Version, 1374 bool IsStore) { 1375 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major), 1376 getDscntBitWidth(Version.Major)); 1377 if (IsStore) { 1378 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1379 getStorecntBitWidth(Version.Major)); 1380 return Dscnt | Storecnt; 1381 } else { 1382 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1383 getLoadcntBitWidth(Version.Major)); 1384 return Dscnt | Loadcnt; 1385 } 1386} 1387 1388Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { 1389 Waitcnt Decoded; 1390 Decoded.LoadCnt = 1391 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major), 1392 getLoadcntBitWidth(Version.Major)); 1393 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major), 1394 getDscntBitWidth(Version.Major)); 1395 return Decoded; 1396} 1397 1398Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { 1399 Waitcnt Decoded; 1400 Decoded.StoreCnt = 1401 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major), 1402 getStorecntBitWidth(Version.Major)); 1403 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major), 1404 getDscntBitWidth(Version.Major)); 1405 return Decoded; 1406} 1407 1408static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, 1409 unsigned Loadcnt) { 1410 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1411 getLoadcntBitWidth(Version.Major)); 1412} 1413 1414static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, 1415 unsigned Storecnt) { 1416 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1417 getStorecntBitWidth(Version.Major)); 1418} 1419 1420static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, 1421 unsigned Dscnt) { 1422 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major), 1423 getDscntBitWidth(Version.Major)); 1424} 1425 1426static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, 1427 unsigned Dscnt) { 1428 unsigned Waitcnt = getCombinedCountBitMask(Version, false); 1429 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); 1430 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1431 return Waitcnt; 1432} 1433 1434unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1435 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt); 1436} 1437 1438static unsigned encodeStorecntDscnt(const IsaVersion &Version, 1439 unsigned Storecnt, unsigned Dscnt) { 1440 unsigned Waitcnt = getCombinedCountBitMask(Version, true); 1441 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); 1442 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1443 return Waitcnt; 1444} 1445 1446unsigned encodeStorecntDscnt(const IsaVersion &Version, 1447 const Waitcnt &Decoded) { 1448 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt); 1449} 1450 1451//===----------------------------------------------------------------------===// 1452// Custom Operands. 1453// 1454// A table of custom operands shall describe "primary" operand names 1455// first followed by aliases if any. It is not required but recommended 1456// to arrange operands so that operand encoding match operand position 1457// in the table. This will make disassembly a bit more efficient. 1458// Unused slots in the table shall have an empty name. 1459// 1460//===----------------------------------------------------------------------===// 1461 1462template <class T> 1463static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, 1464 T Context) { 1465 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && 1466 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); 1467} 1468 1469template <class T> 1470static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, 1471 const CustomOperand<T> OpInfo[], int OpInfoSize, 1472 T Context) { 1473 int InvalidIdx = OPR_ID_UNKNOWN; 1474 for (int Idx = 0; Idx < OpInfoSize; ++Idx) { 1475 if (Test(OpInfo[Idx])) { 1476 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) 1477 return Idx; 1478 InvalidIdx = OPR_ID_UNSUPPORTED; 1479 } 1480 } 1481 return InvalidIdx; 1482} 1483 1484template <class T> 1485static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], 1486 int OpInfoSize, T Context) { 1487 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; 1488 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1489} 1490 1491template <class T> 1492static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, 1493 T Context, bool QuickCheck = true) { 1494 auto Test = [=](const CustomOperand<T> &Op) { 1495 return Op.Encoding == Id && !Op.Name.empty(); 1496 }; 1497 // This is an optimization that should work in most cases. 1498 // As a side effect, it may cause selection of an alias 1499 // instead of a primary operand name in case of sparse tables. 1500 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && 1501 OpInfo[Id].Encoding == Id) { 1502 return Id; 1503 } 1504 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1505} 1506 1507//===----------------------------------------------------------------------===// 1508// Custom Operand Values 1509//===----------------------------------------------------------------------===// 1510 1511static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1512 int Size, 1513 const MCSubtargetInfo &STI) { 1514 unsigned Enc = 0; 1515 for (int Idx = 0; Idx < Size; ++Idx) { 1516 const auto &Op = Opr[Idx]; 1517 if (Op.isSupported(STI)) 1518 Enc |= Op.encode(Op.Default); 1519 } 1520 return Enc; 1521} 1522 1523static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1524 int Size, unsigned Code, 1525 bool &HasNonDefaultVal, 1526 const MCSubtargetInfo &STI) { 1527 unsigned UsedOprMask = 0; 1528 HasNonDefaultVal = false; 1529 for (int Idx = 0; Idx < Size; ++Idx) { 1530 const auto &Op = Opr[Idx]; 1531 if (!Op.isSupported(STI)) 1532 continue; 1533 UsedOprMask |= Op.getMask(); 1534 unsigned Val = Op.decode(Code); 1535 if (!Op.isValid(Val)) 1536 return false; 1537 HasNonDefaultVal |= (Val != Op.Default); 1538 } 1539 return (Code & ~UsedOprMask) == 0; 1540} 1541 1542static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1543 unsigned Code, int &Idx, StringRef &Name, 1544 unsigned &Val, bool &IsDefault, 1545 const MCSubtargetInfo &STI) { 1546 while (Idx < Size) { 1547 const auto &Op = Opr[Idx++]; 1548 if (Op.isSupported(STI)) { 1549 Name = Op.Name; 1550 Val = Op.decode(Code); 1551 IsDefault = (Val == Op.Default); 1552 return true; 1553 } 1554 } 1555 1556 return false; 1557} 1558 1559static int encodeCustomOperandVal(const CustomOperandVal &Op, 1560 int64_t InputVal) { 1561 if (InputVal < 0 || InputVal > Op.Max) 1562 return OPR_VAL_INVALID; 1563 return Op.encode(InputVal); 1564} 1565 1566static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1567 const StringRef Name, int64_t InputVal, 1568 unsigned &UsedOprMask, 1569 const MCSubtargetInfo &STI) { 1570 int InvalidId = OPR_ID_UNKNOWN; 1571 for (int Idx = 0; Idx < Size; ++Idx) { 1572 const auto &Op = Opr[Idx]; 1573 if (Op.Name == Name) { 1574 if (!Op.isSupported(STI)) { 1575 InvalidId = OPR_ID_UNSUPPORTED; 1576 continue; 1577 } 1578 auto OprMask = Op.getMask(); 1579 if (OprMask & UsedOprMask) 1580 return OPR_ID_DUPLICATE; 1581 UsedOprMask |= OprMask; 1582 return encodeCustomOperandVal(Op, InputVal); 1583 } 1584 } 1585 return InvalidId; 1586} 1587 1588//===----------------------------------------------------------------------===// 1589// DepCtr 1590//===----------------------------------------------------------------------===// 1591 1592namespace DepCtr { 1593 1594int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1595 static int Default = -1; 1596 if (Default == -1) 1597 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1598 return Default; 1599} 1600 1601bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1602 const MCSubtargetInfo &STI) { 1603 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1604 HasNonDefaultVal, STI); 1605} 1606 1607bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1608 bool &IsDefault, const MCSubtargetInfo &STI) { 1609 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1610 IsDefault, STI); 1611} 1612 1613int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1614 const MCSubtargetInfo &STI) { 1615 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1616 STI); 1617} 1618 1619unsigned decodeFieldVmVsrc(unsigned Encoded) { 1620 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1621} 1622 1623unsigned decodeFieldVaVdst(unsigned Encoded) { 1624 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1625} 1626 1627unsigned decodeFieldSaSdst(unsigned Encoded) { 1628 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1629} 1630 1631unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { 1632 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1633} 1634 1635unsigned encodeFieldVmVsrc(unsigned VmVsrc) { 1636 return encodeFieldVmVsrc(0xffff, VmVsrc); 1637} 1638 1639unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { 1640 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1641} 1642 1643unsigned encodeFieldVaVdst(unsigned VaVdst) { 1644 return encodeFieldVaVdst(0xffff, VaVdst); 1645} 1646 1647unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { 1648 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1649} 1650 1651unsigned encodeFieldSaSdst(unsigned SaSdst) { 1652 return encodeFieldSaSdst(0xffff, SaSdst); 1653} 1654 1655} // namespace DepCtr 1656 1657//===----------------------------------------------------------------------===// 1658// hwreg 1659//===----------------------------------------------------------------------===// 1660 1661namespace Hwreg { 1662 1663int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { 1664 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); 1665 return (Idx < 0) ? Idx : Opr[Idx].Encoding; 1666} 1667 1668bool isValidHwreg(int64_t Id) { 1669 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 1670} 1671 1672bool isValidHwregOffset(int64_t Offset) { 1673 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 1674} 1675 1676bool isValidHwregWidth(int64_t Width) { 1677 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 1678} 1679 1680uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 1681 return (Id << ID_SHIFT_) | 1682 (Offset << OFFSET_SHIFT_) | 1683 ((Width - 1) << WIDTH_M1_SHIFT_); 1684} 1685 1686StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 1687 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); 1688 return (Idx < 0) ? "" : Opr[Idx].Name; 1689} 1690 1691void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 1692 Id = (Val & ID_MASK_) >> ID_SHIFT_; 1693 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 1694 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 1695} 1696 1697} // namespace Hwreg 1698 1699//===----------------------------------------------------------------------===// 1700// exp tgt 1701//===----------------------------------------------------------------------===// 1702 1703namespace Exp { 1704 1705struct ExpTgt { 1706 StringLiteral Name; 1707 unsigned Tgt; 1708 unsigned MaxIndex; 1709}; 1710 1711static constexpr ExpTgt ExpTgtInfo[] = { 1712 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1713 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1714 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1715 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1716 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1717 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1718 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1719}; 1720 1721bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1722 for (const ExpTgt &Val : ExpTgtInfo) { 1723 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1724 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1725 Name = Val.Name; 1726 return true; 1727 } 1728 } 1729 return false; 1730} 1731 1732unsigned getTgtId(const StringRef Name) { 1733 1734 for (const ExpTgt &Val : ExpTgtInfo) { 1735 if (Val.MaxIndex == 0 && Name == Val.Name) 1736 return Val.Tgt; 1737 1738 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { 1739 StringRef Suffix = Name.drop_front(Val.Name.size()); 1740 1741 unsigned Id; 1742 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1743 return ET_INVALID; 1744 1745 // Disable leading zeroes 1746 if (Suffix.size() > 1 && Suffix[0] == '0') 1747 return ET_INVALID; 1748 1749 return Val.Tgt + Id; 1750 } 1751 } 1752 return ET_INVALID; 1753} 1754 1755bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1756 switch (Id) { 1757 case ET_NULL: 1758 return !isGFX11Plus(STI); 1759 case ET_POS4: 1760 case ET_PRIM: 1761 return isGFX10Plus(STI); 1762 case ET_DUAL_SRC_BLEND0: 1763 case ET_DUAL_SRC_BLEND1: 1764 return isGFX11Plus(STI); 1765 default: 1766 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1767 return !isGFX11Plus(STI); 1768 return true; 1769 } 1770} 1771 1772} // namespace Exp 1773 1774//===----------------------------------------------------------------------===// 1775// MTBUF Format 1776//===----------------------------------------------------------------------===// 1777 1778namespace MTBUFFormat { 1779 1780int64_t getDfmt(const StringRef Name) { 1781 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1782 if (Name == DfmtSymbolic[Id]) 1783 return Id; 1784 } 1785 return DFMT_UNDEF; 1786} 1787 1788StringRef getDfmtName(unsigned Id) { 1789 assert(Id <= DFMT_MAX); 1790 return DfmtSymbolic[Id]; 1791} 1792 1793static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1794 if (isSI(STI) || isCI(STI)) 1795 return NfmtSymbolicSICI; 1796 if (isVI(STI) || isGFX9(STI)) 1797 return NfmtSymbolicVI; 1798 return NfmtSymbolicGFX10; 1799} 1800 1801int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1802 auto lookupTable = getNfmtLookupTable(STI); 1803 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1804 if (Name == lookupTable[Id]) 1805 return Id; 1806 } 1807 return NFMT_UNDEF; 1808} 1809 1810StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1811 assert(Id <= NFMT_MAX); 1812 return getNfmtLookupTable(STI)[Id]; 1813} 1814 1815bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1816 unsigned Dfmt; 1817 unsigned Nfmt; 1818 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1819 return isValidNfmt(Nfmt, STI); 1820} 1821 1822bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1823 return !getNfmtName(Id, STI).empty(); 1824} 1825 1826int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1827 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1828} 1829 1830void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1831 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1832 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1833} 1834 1835int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1836 if (isGFX11Plus(STI)) { 1837 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1838 if (Name == UfmtSymbolicGFX11[Id]) 1839 return Id; 1840 } 1841 } else { 1842 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1843 if (Name == UfmtSymbolicGFX10[Id]) 1844 return Id; 1845 } 1846 } 1847 return UFMT_UNDEF; 1848} 1849 1850StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1851 if(isValidUnifiedFormat(Id, STI)) 1852 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1853 return ""; 1854} 1855 1856bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1857 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1858} 1859 1860int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1861 const MCSubtargetInfo &STI) { 1862 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1863 if (isGFX11Plus(STI)) { 1864 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1865 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1866 return Id; 1867 } 1868 } else { 1869 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1870 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1871 return Id; 1872 } 1873 } 1874 return UFMT_UNDEF; 1875} 1876 1877bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1878 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1879} 1880 1881unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1882 if (isGFX10Plus(STI)) 1883 return UFMT_DEFAULT; 1884 return DFMT_NFMT_DEFAULT; 1885} 1886 1887} // namespace MTBUFFormat 1888 1889//===----------------------------------------------------------------------===// 1890// SendMsg 1891//===----------------------------------------------------------------------===// 1892 1893namespace SendMsg { 1894 1895static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1896 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1897} 1898 1899int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { 1900 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); 1901 return (Idx < 0) ? Idx : Msg[Idx].Encoding; 1902} 1903 1904bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1905 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1906} 1907 1908StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { 1909 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); 1910 return (Idx < 0) ? "" : Msg[Idx].Name; 1911} 1912 1913int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1914 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1915 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1916 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1917 for (int i = F; i < L; ++i) { 1918 if (Name == S[i]) { 1919 return i; 1920 } 1921 } 1922 return OP_UNKNOWN_; 1923} 1924 1925bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1926 bool Strict) { 1927 assert(isValidMsgId(MsgId, STI)); 1928 1929 if (!Strict) 1930 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1931 1932 if (MsgId == ID_SYSMSG) 1933 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1934 if (!isGFX11Plus(STI)) { 1935 switch (MsgId) { 1936 case ID_GS_PreGFX11: 1937 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1938 case ID_GS_DONE_PreGFX11: 1939 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1940 } 1941 } 1942 return OpId == OP_NONE_; 1943} 1944 1945StringRef getMsgOpName(int64_t MsgId, int64_t OpId, 1946 const MCSubtargetInfo &STI) { 1947 assert(msgRequiresOp(MsgId, STI)); 1948 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1949} 1950 1951bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1952 const MCSubtargetInfo &STI, bool Strict) { 1953 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1954 1955 if (!Strict) 1956 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1957 1958 if (!isGFX11Plus(STI)) { 1959 switch (MsgId) { 1960 case ID_GS_PreGFX11: 1961 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1962 case ID_GS_DONE_PreGFX11: 1963 return (OpId == OP_GS_NOP) ? 1964 (StreamId == STREAM_ID_NONE_) : 1965 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1966 } 1967 } 1968 return StreamId == STREAM_ID_NONE_; 1969} 1970 1971bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1972 return MsgId == ID_SYSMSG || 1973 (!isGFX11Plus(STI) && 1974 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1975} 1976 1977bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1978 const MCSubtargetInfo &STI) { 1979 return !isGFX11Plus(STI) && 1980 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1981 OpId != OP_GS_NOP; 1982} 1983 1984void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1985 uint16_t &StreamId, const MCSubtargetInfo &STI) { 1986 MsgId = Val & getMsgIdMask(STI); 1987 if (isGFX11Plus(STI)) { 1988 OpId = 0; 1989 StreamId = 0; 1990 } else { 1991 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1992 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1993 } 1994} 1995 1996uint64_t encodeMsg(uint64_t MsgId, 1997 uint64_t OpId, 1998 uint64_t StreamId) { 1999 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 2000} 2001 2002} // namespace SendMsg 2003 2004//===----------------------------------------------------------------------===// 2005// 2006//===----------------------------------------------------------------------===// 2007 2008unsigned getInitialPSInputAddr(const Function &F) { 2009 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 2010} 2011 2012bool getHasColorExport(const Function &F) { 2013 // As a safe default always respond as if PS has color exports. 2014 return F.getFnAttributeAsParsedInteger( 2015 "amdgpu-color-export", 2016 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 2017} 2018 2019bool getHasDepthExport(const Function &F) { 2020 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 2021} 2022 2023bool isShader(CallingConv::ID cc) { 2024 switch(cc) { 2025 case CallingConv::AMDGPU_VS: 2026 case CallingConv::AMDGPU_LS: 2027 case CallingConv::AMDGPU_HS: 2028 case CallingConv::AMDGPU_ES: 2029 case CallingConv::AMDGPU_GS: 2030 case CallingConv::AMDGPU_PS: 2031 case CallingConv::AMDGPU_CS_Chain: 2032 case CallingConv::AMDGPU_CS_ChainPreserve: 2033 case CallingConv::AMDGPU_CS: 2034 return true; 2035 default: 2036 return false; 2037 } 2038} 2039 2040bool isGraphics(CallingConv::ID cc) { 2041 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 2042} 2043 2044bool isCompute(CallingConv::ID cc) { 2045 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 2046} 2047 2048bool isEntryFunctionCC(CallingConv::ID CC) { 2049 switch (CC) { 2050 case CallingConv::AMDGPU_KERNEL: 2051 case CallingConv::SPIR_KERNEL: 2052 case CallingConv::AMDGPU_VS: 2053 case CallingConv::AMDGPU_GS: 2054 case CallingConv::AMDGPU_PS: 2055 case CallingConv::AMDGPU_CS: 2056 case CallingConv::AMDGPU_ES: 2057 case CallingConv::AMDGPU_HS: 2058 case CallingConv::AMDGPU_LS: 2059 return true; 2060 default: 2061 return false; 2062 } 2063} 2064 2065bool isModuleEntryFunctionCC(CallingConv::ID CC) { 2066 switch (CC) { 2067 case CallingConv::AMDGPU_Gfx: 2068 return true; 2069 default: 2070 return isEntryFunctionCC(CC) || isChainCC(CC); 2071 } 2072} 2073 2074bool isChainCC(CallingConv::ID CC) { 2075 switch (CC) { 2076 case CallingConv::AMDGPU_CS_Chain: 2077 case CallingConv::AMDGPU_CS_ChainPreserve: 2078 return true; 2079 default: 2080 return false; 2081 } 2082} 2083 2084bool isKernelCC(const Function *Func) { 2085 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 2086} 2087 2088bool hasXNACK(const MCSubtargetInfo &STI) { 2089 return STI.hasFeature(AMDGPU::FeatureXNACK); 2090} 2091 2092bool hasSRAMECC(const MCSubtargetInfo &STI) { 2093 return STI.hasFeature(AMDGPU::FeatureSRAMECC); 2094} 2095 2096bool hasMIMG_R128(const MCSubtargetInfo &STI) { 2097 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16); 2098} 2099 2100bool hasA16(const MCSubtargetInfo &STI) { 2101 return STI.hasFeature(AMDGPU::FeatureA16); 2102} 2103 2104bool hasG16(const MCSubtargetInfo &STI) { 2105 return STI.hasFeature(AMDGPU::FeatureG16); 2106} 2107 2108bool hasPackedD16(const MCSubtargetInfo &STI) { 2109 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && 2110 !isSI(STI); 2111} 2112 2113bool hasGDS(const MCSubtargetInfo &STI) { 2114 return STI.hasFeature(AMDGPU::FeatureGDS); 2115} 2116 2117unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { 2118 auto Version = getIsaVersion(STI.getCPU()); 2119 if (Version.Major == 10) 2120 return Version.Minor >= 3 ? 13 : 5; 2121 if (Version.Major == 11) 2122 return 5; 2123 if (Version.Major >= 12) 2124 return HasSampler ? 4 : 5; 2125 return 0; 2126} 2127 2128unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } 2129 2130bool isSI(const MCSubtargetInfo &STI) { 2131 return STI.hasFeature(AMDGPU::FeatureSouthernIslands); 2132} 2133 2134bool isCI(const MCSubtargetInfo &STI) { 2135 return STI.hasFeature(AMDGPU::FeatureSeaIslands); 2136} 2137 2138bool isVI(const MCSubtargetInfo &STI) { 2139 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2140} 2141 2142bool isGFX9(const MCSubtargetInfo &STI) { 2143 return STI.hasFeature(AMDGPU::FeatureGFX9); 2144} 2145 2146bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 2147 return isGFX9(STI) || isGFX10(STI); 2148} 2149 2150bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { 2151 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); 2152} 2153 2154bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 2155 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 2156} 2157 2158bool isGFX8Plus(const MCSubtargetInfo &STI) { 2159 return isVI(STI) || isGFX9Plus(STI); 2160} 2161 2162bool isGFX9Plus(const MCSubtargetInfo &STI) { 2163 return isGFX9(STI) || isGFX10Plus(STI); 2164} 2165 2166bool isGFX10(const MCSubtargetInfo &STI) { 2167 return STI.hasFeature(AMDGPU::FeatureGFX10); 2168} 2169 2170bool isGFX10_GFX11(const MCSubtargetInfo &STI) { 2171 return isGFX10(STI) || isGFX11(STI); 2172} 2173 2174bool isGFX10Plus(const MCSubtargetInfo &STI) { 2175 return isGFX10(STI) || isGFX11Plus(STI); 2176} 2177 2178bool isGFX11(const MCSubtargetInfo &STI) { 2179 return STI.hasFeature(AMDGPU::FeatureGFX11); 2180} 2181 2182bool isGFX11Plus(const MCSubtargetInfo &STI) { 2183 return isGFX11(STI) || isGFX12Plus(STI); 2184} 2185 2186bool isGFX12(const MCSubtargetInfo &STI) { 2187 return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; 2188} 2189 2190bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } 2191 2192bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } 2193 2194bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 2195 return !isGFX11Plus(STI); 2196} 2197 2198bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 2199 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 2200} 2201 2202bool isGFX10Before1030(const MCSubtargetInfo &STI) { 2203 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 2204} 2205 2206bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2207 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); 2208} 2209 2210bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2211 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); 2212} 2213 2214bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2215 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); 2216} 2217 2218bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2219 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); 2220} 2221 2222bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { 2223 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); 2224} 2225 2226bool isGFX90A(const MCSubtargetInfo &STI) { 2227 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2228} 2229 2230bool isGFX940(const MCSubtargetInfo &STI) { 2231 return STI.hasFeature(AMDGPU::FeatureGFX940Insts); 2232} 2233 2234bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2235 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2236} 2237 2238bool hasMAIInsts(const MCSubtargetInfo &STI) { 2239 return STI.hasFeature(AMDGPU::FeatureMAIInsts); 2240} 2241 2242bool hasVOPD(const MCSubtargetInfo &STI) { 2243 return STI.hasFeature(AMDGPU::FeatureVOPD); 2244} 2245 2246bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { 2247 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); 2248} 2249 2250unsigned hasKernargPreload(const MCSubtargetInfo &STI) { 2251 return STI.hasFeature(AMDGPU::FeatureKernargPreload); 2252} 2253 2254int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2255 int32_t ArgNumVGPR) { 2256 if (has90AInsts && ArgNumAGPR) 2257 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2258 return std::max(ArgNumVGPR, ArgNumAGPR); 2259} 2260 2261bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 2262 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2263 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2264 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2265 Reg == AMDGPU::SCC; 2266} 2267 2268bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { 2269 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI; 2270} 2271 2272#define MAP_REG2REG \ 2273 using namespace AMDGPU; \ 2274 switch(Reg) { \ 2275 default: return Reg; \ 2276 CASE_CI_VI(FLAT_SCR) \ 2277 CASE_CI_VI(FLAT_SCR_LO) \ 2278 CASE_CI_VI(FLAT_SCR_HI) \ 2279 CASE_VI_GFX9PLUS(TTMP0) \ 2280 CASE_VI_GFX9PLUS(TTMP1) \ 2281 CASE_VI_GFX9PLUS(TTMP2) \ 2282 CASE_VI_GFX9PLUS(TTMP3) \ 2283 CASE_VI_GFX9PLUS(TTMP4) \ 2284 CASE_VI_GFX9PLUS(TTMP5) \ 2285 CASE_VI_GFX9PLUS(TTMP6) \ 2286 CASE_VI_GFX9PLUS(TTMP7) \ 2287 CASE_VI_GFX9PLUS(TTMP8) \ 2288 CASE_VI_GFX9PLUS(TTMP9) \ 2289 CASE_VI_GFX9PLUS(TTMP10) \ 2290 CASE_VI_GFX9PLUS(TTMP11) \ 2291 CASE_VI_GFX9PLUS(TTMP12) \ 2292 CASE_VI_GFX9PLUS(TTMP13) \ 2293 CASE_VI_GFX9PLUS(TTMP14) \ 2294 CASE_VI_GFX9PLUS(TTMP15) \ 2295 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2296 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2297 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2298 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2299 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2300 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2301 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2302 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2303 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2304 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2305 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2306 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2307 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2308 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2309 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2310 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2311 CASE_GFXPRE11_GFX11PLUS(M0) \ 2312 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2313 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2314 } 2315 2316#define CASE_CI_VI(node) \ 2317 assert(!isSI(STI)); \ 2318 case node: return isCI(STI) ? node##_ci : node##_vi; 2319 2320#define CASE_VI_GFX9PLUS(node) \ 2321 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2322 2323#define CASE_GFXPRE11_GFX11PLUS(node) \ 2324 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2325 2326#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2327 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2328 2329unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 2330 if (STI.getTargetTriple().getArch() == Triple::r600) 2331 return Reg; 2332 MAP_REG2REG 2333} 2334 2335#undef CASE_CI_VI 2336#undef CASE_VI_GFX9PLUS 2337#undef CASE_GFXPRE11_GFX11PLUS 2338#undef CASE_GFXPRE11_GFX11PLUS_TO 2339 2340#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 2341#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 2342#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 2343#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2344 2345unsigned mc2PseudoReg(unsigned Reg) { 2346 MAP_REG2REG 2347} 2348 2349bool isInlineValue(unsigned Reg) { 2350 switch (Reg) { 2351 case AMDGPU::SRC_SHARED_BASE_LO: 2352 case AMDGPU::SRC_SHARED_BASE: 2353 case AMDGPU::SRC_SHARED_LIMIT_LO: 2354 case AMDGPU::SRC_SHARED_LIMIT: 2355 case AMDGPU::SRC_PRIVATE_BASE_LO: 2356 case AMDGPU::SRC_PRIVATE_BASE: 2357 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2358 case AMDGPU::SRC_PRIVATE_LIMIT: 2359 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2360 return true; 2361 case AMDGPU::SRC_VCCZ: 2362 case AMDGPU::SRC_EXECZ: 2363 case AMDGPU::SRC_SCC: 2364 return true; 2365 case AMDGPU::SGPR_NULL: 2366 return true; 2367 default: 2368 return false; 2369 } 2370} 2371 2372#undef CASE_CI_VI 2373#undef CASE_VI_GFX9PLUS 2374#undef CASE_GFXPRE11_GFX11PLUS 2375#undef CASE_GFXPRE11_GFX11PLUS_TO 2376#undef MAP_REG2REG 2377 2378bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2379 assert(OpNo < Desc.NumOperands); 2380 unsigned OpType = Desc.operands()[OpNo].OperandType; 2381 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2382 OpType <= AMDGPU::OPERAND_SRC_LAST; 2383} 2384 2385bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2386 assert(OpNo < Desc.NumOperands); 2387 unsigned OpType = Desc.operands()[OpNo].OperandType; 2388 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2389 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2390} 2391 2392bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2393 assert(OpNo < Desc.NumOperands); 2394 unsigned OpType = Desc.operands()[OpNo].OperandType; 2395 switch (OpType) { 2396 case AMDGPU::OPERAND_REG_IMM_FP32: 2397 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2398 case AMDGPU::OPERAND_REG_IMM_FP64: 2399 case AMDGPU::OPERAND_REG_IMM_FP16: 2400 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2401 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2402 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2403 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2404 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2405 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2406 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2407 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2408 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2409 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2410 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2411 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2412 return true; 2413 default: 2414 return false; 2415 } 2416} 2417 2418bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2419 assert(OpNo < Desc.NumOperands); 2420 unsigned OpType = Desc.operands()[OpNo].OperandType; 2421 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2422 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || 2423 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2424 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); 2425} 2426 2427// Avoid using MCRegisterClass::getSize, since that function will go away 2428// (move from MC* level to Target* level). Return size in bits. 2429unsigned getRegBitWidth(unsigned RCID) { 2430 switch (RCID) { 2431 case AMDGPU::SGPR_LO16RegClassID: 2432 case AMDGPU::AGPR_LO16RegClassID: 2433 return 16; 2434 case AMDGPU::SGPR_32RegClassID: 2435 case AMDGPU::VGPR_32RegClassID: 2436 case AMDGPU::VRegOrLds_32RegClassID: 2437 case AMDGPU::AGPR_32RegClassID: 2438 case AMDGPU::VS_32RegClassID: 2439 case AMDGPU::AV_32RegClassID: 2440 case AMDGPU::SReg_32RegClassID: 2441 case AMDGPU::SReg_32_XM0RegClassID: 2442 case AMDGPU::SRegOrLds_32RegClassID: 2443 return 32; 2444 case AMDGPU::SGPR_64RegClassID: 2445 case AMDGPU::VS_64RegClassID: 2446 case AMDGPU::SReg_64RegClassID: 2447 case AMDGPU::VReg_64RegClassID: 2448 case AMDGPU::AReg_64RegClassID: 2449 case AMDGPU::SReg_64_XEXECRegClassID: 2450 case AMDGPU::VReg_64_Align2RegClassID: 2451 case AMDGPU::AReg_64_Align2RegClassID: 2452 case AMDGPU::AV_64RegClassID: 2453 case AMDGPU::AV_64_Align2RegClassID: 2454 return 64; 2455 case AMDGPU::SGPR_96RegClassID: 2456 case AMDGPU::SReg_96RegClassID: 2457 case AMDGPU::VReg_96RegClassID: 2458 case AMDGPU::AReg_96RegClassID: 2459 case AMDGPU::VReg_96_Align2RegClassID: 2460 case AMDGPU::AReg_96_Align2RegClassID: 2461 case AMDGPU::AV_96RegClassID: 2462 case AMDGPU::AV_96_Align2RegClassID: 2463 return 96; 2464 case AMDGPU::SGPR_128RegClassID: 2465 case AMDGPU::SReg_128RegClassID: 2466 case AMDGPU::VReg_128RegClassID: 2467 case AMDGPU::AReg_128RegClassID: 2468 case AMDGPU::VReg_128_Align2RegClassID: 2469 case AMDGPU::AReg_128_Align2RegClassID: 2470 case AMDGPU::AV_128RegClassID: 2471 case AMDGPU::AV_128_Align2RegClassID: 2472 return 128; 2473 case AMDGPU::SGPR_160RegClassID: 2474 case AMDGPU::SReg_160RegClassID: 2475 case AMDGPU::VReg_160RegClassID: 2476 case AMDGPU::AReg_160RegClassID: 2477 case AMDGPU::VReg_160_Align2RegClassID: 2478 case AMDGPU::AReg_160_Align2RegClassID: 2479 case AMDGPU::AV_160RegClassID: 2480 case AMDGPU::AV_160_Align2RegClassID: 2481 return 160; 2482 case AMDGPU::SGPR_192RegClassID: 2483 case AMDGPU::SReg_192RegClassID: 2484 case AMDGPU::VReg_192RegClassID: 2485 case AMDGPU::AReg_192RegClassID: 2486 case AMDGPU::VReg_192_Align2RegClassID: 2487 case AMDGPU::AReg_192_Align2RegClassID: 2488 case AMDGPU::AV_192RegClassID: 2489 case AMDGPU::AV_192_Align2RegClassID: 2490 return 192; 2491 case AMDGPU::SGPR_224RegClassID: 2492 case AMDGPU::SReg_224RegClassID: 2493 case AMDGPU::VReg_224RegClassID: 2494 case AMDGPU::AReg_224RegClassID: 2495 case AMDGPU::VReg_224_Align2RegClassID: 2496 case AMDGPU::AReg_224_Align2RegClassID: 2497 case AMDGPU::AV_224RegClassID: 2498 case AMDGPU::AV_224_Align2RegClassID: 2499 return 224; 2500 case AMDGPU::SGPR_256RegClassID: 2501 case AMDGPU::SReg_256RegClassID: 2502 case AMDGPU::VReg_256RegClassID: 2503 case AMDGPU::AReg_256RegClassID: 2504 case AMDGPU::VReg_256_Align2RegClassID: 2505 case AMDGPU::AReg_256_Align2RegClassID: 2506 case AMDGPU::AV_256RegClassID: 2507 case AMDGPU::AV_256_Align2RegClassID: 2508 return 256; 2509 case AMDGPU::SGPR_288RegClassID: 2510 case AMDGPU::SReg_288RegClassID: 2511 case AMDGPU::VReg_288RegClassID: 2512 case AMDGPU::AReg_288RegClassID: 2513 case AMDGPU::VReg_288_Align2RegClassID: 2514 case AMDGPU::AReg_288_Align2RegClassID: 2515 case AMDGPU::AV_288RegClassID: 2516 case AMDGPU::AV_288_Align2RegClassID: 2517 return 288; 2518 case AMDGPU::SGPR_320RegClassID: 2519 case AMDGPU::SReg_320RegClassID: 2520 case AMDGPU::VReg_320RegClassID: 2521 case AMDGPU::AReg_320RegClassID: 2522 case AMDGPU::VReg_320_Align2RegClassID: 2523 case AMDGPU::AReg_320_Align2RegClassID: 2524 case AMDGPU::AV_320RegClassID: 2525 case AMDGPU::AV_320_Align2RegClassID: 2526 return 320; 2527 case AMDGPU::SGPR_352RegClassID: 2528 case AMDGPU::SReg_352RegClassID: 2529 case AMDGPU::VReg_352RegClassID: 2530 case AMDGPU::AReg_352RegClassID: 2531 case AMDGPU::VReg_352_Align2RegClassID: 2532 case AMDGPU::AReg_352_Align2RegClassID: 2533 case AMDGPU::AV_352RegClassID: 2534 case AMDGPU::AV_352_Align2RegClassID: 2535 return 352; 2536 case AMDGPU::SGPR_384RegClassID: 2537 case AMDGPU::SReg_384RegClassID: 2538 case AMDGPU::VReg_384RegClassID: 2539 case AMDGPU::AReg_384RegClassID: 2540 case AMDGPU::VReg_384_Align2RegClassID: 2541 case AMDGPU::AReg_384_Align2RegClassID: 2542 case AMDGPU::AV_384RegClassID: 2543 case AMDGPU::AV_384_Align2RegClassID: 2544 return 384; 2545 case AMDGPU::SGPR_512RegClassID: 2546 case AMDGPU::SReg_512RegClassID: 2547 case AMDGPU::VReg_512RegClassID: 2548 case AMDGPU::AReg_512RegClassID: 2549 case AMDGPU::VReg_512_Align2RegClassID: 2550 case AMDGPU::AReg_512_Align2RegClassID: 2551 case AMDGPU::AV_512RegClassID: 2552 case AMDGPU::AV_512_Align2RegClassID: 2553 return 512; 2554 case AMDGPU::SGPR_1024RegClassID: 2555 case AMDGPU::SReg_1024RegClassID: 2556 case AMDGPU::VReg_1024RegClassID: 2557 case AMDGPU::AReg_1024RegClassID: 2558 case AMDGPU::VReg_1024_Align2RegClassID: 2559 case AMDGPU::AReg_1024_Align2RegClassID: 2560 case AMDGPU::AV_1024RegClassID: 2561 case AMDGPU::AV_1024_Align2RegClassID: 2562 return 1024; 2563 default: 2564 llvm_unreachable("Unexpected register class"); 2565 } 2566} 2567 2568unsigned getRegBitWidth(const MCRegisterClass &RC) { 2569 return getRegBitWidth(RC.getID()); 2570} 2571 2572unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2573 unsigned OpNo) { 2574 assert(OpNo < Desc.NumOperands); 2575 unsigned RCID = Desc.operands()[OpNo].RegClass; 2576 return getRegBitWidth(RCID) / 8; 2577} 2578 2579bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2580 if (isInlinableIntLiteral(Literal)) 2581 return true; 2582 2583 uint64_t Val = static_cast<uint64_t>(Literal); 2584 return (Val == llvm::bit_cast<uint64_t>(0.0)) || 2585 (Val == llvm::bit_cast<uint64_t>(1.0)) || 2586 (Val == llvm::bit_cast<uint64_t>(-1.0)) || 2587 (Val == llvm::bit_cast<uint64_t>(0.5)) || 2588 (Val == llvm::bit_cast<uint64_t>(-0.5)) || 2589 (Val == llvm::bit_cast<uint64_t>(2.0)) || 2590 (Val == llvm::bit_cast<uint64_t>(-2.0)) || 2591 (Val == llvm::bit_cast<uint64_t>(4.0)) || 2592 (Val == llvm::bit_cast<uint64_t>(-4.0)) || 2593 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2594} 2595 2596bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2597 if (isInlinableIntLiteral(Literal)) 2598 return true; 2599 2600 // The actual type of the operand does not seem to matter as long 2601 // as the bits match one of the inline immediate values. For example: 2602 // 2603 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2604 // so it is a legal inline immediate. 2605 // 2606 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2607 // floating-point, so it is a legal inline immediate. 2608 2609 uint32_t Val = static_cast<uint32_t>(Literal); 2610 return (Val == llvm::bit_cast<uint32_t>(0.0f)) || 2611 (Val == llvm::bit_cast<uint32_t>(1.0f)) || 2612 (Val == llvm::bit_cast<uint32_t>(-1.0f)) || 2613 (Val == llvm::bit_cast<uint32_t>(0.5f)) || 2614 (Val == llvm::bit_cast<uint32_t>(-0.5f)) || 2615 (Val == llvm::bit_cast<uint32_t>(2.0f)) || 2616 (Val == llvm::bit_cast<uint32_t>(-2.0f)) || 2617 (Val == llvm::bit_cast<uint32_t>(4.0f)) || 2618 (Val == llvm::bit_cast<uint32_t>(-4.0f)) || 2619 (Val == 0x3e22f983 && HasInv2Pi); 2620} 2621 2622bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 2623 if (!HasInv2Pi) 2624 return false; 2625 2626 if (isInlinableIntLiteral(Literal)) 2627 return true; 2628 2629 uint16_t Val = static_cast<uint16_t>(Literal); 2630 return Val == 0x3C00 || // 1.0 2631 Val == 0xBC00 || // -1.0 2632 Val == 0x3800 || // 0.5 2633 Val == 0xB800 || // -0.5 2634 Val == 0x4000 || // 2.0 2635 Val == 0xC000 || // -2.0 2636 Val == 0x4400 || // 4.0 2637 Val == 0xC400 || // -4.0 2638 Val == 0x3118; // 1/2pi 2639} 2640 2641std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { 2642 // Unfortunately, the Instruction Set Architecture Reference Guide is 2643 // misleading about how the inline operands work for (packed) 16-bit 2644 // instructions. In a nutshell, the actual HW behavior is: 2645 // 2646 // - integer encodings (-16 .. 64) are always produced as sign-extended 2647 // 32-bit values 2648 // - float encodings are produced as: 2649 // - for F16 instructions: corresponding half-precision float values in 2650 // the LSBs, 0 in the MSBs 2651 // - for UI16 instructions: corresponding single-precision float value 2652 int32_t Signed = static_cast<int32_t>(Literal); 2653 if (Signed >= 0 && Signed <= 64) 2654 return 128 + Signed; 2655 2656 if (Signed >= -16 && Signed <= -1) 2657 return 192 + std::abs(Signed); 2658 2659 if (IsFloat) { 2660 // clang-format off 2661 switch (Literal) { 2662 case 0x3800: return 240; // 0.5 2663 case 0xB800: return 241; // -0.5 2664 case 0x3C00: return 242; // 1.0 2665 case 0xBC00: return 243; // -1.0 2666 case 0x4000: return 244; // 2.0 2667 case 0xC000: return 245; // -2.0 2668 case 0x4400: return 246; // 4.0 2669 case 0xC400: return 247; // -4.0 2670 case 0x3118: return 248; // 1.0 / (2.0 * pi) 2671 default: break; 2672 } 2673 // clang-format on 2674 } else { 2675 // clang-format off 2676 switch (Literal) { 2677 case 0x3F000000: return 240; // 0.5 2678 case 0xBF000000: return 241; // -0.5 2679 case 0x3F800000: return 242; // 1.0 2680 case 0xBF800000: return 243; // -1.0 2681 case 0x40000000: return 244; // 2.0 2682 case 0xC0000000: return 245; // -2.0 2683 case 0x40800000: return 246; // 4.0 2684 case 0xC0800000: return 247; // -4.0 2685 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) 2686 default: break; 2687 } 2688 // clang-format on 2689 } 2690 2691 return {}; 2692} 2693 2694// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction 2695// or nullopt. 2696std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { 2697 return getInlineEncodingV216(false, Literal); 2698} 2699 2700// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction 2701// or nullopt. 2702std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { 2703 return getInlineEncodingV216(true, Literal); 2704} 2705 2706// Whether the given literal can be inlined for a V_PK_* instruction. 2707bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { 2708 switch (OpType) { 2709 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2710 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2711 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2712 return getInlineEncodingV216(false, Literal).has_value(); 2713 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2714 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2715 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2716 return getInlineEncodingV216(true, Literal).has_value(); 2717 default: 2718 llvm_unreachable("bad packed operand type"); 2719 } 2720} 2721 2722// Whether the given literal can be inlined for a V_PK_*_IU16 instruction. 2723bool isInlinableLiteralV2I16(uint32_t Literal) { 2724 return getInlineEncodingV2I16(Literal).has_value(); 2725} 2726 2727// Whether the given literal can be inlined for a V_PK_*_F16 instruction. 2728bool isInlinableLiteralV2F16(uint32_t Literal) { 2729 return getInlineEncodingV2F16(Literal).has_value(); 2730} 2731 2732bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { 2733 if (IsFP64) 2734 return !(Val & 0xffffffffu); 2735 2736 return isUInt<32>(Val) || isInt<32>(Val); 2737} 2738 2739bool isArgPassedInSGPR(const Argument *A) { 2740 const Function *F = A->getParent(); 2741 2742 // Arguments to compute shaders are never a source of divergence. 2743 CallingConv::ID CC = F->getCallingConv(); 2744 switch (CC) { 2745 case CallingConv::AMDGPU_KERNEL: 2746 case CallingConv::SPIR_KERNEL: 2747 return true; 2748 case CallingConv::AMDGPU_VS: 2749 case CallingConv::AMDGPU_LS: 2750 case CallingConv::AMDGPU_HS: 2751 case CallingConv::AMDGPU_ES: 2752 case CallingConv::AMDGPU_GS: 2753 case CallingConv::AMDGPU_PS: 2754 case CallingConv::AMDGPU_CS: 2755 case CallingConv::AMDGPU_Gfx: 2756 case CallingConv::AMDGPU_CS_Chain: 2757 case CallingConv::AMDGPU_CS_ChainPreserve: 2758 // For non-compute shaders, SGPR inputs are marked with either inreg or 2759 // byval. Everything else is in VGPRs. 2760 return A->hasAttribute(Attribute::InReg) || 2761 A->hasAttribute(Attribute::ByVal); 2762 default: 2763 // TODO: treat i1 as divergent? 2764 return A->hasAttribute(Attribute::InReg); 2765 } 2766} 2767 2768bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { 2769 // Arguments to compute shaders are never a source of divergence. 2770 CallingConv::ID CC = CB->getCallingConv(); 2771 switch (CC) { 2772 case CallingConv::AMDGPU_KERNEL: 2773 case CallingConv::SPIR_KERNEL: 2774 return true; 2775 case CallingConv::AMDGPU_VS: 2776 case CallingConv::AMDGPU_LS: 2777 case CallingConv::AMDGPU_HS: 2778 case CallingConv::AMDGPU_ES: 2779 case CallingConv::AMDGPU_GS: 2780 case CallingConv::AMDGPU_PS: 2781 case CallingConv::AMDGPU_CS: 2782 case CallingConv::AMDGPU_Gfx: 2783 case CallingConv::AMDGPU_CS_Chain: 2784 case CallingConv::AMDGPU_CS_ChainPreserve: 2785 // For non-compute shaders, SGPR inputs are marked with either inreg or 2786 // byval. Everything else is in VGPRs. 2787 return CB->paramHasAttr(ArgNo, Attribute::InReg) || 2788 CB->paramHasAttr(ArgNo, Attribute::ByVal); 2789 default: 2790 return CB->paramHasAttr(ArgNo, Attribute::InReg); 2791 } 2792} 2793 2794static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2795 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2796} 2797 2798static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 2799 return isGFX9Plus(ST); 2800} 2801 2802bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2803 int64_t EncodedOffset) { 2804 if (isGFX12Plus(ST)) 2805 return isUInt<23>(EncodedOffset); 2806 2807 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2808 : isUInt<8>(EncodedOffset); 2809} 2810 2811bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2812 int64_t EncodedOffset, 2813 bool IsBuffer) { 2814 if (isGFX12Plus(ST)) 2815 return isInt<24>(EncodedOffset); 2816 2817 return !IsBuffer && 2818 hasSMRDSignedImmOffset(ST) && 2819 isInt<21>(EncodedOffset); 2820} 2821 2822static bool isDwordAligned(uint64_t ByteOffset) { 2823 return (ByteOffset & 3) == 0; 2824} 2825 2826uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2827 uint64_t ByteOffset) { 2828 if (hasSMEMByteOffset(ST)) 2829 return ByteOffset; 2830 2831 assert(isDwordAligned(ByteOffset)); 2832 return ByteOffset >> 2; 2833} 2834 2835std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2836 int64_t ByteOffset, bool IsBuffer) { 2837 if (isGFX12Plus(ST)) // 24 bit signed offsets 2838 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2839 : std::nullopt; 2840 2841 // The signed version is always a byte offset. 2842 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2843 assert(hasSMEMByteOffset(ST)); 2844 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2845 : std::nullopt; 2846 } 2847 2848 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2849 return std::nullopt; 2850 2851 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2852 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2853 ? std::optional<int64_t>(EncodedOffset) 2854 : std::nullopt; 2855} 2856 2857std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2858 int64_t ByteOffset) { 2859 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2860 return std::nullopt; 2861 2862 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2863 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 2864 : std::nullopt; 2865} 2866 2867unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 2868 if (AMDGPU::isGFX10(ST)) 2869 return 12; 2870 2871 if (AMDGPU::isGFX12(ST)) 2872 return 24; 2873 return 13; 2874} 2875 2876namespace { 2877 2878struct SourceOfDivergence { 2879 unsigned Intr; 2880}; 2881const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2882 2883struct AlwaysUniform { 2884 unsigned Intr; 2885}; 2886const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); 2887 2888#define GET_SourcesOfDivergence_IMPL 2889#define GET_UniformIntrinsics_IMPL 2890#define GET_Gfx9BufferFormat_IMPL 2891#define GET_Gfx10BufferFormat_IMPL 2892#define GET_Gfx11PlusBufferFormat_IMPL 2893#include "AMDGPUGenSearchableTables.inc" 2894 2895} // end anonymous namespace 2896 2897bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2898 return lookupSourceOfDivergence(IntrID); 2899} 2900 2901bool isIntrinsicAlwaysUniform(unsigned IntrID) { 2902 return lookupAlwaysUniform(IntrID); 2903} 2904 2905const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2906 uint8_t NumComponents, 2907 uint8_t NumFormat, 2908 const MCSubtargetInfo &STI) { 2909 return isGFX11Plus(STI) 2910 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2911 NumFormat) 2912 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2913 NumComponents, NumFormat) 2914 : getGfx9BufferFormatInfo(BitsPerComp, 2915 NumComponents, NumFormat); 2916} 2917 2918const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2919 const MCSubtargetInfo &STI) { 2920 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 2921 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 2922 : getGfx9BufferFormatInfo(Format); 2923} 2924 2925bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { 2926 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, 2927 OpName::src2 }) { 2928 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); 2929 if (Idx == -1) 2930 continue; 2931 2932 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || 2933 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) 2934 return true; 2935 } 2936 2937 return false; 2938} 2939 2940bool isDPALU_DPP(const MCInstrDesc &OpDesc) { 2941 return hasAny64BitVGPROperands(OpDesc); 2942} 2943 2944} // namespace AMDGPU 2945 2946raw_ostream &operator<<(raw_ostream &OS, 2947 const AMDGPU::IsaInfo::TargetIDSetting S) { 2948 switch (S) { 2949 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2950 OS << "Unsupported"; 2951 break; 2952 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2953 OS << "Any"; 2954 break; 2955 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2956 OS << "Off"; 2957 break; 2958 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2959 OS << "On"; 2960 break; 2961 } 2962 return OS; 2963} 2964 2965} // namespace llvm 2966