1321369Sdim//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2285163Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6285163Sdim// 7285163Sdim//===----------------------------------------------------------------------===// 8321369Sdim 9285163Sdim#include "AMDGPUBaseInfo.h" 10360784Sdim#include "AMDGPU.h" 11360784Sdim#include "AMDGPUAsmUtils.h" 12341825Sdim#include "AMDGPUTargetTransformInfo.h" 13314564Sdim#include "SIDefines.h" 14321369Sdim#include "llvm/ADT/StringRef.h" 15321369Sdim#include "llvm/ADT/Triple.h" 16321369Sdim#include "llvm/BinaryFormat/ELF.h" 17321369Sdim#include "llvm/CodeGen/MachineMemOperand.h" 18321369Sdim#include "llvm/IR/Attributes.h" 19321369Sdim#include "llvm/IR/Constants.h" 20296417Sdim#include "llvm/IR/Function.h" 21296417Sdim#include "llvm/IR/GlobalValue.h" 22321369Sdim#include "llvm/IR/Instruction.h" 23360784Sdim#include "llvm/IR/IntrinsicsAMDGPU.h" 24360784Sdim#include "llvm/IR/IntrinsicsR600.h" 25321369Sdim#include "llvm/IR/LLVMContext.h" 26321369Sdim#include "llvm/IR/Module.h" 27296417Sdim#include "llvm/MC/MCContext.h" 28321369Sdim#include "llvm/MC/MCInstrDesc.h" 29327952Sdim#include "llvm/MC/MCInstrInfo.h" 30314564Sdim#include "llvm/MC/MCRegisterInfo.h" 31296417Sdim#include "llvm/MC/MCSectionELF.h" 32296417Sdim#include "llvm/MC/MCSubtargetInfo.h" 33285163Sdim#include "llvm/MC/SubtargetFeature.h" 34321369Sdim#include "llvm/Support/Casting.h" 35321369Sdim#include "llvm/Support/ErrorHandling.h" 36321369Sdim#include "llvm/Support/MathExtras.h" 37321369Sdim#include <algorithm> 38321369Sdim#include <cassert> 39321369Sdim#include <cstdint> 40321369Sdim#include <cstring> 41321369Sdim#include <utility> 42285163Sdim 43321369Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 44285163Sdim 45314564Sdim#define GET_INSTRINFO_NAMED_OPS 46327952Sdim#define GET_INSTRMAP_INFO 47314564Sdim#include "AMDGPUGenInstrInfo.inc" 48327952Sdim#undef GET_INSTRMAP_INFO 49314564Sdim#undef GET_INSTRINFO_NAMED_OPS 50314564Sdim 51314564Sdimnamespace { 52314564Sdim 53314564Sdim/// \returns Bit mask for given bit \p Shift and bit \p Width. 54314564Sdimunsigned getBitMask(unsigned Shift, unsigned Width) { 55314564Sdim return ((1 << Width) - 1) << Shift; 56314564Sdim} 57314564Sdim 58341825Sdim/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 59314564Sdim/// 60314564Sdim/// \returns Packed \p Dst. 61314564Sdimunsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 62314564Sdim Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 63314564Sdim Dst |= (Src << Shift) & getBitMask(Shift, Width); 64314564Sdim return Dst; 65314564Sdim} 66314564Sdim 67341825Sdim/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 68314564Sdim/// 69314564Sdim/// \returns Unpacked bits. 70314564Sdimunsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 71314564Sdim return (Src & getBitMask(Shift, Width)) >> Shift; 72314564Sdim} 73314564Sdim 74321369Sdim/// \returns Vmcnt bit shift (lower bits). 75321369Sdimunsigned getVmcntBitShiftLo() { return 0; } 76314564Sdim 77321369Sdim/// \returns Vmcnt bit width (lower bits). 78321369Sdimunsigned getVmcntBitWidthLo() { return 4; } 79314564Sdim 80314564Sdim/// \returns Expcnt bit shift. 81314564Sdimunsigned getExpcntBitShift() { return 4; } 82314564Sdim 83314564Sdim/// \returns Expcnt bit width. 84314564Sdimunsigned getExpcntBitWidth() { return 3; } 85314564Sdim 86314564Sdim/// \returns Lgkmcnt bit shift. 87314564Sdimunsigned getLgkmcntBitShift() { return 8; } 88314564Sdim 89314564Sdim/// \returns Lgkmcnt bit width. 90353358Sdimunsigned getLgkmcntBitWidth(unsigned VersionMajor) { 91353358Sdim return (VersionMajor >= 10) ? 6 : 4; 92353358Sdim} 93314564Sdim 94321369Sdim/// \returns Vmcnt bit shift (higher bits). 95321369Sdimunsigned getVmcntBitShiftHi() { return 14; } 96314564Sdim 97321369Sdim/// \returns Vmcnt bit width (higher bits). 98321369Sdimunsigned getVmcntBitWidthHi() { return 2; } 99321369Sdim 100321369Sdim} // end namespace anonymous 101321369Sdim 102285163Sdimnamespace llvm { 103321369Sdim 104285163Sdimnamespace AMDGPU { 105285163Sdim 106341825Sdim#define GET_MIMGBaseOpcodesTable_IMPL 107341825Sdim#define GET_MIMGDimInfoTable_IMPL 108341825Sdim#define GET_MIMGInfoTable_IMPL 109341825Sdim#define GET_MIMGLZMappingTable_IMPL 110353358Sdim#define GET_MIMGMIPMappingTable_IMPL 111341825Sdim#include "AMDGPUGenSearchableTables.inc" 112327952Sdim 113341825Sdimint getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 114341825Sdim unsigned VDataDwords, unsigned VAddrDwords) { 115341825Sdim const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 116341825Sdim VDataDwords, VAddrDwords); 117341825Sdim return Info ? Info->Opcode : -1; 118327952Sdim} 119327952Sdim 120353358Sdimconst MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 121353358Sdim const MIMGInfo *Info = getMIMGInfo(Opc); 122353358Sdim return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 123353358Sdim} 124353358Sdim 125341825Sdimint getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 126341825Sdim const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 127341825Sdim const MIMGInfo *NewInfo = 128341825Sdim getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 129341825Sdim NewChannels, OrigInfo->VAddrDwords); 130341825Sdim return NewInfo ? NewInfo->Opcode : -1; 131327952Sdim} 132327952Sdim 133344779Sdimstruct MUBUFInfo { 134344779Sdim uint16_t Opcode; 135344779Sdim uint16_t BaseOpcode; 136360784Sdim uint8_t elements; 137344779Sdim bool has_vaddr; 138344779Sdim bool has_srsrc; 139344779Sdim bool has_soffset; 140344779Sdim}; 141344779Sdim 142360784Sdimstruct MTBUFInfo { 143360784Sdim uint16_t Opcode; 144360784Sdim uint16_t BaseOpcode; 145360784Sdim uint8_t elements; 146360784Sdim bool has_vaddr; 147360784Sdim bool has_srsrc; 148360784Sdim bool has_soffset; 149360784Sdim}; 150360784Sdim 151360784Sdim#define GET_MTBUFInfoTable_DECL 152360784Sdim#define GET_MTBUFInfoTable_IMPL 153344779Sdim#define GET_MUBUFInfoTable_DECL 154344779Sdim#define GET_MUBUFInfoTable_IMPL 155344779Sdim#include "AMDGPUGenSearchableTables.inc" 156344779Sdim 157360784Sdimint getMTBUFBaseOpcode(unsigned Opc) { 158360784Sdim const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 159360784Sdim return Info ? Info->BaseOpcode : -1; 160360784Sdim} 161360784Sdim 162360784Sdimint getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 163360784Sdim const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 164360784Sdim return Info ? Info->Opcode : -1; 165360784Sdim} 166360784Sdim 167360784Sdimint getMTBUFElements(unsigned Opc) { 168360784Sdim const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 169360784Sdim return Info ? Info->elements : 0; 170360784Sdim} 171360784Sdim 172360784Sdimbool getMTBUFHasVAddr(unsigned Opc) { 173360784Sdim const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 174360784Sdim return Info ? Info->has_vaddr : false; 175360784Sdim} 176360784Sdim 177360784Sdimbool getMTBUFHasSrsrc(unsigned Opc) { 178360784Sdim const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 179360784Sdim return Info ? Info->has_srsrc : false; 180360784Sdim} 181360784Sdim 182360784Sdimbool getMTBUFHasSoffset(unsigned Opc) { 183360784Sdim const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 184360784Sdim return Info ? Info->has_soffset : false; 185360784Sdim} 186360784Sdim 187344779Sdimint getMUBUFBaseOpcode(unsigned Opc) { 188344779Sdim const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 189344779Sdim return Info ? Info->BaseOpcode : -1; 190344779Sdim} 191344779Sdim 192360784Sdimint getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 193360784Sdim const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 194344779Sdim return Info ? Info->Opcode : -1; 195344779Sdim} 196344779Sdim 197360784Sdimint getMUBUFElements(unsigned Opc) { 198344779Sdim const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 199360784Sdim return Info ? Info->elements : 0; 200344779Sdim} 201344779Sdim 202344779Sdimbool getMUBUFHasVAddr(unsigned Opc) { 203344779Sdim const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 204344779Sdim return Info ? Info->has_vaddr : false; 205344779Sdim} 206344779Sdim 207344779Sdimbool getMUBUFHasSrsrc(unsigned Opc) { 208344779Sdim const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 209344779Sdim return Info ? Info->has_srsrc : false; 210344779Sdim} 211344779Sdim 212344779Sdimbool getMUBUFHasSoffset(unsigned Opc) { 213344779Sdim const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 214344779Sdim return Info ? Info->has_soffset : false; 215344779Sdim} 216344779Sdim 217327952Sdim// Wrapper for Tablegen'd function. enum Subtarget is not defined in any 218327952Sdim// header files, so we need to wrap it in a function that takes unsigned 219327952Sdim// instead. 220327952Sdimint getMCOpcode(uint16_t Opcode, unsigned Gen) { 221327952Sdim return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 222327952Sdim} 223327952Sdim 224321369Sdimnamespace IsaInfo { 225321369Sdim 226327952Sdimvoid streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { 227327952Sdim auto TargetTriple = STI->getTargetTriple(); 228344779Sdim auto Version = getIsaVersion(STI->getCPU()); 229327952Sdim 230327952Sdim Stream << TargetTriple.getArchName() << '-' 231327952Sdim << TargetTriple.getVendorName() << '-' 232327952Sdim << TargetTriple.getOSName() << '-' 233327952Sdim << TargetTriple.getEnvironmentName() << '-' 234327952Sdim << "gfx" 235344779Sdim << Version.Major 236344779Sdim << Version.Minor 237344779Sdim << Version.Stepping; 238341825Sdim 239341825Sdim if (hasXNACK(*STI)) 240341825Sdim Stream << "+xnack"; 241344779Sdim if (hasSRAMECC(*STI)) 242344779Sdim Stream << "+sram-ecc"; 243341825Sdim 244327952Sdim Stream.flush(); 245327952Sdim} 246327952Sdim 247341825Sdimbool hasCodeObjectV3(const MCSubtargetInfo *STI) { 248344779Sdim return STI->getTargetTriple().getOS() == Triple::AMDHSA && 249344779Sdim STI->getFeatureBits().test(FeatureCodeObjectV3); 250327952Sdim} 251327952Sdim 252344779Sdimunsigned getWavefrontSize(const MCSubtargetInfo *STI) { 253344779Sdim if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 254321369Sdim return 16; 255344779Sdim if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 256321369Sdim return 32; 257321369Sdim 258321369Sdim return 64; 259321369Sdim} 260321369Sdim 261344779Sdimunsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 262344779Sdim if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 263321369Sdim return 32768; 264344779Sdim if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 265321369Sdim return 65536; 266321369Sdim 267321369Sdim return 0; 268321369Sdim} 269321369Sdim 270344779Sdimunsigned getEUsPerCU(const MCSubtargetInfo *STI) { 271321369Sdim return 4; 272321369Sdim} 273321369Sdim 274344779Sdimunsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 275321369Sdim unsigned FlatWorkGroupSize) { 276353358Sdim assert(FlatWorkGroupSize != 0); 277353358Sdim if (STI->getTargetTriple().getArch() != Triple::amdgcn) 278321369Sdim return 8; 279344779Sdim unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 280321369Sdim if (N == 1) 281321369Sdim return 40; 282321369Sdim N = 40 / N; 283321369Sdim return std::min(N, 16u); 284321369Sdim} 285321369Sdim 286344779Sdimunsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { 287360784Sdim return getMaxWavesPerEU(STI) * getEUsPerCU(STI); 288321369Sdim} 289321369Sdim 290344779Sdimunsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, 291321369Sdim unsigned FlatWorkGroupSize) { 292344779Sdim return getWavesPerWorkGroup(STI, FlatWorkGroupSize); 293321369Sdim} 294321369Sdim 295344779Sdimunsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 296321369Sdim return 1; 297321369Sdim} 298321369Sdim 299360784Sdimunsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 300321369Sdim // FIXME: Need to take scratch memory into account. 301360784Sdim if (!isGFX10(*STI)) 302360784Sdim return 10; 303360784Sdim return 20; 304321369Sdim} 305321369Sdim 306344779Sdimunsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, 307321369Sdim unsigned FlatWorkGroupSize) { 308344779Sdim return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), 309344779Sdim getEUsPerCU(STI)) / getEUsPerCU(STI); 310321369Sdim} 311321369Sdim 312344779Sdimunsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 313321369Sdim return 1; 314321369Sdim} 315321369Sdim 316344779Sdimunsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 317360784Sdim // Some subtargets allow encoding 2048, but this isn't tested or supported. 318360784Sdim return 1024; 319321369Sdim} 320321369Sdim 321344779Sdimunsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 322321369Sdim unsigned FlatWorkGroupSize) { 323344779Sdim return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / 324344779Sdim getWavefrontSize(STI); 325321369Sdim} 326321369Sdim 327344779Sdimunsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 328344779Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 329353358Sdim if (Version.Major >= 10) 330353358Sdim return getAddressableNumSGPRs(STI); 331321369Sdim if (Version.Major >= 8) 332321369Sdim return 16; 333321369Sdim return 8; 334321369Sdim} 335321369Sdim 336344779Sdimunsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 337321369Sdim return 8; 338321369Sdim} 339321369Sdim 340344779Sdimunsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 341344779Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 342321369Sdim if (Version.Major >= 8) 343321369Sdim return 800; 344321369Sdim return 512; 345321369Sdim} 346321369Sdim 347344779Sdimunsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 348344779Sdim if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 349321369Sdim return FIXED_NUM_SGPRS_FOR_INIT_BUG; 350321369Sdim 351344779Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 352353358Sdim if (Version.Major >= 10) 353353358Sdim return 106; 354321369Sdim if (Version.Major >= 8) 355321369Sdim return 102; 356321369Sdim return 104; 357321369Sdim} 358321369Sdim 359344779Sdimunsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 360321369Sdim assert(WavesPerEU != 0); 361321369Sdim 362353358Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 363353358Sdim if (Version.Major >= 10) 364353358Sdim return 0; 365353358Sdim 366360784Sdim if (WavesPerEU >= getMaxWavesPerEU(STI)) 367321369Sdim return 0; 368341825Sdim 369344779Sdim unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 370344779Sdim if (STI->getFeatureBits().test(FeatureTrapHandler)) 371341825Sdim MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 372344779Sdim MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 373344779Sdim return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 374321369Sdim} 375321369Sdim 376344779Sdimunsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 377321369Sdim bool Addressable) { 378321369Sdim assert(WavesPerEU != 0); 379321369Sdim 380353358Sdim unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 381344779Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 382353358Sdim if (Version.Major >= 10) 383353358Sdim return Addressable ? AddressableNumSGPRs : 108; 384321369Sdim if (Version.Major >= 8 && !Addressable) 385321369Sdim AddressableNumSGPRs = 112; 386344779Sdim unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 387344779Sdim if (STI->getFeatureBits().test(FeatureTrapHandler)) 388341825Sdim MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 389344779Sdim MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 390321369Sdim return std::min(MaxNumSGPRs, AddressableNumSGPRs); 391321369Sdim} 392321369Sdim 393344779Sdimunsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 394341825Sdim bool FlatScrUsed, bool XNACKUsed) { 395341825Sdim unsigned ExtraSGPRs = 0; 396341825Sdim if (VCCUsed) 397341825Sdim ExtraSGPRs = 2; 398341825Sdim 399344779Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 400353358Sdim if (Version.Major >= 10) 401353358Sdim return ExtraSGPRs; 402353358Sdim 403341825Sdim if (Version.Major < 8) { 404341825Sdim if (FlatScrUsed) 405341825Sdim ExtraSGPRs = 4; 406341825Sdim } else { 407341825Sdim if (XNACKUsed) 408341825Sdim ExtraSGPRs = 4; 409341825Sdim 410341825Sdim if (FlatScrUsed) 411341825Sdim ExtraSGPRs = 6; 412341825Sdim } 413341825Sdim 414341825Sdim return ExtraSGPRs; 415341825Sdim} 416341825Sdim 417344779Sdimunsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 418341825Sdim bool FlatScrUsed) { 419344779Sdim return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 420344779Sdim STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 421341825Sdim} 422341825Sdim 423344779Sdimunsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 424344779Sdim NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 425341825Sdim // SGPRBlocks is actual number of SGPR blocks minus 1. 426344779Sdim return NumSGPRs / getSGPREncodingGranule(STI) - 1; 427341825Sdim} 428341825Sdim 429353358Sdimunsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 430353358Sdim Optional<bool> EnableWavefrontSize32) { 431353358Sdim bool IsWave32 = EnableWavefrontSize32 ? 432353358Sdim *EnableWavefrontSize32 : 433353358Sdim STI->getFeatureBits().test(FeatureWavefrontSize32); 434353358Sdim return IsWave32 ? 8 : 4; 435321369Sdim} 436321369Sdim 437353358Sdimunsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 438353358Sdim Optional<bool> EnableWavefrontSize32) { 439353358Sdim return getVGPRAllocGranule(STI, EnableWavefrontSize32); 440321369Sdim} 441321369Sdim 442344779Sdimunsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 443360784Sdim if (!isGFX10(*STI)) 444360784Sdim return 256; 445360784Sdim return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; 446321369Sdim} 447321369Sdim 448344779Sdimunsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 449360784Sdim return 256; 450321369Sdim} 451321369Sdim 452344779Sdimunsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 453321369Sdim assert(WavesPerEU != 0); 454321369Sdim 455360784Sdim if (WavesPerEU >= getMaxWavesPerEU(STI)) 456321369Sdim return 0; 457321369Sdim unsigned MinNumVGPRs = 458344779Sdim alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), 459344779Sdim getVGPRAllocGranule(STI)) + 1; 460344779Sdim return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); 461321369Sdim} 462321369Sdim 463344779Sdimunsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 464321369Sdim assert(WavesPerEU != 0); 465321369Sdim 466344779Sdim unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 467344779Sdim getVGPRAllocGranule(STI)); 468344779Sdim unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 469321369Sdim return std::min(MaxNumVGPRs, AddressableNumVGPRs); 470321369Sdim} 471321369Sdim 472353358Sdimunsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 473353358Sdim Optional<bool> EnableWavefrontSize32) { 474353358Sdim NumVGPRs = alignTo(std::max(1u, NumVGPRs), 475353358Sdim getVGPREncodingGranule(STI, EnableWavefrontSize32)); 476341825Sdim // VGPRBlocks is actual number of VGPR blocks minus 1. 477353358Sdim return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 478341825Sdim} 479341825Sdim 480321369Sdim} // end namespace IsaInfo 481321369Sdim 482285163Sdimvoid initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 483344779Sdim const MCSubtargetInfo *STI) { 484344779Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 485285163Sdim 486285163Sdim memset(&Header, 0, sizeof(Header)); 487285163Sdim 488285163Sdim Header.amd_kernel_code_version_major = 1; 489341825Sdim Header.amd_kernel_code_version_minor = 2; 490285163Sdim Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 491344779Sdim Header.amd_machine_version_major = Version.Major; 492344779Sdim Header.amd_machine_version_minor = Version.Minor; 493344779Sdim Header.amd_machine_version_stepping = Version.Stepping; 494285163Sdim Header.kernel_code_entry_byte_offset = sizeof(Header); 495285163Sdim Header.wavefront_size = 6; 496321369Sdim 497321369Sdim // If the code object does not support indirect functions, then the value must 498321369Sdim // be 0xffffffff. 499321369Sdim Header.call_convention = -1; 500321369Sdim 501285163Sdim // These alignment values are specified in powers of two, so alignment = 502285163Sdim // 2^n. The minimum alignment is 2^4 = 16. 503285163Sdim Header.kernarg_segment_alignment = 4; 504285163Sdim Header.group_segment_alignment = 4; 505285163Sdim Header.private_segment_alignment = 4; 506353358Sdim 507353358Sdim if (Version.Major >= 10) { 508353358Sdim if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 509353358Sdim Header.wavefront_size = 5; 510353358Sdim Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 511353358Sdim } 512353358Sdim Header.compute_pgm_resource_registers |= 513353358Sdim S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 514353358Sdim S_00B848_MEM_ORDERED(1); 515353358Sdim } 516285163Sdim} 517285163Sdim 518353358Sdimamdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 519353358Sdim const MCSubtargetInfo *STI) { 520353358Sdim IsaVersion Version = getIsaVersion(STI->getCPU()); 521353358Sdim 522341825Sdim amdhsa::kernel_descriptor_t KD; 523341825Sdim memset(&KD, 0, sizeof(KD)); 524353358Sdim 525341825Sdim AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 526341825Sdim amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 527341825Sdim amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 528341825Sdim AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 529341825Sdim amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1); 530341825Sdim AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 531341825Sdim amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); 532341825Sdim AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 533341825Sdim amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 534353358Sdim if (Version.Major >= 10) { 535353358Sdim AMDHSA_BITS_SET(KD.kernel_code_properties, 536353358Sdim amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 537353358Sdim STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 538353358Sdim AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 539353358Sdim amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, 540353358Sdim STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 541353358Sdim AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 542353358Sdim amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1); 543353358Sdim } 544341825Sdim return KD; 545341825Sdim} 546341825Sdim 547327952Sdimbool isGroupSegment(const GlobalValue *GV) { 548360784Sdim return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 549296417Sdim} 550296417Sdim 551327952Sdimbool isGlobalSegment(const GlobalValue *GV) { 552360784Sdim return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 553296417Sdim} 554296417Sdim 555327952Sdimbool isReadOnlySegment(const GlobalValue *GV) { 556360784Sdim unsigned AS = GV->getAddressSpace(); 557360784Sdim return AS == AMDGPUAS::CONSTANT_ADDRESS || 558360784Sdim AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 559296417Sdim} 560296417Sdim 561314564Sdimbool shouldEmitConstantsToTextSection(const Triple &TT) { 562360784Sdim return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; 563314564Sdim} 564314564Sdim 565309124Sdimint getIntegerAttribute(const Function &F, StringRef Name, int Default) { 566296417Sdim Attribute A = F.getFnAttribute(Name); 567309124Sdim int Result = Default; 568296417Sdim 569296417Sdim if (A.isStringAttribute()) { 570296417Sdim StringRef Str = A.getValueAsString(); 571296417Sdim if (Str.getAsInteger(0, Result)) { 572296417Sdim LLVMContext &Ctx = F.getContext(); 573309124Sdim Ctx.emitError("can't parse integer attribute " + Name); 574296417Sdim } 575296417Sdim } 576309124Sdim 577296417Sdim return Result; 578296417Sdim} 579296417Sdim 580314564Sdimstd::pair<int, int> getIntegerPairAttribute(const Function &F, 581314564Sdim StringRef Name, 582314564Sdim std::pair<int, int> Default, 583314564Sdim bool OnlyFirstRequired) { 584314564Sdim Attribute A = F.getFnAttribute(Name); 585314564Sdim if (!A.isStringAttribute()) 586314564Sdim return Default; 587314564Sdim 588314564Sdim LLVMContext &Ctx = F.getContext(); 589314564Sdim std::pair<int, int> Ints = Default; 590314564Sdim std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 591314564Sdim if (Strs.first.trim().getAsInteger(0, Ints.first)) { 592314564Sdim Ctx.emitError("can't parse first integer attribute " + Name); 593314564Sdim return Default; 594314564Sdim } 595314564Sdim if (Strs.second.trim().getAsInteger(0, Ints.second)) { 596321369Sdim if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 597314564Sdim Ctx.emitError("can't parse second integer attribute " + Name); 598314564Sdim return Default; 599314564Sdim } 600314564Sdim } 601314564Sdim 602314564Sdim return Ints; 603296417Sdim} 604296417Sdim 605344779Sdimunsigned getVmcntBitMask(const IsaVersion &Version) { 606321369Sdim unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 607321369Sdim if (Version.Major < 9) 608321369Sdim return VmcntLo; 609314564Sdim 610321369Sdim unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 611321369Sdim return VmcntLo | VmcntHi; 612314564Sdim} 613314564Sdim 614344779Sdimunsigned getExpcntBitMask(const IsaVersion &Version) { 615314564Sdim return (1 << getExpcntBitWidth()) - 1; 616314564Sdim} 617314564Sdim 618344779Sdimunsigned getLgkmcntBitMask(const IsaVersion &Version) { 619353358Sdim return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 620314564Sdim} 621314564Sdim 622344779Sdimunsigned getWaitcntBitMask(const IsaVersion &Version) { 623321369Sdim unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 624321369Sdim unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 625353358Sdim unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), 626353358Sdim getLgkmcntBitWidth(Version.Major)); 627321369Sdim unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 628321369Sdim if (Version.Major < 9) 629321369Sdim return Waitcnt; 630321369Sdim 631321369Sdim unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 632321369Sdim return Waitcnt | VmcntHi; 633314564Sdim} 634314564Sdim 635344779Sdimunsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 636321369Sdim unsigned VmcntLo = 637321369Sdim unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 638321369Sdim if (Version.Major < 9) 639321369Sdim return VmcntLo; 640321369Sdim 641321369Sdim unsigned VmcntHi = 642321369Sdim unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 643321369Sdim VmcntHi <<= getVmcntBitWidthLo(); 644321369Sdim return VmcntLo | VmcntHi; 645321369Sdim} 646321369Sdim 647344779Sdimunsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 648314564Sdim return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 649314564Sdim} 650314564Sdim 651344779Sdimunsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 652353358Sdim return unpackBits(Waitcnt, getLgkmcntBitShift(), 653353358Sdim getLgkmcntBitWidth(Version.Major)); 654314564Sdim} 655314564Sdim 656344779Sdimvoid decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 657314564Sdim unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 658314564Sdim Vmcnt = decodeVmcnt(Version, Waitcnt); 659314564Sdim Expcnt = decodeExpcnt(Version, Waitcnt); 660314564Sdim Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 661314564Sdim} 662314564Sdim 663344779SdimWaitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 664344779Sdim Waitcnt Decoded; 665344779Sdim Decoded.VmCnt = decodeVmcnt(Version, Encoded); 666344779Sdim Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 667344779Sdim Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); 668344779Sdim return Decoded; 669344779Sdim} 670344779Sdim 671344779Sdimunsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 672321369Sdim unsigned Vmcnt) { 673321369Sdim Waitcnt = 674321369Sdim packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 675321369Sdim if (Version.Major < 9) 676321369Sdim return Waitcnt; 677321369Sdim 678321369Sdim Vmcnt >>= getVmcntBitWidthLo(); 679321369Sdim return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 680314564Sdim} 681314564Sdim 682344779Sdimunsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 683321369Sdim unsigned Expcnt) { 684314564Sdim return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 685314564Sdim} 686314564Sdim 687344779Sdimunsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 688321369Sdim unsigned Lgkmcnt) { 689353358Sdim return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), 690353358Sdim getLgkmcntBitWidth(Version.Major)); 691314564Sdim} 692314564Sdim 693344779Sdimunsigned encodeWaitcnt(const IsaVersion &Version, 694314564Sdim unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 695314564Sdim unsigned Waitcnt = getWaitcntBitMask(Version); 696314564Sdim Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 697314564Sdim Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 698314564Sdim Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 699314564Sdim return Waitcnt; 700314564Sdim} 701314564Sdim 702344779Sdimunsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 703344779Sdim return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); 704344779Sdim} 705344779Sdim 706353358Sdim//===----------------------------------------------------------------------===// 707353358Sdim// hwreg 708353358Sdim//===----------------------------------------------------------------------===// 709353358Sdim 710353358Sdimnamespace Hwreg { 711353358Sdim 712353358Sdimint64_t getHwregId(const StringRef Name) { 713353358Sdim for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) { 714353358Sdim if (IdSymbolic[Id] && Name == IdSymbolic[Id]) 715353358Sdim return Id; 716353358Sdim } 717353358Sdim return ID_UNKNOWN_; 718353358Sdim} 719353358Sdim 720353358Sdimstatic unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { 721353358Sdim if (isSI(STI) || isCI(STI) || isVI(STI)) 722353358Sdim return ID_SYMBOLIC_FIRST_GFX9_; 723353358Sdim else if (isGFX9(STI)) 724353358Sdim return ID_SYMBOLIC_FIRST_GFX10_; 725353358Sdim else 726353358Sdim return ID_SYMBOLIC_LAST_; 727353358Sdim} 728353358Sdim 729353358Sdimbool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { 730353358Sdim return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && 731353358Sdim IdSymbolic[Id]; 732353358Sdim} 733353358Sdim 734353358Sdimbool isValidHwreg(int64_t Id) { 735353358Sdim return 0 <= Id && isUInt<ID_WIDTH_>(Id); 736353358Sdim} 737353358Sdim 738353358Sdimbool isValidHwregOffset(int64_t Offset) { 739353358Sdim return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 740353358Sdim} 741353358Sdim 742353358Sdimbool isValidHwregWidth(int64_t Width) { 743353358Sdim return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 744353358Sdim} 745353358Sdim 746353358Sdimuint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 747353358Sdim return (Id << ID_SHIFT_) | 748353358Sdim (Offset << OFFSET_SHIFT_) | 749353358Sdim ((Width - 1) << WIDTH_M1_SHIFT_); 750353358Sdim} 751353358Sdim 752353358SdimStringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 753353358Sdim return isValidHwreg(Id, STI) ? IdSymbolic[Id] : ""; 754353358Sdim} 755353358Sdim 756353358Sdimvoid decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 757353358Sdim Id = (Val & ID_MASK_) >> ID_SHIFT_; 758353358Sdim Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 759353358Sdim Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 760353358Sdim} 761353358Sdim 762353358Sdim} // namespace Hwreg 763353358Sdim 764353358Sdim//===----------------------------------------------------------------------===// 765353358Sdim// SendMsg 766353358Sdim//===----------------------------------------------------------------------===// 767353358Sdim 768353358Sdimnamespace SendMsg { 769353358Sdim 770353358Sdimint64_t getMsgId(const StringRef Name) { 771353358Sdim for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 772353358Sdim if (IdSymbolic[i] && Name == IdSymbolic[i]) 773353358Sdim return i; 774353358Sdim } 775353358Sdim return ID_UNKNOWN_; 776353358Sdim} 777353358Sdim 778353358Sdimstatic bool isValidMsgId(int64_t MsgId) { 779353358Sdim return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId]; 780353358Sdim} 781353358Sdim 782353358Sdimbool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { 783353358Sdim if (Strict) { 784353358Sdim if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL) 785353358Sdim return isGFX9(STI) || isGFX10(STI); 786353358Sdim else 787353358Sdim return isValidMsgId(MsgId); 788353358Sdim } else { 789353358Sdim return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId); 790353358Sdim } 791353358Sdim} 792353358Sdim 793353358SdimStringRef getMsgName(int64_t MsgId) { 794353358Sdim return isValidMsgId(MsgId)? IdSymbolic[MsgId] : ""; 795353358Sdim} 796353358Sdim 797353358Sdimint64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 798353358Sdim const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 799353358Sdim const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 800353358Sdim const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 801353358Sdim for (int i = F; i < L; ++i) { 802353358Sdim if (Name == S[i]) { 803353358Sdim return i; 804353358Sdim } 805353358Sdim } 806353358Sdim return OP_UNKNOWN_; 807353358Sdim} 808353358Sdim 809353358Sdimbool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) { 810353358Sdim 811353358Sdim if (!Strict) 812353358Sdim return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 813353358Sdim 814353358Sdim switch(MsgId) 815353358Sdim { 816353358Sdim case ID_GS: 817353358Sdim return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 818353358Sdim case ID_GS_DONE: 819353358Sdim return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 820353358Sdim case ID_SYSMSG: 821353358Sdim return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 822353358Sdim default: 823353358Sdim return OpId == OP_NONE_; 824353358Sdim } 825353358Sdim} 826353358Sdim 827353358SdimStringRef getMsgOpName(int64_t MsgId, int64_t OpId) { 828353358Sdim assert(msgRequiresOp(MsgId)); 829353358Sdim return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 830353358Sdim} 831353358Sdim 832353358Sdimbool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) { 833353358Sdim 834353358Sdim if (!Strict) 835353358Sdim return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 836353358Sdim 837353358Sdim switch(MsgId) 838353358Sdim { 839353358Sdim case ID_GS: 840353358Sdim return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 841353358Sdim case ID_GS_DONE: 842353358Sdim return (OpId == OP_GS_NOP)? 843353358Sdim (StreamId == STREAM_ID_NONE_) : 844353358Sdim (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 845353358Sdim default: 846353358Sdim return StreamId == STREAM_ID_NONE_; 847353358Sdim } 848353358Sdim} 849353358Sdim 850353358Sdimbool msgRequiresOp(int64_t MsgId) { 851353358Sdim return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG; 852353358Sdim} 853353358Sdim 854353358Sdimbool msgSupportsStream(int64_t MsgId, int64_t OpId) { 855353358Sdim return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP; 856353358Sdim} 857353358Sdim 858353358Sdimvoid decodeMsg(unsigned Val, 859353358Sdim uint16_t &MsgId, 860353358Sdim uint16_t &OpId, 861353358Sdim uint16_t &StreamId) { 862353358Sdim MsgId = Val & ID_MASK_; 863353358Sdim OpId = (Val & OP_MASK_) >> OP_SHIFT_; 864353358Sdim StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 865353358Sdim} 866353358Sdim 867353358Sdimuint64_t encodeMsg(uint64_t MsgId, 868353358Sdim uint64_t OpId, 869353358Sdim uint64_t StreamId) { 870353358Sdim return (MsgId << ID_SHIFT_) | 871353358Sdim (OpId << OP_SHIFT_) | 872353358Sdim (StreamId << STREAM_ID_SHIFT_); 873353358Sdim} 874353358Sdim 875353358Sdim} // namespace SendMsg 876353358Sdim 877353358Sdim//===----------------------------------------------------------------------===// 878353358Sdim// 879353358Sdim//===----------------------------------------------------------------------===// 880353358Sdim 881296417Sdimunsigned getInitialPSInputAddr(const Function &F) { 882296417Sdim return getIntegerAttribute(F, "InitialPSInputAddr", 0); 883296417Sdim} 884296417Sdim 885309124Sdimbool isShader(CallingConv::ID cc) { 886309124Sdim switch(cc) { 887309124Sdim case CallingConv::AMDGPU_VS: 888327952Sdim case CallingConv::AMDGPU_LS: 889321369Sdim case CallingConv::AMDGPU_HS: 890327952Sdim case CallingConv::AMDGPU_ES: 891309124Sdim case CallingConv::AMDGPU_GS: 892309124Sdim case CallingConv::AMDGPU_PS: 893309124Sdim case CallingConv::AMDGPU_CS: 894309124Sdim return true; 895309124Sdim default: 896309124Sdim return false; 897309124Sdim } 898309124Sdim} 899309124Sdim 900309124Sdimbool isCompute(CallingConv::ID cc) { 901309124Sdim return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 902309124Sdim} 903309124Sdim 904321369Sdimbool isEntryFunctionCC(CallingConv::ID CC) { 905321369Sdim switch (CC) { 906321369Sdim case CallingConv::AMDGPU_KERNEL: 907321369Sdim case CallingConv::SPIR_KERNEL: 908321369Sdim case CallingConv::AMDGPU_VS: 909321369Sdim case CallingConv::AMDGPU_GS: 910321369Sdim case CallingConv::AMDGPU_PS: 911321369Sdim case CallingConv::AMDGPU_CS: 912327952Sdim case CallingConv::AMDGPU_ES: 913321369Sdim case CallingConv::AMDGPU_HS: 914327952Sdim case CallingConv::AMDGPU_LS: 915321369Sdim return true; 916321369Sdim default: 917321369Sdim return false; 918321369Sdim } 919321369Sdim} 920321369Sdim 921341825Sdimbool hasXNACK(const MCSubtargetInfo &STI) { 922341825Sdim return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 923341825Sdim} 924341825Sdim 925344779Sdimbool hasSRAMECC(const MCSubtargetInfo &STI) { 926344779Sdim return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; 927344779Sdim} 928344779Sdim 929341825Sdimbool hasMIMG_R128(const MCSubtargetInfo &STI) { 930341825Sdim return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; 931341825Sdim} 932341825Sdim 933341825Sdimbool hasPackedD16(const MCSubtargetInfo &STI) { 934341825Sdim return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; 935341825Sdim} 936341825Sdim 937296417Sdimbool isSI(const MCSubtargetInfo &STI) { 938296417Sdim return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 939296417Sdim} 940296417Sdim 941296417Sdimbool isCI(const MCSubtargetInfo &STI) { 942296417Sdim return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 943296417Sdim} 944296417Sdim 945296417Sdimbool isVI(const MCSubtargetInfo &STI) { 946296417Sdim return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 947296417Sdim} 948296417Sdim 949321369Sdimbool isGFX9(const MCSubtargetInfo &STI) { 950321369Sdim return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 951321369Sdim} 952321369Sdim 953353358Sdimbool isGFX10(const MCSubtargetInfo &STI) { 954353358Sdim return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 955353358Sdim} 956353358Sdim 957327952Sdimbool isGCN3Encoding(const MCSubtargetInfo &STI) { 958327952Sdim return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 959327952Sdim} 960327952Sdim 961321369Sdimbool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 962321369Sdim const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 963321369Sdim const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 964321369Sdim return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 965321369Sdim Reg == AMDGPU::SCC; 966321369Sdim} 967321369Sdim 968321369Sdimbool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 969321369Sdim for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 970321369Sdim if (*R == Reg1) return true; 971321369Sdim } 972321369Sdim return false; 973321369Sdim} 974321369Sdim 975327952Sdim#define MAP_REG2REG \ 976327952Sdim using namespace AMDGPU; \ 977327952Sdim switch(Reg) { \ 978327952Sdim default: return Reg; \ 979327952Sdim CASE_CI_VI(FLAT_SCR) \ 980327952Sdim CASE_CI_VI(FLAT_SCR_LO) \ 981327952Sdim CASE_CI_VI(FLAT_SCR_HI) \ 982353358Sdim CASE_VI_GFX9_GFX10(TTMP0) \ 983353358Sdim CASE_VI_GFX9_GFX10(TTMP1) \ 984353358Sdim CASE_VI_GFX9_GFX10(TTMP2) \ 985353358Sdim CASE_VI_GFX9_GFX10(TTMP3) \ 986353358Sdim CASE_VI_GFX9_GFX10(TTMP4) \ 987353358Sdim CASE_VI_GFX9_GFX10(TTMP5) \ 988353358Sdim CASE_VI_GFX9_GFX10(TTMP6) \ 989353358Sdim CASE_VI_GFX9_GFX10(TTMP7) \ 990353358Sdim CASE_VI_GFX9_GFX10(TTMP8) \ 991353358Sdim CASE_VI_GFX9_GFX10(TTMP9) \ 992353358Sdim CASE_VI_GFX9_GFX10(TTMP10) \ 993353358Sdim CASE_VI_GFX9_GFX10(TTMP11) \ 994353358Sdim CASE_VI_GFX9_GFX10(TTMP12) \ 995353358Sdim CASE_VI_GFX9_GFX10(TTMP13) \ 996353358Sdim CASE_VI_GFX9_GFX10(TTMP14) \ 997353358Sdim CASE_VI_GFX9_GFX10(TTMP15) \ 998353358Sdim CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \ 999353358Sdim CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \ 1000353358Sdim CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \ 1001353358Sdim CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \ 1002353358Sdim CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \ 1003353358Sdim CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \ 1004353358Sdim CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \ 1005353358Sdim CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \ 1006353358Sdim CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \ 1007353358Sdim CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \ 1008353358Sdim CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \ 1009353358Sdim CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \ 1010353358Sdim CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 1011353358Sdim CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 1012353358Sdim CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1013353358Sdim CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1014327952Sdim } 1015296417Sdim 1016327952Sdim#define CASE_CI_VI(node) \ 1017327952Sdim assert(!isSI(STI)); \ 1018327952Sdim case node: return isCI(STI) ? node##_ci : node##_vi; 1019296417Sdim 1020353358Sdim#define CASE_VI_GFX9_GFX10(node) \ 1021353358Sdim case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi; 1022296417Sdim 1023327952Sdimunsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 1024341825Sdim if (STI.getTargetTriple().getArch() == Triple::r600) 1025341825Sdim return Reg; 1026327952Sdim MAP_REG2REG 1027296417Sdim} 1028296417Sdim 1029327952Sdim#undef CASE_CI_VI 1030353358Sdim#undef CASE_VI_GFX9_GFX10 1031321369Sdim 1032327952Sdim#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 1033353358Sdim#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node; 1034321369Sdim 1035327952Sdimunsigned mc2PseudoReg(unsigned Reg) { 1036327952Sdim MAP_REG2REG 1037321369Sdim} 1038321369Sdim 1039327952Sdim#undef CASE_CI_VI 1040353358Sdim#undef CASE_VI_GFX9_GFX10 1041327952Sdim#undef MAP_REG2REG 1042327952Sdim 1043314564Sdimbool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1044321369Sdim assert(OpNo < Desc.NumOperands); 1045314564Sdim unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1046314564Sdim return OpType >= AMDGPU::OPERAND_SRC_FIRST && 1047314564Sdim OpType <= AMDGPU::OPERAND_SRC_LAST; 1048314564Sdim} 1049314564Sdim 1050314564Sdimbool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1051321369Sdim assert(OpNo < Desc.NumOperands); 1052314564Sdim unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1053314564Sdim switch (OpType) { 1054314564Sdim case AMDGPU::OPERAND_REG_IMM_FP32: 1055314564Sdim case AMDGPU::OPERAND_REG_IMM_FP64: 1056314564Sdim case AMDGPU::OPERAND_REG_IMM_FP16: 1057353358Sdim case AMDGPU::OPERAND_REG_IMM_V2FP16: 1058353358Sdim case AMDGPU::OPERAND_REG_IMM_V2INT16: 1059314564Sdim case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1060314564Sdim case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1061314564Sdim case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1062321369Sdim case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1063353358Sdim case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1064353358Sdim case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1065353358Sdim case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1066353358Sdim case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1067353358Sdim case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1068314564Sdim return true; 1069314564Sdim default: 1070314564Sdim return false; 1071314564Sdim } 1072314564Sdim} 1073314564Sdim 1074314564Sdimbool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1075321369Sdim assert(OpNo < Desc.NumOperands); 1076314564Sdim unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1077314564Sdim return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 1078314564Sdim OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 1079314564Sdim} 1080314564Sdim 1081314564Sdim// Avoid using MCRegisterClass::getSize, since that function will go away 1082314564Sdim// (move from MC* level to Target* level). Return size in bits. 1083314564Sdimunsigned getRegBitWidth(unsigned RCID) { 1084314564Sdim switch (RCID) { 1085314564Sdim case AMDGPU::SGPR_32RegClassID: 1086314564Sdim case AMDGPU::VGPR_32RegClassID: 1087353358Sdim case AMDGPU::VRegOrLds_32RegClassID: 1088353358Sdim case AMDGPU::AGPR_32RegClassID: 1089314564Sdim case AMDGPU::VS_32RegClassID: 1090353358Sdim case AMDGPU::AV_32RegClassID: 1091314564Sdim case AMDGPU::SReg_32RegClassID: 1092314564Sdim case AMDGPU::SReg_32_XM0RegClassID: 1093353358Sdim case AMDGPU::SRegOrLds_32RegClassID: 1094314564Sdim return 32; 1095314564Sdim case AMDGPU::SGPR_64RegClassID: 1096314564Sdim case AMDGPU::VS_64RegClassID: 1097353358Sdim case AMDGPU::AV_64RegClassID: 1098314564Sdim case AMDGPU::SReg_64RegClassID: 1099314564Sdim case AMDGPU::VReg_64RegClassID: 1100353358Sdim case AMDGPU::AReg_64RegClassID: 1101344779Sdim case AMDGPU::SReg_64_XEXECRegClassID: 1102314564Sdim return 64; 1103353358Sdim case AMDGPU::SGPR_96RegClassID: 1104353358Sdim case AMDGPU::SReg_96RegClassID: 1105314564Sdim case AMDGPU::VReg_96RegClassID: 1106314564Sdim return 96; 1107314564Sdim case AMDGPU::SGPR_128RegClassID: 1108314564Sdim case AMDGPU::SReg_128RegClassID: 1109314564Sdim case AMDGPU::VReg_128RegClassID: 1110353358Sdim case AMDGPU::AReg_128RegClassID: 1111314564Sdim return 128; 1112353358Sdim case AMDGPU::SGPR_160RegClassID: 1113353358Sdim case AMDGPU::SReg_160RegClassID: 1114353358Sdim case AMDGPU::VReg_160RegClassID: 1115353358Sdim return 160; 1116314564Sdim case AMDGPU::SReg_256RegClassID: 1117314564Sdim case AMDGPU::VReg_256RegClassID: 1118314564Sdim return 256; 1119314564Sdim case AMDGPU::SReg_512RegClassID: 1120314564Sdim case AMDGPU::VReg_512RegClassID: 1121353358Sdim case AMDGPU::AReg_512RegClassID: 1122314564Sdim return 512; 1123353358Sdim case AMDGPU::SReg_1024RegClassID: 1124353358Sdim case AMDGPU::VReg_1024RegClassID: 1125353358Sdim case AMDGPU::AReg_1024RegClassID: 1126353358Sdim return 1024; 1127314564Sdim default: 1128314564Sdim llvm_unreachable("Unexpected register class"); 1129314564Sdim } 1130314564Sdim} 1131314564Sdim 1132314564Sdimunsigned getRegBitWidth(const MCRegisterClass &RC) { 1133314564Sdim return getRegBitWidth(RC.getID()); 1134314564Sdim} 1135314564Sdim 1136314564Sdimunsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1137314564Sdim unsigned OpNo) { 1138321369Sdim assert(OpNo < Desc.NumOperands); 1139314564Sdim unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1140314564Sdim return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 1141314564Sdim} 1142314564Sdim 1143314564Sdimbool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 1144314564Sdim if (Literal >= -16 && Literal <= 64) 1145314564Sdim return true; 1146314564Sdim 1147314564Sdim uint64_t Val = static_cast<uint64_t>(Literal); 1148314564Sdim return (Val == DoubleToBits(0.0)) || 1149314564Sdim (Val == DoubleToBits(1.0)) || 1150314564Sdim (Val == DoubleToBits(-1.0)) || 1151314564Sdim (Val == DoubleToBits(0.5)) || 1152314564Sdim (Val == DoubleToBits(-0.5)) || 1153314564Sdim (Val == DoubleToBits(2.0)) || 1154314564Sdim (Val == DoubleToBits(-2.0)) || 1155314564Sdim (Val == DoubleToBits(4.0)) || 1156314564Sdim (Val == DoubleToBits(-4.0)) || 1157314564Sdim (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 1158314564Sdim} 1159314564Sdim 1160314564Sdimbool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 1161314564Sdim if (Literal >= -16 && Literal <= 64) 1162314564Sdim return true; 1163314564Sdim 1164314564Sdim // The actual type of the operand does not seem to matter as long 1165314564Sdim // as the bits match one of the inline immediate values. For example: 1166314564Sdim // 1167314564Sdim // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1168314564Sdim // so it is a legal inline immediate. 1169314564Sdim // 1170314564Sdim // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1171314564Sdim // floating-point, so it is a legal inline immediate. 1172314564Sdim 1173314564Sdim uint32_t Val = static_cast<uint32_t>(Literal); 1174314564Sdim return (Val == FloatToBits(0.0f)) || 1175314564Sdim (Val == FloatToBits(1.0f)) || 1176314564Sdim (Val == FloatToBits(-1.0f)) || 1177314564Sdim (Val == FloatToBits(0.5f)) || 1178314564Sdim (Val == FloatToBits(-0.5f)) || 1179314564Sdim (Val == FloatToBits(2.0f)) || 1180314564Sdim (Val == FloatToBits(-2.0f)) || 1181314564Sdim (Val == FloatToBits(4.0f)) || 1182314564Sdim (Val == FloatToBits(-4.0f)) || 1183314564Sdim (Val == 0x3e22f983 && HasInv2Pi); 1184314564Sdim} 1185314564Sdim 1186314564Sdimbool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 1187321369Sdim if (!HasInv2Pi) 1188321369Sdim return false; 1189314564Sdim 1190314564Sdim if (Literal >= -16 && Literal <= 64) 1191314564Sdim return true; 1192314564Sdim 1193314564Sdim uint16_t Val = static_cast<uint16_t>(Literal); 1194314564Sdim return Val == 0x3C00 || // 1.0 1195314564Sdim Val == 0xBC00 || // -1.0 1196314564Sdim Val == 0x3800 || // 0.5 1197314564Sdim Val == 0xB800 || // -0.5 1198314564Sdim Val == 0x4000 || // 2.0 1199314564Sdim Val == 0xC000 || // -2.0 1200314564Sdim Val == 0x4400 || // 4.0 1201314564Sdim Val == 0xC400 || // -4.0 1202314564Sdim Val == 0x3118; // 1/2pi 1203314564Sdim} 1204314564Sdim 1205321369Sdimbool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 1206321369Sdim assert(HasInv2Pi); 1207321369Sdim 1208353358Sdim if (isInt<16>(Literal) || isUInt<16>(Literal)) { 1209353358Sdim int16_t Trunc = static_cast<int16_t>(Literal); 1210353358Sdim return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); 1211353358Sdim } 1212353358Sdim if (!(Literal & 0xffff)) 1213353358Sdim return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); 1214353358Sdim 1215321369Sdim int16_t Lo16 = static_cast<int16_t>(Literal); 1216321369Sdim int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1217321369Sdim return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 1218321369Sdim} 1219321369Sdim 1220327952Sdimbool isArgPassedInSGPR(const Argument *A) { 1221327952Sdim const Function *F = A->getParent(); 1222327952Sdim 1223327952Sdim // Arguments to compute shaders are never a source of divergence. 1224327952Sdim CallingConv::ID CC = F->getCallingConv(); 1225327952Sdim switch (CC) { 1226327952Sdim case CallingConv::AMDGPU_KERNEL: 1227327952Sdim case CallingConv::SPIR_KERNEL: 1228327952Sdim return true; 1229327952Sdim case CallingConv::AMDGPU_VS: 1230327952Sdim case CallingConv::AMDGPU_LS: 1231327952Sdim case CallingConv::AMDGPU_HS: 1232327952Sdim case CallingConv::AMDGPU_ES: 1233327952Sdim case CallingConv::AMDGPU_GS: 1234327952Sdim case CallingConv::AMDGPU_PS: 1235327952Sdim case CallingConv::AMDGPU_CS: 1236327952Sdim // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 1237327952Sdim // Everything else is in VGPRs. 1238327952Sdim return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 1239327952Sdim F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 1240327952Sdim default: 1241327952Sdim // TODO: Should calls support inreg for SGPR inputs? 1242327952Sdim return false; 1243327952Sdim } 1244327952Sdim} 1245327952Sdim 1246353358Sdimstatic bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 1247353358Sdim return isGCN3Encoding(ST) || isGFX10(ST); 1248353358Sdim} 1249353358Sdim 1250321369Sdimint64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 1251353358Sdim if (hasSMEMByteOffset(ST)) 1252327952Sdim return ByteOffset; 1253327952Sdim return ByteOffset >> 2; 1254321369Sdim} 1255321369Sdim 1256321369Sdimbool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 1257321369Sdim int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 1258353358Sdim return (hasSMEMByteOffset(ST)) ? 1259327952Sdim isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); 1260321369Sdim} 1261327952Sdim 1262344779Sdim// Given Imm, split it into the values to put into the SOffset and ImmOffset 1263344779Sdim// fields in an MUBUF instruction. Return false if it is not possible (due to a 1264344779Sdim// hardware bug needing a workaround). 1265344779Sdim// 1266344779Sdim// The required alignment ensures that individual address components remain 1267344779Sdim// aligned if they are aligned to begin with. It also ensures that additional 1268344779Sdim// offsets within the given alignment can be added to the resulting ImmOffset. 1269344779Sdimbool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1270344779Sdim const GCNSubtarget *Subtarget, uint32_t Align) { 1271344779Sdim const uint32_t MaxImm = alignDown(4095, Align); 1272344779Sdim uint32_t Overflow = 0; 1273321369Sdim 1274344779Sdim if (Imm > MaxImm) { 1275344779Sdim if (Imm <= MaxImm + 64) { 1276344779Sdim // Use an SOffset inline constant for 4..64 1277344779Sdim Overflow = Imm - MaxImm; 1278344779Sdim Imm = MaxImm; 1279344779Sdim } else { 1280344779Sdim // Try to keep the same value in SOffset for adjacent loads, so that 1281344779Sdim // the corresponding register contents can be re-used. 1282344779Sdim // 1283344779Sdim // Load values with all low-bits (except for alignment bits) set into 1284344779Sdim // SOffset, so that a larger range of values can be covered using 1285344779Sdim // s_movk_i32. 1286344779Sdim // 1287344779Sdim // Atomic operations fail to work correctly when individual address 1288344779Sdim // components are unaligned, even if their sum is aligned. 1289344779Sdim uint32_t High = (Imm + Align) & ~4095; 1290344779Sdim uint32_t Low = (Imm + Align) & 4095; 1291344779Sdim Imm = Low; 1292344779Sdim Overflow = High - Align; 1293344779Sdim } 1294344779Sdim } 1295321369Sdim 1296344779Sdim // There is a hardware bug in SI and CI which prevents address clamping in 1297344779Sdim // MUBUF instructions from working correctly with SOffsets. The immediate 1298344779Sdim // offset is unaffected. 1299344779Sdim if (Overflow > 0 && 1300344779Sdim Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1301344779Sdim return false; 1302321369Sdim 1303344779Sdim ImmOffset = Imm; 1304344779Sdim SOffset = Overflow; 1305344779Sdim return true; 1306321369Sdim} 1307321369Sdim 1308360784SdimSIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, 1309360784Sdim const GCNSubtarget &ST) { 1310353358Sdim *this = getDefaultForCallingConv(F.getCallingConv()); 1311353358Sdim 1312353358Sdim StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); 1313353358Sdim if (!IEEEAttr.empty()) 1314353358Sdim IEEE = IEEEAttr == "true"; 1315353358Sdim 1316353358Sdim StringRef DX10ClampAttr 1317353358Sdim = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); 1318353358Sdim if (!DX10ClampAttr.empty()) 1319353358Sdim DX10Clamp = DX10ClampAttr == "true"; 1320360784Sdim 1321360784Sdim FP32Denormals = ST.hasFP32Denormals(F); 1322360784Sdim FP64FP16Denormals = ST.hasFP64FP16Denormals(F); 1323353358Sdim} 1324353358Sdim 1325341825Sdimnamespace { 1326341825Sdim 1327341825Sdimstruct SourceOfDivergence { 1328341825Sdim unsigned Intr; 1329341825Sdim}; 1330341825Sdimconst SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 1331341825Sdim 1332341825Sdim#define GET_SourcesOfDivergence_IMPL 1333360784Sdim#define GET_Gfx9BufferFormat_IMPL 1334360784Sdim#define GET_Gfx10PlusBufferFormat_IMPL 1335341825Sdim#include "AMDGPUGenSearchableTables.inc" 1336341825Sdim 1337341825Sdim} // end anonymous namespace 1338341825Sdim 1339341825Sdimbool isIntrinsicSourceOfDivergence(unsigned IntrID) { 1340341825Sdim return lookupSourceOfDivergence(IntrID); 1341341825Sdim} 1342353358Sdim 1343360784Sdimconst GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 1344360784Sdim uint8_t NumComponents, 1345360784Sdim uint8_t NumFormat, 1346360784Sdim const MCSubtargetInfo &STI) { 1347360784Sdim return isGFX10(STI) 1348360784Sdim ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, 1349360784Sdim NumFormat) 1350360784Sdim : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); 1351360784Sdim} 1352360784Sdim 1353360784Sdimconst GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 1354360784Sdim const MCSubtargetInfo &STI) { 1355360784Sdim return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format) 1356360784Sdim : getGfx9BufferFormatInfo(Format); 1357360784Sdim} 1358360784Sdim 1359321369Sdim} // namespace AMDGPU 1360321369Sdim} // namespace llvm 1361