1321369Sdim//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2285163Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6285163Sdim//
7285163Sdim//===----------------------------------------------------------------------===//
8321369Sdim
9285163Sdim#include "AMDGPUBaseInfo.h"
10360784Sdim#include "AMDGPU.h"
11360784Sdim#include "AMDGPUAsmUtils.h"
12341825Sdim#include "AMDGPUTargetTransformInfo.h"
13314564Sdim#include "SIDefines.h"
14321369Sdim#include "llvm/ADT/StringRef.h"
15321369Sdim#include "llvm/ADT/Triple.h"
16321369Sdim#include "llvm/BinaryFormat/ELF.h"
17321369Sdim#include "llvm/CodeGen/MachineMemOperand.h"
18321369Sdim#include "llvm/IR/Attributes.h"
19321369Sdim#include "llvm/IR/Constants.h"
20296417Sdim#include "llvm/IR/Function.h"
21296417Sdim#include "llvm/IR/GlobalValue.h"
22321369Sdim#include "llvm/IR/Instruction.h"
23360784Sdim#include "llvm/IR/IntrinsicsAMDGPU.h"
24360784Sdim#include "llvm/IR/IntrinsicsR600.h"
25321369Sdim#include "llvm/IR/LLVMContext.h"
26321369Sdim#include "llvm/IR/Module.h"
27296417Sdim#include "llvm/MC/MCContext.h"
28321369Sdim#include "llvm/MC/MCInstrDesc.h"
29327952Sdim#include "llvm/MC/MCInstrInfo.h"
30314564Sdim#include "llvm/MC/MCRegisterInfo.h"
31296417Sdim#include "llvm/MC/MCSectionELF.h"
32296417Sdim#include "llvm/MC/MCSubtargetInfo.h"
33285163Sdim#include "llvm/MC/SubtargetFeature.h"
34321369Sdim#include "llvm/Support/Casting.h"
35321369Sdim#include "llvm/Support/ErrorHandling.h"
36321369Sdim#include "llvm/Support/MathExtras.h"
37321369Sdim#include <algorithm>
38321369Sdim#include <cassert>
39321369Sdim#include <cstdint>
40321369Sdim#include <cstring>
41321369Sdim#include <utility>
42285163Sdim
43321369Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44285163Sdim
45314564Sdim#define GET_INSTRINFO_NAMED_OPS
46327952Sdim#define GET_INSTRMAP_INFO
47314564Sdim#include "AMDGPUGenInstrInfo.inc"
48327952Sdim#undef GET_INSTRMAP_INFO
49314564Sdim#undef GET_INSTRINFO_NAMED_OPS
50314564Sdim
51314564Sdimnamespace {
52314564Sdim
53314564Sdim/// \returns Bit mask for given bit \p Shift and bit \p Width.
54314564Sdimunsigned getBitMask(unsigned Shift, unsigned Width) {
55314564Sdim  return ((1 << Width) - 1) << Shift;
56314564Sdim}
57314564Sdim
58341825Sdim/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
59314564Sdim///
60314564Sdim/// \returns Packed \p Dst.
61314564Sdimunsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
62314564Sdim  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
63314564Sdim  Dst |= (Src << Shift) & getBitMask(Shift, Width);
64314564Sdim  return Dst;
65314564Sdim}
66314564Sdim
67341825Sdim/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
68314564Sdim///
69314564Sdim/// \returns Unpacked bits.
70314564Sdimunsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
71314564Sdim  return (Src & getBitMask(Shift, Width)) >> Shift;
72314564Sdim}
73314564Sdim
74321369Sdim/// \returns Vmcnt bit shift (lower bits).
75321369Sdimunsigned getVmcntBitShiftLo() { return 0; }
76314564Sdim
77321369Sdim/// \returns Vmcnt bit width (lower bits).
78321369Sdimunsigned getVmcntBitWidthLo() { return 4; }
79314564Sdim
80314564Sdim/// \returns Expcnt bit shift.
81314564Sdimunsigned getExpcntBitShift() { return 4; }
82314564Sdim
83314564Sdim/// \returns Expcnt bit width.
84314564Sdimunsigned getExpcntBitWidth() { return 3; }
85314564Sdim
86314564Sdim/// \returns Lgkmcnt bit shift.
87314564Sdimunsigned getLgkmcntBitShift() { return 8; }
88314564Sdim
89314564Sdim/// \returns Lgkmcnt bit width.
90353358Sdimunsigned getLgkmcntBitWidth(unsigned VersionMajor) {
91353358Sdim  return (VersionMajor >= 10) ? 6 : 4;
92353358Sdim}
93314564Sdim
94321369Sdim/// \returns Vmcnt bit shift (higher bits).
95321369Sdimunsigned getVmcntBitShiftHi() { return 14; }
96314564Sdim
97321369Sdim/// \returns Vmcnt bit width (higher bits).
98321369Sdimunsigned getVmcntBitWidthHi() { return 2; }
99321369Sdim
100321369Sdim} // end namespace anonymous
101321369Sdim
102285163Sdimnamespace llvm {
103321369Sdim
104285163Sdimnamespace AMDGPU {
105285163Sdim
106341825Sdim#define GET_MIMGBaseOpcodesTable_IMPL
107341825Sdim#define GET_MIMGDimInfoTable_IMPL
108341825Sdim#define GET_MIMGInfoTable_IMPL
109341825Sdim#define GET_MIMGLZMappingTable_IMPL
110353358Sdim#define GET_MIMGMIPMappingTable_IMPL
111341825Sdim#include "AMDGPUGenSearchableTables.inc"
112327952Sdim
113341825Sdimint getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
114341825Sdim                  unsigned VDataDwords, unsigned VAddrDwords) {
115341825Sdim  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
116341825Sdim                                             VDataDwords, VAddrDwords);
117341825Sdim  return Info ? Info->Opcode : -1;
118327952Sdim}
119327952Sdim
120353358Sdimconst MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
121353358Sdim  const MIMGInfo *Info = getMIMGInfo(Opc);
122353358Sdim  return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
123353358Sdim}
124353358Sdim
125341825Sdimint getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
126341825Sdim  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
127341825Sdim  const MIMGInfo *NewInfo =
128341825Sdim      getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
129341825Sdim                          NewChannels, OrigInfo->VAddrDwords);
130341825Sdim  return NewInfo ? NewInfo->Opcode : -1;
131327952Sdim}
132327952Sdim
133344779Sdimstruct MUBUFInfo {
134344779Sdim  uint16_t Opcode;
135344779Sdim  uint16_t BaseOpcode;
136360784Sdim  uint8_t elements;
137344779Sdim  bool has_vaddr;
138344779Sdim  bool has_srsrc;
139344779Sdim  bool has_soffset;
140344779Sdim};
141344779Sdim
142360784Sdimstruct MTBUFInfo {
143360784Sdim  uint16_t Opcode;
144360784Sdim  uint16_t BaseOpcode;
145360784Sdim  uint8_t elements;
146360784Sdim  bool has_vaddr;
147360784Sdim  bool has_srsrc;
148360784Sdim  bool has_soffset;
149360784Sdim};
150360784Sdim
151360784Sdim#define GET_MTBUFInfoTable_DECL
152360784Sdim#define GET_MTBUFInfoTable_IMPL
153344779Sdim#define GET_MUBUFInfoTable_DECL
154344779Sdim#define GET_MUBUFInfoTable_IMPL
155344779Sdim#include "AMDGPUGenSearchableTables.inc"
156344779Sdim
157360784Sdimint getMTBUFBaseOpcode(unsigned Opc) {
158360784Sdim  const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
159360784Sdim  return Info ? Info->BaseOpcode : -1;
160360784Sdim}
161360784Sdim
162360784Sdimint getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
163360784Sdim  const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
164360784Sdim  return Info ? Info->Opcode : -1;
165360784Sdim}
166360784Sdim
167360784Sdimint getMTBUFElements(unsigned Opc) {
168360784Sdim  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
169360784Sdim  return Info ? Info->elements : 0;
170360784Sdim}
171360784Sdim
172360784Sdimbool getMTBUFHasVAddr(unsigned Opc) {
173360784Sdim  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
174360784Sdim  return Info ? Info->has_vaddr : false;
175360784Sdim}
176360784Sdim
177360784Sdimbool getMTBUFHasSrsrc(unsigned Opc) {
178360784Sdim  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
179360784Sdim  return Info ? Info->has_srsrc : false;
180360784Sdim}
181360784Sdim
182360784Sdimbool getMTBUFHasSoffset(unsigned Opc) {
183360784Sdim  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
184360784Sdim  return Info ? Info->has_soffset : false;
185360784Sdim}
186360784Sdim
187344779Sdimint getMUBUFBaseOpcode(unsigned Opc) {
188344779Sdim  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
189344779Sdim  return Info ? Info->BaseOpcode : -1;
190344779Sdim}
191344779Sdim
192360784Sdimint getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
193360784Sdim  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
194344779Sdim  return Info ? Info->Opcode : -1;
195344779Sdim}
196344779Sdim
197360784Sdimint getMUBUFElements(unsigned Opc) {
198344779Sdim  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
199360784Sdim  return Info ? Info->elements : 0;
200344779Sdim}
201344779Sdim
202344779Sdimbool getMUBUFHasVAddr(unsigned Opc) {
203344779Sdim  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
204344779Sdim  return Info ? Info->has_vaddr : false;
205344779Sdim}
206344779Sdim
207344779Sdimbool getMUBUFHasSrsrc(unsigned Opc) {
208344779Sdim  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
209344779Sdim  return Info ? Info->has_srsrc : false;
210344779Sdim}
211344779Sdim
212344779Sdimbool getMUBUFHasSoffset(unsigned Opc) {
213344779Sdim  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
214344779Sdim  return Info ? Info->has_soffset : false;
215344779Sdim}
216344779Sdim
217327952Sdim// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
218327952Sdim// header files, so we need to wrap it in a function that takes unsigned
219327952Sdim// instead.
220327952Sdimint getMCOpcode(uint16_t Opcode, unsigned Gen) {
221327952Sdim  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
222327952Sdim}
223327952Sdim
224321369Sdimnamespace IsaInfo {
225321369Sdim
226327952Sdimvoid streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
227327952Sdim  auto TargetTriple = STI->getTargetTriple();
228344779Sdim  auto Version = getIsaVersion(STI->getCPU());
229327952Sdim
230327952Sdim  Stream << TargetTriple.getArchName() << '-'
231327952Sdim         << TargetTriple.getVendorName() << '-'
232327952Sdim         << TargetTriple.getOSName() << '-'
233327952Sdim         << TargetTriple.getEnvironmentName() << '-'
234327952Sdim         << "gfx"
235344779Sdim         << Version.Major
236344779Sdim         << Version.Minor
237344779Sdim         << Version.Stepping;
238341825Sdim
239341825Sdim  if (hasXNACK(*STI))
240341825Sdim    Stream << "+xnack";
241344779Sdim  if (hasSRAMECC(*STI))
242344779Sdim    Stream << "+sram-ecc";
243341825Sdim
244327952Sdim  Stream.flush();
245327952Sdim}
246327952Sdim
247341825Sdimbool hasCodeObjectV3(const MCSubtargetInfo *STI) {
248344779Sdim  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
249344779Sdim             STI->getFeatureBits().test(FeatureCodeObjectV3);
250327952Sdim}
251327952Sdim
252344779Sdimunsigned getWavefrontSize(const MCSubtargetInfo *STI) {
253344779Sdim  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
254321369Sdim    return 16;
255344779Sdim  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
256321369Sdim    return 32;
257321369Sdim
258321369Sdim  return 64;
259321369Sdim}
260321369Sdim
261344779Sdimunsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
262344779Sdim  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
263321369Sdim    return 32768;
264344779Sdim  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
265321369Sdim    return 65536;
266321369Sdim
267321369Sdim  return 0;
268321369Sdim}
269321369Sdim
270344779Sdimunsigned getEUsPerCU(const MCSubtargetInfo *STI) {
271321369Sdim  return 4;
272321369Sdim}
273321369Sdim
274344779Sdimunsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
275321369Sdim                               unsigned FlatWorkGroupSize) {
276353358Sdim  assert(FlatWorkGroupSize != 0);
277353358Sdim  if (STI->getTargetTriple().getArch() != Triple::amdgcn)
278321369Sdim    return 8;
279344779Sdim  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
280321369Sdim  if (N == 1)
281321369Sdim    return 40;
282321369Sdim  N = 40 / N;
283321369Sdim  return std::min(N, 16u);
284321369Sdim}
285321369Sdim
286344779Sdimunsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
287360784Sdim  return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
288321369Sdim}
289321369Sdim
290344779Sdimunsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
291321369Sdim                          unsigned FlatWorkGroupSize) {
292344779Sdim  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
293321369Sdim}
294321369Sdim
295344779Sdimunsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
296321369Sdim  return 1;
297321369Sdim}
298321369Sdim
299360784Sdimunsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
300321369Sdim  // FIXME: Need to take scratch memory into account.
301360784Sdim  if (!isGFX10(*STI))
302360784Sdim    return 10;
303360784Sdim  return 20;
304321369Sdim}
305321369Sdim
306344779Sdimunsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
307321369Sdim                          unsigned FlatWorkGroupSize) {
308344779Sdim  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
309344779Sdim                 getEUsPerCU(STI)) / getEUsPerCU(STI);
310321369Sdim}
311321369Sdim
312344779Sdimunsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
313321369Sdim  return 1;
314321369Sdim}
315321369Sdim
316344779Sdimunsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
317360784Sdim  // Some subtargets allow encoding 2048, but this isn't tested or supported.
318360784Sdim  return 1024;
319321369Sdim}
320321369Sdim
321344779Sdimunsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
322321369Sdim                              unsigned FlatWorkGroupSize) {
323344779Sdim  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
324344779Sdim                 getWavefrontSize(STI);
325321369Sdim}
326321369Sdim
327344779Sdimunsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
328344779Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
329353358Sdim  if (Version.Major >= 10)
330353358Sdim    return getAddressableNumSGPRs(STI);
331321369Sdim  if (Version.Major >= 8)
332321369Sdim    return 16;
333321369Sdim  return 8;
334321369Sdim}
335321369Sdim
336344779Sdimunsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
337321369Sdim  return 8;
338321369Sdim}
339321369Sdim
340344779Sdimunsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
341344779Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
342321369Sdim  if (Version.Major >= 8)
343321369Sdim    return 800;
344321369Sdim  return 512;
345321369Sdim}
346321369Sdim
347344779Sdimunsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
348344779Sdim  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
349321369Sdim    return FIXED_NUM_SGPRS_FOR_INIT_BUG;
350321369Sdim
351344779Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
352353358Sdim  if (Version.Major >= 10)
353353358Sdim    return 106;
354321369Sdim  if (Version.Major >= 8)
355321369Sdim    return 102;
356321369Sdim  return 104;
357321369Sdim}
358321369Sdim
359344779Sdimunsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
360321369Sdim  assert(WavesPerEU != 0);
361321369Sdim
362353358Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
363353358Sdim  if (Version.Major >= 10)
364353358Sdim    return 0;
365353358Sdim
366360784Sdim  if (WavesPerEU >= getMaxWavesPerEU(STI))
367321369Sdim    return 0;
368341825Sdim
369344779Sdim  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
370344779Sdim  if (STI->getFeatureBits().test(FeatureTrapHandler))
371341825Sdim    MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
372344779Sdim  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
373344779Sdim  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
374321369Sdim}
375321369Sdim
376344779Sdimunsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
377321369Sdim                        bool Addressable) {
378321369Sdim  assert(WavesPerEU != 0);
379321369Sdim
380353358Sdim  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
381344779Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
382353358Sdim  if (Version.Major >= 10)
383353358Sdim    return Addressable ? AddressableNumSGPRs : 108;
384321369Sdim  if (Version.Major >= 8 && !Addressable)
385321369Sdim    AddressableNumSGPRs = 112;
386344779Sdim  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
387344779Sdim  if (STI->getFeatureBits().test(FeatureTrapHandler))
388341825Sdim    MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
389344779Sdim  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
390321369Sdim  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
391321369Sdim}
392321369Sdim
393344779Sdimunsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
394341825Sdim                          bool FlatScrUsed, bool XNACKUsed) {
395341825Sdim  unsigned ExtraSGPRs = 0;
396341825Sdim  if (VCCUsed)
397341825Sdim    ExtraSGPRs = 2;
398341825Sdim
399344779Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
400353358Sdim  if (Version.Major >= 10)
401353358Sdim    return ExtraSGPRs;
402353358Sdim
403341825Sdim  if (Version.Major < 8) {
404341825Sdim    if (FlatScrUsed)
405341825Sdim      ExtraSGPRs = 4;
406341825Sdim  } else {
407341825Sdim    if (XNACKUsed)
408341825Sdim      ExtraSGPRs = 4;
409341825Sdim
410341825Sdim    if (FlatScrUsed)
411341825Sdim      ExtraSGPRs = 6;
412341825Sdim  }
413341825Sdim
414341825Sdim  return ExtraSGPRs;
415341825Sdim}
416341825Sdim
417344779Sdimunsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
418341825Sdim                          bool FlatScrUsed) {
419344779Sdim  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
420344779Sdim                          STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
421341825Sdim}
422341825Sdim
423344779Sdimunsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
424344779Sdim  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
425341825Sdim  // SGPRBlocks is actual number of SGPR blocks minus 1.
426344779Sdim  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
427341825Sdim}
428341825Sdim
429353358Sdimunsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
430353358Sdim                             Optional<bool> EnableWavefrontSize32) {
431353358Sdim  bool IsWave32 = EnableWavefrontSize32 ?
432353358Sdim      *EnableWavefrontSize32 :
433353358Sdim      STI->getFeatureBits().test(FeatureWavefrontSize32);
434353358Sdim  return IsWave32 ? 8 : 4;
435321369Sdim}
436321369Sdim
437353358Sdimunsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
438353358Sdim                                Optional<bool> EnableWavefrontSize32) {
439353358Sdim  return getVGPRAllocGranule(STI, EnableWavefrontSize32);
440321369Sdim}
441321369Sdim
442344779Sdimunsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
443360784Sdim  if (!isGFX10(*STI))
444360784Sdim    return 256;
445360784Sdim  return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
446321369Sdim}
447321369Sdim
448344779Sdimunsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
449360784Sdim  return 256;
450321369Sdim}
451321369Sdim
452344779Sdimunsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
453321369Sdim  assert(WavesPerEU != 0);
454321369Sdim
455360784Sdim  if (WavesPerEU >= getMaxWavesPerEU(STI))
456321369Sdim    return 0;
457321369Sdim  unsigned MinNumVGPRs =
458344779Sdim      alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
459344779Sdim                getVGPRAllocGranule(STI)) + 1;
460344779Sdim  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
461321369Sdim}
462321369Sdim
463344779Sdimunsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
464321369Sdim  assert(WavesPerEU != 0);
465321369Sdim
466344779Sdim  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
467344779Sdim                                   getVGPRAllocGranule(STI));
468344779Sdim  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
469321369Sdim  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
470321369Sdim}
471321369Sdim
472353358Sdimunsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
473353358Sdim                          Optional<bool> EnableWavefrontSize32) {
474353358Sdim  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
475353358Sdim                     getVGPREncodingGranule(STI, EnableWavefrontSize32));
476341825Sdim  // VGPRBlocks is actual number of VGPR blocks minus 1.
477353358Sdim  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
478341825Sdim}
479341825Sdim
480321369Sdim} // end namespace IsaInfo
481321369Sdim
482285163Sdimvoid initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
483344779Sdim                               const MCSubtargetInfo *STI) {
484344779Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
485285163Sdim
486285163Sdim  memset(&Header, 0, sizeof(Header));
487285163Sdim
488285163Sdim  Header.amd_kernel_code_version_major = 1;
489341825Sdim  Header.amd_kernel_code_version_minor = 2;
490285163Sdim  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
491344779Sdim  Header.amd_machine_version_major = Version.Major;
492344779Sdim  Header.amd_machine_version_minor = Version.Minor;
493344779Sdim  Header.amd_machine_version_stepping = Version.Stepping;
494285163Sdim  Header.kernel_code_entry_byte_offset = sizeof(Header);
495285163Sdim  Header.wavefront_size = 6;
496321369Sdim
497321369Sdim  // If the code object does not support indirect functions, then the value must
498321369Sdim  // be 0xffffffff.
499321369Sdim  Header.call_convention = -1;
500321369Sdim
501285163Sdim  // These alignment values are specified in powers of two, so alignment =
502285163Sdim  // 2^n.  The minimum alignment is 2^4 = 16.
503285163Sdim  Header.kernarg_segment_alignment = 4;
504285163Sdim  Header.group_segment_alignment = 4;
505285163Sdim  Header.private_segment_alignment = 4;
506353358Sdim
507353358Sdim  if (Version.Major >= 10) {
508353358Sdim    if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
509353358Sdim      Header.wavefront_size = 5;
510353358Sdim      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
511353358Sdim    }
512353358Sdim    Header.compute_pgm_resource_registers |=
513353358Sdim      S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
514353358Sdim      S_00B848_MEM_ORDERED(1);
515353358Sdim  }
516285163Sdim}
517285163Sdim
518353358Sdimamdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
519353358Sdim    const MCSubtargetInfo *STI) {
520353358Sdim  IsaVersion Version = getIsaVersion(STI->getCPU());
521353358Sdim
522341825Sdim  amdhsa::kernel_descriptor_t KD;
523341825Sdim  memset(&KD, 0, sizeof(KD));
524353358Sdim
525341825Sdim  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
526341825Sdim                  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
527341825Sdim                  amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
528341825Sdim  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
529341825Sdim                  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
530341825Sdim  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
531341825Sdim                  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
532341825Sdim  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
533341825Sdim                  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
534353358Sdim  if (Version.Major >= 10) {
535353358Sdim    AMDHSA_BITS_SET(KD.kernel_code_properties,
536353358Sdim                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
537353358Sdim                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
538353358Sdim    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
539353358Sdim                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
540353358Sdim                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
541353358Sdim    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
542353358Sdim                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
543353358Sdim  }
544341825Sdim  return KD;
545341825Sdim}
546341825Sdim
547327952Sdimbool isGroupSegment(const GlobalValue *GV) {
548360784Sdim  return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
549296417Sdim}
550296417Sdim
551327952Sdimbool isGlobalSegment(const GlobalValue *GV) {
552360784Sdim  return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
553296417Sdim}
554296417Sdim
555327952Sdimbool isReadOnlySegment(const GlobalValue *GV) {
556360784Sdim  unsigned AS = GV->getAddressSpace();
557360784Sdim  return AS == AMDGPUAS::CONSTANT_ADDRESS ||
558360784Sdim         AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
559296417Sdim}
560296417Sdim
561314564Sdimbool shouldEmitConstantsToTextSection(const Triple &TT) {
562360784Sdim  return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
563314564Sdim}
564314564Sdim
565309124Sdimint getIntegerAttribute(const Function &F, StringRef Name, int Default) {
566296417Sdim  Attribute A = F.getFnAttribute(Name);
567309124Sdim  int Result = Default;
568296417Sdim
569296417Sdim  if (A.isStringAttribute()) {
570296417Sdim    StringRef Str = A.getValueAsString();
571296417Sdim    if (Str.getAsInteger(0, Result)) {
572296417Sdim      LLVMContext &Ctx = F.getContext();
573309124Sdim      Ctx.emitError("can't parse integer attribute " + Name);
574296417Sdim    }
575296417Sdim  }
576309124Sdim
577296417Sdim  return Result;
578296417Sdim}
579296417Sdim
580314564Sdimstd::pair<int, int> getIntegerPairAttribute(const Function &F,
581314564Sdim                                            StringRef Name,
582314564Sdim                                            std::pair<int, int> Default,
583314564Sdim                                            bool OnlyFirstRequired) {
584314564Sdim  Attribute A = F.getFnAttribute(Name);
585314564Sdim  if (!A.isStringAttribute())
586314564Sdim    return Default;
587314564Sdim
588314564Sdim  LLVMContext &Ctx = F.getContext();
589314564Sdim  std::pair<int, int> Ints = Default;
590314564Sdim  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
591314564Sdim  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
592314564Sdim    Ctx.emitError("can't parse first integer attribute " + Name);
593314564Sdim    return Default;
594314564Sdim  }
595314564Sdim  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
596321369Sdim    if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
597314564Sdim      Ctx.emitError("can't parse second integer attribute " + Name);
598314564Sdim      return Default;
599314564Sdim    }
600314564Sdim  }
601314564Sdim
602314564Sdim  return Ints;
603296417Sdim}
604296417Sdim
605344779Sdimunsigned getVmcntBitMask(const IsaVersion &Version) {
606321369Sdim  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
607321369Sdim  if (Version.Major < 9)
608321369Sdim    return VmcntLo;
609314564Sdim
610321369Sdim  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
611321369Sdim  return VmcntLo | VmcntHi;
612314564Sdim}
613314564Sdim
614344779Sdimunsigned getExpcntBitMask(const IsaVersion &Version) {
615314564Sdim  return (1 << getExpcntBitWidth()) - 1;
616314564Sdim}
617314564Sdim
618344779Sdimunsigned getLgkmcntBitMask(const IsaVersion &Version) {
619353358Sdim  return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
620314564Sdim}
621314564Sdim
622344779Sdimunsigned getWaitcntBitMask(const IsaVersion &Version) {
623321369Sdim  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
624321369Sdim  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
625353358Sdim  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
626353358Sdim                                getLgkmcntBitWidth(Version.Major));
627321369Sdim  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
628321369Sdim  if (Version.Major < 9)
629321369Sdim    return Waitcnt;
630321369Sdim
631321369Sdim  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
632321369Sdim  return Waitcnt | VmcntHi;
633314564Sdim}
634314564Sdim
635344779Sdimunsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
636321369Sdim  unsigned VmcntLo =
637321369Sdim      unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
638321369Sdim  if (Version.Major < 9)
639321369Sdim    return VmcntLo;
640321369Sdim
641321369Sdim  unsigned VmcntHi =
642321369Sdim      unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
643321369Sdim  VmcntHi <<= getVmcntBitWidthLo();
644321369Sdim  return VmcntLo | VmcntHi;
645321369Sdim}
646321369Sdim
647344779Sdimunsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
648314564Sdim  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
649314564Sdim}
650314564Sdim
651344779Sdimunsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
652353358Sdim  return unpackBits(Waitcnt, getLgkmcntBitShift(),
653353358Sdim                    getLgkmcntBitWidth(Version.Major));
654314564Sdim}
655314564Sdim
656344779Sdimvoid decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
657314564Sdim                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
658314564Sdim  Vmcnt = decodeVmcnt(Version, Waitcnt);
659314564Sdim  Expcnt = decodeExpcnt(Version, Waitcnt);
660314564Sdim  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
661314564Sdim}
662314564Sdim
663344779SdimWaitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
664344779Sdim  Waitcnt Decoded;
665344779Sdim  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
666344779Sdim  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
667344779Sdim  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
668344779Sdim  return Decoded;
669344779Sdim}
670344779Sdim
671344779Sdimunsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
672321369Sdim                     unsigned Vmcnt) {
673321369Sdim  Waitcnt =
674321369Sdim      packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
675321369Sdim  if (Version.Major < 9)
676321369Sdim    return Waitcnt;
677321369Sdim
678321369Sdim  Vmcnt >>= getVmcntBitWidthLo();
679321369Sdim  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
680314564Sdim}
681314564Sdim
682344779Sdimunsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
683321369Sdim                      unsigned Expcnt) {
684314564Sdim  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
685314564Sdim}
686314564Sdim
687344779Sdimunsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
688321369Sdim                       unsigned Lgkmcnt) {
689353358Sdim  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
690353358Sdim                                    getLgkmcntBitWidth(Version.Major));
691314564Sdim}
692314564Sdim
693344779Sdimunsigned encodeWaitcnt(const IsaVersion &Version,
694314564Sdim                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
695314564Sdim  unsigned Waitcnt = getWaitcntBitMask(Version);
696314564Sdim  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
697314564Sdim  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
698314564Sdim  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
699314564Sdim  return Waitcnt;
700314564Sdim}
701314564Sdim
702344779Sdimunsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
703344779Sdim  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
704344779Sdim}
705344779Sdim
706353358Sdim//===----------------------------------------------------------------------===//
707353358Sdim// hwreg
708353358Sdim//===----------------------------------------------------------------------===//
709353358Sdim
710353358Sdimnamespace Hwreg {
711353358Sdim
712353358Sdimint64_t getHwregId(const StringRef Name) {
713353358Sdim  for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
714353358Sdim    if (IdSymbolic[Id] && Name == IdSymbolic[Id])
715353358Sdim      return Id;
716353358Sdim  }
717353358Sdim  return ID_UNKNOWN_;
718353358Sdim}
719353358Sdim
720353358Sdimstatic unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
721353358Sdim  if (isSI(STI) || isCI(STI) || isVI(STI))
722353358Sdim    return ID_SYMBOLIC_FIRST_GFX9_;
723353358Sdim  else if (isGFX9(STI))
724353358Sdim    return ID_SYMBOLIC_FIRST_GFX10_;
725353358Sdim  else
726353358Sdim    return ID_SYMBOLIC_LAST_;
727353358Sdim}
728353358Sdim
729353358Sdimbool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
730353358Sdim  return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
731353358Sdim         IdSymbolic[Id];
732353358Sdim}
733353358Sdim
734353358Sdimbool isValidHwreg(int64_t Id) {
735353358Sdim  return 0 <= Id && isUInt<ID_WIDTH_>(Id);
736353358Sdim}
737353358Sdim
738353358Sdimbool isValidHwregOffset(int64_t Offset) {
739353358Sdim  return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
740353358Sdim}
741353358Sdim
742353358Sdimbool isValidHwregWidth(int64_t Width) {
743353358Sdim  return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
744353358Sdim}
745353358Sdim
746353358Sdimuint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
747353358Sdim  return (Id << ID_SHIFT_) |
748353358Sdim         (Offset << OFFSET_SHIFT_) |
749353358Sdim         ((Width - 1) << WIDTH_M1_SHIFT_);
750353358Sdim}
751353358Sdim
752353358SdimStringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
753353358Sdim  return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
754353358Sdim}
755353358Sdim
756353358Sdimvoid decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
757353358Sdim  Id = (Val & ID_MASK_) >> ID_SHIFT_;
758353358Sdim  Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
759353358Sdim  Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
760353358Sdim}
761353358Sdim
762353358Sdim} // namespace Hwreg
763353358Sdim
764353358Sdim//===----------------------------------------------------------------------===//
765353358Sdim// SendMsg
766353358Sdim//===----------------------------------------------------------------------===//
767353358Sdim
768353358Sdimnamespace SendMsg {
769353358Sdim
770353358Sdimint64_t getMsgId(const StringRef Name) {
771353358Sdim  for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
772353358Sdim    if (IdSymbolic[i] && Name == IdSymbolic[i])
773353358Sdim      return i;
774353358Sdim  }
775353358Sdim  return ID_UNKNOWN_;
776353358Sdim}
777353358Sdim
778353358Sdimstatic bool isValidMsgId(int64_t MsgId) {
779353358Sdim  return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
780353358Sdim}
781353358Sdim
782353358Sdimbool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
783353358Sdim  if (Strict) {
784353358Sdim    if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
785353358Sdim      return isGFX9(STI) || isGFX10(STI);
786353358Sdim    else
787353358Sdim      return isValidMsgId(MsgId);
788353358Sdim  } else {
789353358Sdim    return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
790353358Sdim  }
791353358Sdim}
792353358Sdim
793353358SdimStringRef getMsgName(int64_t MsgId) {
794353358Sdim  return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
795353358Sdim}
796353358Sdim
797353358Sdimint64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
798353358Sdim  const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
799353358Sdim  const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
800353358Sdim  const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
801353358Sdim  for (int i = F; i < L; ++i) {
802353358Sdim    if (Name == S[i]) {
803353358Sdim      return i;
804353358Sdim    }
805353358Sdim  }
806353358Sdim  return OP_UNKNOWN_;
807353358Sdim}
808353358Sdim
809353358Sdimbool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
810353358Sdim
811353358Sdim  if (!Strict)
812353358Sdim    return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
813353358Sdim
814353358Sdim  switch(MsgId)
815353358Sdim  {
816353358Sdim  case ID_GS:
817353358Sdim    return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
818353358Sdim  case ID_GS_DONE:
819353358Sdim    return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
820353358Sdim  case ID_SYSMSG:
821353358Sdim    return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
822353358Sdim  default:
823353358Sdim    return OpId == OP_NONE_;
824353358Sdim  }
825353358Sdim}
826353358Sdim
827353358SdimStringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
828353358Sdim  assert(msgRequiresOp(MsgId));
829353358Sdim  return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
830353358Sdim}
831353358Sdim
832353358Sdimbool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
833353358Sdim
834353358Sdim  if (!Strict)
835353358Sdim    return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
836353358Sdim
837353358Sdim  switch(MsgId)
838353358Sdim  {
839353358Sdim  case ID_GS:
840353358Sdim    return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
841353358Sdim  case ID_GS_DONE:
842353358Sdim    return (OpId == OP_GS_NOP)?
843353358Sdim           (StreamId == STREAM_ID_NONE_) :
844353358Sdim           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
845353358Sdim  default:
846353358Sdim    return StreamId == STREAM_ID_NONE_;
847353358Sdim  }
848353358Sdim}
849353358Sdim
850353358Sdimbool msgRequiresOp(int64_t MsgId) {
851353358Sdim  return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
852353358Sdim}
853353358Sdim
854353358Sdimbool msgSupportsStream(int64_t MsgId, int64_t OpId) {
855353358Sdim  return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
856353358Sdim}
857353358Sdim
858353358Sdimvoid decodeMsg(unsigned Val,
859353358Sdim               uint16_t &MsgId,
860353358Sdim               uint16_t &OpId,
861353358Sdim               uint16_t &StreamId) {
862353358Sdim  MsgId = Val & ID_MASK_;
863353358Sdim  OpId = (Val & OP_MASK_) >> OP_SHIFT_;
864353358Sdim  StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
865353358Sdim}
866353358Sdim
867353358Sdimuint64_t encodeMsg(uint64_t MsgId,
868353358Sdim                   uint64_t OpId,
869353358Sdim                   uint64_t StreamId) {
870353358Sdim  return (MsgId << ID_SHIFT_) |
871353358Sdim         (OpId << OP_SHIFT_) |
872353358Sdim         (StreamId << STREAM_ID_SHIFT_);
873353358Sdim}
874353358Sdim
875353358Sdim} // namespace SendMsg
876353358Sdim
877353358Sdim//===----------------------------------------------------------------------===//
878353358Sdim//
879353358Sdim//===----------------------------------------------------------------------===//
880353358Sdim
881296417Sdimunsigned getInitialPSInputAddr(const Function &F) {
882296417Sdim  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
883296417Sdim}
884296417Sdim
885309124Sdimbool isShader(CallingConv::ID cc) {
886309124Sdim  switch(cc) {
887309124Sdim    case CallingConv::AMDGPU_VS:
888327952Sdim    case CallingConv::AMDGPU_LS:
889321369Sdim    case CallingConv::AMDGPU_HS:
890327952Sdim    case CallingConv::AMDGPU_ES:
891309124Sdim    case CallingConv::AMDGPU_GS:
892309124Sdim    case CallingConv::AMDGPU_PS:
893309124Sdim    case CallingConv::AMDGPU_CS:
894309124Sdim      return true;
895309124Sdim    default:
896309124Sdim      return false;
897309124Sdim  }
898309124Sdim}
899309124Sdim
900309124Sdimbool isCompute(CallingConv::ID cc) {
901309124Sdim  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
902309124Sdim}
903309124Sdim
904321369Sdimbool isEntryFunctionCC(CallingConv::ID CC) {
905321369Sdim  switch (CC) {
906321369Sdim  case CallingConv::AMDGPU_KERNEL:
907321369Sdim  case CallingConv::SPIR_KERNEL:
908321369Sdim  case CallingConv::AMDGPU_VS:
909321369Sdim  case CallingConv::AMDGPU_GS:
910321369Sdim  case CallingConv::AMDGPU_PS:
911321369Sdim  case CallingConv::AMDGPU_CS:
912327952Sdim  case CallingConv::AMDGPU_ES:
913321369Sdim  case CallingConv::AMDGPU_HS:
914327952Sdim  case CallingConv::AMDGPU_LS:
915321369Sdim    return true;
916321369Sdim  default:
917321369Sdim    return false;
918321369Sdim  }
919321369Sdim}
920321369Sdim
921341825Sdimbool hasXNACK(const MCSubtargetInfo &STI) {
922341825Sdim  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
923341825Sdim}
924341825Sdim
925344779Sdimbool hasSRAMECC(const MCSubtargetInfo &STI) {
926344779Sdim  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
927344779Sdim}
928344779Sdim
929341825Sdimbool hasMIMG_R128(const MCSubtargetInfo &STI) {
930341825Sdim  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
931341825Sdim}
932341825Sdim
933341825Sdimbool hasPackedD16(const MCSubtargetInfo &STI) {
934341825Sdim  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
935341825Sdim}
936341825Sdim
937296417Sdimbool isSI(const MCSubtargetInfo &STI) {
938296417Sdim  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
939296417Sdim}
940296417Sdim
941296417Sdimbool isCI(const MCSubtargetInfo &STI) {
942296417Sdim  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
943296417Sdim}
944296417Sdim
945296417Sdimbool isVI(const MCSubtargetInfo &STI) {
946296417Sdim  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
947296417Sdim}
948296417Sdim
949321369Sdimbool isGFX9(const MCSubtargetInfo &STI) {
950321369Sdim  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
951321369Sdim}
952321369Sdim
953353358Sdimbool isGFX10(const MCSubtargetInfo &STI) {
954353358Sdim  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
955353358Sdim}
956353358Sdim
957327952Sdimbool isGCN3Encoding(const MCSubtargetInfo &STI) {
958327952Sdim  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
959327952Sdim}
960327952Sdim
961321369Sdimbool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
962321369Sdim  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
963321369Sdim  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
964321369Sdim  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
965321369Sdim    Reg == AMDGPU::SCC;
966321369Sdim}
967321369Sdim
968321369Sdimbool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
969321369Sdim  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
970321369Sdim    if (*R == Reg1) return true;
971321369Sdim  }
972321369Sdim  return false;
973321369Sdim}
974321369Sdim
975327952Sdim#define MAP_REG2REG \
976327952Sdim  using namespace AMDGPU; \
977327952Sdim  switch(Reg) { \
978327952Sdim  default: return Reg; \
979327952Sdim  CASE_CI_VI(FLAT_SCR) \
980327952Sdim  CASE_CI_VI(FLAT_SCR_LO) \
981327952Sdim  CASE_CI_VI(FLAT_SCR_HI) \
982353358Sdim  CASE_VI_GFX9_GFX10(TTMP0) \
983353358Sdim  CASE_VI_GFX9_GFX10(TTMP1) \
984353358Sdim  CASE_VI_GFX9_GFX10(TTMP2) \
985353358Sdim  CASE_VI_GFX9_GFX10(TTMP3) \
986353358Sdim  CASE_VI_GFX9_GFX10(TTMP4) \
987353358Sdim  CASE_VI_GFX9_GFX10(TTMP5) \
988353358Sdim  CASE_VI_GFX9_GFX10(TTMP6) \
989353358Sdim  CASE_VI_GFX9_GFX10(TTMP7) \
990353358Sdim  CASE_VI_GFX9_GFX10(TTMP8) \
991353358Sdim  CASE_VI_GFX9_GFX10(TTMP9) \
992353358Sdim  CASE_VI_GFX9_GFX10(TTMP10) \
993353358Sdim  CASE_VI_GFX9_GFX10(TTMP11) \
994353358Sdim  CASE_VI_GFX9_GFX10(TTMP12) \
995353358Sdim  CASE_VI_GFX9_GFX10(TTMP13) \
996353358Sdim  CASE_VI_GFX9_GFX10(TTMP14) \
997353358Sdim  CASE_VI_GFX9_GFX10(TTMP15) \
998353358Sdim  CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
999353358Sdim  CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
1000353358Sdim  CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
1001353358Sdim  CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
1002353358Sdim  CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
1003353358Sdim  CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
1004353358Sdim  CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
1005353358Sdim  CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
1006353358Sdim  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
1007353358Sdim  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
1008353358Sdim  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
1009353358Sdim  CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
1010353358Sdim  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1011353358Sdim  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1012353358Sdim  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1013353358Sdim  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1014327952Sdim  }
1015296417Sdim
1016327952Sdim#define CASE_CI_VI(node) \
1017327952Sdim  assert(!isSI(STI)); \
1018327952Sdim  case node: return isCI(STI) ? node##_ci : node##_vi;
1019296417Sdim
1020353358Sdim#define CASE_VI_GFX9_GFX10(node) \
1021353358Sdim  case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
1022296417Sdim
1023327952Sdimunsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1024341825Sdim  if (STI.getTargetTriple().getArch() == Triple::r600)
1025341825Sdim    return Reg;
1026327952Sdim  MAP_REG2REG
1027296417Sdim}
1028296417Sdim
1029327952Sdim#undef CASE_CI_VI
1030353358Sdim#undef CASE_VI_GFX9_GFX10
1031321369Sdim
1032327952Sdim#define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
1033353358Sdim#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
1034321369Sdim
1035327952Sdimunsigned mc2PseudoReg(unsigned Reg) {
1036327952Sdim  MAP_REG2REG
1037321369Sdim}
1038321369Sdim
1039327952Sdim#undef CASE_CI_VI
1040353358Sdim#undef CASE_VI_GFX9_GFX10
1041327952Sdim#undef MAP_REG2REG
1042327952Sdim
1043314564Sdimbool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1044321369Sdim  assert(OpNo < Desc.NumOperands);
1045314564Sdim  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1046314564Sdim  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1047314564Sdim         OpType <= AMDGPU::OPERAND_SRC_LAST;
1048314564Sdim}
1049314564Sdim
1050314564Sdimbool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1051321369Sdim  assert(OpNo < Desc.NumOperands);
1052314564Sdim  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1053314564Sdim  switch (OpType) {
1054314564Sdim  case AMDGPU::OPERAND_REG_IMM_FP32:
1055314564Sdim  case AMDGPU::OPERAND_REG_IMM_FP64:
1056314564Sdim  case AMDGPU::OPERAND_REG_IMM_FP16:
1057353358Sdim  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1058353358Sdim  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1059314564Sdim  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1060314564Sdim  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1061314564Sdim  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1062321369Sdim  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1063353358Sdim  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1064353358Sdim  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1065353358Sdim  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1066353358Sdim  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1067353358Sdim  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1068314564Sdim    return true;
1069314564Sdim  default:
1070314564Sdim    return false;
1071314564Sdim  }
1072314564Sdim}
1073314564Sdim
1074314564Sdimbool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1075321369Sdim  assert(OpNo < Desc.NumOperands);
1076314564Sdim  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1077314564Sdim  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1078314564Sdim         OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1079314564Sdim}
1080314564Sdim
1081314564Sdim// Avoid using MCRegisterClass::getSize, since that function will go away
1082314564Sdim// (move from MC* level to Target* level). Return size in bits.
1083314564Sdimunsigned getRegBitWidth(unsigned RCID) {
1084314564Sdim  switch (RCID) {
1085314564Sdim  case AMDGPU::SGPR_32RegClassID:
1086314564Sdim  case AMDGPU::VGPR_32RegClassID:
1087353358Sdim  case AMDGPU::VRegOrLds_32RegClassID:
1088353358Sdim  case AMDGPU::AGPR_32RegClassID:
1089314564Sdim  case AMDGPU::VS_32RegClassID:
1090353358Sdim  case AMDGPU::AV_32RegClassID:
1091314564Sdim  case AMDGPU::SReg_32RegClassID:
1092314564Sdim  case AMDGPU::SReg_32_XM0RegClassID:
1093353358Sdim  case AMDGPU::SRegOrLds_32RegClassID:
1094314564Sdim    return 32;
1095314564Sdim  case AMDGPU::SGPR_64RegClassID:
1096314564Sdim  case AMDGPU::VS_64RegClassID:
1097353358Sdim  case AMDGPU::AV_64RegClassID:
1098314564Sdim  case AMDGPU::SReg_64RegClassID:
1099314564Sdim  case AMDGPU::VReg_64RegClassID:
1100353358Sdim  case AMDGPU::AReg_64RegClassID:
1101344779Sdim  case AMDGPU::SReg_64_XEXECRegClassID:
1102314564Sdim    return 64;
1103353358Sdim  case AMDGPU::SGPR_96RegClassID:
1104353358Sdim  case AMDGPU::SReg_96RegClassID:
1105314564Sdim  case AMDGPU::VReg_96RegClassID:
1106314564Sdim    return 96;
1107314564Sdim  case AMDGPU::SGPR_128RegClassID:
1108314564Sdim  case AMDGPU::SReg_128RegClassID:
1109314564Sdim  case AMDGPU::VReg_128RegClassID:
1110353358Sdim  case AMDGPU::AReg_128RegClassID:
1111314564Sdim    return 128;
1112353358Sdim  case AMDGPU::SGPR_160RegClassID:
1113353358Sdim  case AMDGPU::SReg_160RegClassID:
1114353358Sdim  case AMDGPU::VReg_160RegClassID:
1115353358Sdim    return 160;
1116314564Sdim  case AMDGPU::SReg_256RegClassID:
1117314564Sdim  case AMDGPU::VReg_256RegClassID:
1118314564Sdim    return 256;
1119314564Sdim  case AMDGPU::SReg_512RegClassID:
1120314564Sdim  case AMDGPU::VReg_512RegClassID:
1121353358Sdim  case AMDGPU::AReg_512RegClassID:
1122314564Sdim    return 512;
1123353358Sdim  case AMDGPU::SReg_1024RegClassID:
1124353358Sdim  case AMDGPU::VReg_1024RegClassID:
1125353358Sdim  case AMDGPU::AReg_1024RegClassID:
1126353358Sdim    return 1024;
1127314564Sdim  default:
1128314564Sdim    llvm_unreachable("Unexpected register class");
1129314564Sdim  }
1130314564Sdim}
1131314564Sdim
1132314564Sdimunsigned getRegBitWidth(const MCRegisterClass &RC) {
1133314564Sdim  return getRegBitWidth(RC.getID());
1134314564Sdim}
1135314564Sdim
1136314564Sdimunsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1137314564Sdim                           unsigned OpNo) {
1138321369Sdim  assert(OpNo < Desc.NumOperands);
1139314564Sdim  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1140314564Sdim  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1141314564Sdim}
1142314564Sdim
1143314564Sdimbool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1144314564Sdim  if (Literal >= -16 && Literal <= 64)
1145314564Sdim    return true;
1146314564Sdim
1147314564Sdim  uint64_t Val = static_cast<uint64_t>(Literal);
1148314564Sdim  return (Val == DoubleToBits(0.0)) ||
1149314564Sdim         (Val == DoubleToBits(1.0)) ||
1150314564Sdim         (Val == DoubleToBits(-1.0)) ||
1151314564Sdim         (Val == DoubleToBits(0.5)) ||
1152314564Sdim         (Val == DoubleToBits(-0.5)) ||
1153314564Sdim         (Val == DoubleToBits(2.0)) ||
1154314564Sdim         (Val == DoubleToBits(-2.0)) ||
1155314564Sdim         (Val == DoubleToBits(4.0)) ||
1156314564Sdim         (Val == DoubleToBits(-4.0)) ||
1157314564Sdim         (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1158314564Sdim}
1159314564Sdim
1160314564Sdimbool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1161314564Sdim  if (Literal >= -16 && Literal <= 64)
1162314564Sdim    return true;
1163314564Sdim
1164314564Sdim  // The actual type of the operand does not seem to matter as long
1165314564Sdim  // as the bits match one of the inline immediate values.  For example:
1166314564Sdim  //
1167314564Sdim  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1168314564Sdim  // so it is a legal inline immediate.
1169314564Sdim  //
1170314564Sdim  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1171314564Sdim  // floating-point, so it is a legal inline immediate.
1172314564Sdim
1173314564Sdim  uint32_t Val = static_cast<uint32_t>(Literal);
1174314564Sdim  return (Val == FloatToBits(0.0f)) ||
1175314564Sdim         (Val == FloatToBits(1.0f)) ||
1176314564Sdim         (Val == FloatToBits(-1.0f)) ||
1177314564Sdim         (Val == FloatToBits(0.5f)) ||
1178314564Sdim         (Val == FloatToBits(-0.5f)) ||
1179314564Sdim         (Val == FloatToBits(2.0f)) ||
1180314564Sdim         (Val == FloatToBits(-2.0f)) ||
1181314564Sdim         (Val == FloatToBits(4.0f)) ||
1182314564Sdim         (Val == FloatToBits(-4.0f)) ||
1183314564Sdim         (Val == 0x3e22f983 && HasInv2Pi);
1184314564Sdim}
1185314564Sdim
1186314564Sdimbool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1187321369Sdim  if (!HasInv2Pi)
1188321369Sdim    return false;
1189314564Sdim
1190314564Sdim  if (Literal >= -16 && Literal <= 64)
1191314564Sdim    return true;
1192314564Sdim
1193314564Sdim  uint16_t Val = static_cast<uint16_t>(Literal);
1194314564Sdim  return Val == 0x3C00 || // 1.0
1195314564Sdim         Val == 0xBC00 || // -1.0
1196314564Sdim         Val == 0x3800 || // 0.5
1197314564Sdim         Val == 0xB800 || // -0.5
1198314564Sdim         Val == 0x4000 || // 2.0
1199314564Sdim         Val == 0xC000 || // -2.0
1200314564Sdim         Val == 0x4400 || // 4.0
1201314564Sdim         Val == 0xC400 || // -4.0
1202314564Sdim         Val == 0x3118;   // 1/2pi
1203314564Sdim}
1204314564Sdim
1205321369Sdimbool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1206321369Sdim  assert(HasInv2Pi);
1207321369Sdim
1208353358Sdim  if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1209353358Sdim    int16_t Trunc = static_cast<int16_t>(Literal);
1210353358Sdim    return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1211353358Sdim  }
1212353358Sdim  if (!(Literal & 0xffff))
1213353358Sdim    return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1214353358Sdim
1215321369Sdim  int16_t Lo16 = static_cast<int16_t>(Literal);
1216321369Sdim  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1217321369Sdim  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1218321369Sdim}
1219321369Sdim
1220327952Sdimbool isArgPassedInSGPR(const Argument *A) {
1221327952Sdim  const Function *F = A->getParent();
1222327952Sdim
1223327952Sdim  // Arguments to compute shaders are never a source of divergence.
1224327952Sdim  CallingConv::ID CC = F->getCallingConv();
1225327952Sdim  switch (CC) {
1226327952Sdim  case CallingConv::AMDGPU_KERNEL:
1227327952Sdim  case CallingConv::SPIR_KERNEL:
1228327952Sdim    return true;
1229327952Sdim  case CallingConv::AMDGPU_VS:
1230327952Sdim  case CallingConv::AMDGPU_LS:
1231327952Sdim  case CallingConv::AMDGPU_HS:
1232327952Sdim  case CallingConv::AMDGPU_ES:
1233327952Sdim  case CallingConv::AMDGPU_GS:
1234327952Sdim  case CallingConv::AMDGPU_PS:
1235327952Sdim  case CallingConv::AMDGPU_CS:
1236327952Sdim    // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1237327952Sdim    // Everything else is in VGPRs.
1238327952Sdim    return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1239327952Sdim           F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1240327952Sdim  default:
1241327952Sdim    // TODO: Should calls support inreg for SGPR inputs?
1242327952Sdim    return false;
1243327952Sdim  }
1244327952Sdim}
1245327952Sdim
1246353358Sdimstatic bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1247353358Sdim  return isGCN3Encoding(ST) || isGFX10(ST);
1248353358Sdim}
1249353358Sdim
1250321369Sdimint64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1251353358Sdim  if (hasSMEMByteOffset(ST))
1252327952Sdim    return ByteOffset;
1253327952Sdim  return ByteOffset >> 2;
1254321369Sdim}
1255321369Sdim
1256321369Sdimbool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1257321369Sdim  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
1258353358Sdim  return (hasSMEMByteOffset(ST)) ?
1259327952Sdim    isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
1260321369Sdim}
1261327952Sdim
1262344779Sdim// Given Imm, split it into the values to put into the SOffset and ImmOffset
1263344779Sdim// fields in an MUBUF instruction. Return false if it is not possible (due to a
1264344779Sdim// hardware bug needing a workaround).
1265344779Sdim//
1266344779Sdim// The required alignment ensures that individual address components remain
1267344779Sdim// aligned if they are aligned to begin with. It also ensures that additional
1268344779Sdim// offsets within the given alignment can be added to the resulting ImmOffset.
1269344779Sdimbool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1270344779Sdim                      const GCNSubtarget *Subtarget, uint32_t Align) {
1271344779Sdim  const uint32_t MaxImm = alignDown(4095, Align);
1272344779Sdim  uint32_t Overflow = 0;
1273321369Sdim
1274344779Sdim  if (Imm > MaxImm) {
1275344779Sdim    if (Imm <= MaxImm + 64) {
1276344779Sdim      // Use an SOffset inline constant for 4..64
1277344779Sdim      Overflow = Imm - MaxImm;
1278344779Sdim      Imm = MaxImm;
1279344779Sdim    } else {
1280344779Sdim      // Try to keep the same value in SOffset for adjacent loads, so that
1281344779Sdim      // the corresponding register contents can be re-used.
1282344779Sdim      //
1283344779Sdim      // Load values with all low-bits (except for alignment bits) set into
1284344779Sdim      // SOffset, so that a larger range of values can be covered using
1285344779Sdim      // s_movk_i32.
1286344779Sdim      //
1287344779Sdim      // Atomic operations fail to work correctly when individual address
1288344779Sdim      // components are unaligned, even if their sum is aligned.
1289344779Sdim      uint32_t High = (Imm + Align) & ~4095;
1290344779Sdim      uint32_t Low = (Imm + Align) & 4095;
1291344779Sdim      Imm = Low;
1292344779Sdim      Overflow = High - Align;
1293344779Sdim    }
1294344779Sdim  }
1295321369Sdim
1296344779Sdim  // There is a hardware bug in SI and CI which prevents address clamping in
1297344779Sdim  // MUBUF instructions from working correctly with SOffsets. The immediate
1298344779Sdim  // offset is unaffected.
1299344779Sdim  if (Overflow > 0 &&
1300344779Sdim      Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1301344779Sdim    return false;
1302321369Sdim
1303344779Sdim  ImmOffset = Imm;
1304344779Sdim  SOffset = Overflow;
1305344779Sdim  return true;
1306321369Sdim}
1307321369Sdim
1308360784SdimSIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
1309360784Sdim                                               const GCNSubtarget &ST) {
1310353358Sdim  *this = getDefaultForCallingConv(F.getCallingConv());
1311353358Sdim
1312353358Sdim  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1313353358Sdim  if (!IEEEAttr.empty())
1314353358Sdim    IEEE = IEEEAttr == "true";
1315353358Sdim
1316353358Sdim  StringRef DX10ClampAttr
1317353358Sdim    = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1318353358Sdim  if (!DX10ClampAttr.empty())
1319353358Sdim    DX10Clamp = DX10ClampAttr == "true";
1320360784Sdim
1321360784Sdim  FP32Denormals = ST.hasFP32Denormals(F);
1322360784Sdim  FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
1323353358Sdim}
1324353358Sdim
1325341825Sdimnamespace {
1326341825Sdim
1327341825Sdimstruct SourceOfDivergence {
1328341825Sdim  unsigned Intr;
1329341825Sdim};
1330341825Sdimconst SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1331341825Sdim
1332341825Sdim#define GET_SourcesOfDivergence_IMPL
1333360784Sdim#define GET_Gfx9BufferFormat_IMPL
1334360784Sdim#define GET_Gfx10PlusBufferFormat_IMPL
1335341825Sdim#include "AMDGPUGenSearchableTables.inc"
1336341825Sdim
1337341825Sdim} // end anonymous namespace
1338341825Sdim
1339341825Sdimbool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1340341825Sdim  return lookupSourceOfDivergence(IntrID);
1341341825Sdim}
1342353358Sdim
1343360784Sdimconst GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
1344360784Sdim                                                  uint8_t NumComponents,
1345360784Sdim                                                  uint8_t NumFormat,
1346360784Sdim                                                  const MCSubtargetInfo &STI) {
1347360784Sdim  return isGFX10(STI)
1348360784Sdim             ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
1349360784Sdim                                            NumFormat)
1350360784Sdim             : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
1351360784Sdim}
1352360784Sdim
1353360784Sdimconst GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
1354360784Sdim                                                  const MCSubtargetInfo &STI) {
1355360784Sdim  return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
1356360784Sdim                      : getGfx9BufferFormatInfo(Format);
1357360784Sdim}
1358360784Sdim
1359321369Sdim} // namespace AMDGPU
1360321369Sdim} // namespace llvm
1361