1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPU.h"
13#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "llvm/IR/CallingConv.h"
16#include "llvm/MC/MCInstrDesc.h"
17#include "llvm/Support/AMDHSAKernelDescriptor.h"
18#include "llvm/Support/Alignment.h"
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/TargetParser.h"
22#include <cstdint>
23#include <string>
24#include <utility>
25
26namespace llvm {
27
28class Argument;
29class Function;
30class GCNSubtarget;
31class GlobalValue;
32class MCRegisterClass;
33class MCRegisterInfo;
34class MCSubtargetInfo;
35class StringRef;
36class Triple;
37
38namespace AMDGPU {
39
40struct GcnBufferFormatInfo {
41  unsigned Format;
42  unsigned BitsPerComp;
43  unsigned NumComponents;
44  unsigned NumFormat;
45  unsigned DataFormat;
46};
47
48#define GET_MIMGBaseOpcode_DECL
49#define GET_MIMGDim_DECL
50#define GET_MIMGEncoding_DECL
51#define GET_MIMGLZMapping_DECL
52#define GET_MIMGMIPMapping_DECL
53#include "AMDGPUGenSearchableTables.inc"
54
55namespace IsaInfo {
56
57enum {
58  // The closed Vulkan driver sets 96, which limits the wave count to 8 but
59  // doesn't spill SGPRs as much as when 80 is set.
60  FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
61  TRAP_NUM_SGPRS = 16
62};
63
64/// Streams isa version string for given subtarget \p STI into \p Stream.
65void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
66
67/// \returns True if given subtarget \p STI supports code object version 3,
68/// false otherwise.
69bool hasCodeObjectV3(const MCSubtargetInfo *STI);
70
71/// \returns Wavefront size for given subtarget \p STI.
72unsigned getWavefrontSize(const MCSubtargetInfo *STI);
73
74/// \returns Local memory size in bytes for given subtarget \p STI.
75unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
76
77/// \returns Number of execution units per compute unit for given subtarget \p
78/// STI.
79unsigned getEUsPerCU(const MCSubtargetInfo *STI);
80
81/// \returns Maximum number of work groups per compute unit for given subtarget
82/// \p STI and limited by given \p FlatWorkGroupSize.
83unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
84                               unsigned FlatWorkGroupSize);
85
86/// \returns Minimum number of waves per execution unit for given subtarget \p
87/// STI.
88unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
89
90/// \returns Maximum number of waves per execution unit for given subtarget \p
91/// STI without any kind of limitation.
92unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
93
94/// \returns Number of waves per execution unit required to support the given \p
95/// FlatWorkGroupSize.
96unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
97                                   unsigned FlatWorkGroupSize);
98
99/// \returns Minimum flat work group size for given subtarget \p STI.
100unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
101
102/// \returns Maximum flat work group size for given subtarget \p STI.
103unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
104
105/// \returns Number of waves per work group for given subtarget \p STI and
106/// \p FlatWorkGroupSize.
107unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
108                              unsigned FlatWorkGroupSize);
109
110/// \returns SGPR allocation granularity for given subtarget \p STI.
111unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
112
113/// \returns SGPR encoding granularity for given subtarget \p STI.
114unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
115
116/// \returns Total number of SGPRs for given subtarget \p STI.
117unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
118
119/// \returns Addressable number of SGPRs for given subtarget \p STI.
120unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
121
122/// \returns Minimum number of SGPRs that meets the given number of waves per
123/// execution unit requirement for given subtarget \p STI.
124unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
125
126/// \returns Maximum number of SGPRs that meets the given number of waves per
127/// execution unit requirement for given subtarget \p STI.
128unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
129                        bool Addressable);
130
131/// \returns Number of extra SGPRs implicitly required by given subtarget \p
132/// STI when the given special registers are used.
133unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
134                          bool FlatScrUsed, bool XNACKUsed);
135
136/// \returns Number of extra SGPRs implicitly required by given subtarget \p
137/// STI when the given special registers are used. XNACK is inferred from
138/// \p STI.
139unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
140                          bool FlatScrUsed);
141
142/// \returns Number of SGPR blocks needed for given subtarget \p STI when
143/// \p NumSGPRs are used. \p NumSGPRs should already include any special
144/// register counts.
145unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
146
147/// \returns VGPR allocation granularity for given subtarget \p STI.
148///
149/// For subtargets which support it, \p EnableWavefrontSize32 should match
150/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
151unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
152                             Optional<bool> EnableWavefrontSize32 = None);
153
154/// \returns VGPR encoding granularity for given subtarget \p STI.
155///
156/// For subtargets which support it, \p EnableWavefrontSize32 should match
157/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
158unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
159                                Optional<bool> EnableWavefrontSize32 = None);
160
161/// \returns Total number of VGPRs for given subtarget \p STI.
162unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
163
164/// \returns Addressable number of VGPRs for given subtarget \p STI.
165unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
166
167/// \returns Minimum number of VGPRs that meets given number of waves per
168/// execution unit requirement for given subtarget \p STI.
169unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
170
171/// \returns Maximum number of VGPRs that meets given number of waves per
172/// execution unit requirement for given subtarget \p STI.
173unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
174
175/// \returns Number of VGPR blocks needed for given subtarget \p STI when
176/// \p NumVGPRs are used.
177///
178/// For subtargets which support it, \p EnableWavefrontSize32 should match the
179/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
180unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
181                          Optional<bool> EnableWavefrontSize32 = None);
182
183} // end namespace IsaInfo
184
185LLVM_READONLY
186int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
187
188LLVM_READONLY
189int getSOPPWithRelaxation(uint16_t Opcode);
190
191struct MIMGBaseOpcodeInfo {
192  MIMGBaseOpcode BaseOpcode;
193  bool Store;
194  bool Atomic;
195  bool AtomicX2;
196  bool Sampler;
197  bool Gather4;
198
199  uint8_t NumExtraArgs;
200  bool Gradients;
201  bool G16;
202  bool Coordinates;
203  bool LodOrClampOrMip;
204  bool HasD16;
205};
206
207LLVM_READONLY
208const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
209
210struct MIMGDimInfo {
211  MIMGDim Dim;
212  uint8_t NumCoords;
213  uint8_t NumGradients;
214  bool DA;
215  uint8_t Encoding;
216  const char *AsmSuffix;
217};
218
219LLVM_READONLY
220const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
221
222LLVM_READONLY
223const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
224
225LLVM_READONLY
226const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
227
228struct MIMGLZMappingInfo {
229  MIMGBaseOpcode L;
230  MIMGBaseOpcode LZ;
231};
232
233struct MIMGMIPMappingInfo {
234  MIMGBaseOpcode MIP;
235  MIMGBaseOpcode NONMIP;
236};
237
238struct MIMGG16MappingInfo {
239  MIMGBaseOpcode G;
240  MIMGBaseOpcode G16;
241};
242
243LLVM_READONLY
244const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
245
246LLVM_READONLY
247const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
248
249LLVM_READONLY
250const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
251
252LLVM_READONLY
253int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
254                  unsigned VDataDwords, unsigned VAddrDwords);
255
256LLVM_READONLY
257int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
258
259struct MIMGInfo {
260  uint16_t Opcode;
261  uint16_t BaseOpcode;
262  uint8_t MIMGEncoding;
263  uint8_t VDataDwords;
264  uint8_t VAddrDwords;
265};
266
267LLVM_READONLY
268const MIMGInfo *getMIMGInfo(unsigned Opc);
269
270LLVM_READONLY
271int getMTBUFBaseOpcode(unsigned Opc);
272
273LLVM_READONLY
274int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
275
276LLVM_READONLY
277int getMTBUFElements(unsigned Opc);
278
279LLVM_READONLY
280bool getMTBUFHasVAddr(unsigned Opc);
281
282LLVM_READONLY
283bool getMTBUFHasSrsrc(unsigned Opc);
284
285LLVM_READONLY
286bool getMTBUFHasSoffset(unsigned Opc);
287
288LLVM_READONLY
289int getMUBUFBaseOpcode(unsigned Opc);
290
291LLVM_READONLY
292int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
293
294LLVM_READONLY
295int getMUBUFElements(unsigned Opc);
296
297LLVM_READONLY
298bool getMUBUFHasVAddr(unsigned Opc);
299
300LLVM_READONLY
301bool getMUBUFHasSrsrc(unsigned Opc);
302
303LLVM_READONLY
304bool getMUBUFHasSoffset(unsigned Opc);
305
306LLVM_READONLY
307bool getSMEMIsBuffer(unsigned Opc);
308
309LLVM_READONLY
310const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
311                                                  uint8_t NumComponents,
312                                                  uint8_t NumFormat,
313                                                  const MCSubtargetInfo &STI);
314LLVM_READONLY
315const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
316                                                  const MCSubtargetInfo &STI);
317
318LLVM_READONLY
319int getMCOpcode(uint16_t Opcode, unsigned Gen);
320
321void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
322                               const MCSubtargetInfo *STI);
323
324amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
325    const MCSubtargetInfo *STI);
326
327bool isGroupSegment(const GlobalValue *GV);
328bool isGlobalSegment(const GlobalValue *GV);
329bool isReadOnlySegment(const GlobalValue *GV);
330
331/// \returns True if constants should be emitted to .text section for given
332/// target triple \p TT, false otherwise.
333bool shouldEmitConstantsToTextSection(const Triple &TT);
334
335/// \returns Integer value requested using \p F's \p Name attribute.
336///
337/// \returns \p Default if attribute is not present.
338///
339/// \returns \p Default and emits error if requested value cannot be converted
340/// to integer.
341int getIntegerAttribute(const Function &F, StringRef Name, int Default);
342
343/// \returns A pair of integer values requested using \p F's \p Name attribute
344/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
345/// is false).
346///
347/// \returns \p Default if attribute is not present.
348///
349/// \returns \p Default and emits error if one of the requested values cannot be
350/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
351/// not present.
352std::pair<int, int> getIntegerPairAttribute(const Function &F,
353                                            StringRef Name,
354                                            std::pair<int, int> Default,
355                                            bool OnlyFirstRequired = false);
356
357/// Represents the counter values to wait for in an s_waitcnt instruction.
358///
359/// Large values (including the maximum possible integer) can be used to
360/// represent "don't care" waits.
361struct Waitcnt {
362  unsigned VmCnt = ~0u;
363  unsigned ExpCnt = ~0u;
364  unsigned LgkmCnt = ~0u;
365  unsigned VsCnt = ~0u;
366
367  Waitcnt() {}
368  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
369      : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
370
371  static Waitcnt allZero(const IsaVersion &Version) {
372    return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
373  }
374  static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
375
376  bool hasWait() const {
377    return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
378  }
379
380  bool dominates(const Waitcnt &Other) const {
381    return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
382           LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
383  }
384
385  Waitcnt combined(const Waitcnt &Other) const {
386    return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
387                   std::min(LgkmCnt, Other.LgkmCnt),
388                   std::min(VsCnt, Other.VsCnt));
389  }
390};
391
392/// \returns Vmcnt bit mask for given isa \p Version.
393unsigned getVmcntBitMask(const IsaVersion &Version);
394
395/// \returns Expcnt bit mask for given isa \p Version.
396unsigned getExpcntBitMask(const IsaVersion &Version);
397
398/// \returns Lgkmcnt bit mask for given isa \p Version.
399unsigned getLgkmcntBitMask(const IsaVersion &Version);
400
401/// \returns Waitcnt bit mask for given isa \p Version.
402unsigned getWaitcntBitMask(const IsaVersion &Version);
403
404/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
405unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
406
407/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
408unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
409
410/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
411unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
412
413/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
414/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
415/// \p Lgkmcnt respectively.
416///
417/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
418///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
419///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
420///     \p Expcnt = \p Waitcnt[6:4]
421///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
422///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
423void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
424                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
425
426Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
427
428/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
429unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
430                     unsigned Vmcnt);
431
432/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
433unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
434                      unsigned Expcnt);
435
436/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
437unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
438                       unsigned Lgkmcnt);
439
440/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
441/// \p Version.
442///
443/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
444///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
445///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
446///     Waitcnt[6:4]   = \p Expcnt
447///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
448///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
449///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
450///
451/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
452/// isa \p Version.
453unsigned encodeWaitcnt(const IsaVersion &Version,
454                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
455
456unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
457
458namespace Hwreg {
459
460LLVM_READONLY
461int64_t getHwregId(const StringRef Name);
462
463LLVM_READNONE
464bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
465
466LLVM_READNONE
467bool isValidHwreg(int64_t Id);
468
469LLVM_READNONE
470bool isValidHwregOffset(int64_t Offset);
471
472LLVM_READNONE
473bool isValidHwregWidth(int64_t Width);
474
475LLVM_READNONE
476uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
477
478LLVM_READNONE
479StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
480
481void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
482
483} // namespace Hwreg
484
485namespace SendMsg {
486
487LLVM_READONLY
488int64_t getMsgId(const StringRef Name);
489
490LLVM_READONLY
491int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
492
493LLVM_READNONE
494StringRef getMsgName(int64_t MsgId);
495
496LLVM_READNONE
497StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
498
499LLVM_READNONE
500bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
501
502LLVM_READNONE
503bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
504
505LLVM_READNONE
506bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
507
508LLVM_READNONE
509bool msgRequiresOp(int64_t MsgId);
510
511LLVM_READNONE
512bool msgSupportsStream(int64_t MsgId, int64_t OpId);
513
514void decodeMsg(unsigned Val,
515               uint16_t &MsgId,
516               uint16_t &OpId,
517               uint16_t &StreamId);
518
519LLVM_READNONE
520uint64_t encodeMsg(uint64_t MsgId,
521                   uint64_t OpId,
522                   uint64_t StreamId);
523
524} // namespace SendMsg
525
526
527unsigned getInitialPSInputAddr(const Function &F);
528
529LLVM_READNONE
530bool isShader(CallingConv::ID CC);
531
532LLVM_READNONE
533bool isCompute(CallingConv::ID CC);
534
535LLVM_READNONE
536bool isEntryFunctionCC(CallingConv::ID CC);
537
538// FIXME: Remove this when calling conventions cleaned up
539LLVM_READNONE
540inline bool isKernel(CallingConv::ID CC) {
541  switch (CC) {
542  case CallingConv::AMDGPU_KERNEL:
543  case CallingConv::SPIR_KERNEL:
544    return true;
545  default:
546    return false;
547  }
548}
549
550bool hasXNACK(const MCSubtargetInfo &STI);
551bool hasSRAMECC(const MCSubtargetInfo &STI);
552bool hasMIMG_R128(const MCSubtargetInfo &STI);
553bool hasGFX10A16(const MCSubtargetInfo &STI);
554bool hasG16(const MCSubtargetInfo &STI);
555bool hasPackedD16(const MCSubtargetInfo &STI);
556
557bool isSI(const MCSubtargetInfo &STI);
558bool isCI(const MCSubtargetInfo &STI);
559bool isVI(const MCSubtargetInfo &STI);
560bool isGFX9(const MCSubtargetInfo &STI);
561bool isGFX10(const MCSubtargetInfo &STI);
562bool isGCN3Encoding(const MCSubtargetInfo &STI);
563bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
564bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
565
566/// Is Reg - scalar register
567bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
568
569/// Is there any intersection between registers
570bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
571
572/// If \p Reg is a pseudo reg, return the correct hardware register given
573/// \p STI otherwise return \p Reg.
574unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
575
576/// Convert hardware register \p Reg to a pseudo register
577LLVM_READNONE
578unsigned mc2PseudoReg(unsigned Reg);
579
580/// Can this operand also contain immediate values?
581bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
582
583/// Is this floating-point operand?
584bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
585
586/// Does this opearnd support only inlinable literals?
587bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
588
589/// Get the size in bits of a register from the register class \p RC.
590unsigned getRegBitWidth(unsigned RCID);
591
592/// Get the size in bits of a register from the register class \p RC.
593unsigned getRegBitWidth(const MCRegisterClass &RC);
594
595/// Get size of register operand
596unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
597                           unsigned OpNo);
598
599LLVM_READNONE
600inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
601  switch (OpInfo.OperandType) {
602  case AMDGPU::OPERAND_REG_IMM_INT32:
603  case AMDGPU::OPERAND_REG_IMM_FP32:
604  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
605  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
606  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
607  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
608    return 4;
609
610  case AMDGPU::OPERAND_REG_IMM_INT64:
611  case AMDGPU::OPERAND_REG_IMM_FP64:
612  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
613  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
614    return 8;
615
616  case AMDGPU::OPERAND_REG_IMM_INT16:
617  case AMDGPU::OPERAND_REG_IMM_FP16:
618  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
619  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
620  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
621  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
622  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
623  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
624  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
625  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
626  case AMDGPU::OPERAND_REG_IMM_V2INT16:
627  case AMDGPU::OPERAND_REG_IMM_V2FP16:
628    return 2;
629
630  default:
631    llvm_unreachable("unhandled operand type");
632  }
633}
634
635LLVM_READNONE
636inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
637  return getOperandSize(Desc.OpInfo[OpNo]);
638}
639
640/// Is this literal inlinable, and not one of the values intended for floating
641/// point values.
642LLVM_READNONE
643inline bool isInlinableIntLiteral(int64_t Literal) {
644  return Literal >= -16 && Literal <= 64;
645}
646
647/// Is this literal inlinable
648LLVM_READNONE
649bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
650
651LLVM_READNONE
652bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
653
654LLVM_READNONE
655bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
656
657LLVM_READNONE
658bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
659
660LLVM_READNONE
661bool isInlinableIntLiteralV216(int32_t Literal);
662
663LLVM_READNONE
664bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
665
666bool isArgPassedInSGPR(const Argument *Arg);
667
668LLVM_READONLY
669bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
670                                      int64_t EncodedOffset);
671
672LLVM_READONLY
673bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
674                                    int64_t EncodedOffset,
675                                    bool IsBuffer);
676
677/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
678/// offsets.
679uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
680
681/// \returns The encoding that will be used for \p ByteOffset in the
682/// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
683/// S_LOAD instructions have a signed offset, on other subtargets it is
684/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
685Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
686                                       int64_t ByteOffset, bool IsBuffer);
687
688/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
689/// instruction. This is only useful on CI.s
690Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
691                                                int64_t ByteOffset);
692
693/// \returns true if this offset is small enough to fit in the SMRD
694/// offset field.  \p ByteOffset should be the offset in bytes and
695/// not the encoded offset.
696bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
697
698bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
699                      const GCNSubtarget *Subtarget,
700                      Align Alignment = Align(4));
701
702/// \returns true if the intrinsic is divergent
703bool isIntrinsicSourceOfDivergence(unsigned IntrID);
704
705// Track defaults for fields in the MODE registser.
706struct SIModeRegisterDefaults {
707  /// Floating point opcodes that support exception flag gathering quiet and
708  /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
709  /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
710  /// quieting.
711  bool IEEE : 1;
712
713  /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
714  /// clamp NaN to zero; otherwise, pass NaN through.
715  bool DX10Clamp : 1;
716
717  /// If this is set, neither input or output denormals are flushed for most f32
718  /// instructions.
719  bool FP32InputDenormals : 1;
720  bool FP32OutputDenormals : 1;
721
722  /// If this is set, neither input or output denormals are flushed for both f64
723  /// and f16/v2f16 instructions.
724  bool FP64FP16InputDenormals : 1;
725  bool FP64FP16OutputDenormals : 1;
726
727  SIModeRegisterDefaults() :
728    IEEE(true),
729    DX10Clamp(true),
730    FP32InputDenormals(true),
731    FP32OutputDenormals(true),
732    FP64FP16InputDenormals(true),
733    FP64FP16OutputDenormals(true) {}
734
735  SIModeRegisterDefaults(const Function &F);
736
737  static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
738    const bool IsCompute = AMDGPU::isCompute(CC);
739
740    SIModeRegisterDefaults Mode;
741    Mode.IEEE = IsCompute;
742    return Mode;
743  }
744
745  bool operator ==(const SIModeRegisterDefaults Other) const {
746    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
747           FP32InputDenormals == Other.FP32InputDenormals &&
748           FP32OutputDenormals == Other.FP32OutputDenormals &&
749           FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
750           FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
751  }
752
753  bool allFP32Denormals() const {
754    return FP32InputDenormals && FP32OutputDenormals;
755  }
756
757  bool allFP64FP16Denormals() const {
758    return FP64FP16InputDenormals && FP64FP16OutputDenormals;
759  }
760
761  /// Get the encoding value for the FP_DENORM bits of the mode register for the
762  /// FP32 denormal mode.
763  uint32_t fpDenormModeSPValue() const {
764    if (FP32InputDenormals && FP32OutputDenormals)
765      return FP_DENORM_FLUSH_NONE;
766    if (FP32InputDenormals)
767      return FP_DENORM_FLUSH_OUT;
768    if (FP32OutputDenormals)
769      return FP_DENORM_FLUSH_IN;
770    return FP_DENORM_FLUSH_IN_FLUSH_OUT;
771  }
772
773  /// Get the encoding value for the FP_DENORM bits of the mode register for the
774  /// FP64/FP16 denormal mode.
775  uint32_t fpDenormModeDPValue() const {
776    if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
777      return FP_DENORM_FLUSH_NONE;
778    if (FP64FP16InputDenormals)
779      return FP_DENORM_FLUSH_OUT;
780    if (FP64FP16OutputDenormals)
781      return FP_DENORM_FLUSH_IN;
782    return FP_DENORM_FLUSH_IN_FLUSH_OUT;
783  }
784
785  /// Returns true if a flag is compatible if it's enabled in the callee, but
786  /// disabled in the caller.
787  static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
788    return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
789  }
790
791  // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
792  // be able to override.
793  bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
794    if (DX10Clamp != CalleeMode.DX10Clamp)
795      return false;
796    if (IEEE != CalleeMode.IEEE)
797      return false;
798
799    // Allow inlining denormals enabled into denormals flushed functions.
800    return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
801           oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
802           oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
803           oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
804  }
805};
806
807} // end namespace AMDGPU
808} // end namespace llvm
809
810#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
811