1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPU.h"
13#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/IR/CallingConv.h"
17#include "llvm/MC/MCInstrDesc.h"
18#include "llvm/Support/AMDHSAKernelDescriptor.h"
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/TargetParser.h"
22#include <cstdint>
23#include <string>
24#include <utility>
25
26namespace llvm {
27
28class Argument;
29class AMDGPUSubtarget;
30class FeatureBitset;
31class Function;
32class GCNSubtarget;
33class GlobalValue;
34class MCContext;
35class MCRegisterClass;
36class MCRegisterInfo;
37class MCSection;
38class MCSubtargetInfo;
39class MachineMemOperand;
40class Triple;
41
42namespace AMDGPU {
43
44struct GcnBufferFormatInfo {
45  unsigned Format;
46  unsigned BitsPerComp;
47  unsigned NumComponents;
48  unsigned NumFormat;
49  unsigned DataFormat;
50};
51
52#define GET_MIMGBaseOpcode_DECL
53#define GET_MIMGDim_DECL
54#define GET_MIMGEncoding_DECL
55#define GET_MIMGLZMapping_DECL
56#define GET_MIMGMIPMapping_DECL
57#include "AMDGPUGenSearchableTables.inc"
58
59namespace IsaInfo {
60
61enum {
62  // The closed Vulkan driver sets 96, which limits the wave count to 8 but
63  // doesn't spill SGPRs as much as when 80 is set.
64  FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
65  TRAP_NUM_SGPRS = 16
66};
67
68/// Streams isa version string for given subtarget \p STI into \p Stream.
69void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
70
71/// \returns True if given subtarget \p STI supports code object version 3,
72/// false otherwise.
73bool hasCodeObjectV3(const MCSubtargetInfo *STI);
74
75/// \returns Wavefront size for given subtarget \p STI.
76unsigned getWavefrontSize(const MCSubtargetInfo *STI);
77
78/// \returns Local memory size in bytes for given subtarget \p STI.
79unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
80
81/// \returns Number of execution units per compute unit for given subtarget \p
82/// STI.
83unsigned getEUsPerCU(const MCSubtargetInfo *STI);
84
85/// \returns Maximum number of work groups per compute unit for given subtarget
86/// \p STI and limited by given \p FlatWorkGroupSize.
87unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
88                               unsigned FlatWorkGroupSize);
89
90/// \returns Maximum number of waves per compute unit for given subtarget \p
91/// STI without any kind of limitation.
92unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
93
94/// \returns Maximum number of waves per compute unit for given subtarget \p
95/// STI and limited by given \p FlatWorkGroupSize.
96unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
97                          unsigned FlatWorkGroupSize);
98
99/// \returns Minimum number of waves per execution unit for given subtarget \p
100/// STI.
101unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
102
103/// \returns Maximum number of waves per execution unit for given subtarget \p
104/// STI without any kind of limitation.
105unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
106
107/// \returns Maximum number of waves per execution unit for given subtarget \p
108/// STI and limited by given \p FlatWorkGroupSize.
109unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
110                          unsigned FlatWorkGroupSize);
111
112/// \returns Minimum flat work group size for given subtarget \p STI.
113unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
114
115/// \returns Maximum flat work group size for given subtarget \p STI.
116unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
117
118/// \returns Number of waves per work group for given subtarget \p STI and
119/// limited by given \p FlatWorkGroupSize.
120unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
121                              unsigned FlatWorkGroupSize);
122
123/// \returns SGPR allocation granularity for given subtarget \p STI.
124unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
125
126/// \returns SGPR encoding granularity for given subtarget \p STI.
127unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
128
129/// \returns Total number of SGPRs for given subtarget \p STI.
130unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
131
132/// \returns Addressable number of SGPRs for given subtarget \p STI.
133unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
134
135/// \returns Minimum number of SGPRs that meets the given number of waves per
136/// execution unit requirement for given subtarget \p STI.
137unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
138
139/// \returns Maximum number of SGPRs that meets the given number of waves per
140/// execution unit requirement for given subtarget \p STI.
141unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
142                        bool Addressable);
143
144/// \returns Number of extra SGPRs implicitly required by given subtarget \p
145/// STI when the given special registers are used.
146unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
147                          bool FlatScrUsed, bool XNACKUsed);
148
149/// \returns Number of extra SGPRs implicitly required by given subtarget \p
150/// STI when the given special registers are used. XNACK is inferred from
151/// \p STI.
152unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
153                          bool FlatScrUsed);
154
155/// \returns Number of SGPR blocks needed for given subtarget \p STI when
156/// \p NumSGPRs are used. \p NumSGPRs should already include any special
157/// register counts.
158unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
159
160/// \returns VGPR allocation granularity for given subtarget \p STI.
161///
162/// For subtargets which support it, \p EnableWavefrontSize32 should match
163/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
164unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
165                             Optional<bool> EnableWavefrontSize32 = None);
166
167/// \returns VGPR encoding granularity for given subtarget \p STI.
168///
169/// For subtargets which support it, \p EnableWavefrontSize32 should match
170/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
171unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
172                                Optional<bool> EnableWavefrontSize32 = None);
173
174/// \returns Total number of VGPRs for given subtarget \p STI.
175unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
176
177/// \returns Addressable number of VGPRs for given subtarget \p STI.
178unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
179
180/// \returns Minimum number of VGPRs that meets given number of waves per
181/// execution unit requirement for given subtarget \p STI.
182unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
183
184/// \returns Maximum number of VGPRs that meets given number of waves per
185/// execution unit requirement for given subtarget \p STI.
186unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
187
188/// \returns Number of VGPR blocks needed for given subtarget \p STI when
189/// \p NumVGPRs are used.
190///
191/// For subtargets which support it, \p EnableWavefrontSize32 should match the
192/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
193unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
194                          Optional<bool> EnableWavefrontSize32 = None);
195
196} // end namespace IsaInfo
197
198LLVM_READONLY
199int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
200
201LLVM_READONLY
202int getSOPPWithRelaxation(uint16_t Opcode);
203
204struct MIMGBaseOpcodeInfo {
205  MIMGBaseOpcode BaseOpcode;
206  bool Store;
207  bool Atomic;
208  bool AtomicX2;
209  bool Sampler;
210  bool Gather4;
211
212  uint8_t NumExtraArgs;
213  bool Gradients;
214  bool Coordinates;
215  bool LodOrClampOrMip;
216  bool HasD16;
217};
218
219LLVM_READONLY
220const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
221
222struct MIMGDimInfo {
223  MIMGDim Dim;
224  uint8_t NumCoords;
225  uint8_t NumGradients;
226  bool DA;
227  uint8_t Encoding;
228  const char *AsmSuffix;
229};
230
231LLVM_READONLY
232const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
233
234LLVM_READONLY
235const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
236
237LLVM_READONLY
238const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
239
240struct MIMGLZMappingInfo {
241  MIMGBaseOpcode L;
242  MIMGBaseOpcode LZ;
243};
244
245struct MIMGMIPMappingInfo {
246  MIMGBaseOpcode MIP;
247  MIMGBaseOpcode NONMIP;
248};
249
250LLVM_READONLY
251const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
252
253LLVM_READONLY
254const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
255
256LLVM_READONLY
257int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
258                  unsigned VDataDwords, unsigned VAddrDwords);
259
260LLVM_READONLY
261int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
262
263struct MIMGInfo {
264  uint16_t Opcode;
265  uint16_t BaseOpcode;
266  uint8_t MIMGEncoding;
267  uint8_t VDataDwords;
268  uint8_t VAddrDwords;
269};
270
271LLVM_READONLY
272const MIMGInfo *getMIMGInfo(unsigned Opc);
273
274LLVM_READONLY
275int getMTBUFBaseOpcode(unsigned Opc);
276
277LLVM_READONLY
278int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
279
280LLVM_READONLY
281int getMTBUFElements(unsigned Opc);
282
283LLVM_READONLY
284bool getMTBUFHasVAddr(unsigned Opc);
285
286LLVM_READONLY
287bool getMTBUFHasSrsrc(unsigned Opc);
288
289LLVM_READONLY
290bool getMTBUFHasSoffset(unsigned Opc);
291
292LLVM_READONLY
293int getMUBUFBaseOpcode(unsigned Opc);
294
295LLVM_READONLY
296int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
297
298LLVM_READONLY
299int getMUBUFElements(unsigned Opc);
300
301LLVM_READONLY
302bool getMUBUFHasVAddr(unsigned Opc);
303
304LLVM_READONLY
305bool getMUBUFHasSrsrc(unsigned Opc);
306
307LLVM_READONLY
308bool getMUBUFHasSoffset(unsigned Opc);
309
310LLVM_READONLY
311const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
312                                                  uint8_t NumComponents,
313                                                  uint8_t NumFormat,
314                                                  const MCSubtargetInfo &STI);
315LLVM_READONLY
316const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
317                                                  const MCSubtargetInfo &STI);
318
319LLVM_READONLY
320int getMCOpcode(uint16_t Opcode, unsigned Gen);
321
322void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
323                               const MCSubtargetInfo *STI);
324
325amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
326    const MCSubtargetInfo *STI);
327
328bool isGroupSegment(const GlobalValue *GV);
329bool isGlobalSegment(const GlobalValue *GV);
330bool isReadOnlySegment(const GlobalValue *GV);
331
332/// \returns True if constants should be emitted to .text section for given
333/// target triple \p TT, false otherwise.
334bool shouldEmitConstantsToTextSection(const Triple &TT);
335
336/// \returns Integer value requested using \p F's \p Name attribute.
337///
338/// \returns \p Default if attribute is not present.
339///
340/// \returns \p Default and emits error if requested value cannot be converted
341/// to integer.
342int getIntegerAttribute(const Function &F, StringRef Name, int Default);
343
344/// \returns A pair of integer values requested using \p F's \p Name attribute
345/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
346/// is false).
347///
348/// \returns \p Default if attribute is not present.
349///
350/// \returns \p Default and emits error if one of the requested values cannot be
351/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
352/// not present.
353std::pair<int, int> getIntegerPairAttribute(const Function &F,
354                                            StringRef Name,
355                                            std::pair<int, int> Default,
356                                            bool OnlyFirstRequired = false);
357
358/// Represents the counter values to wait for in an s_waitcnt instruction.
359///
360/// Large values (including the maximum possible integer) can be used to
361/// represent "don't care" waits.
362struct Waitcnt {
363  unsigned VmCnt = ~0u;
364  unsigned ExpCnt = ~0u;
365  unsigned LgkmCnt = ~0u;
366  unsigned VsCnt = ~0u;
367
368  Waitcnt() {}
369  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
370      : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
371
372  static Waitcnt allZero(const IsaVersion &Version) {
373    return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
374  }
375  static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
376
377  bool hasWait() const {
378    return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
379  }
380
381  bool dominates(const Waitcnt &Other) const {
382    return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
383           LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
384  }
385
386  Waitcnt combined(const Waitcnt &Other) const {
387    return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
388                   std::min(LgkmCnt, Other.LgkmCnt),
389                   std::min(VsCnt, Other.VsCnt));
390  }
391};
392
393/// \returns Vmcnt bit mask for given isa \p Version.
394unsigned getVmcntBitMask(const IsaVersion &Version);
395
396/// \returns Expcnt bit mask for given isa \p Version.
397unsigned getExpcntBitMask(const IsaVersion &Version);
398
399/// \returns Lgkmcnt bit mask for given isa \p Version.
400unsigned getLgkmcntBitMask(const IsaVersion &Version);
401
402/// \returns Waitcnt bit mask for given isa \p Version.
403unsigned getWaitcntBitMask(const IsaVersion &Version);
404
405/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
406unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
407
408/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
409unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
410
411/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
412unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
413
414/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
415/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
416/// \p Lgkmcnt respectively.
417///
418/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
419///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
420///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
421///     \p Expcnt = \p Waitcnt[6:4]
422///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
423///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
424void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
425                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
426
427Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
428
429/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
430unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
431                     unsigned Vmcnt);
432
433/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
434unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
435                      unsigned Expcnt);
436
437/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
438unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
439                       unsigned Lgkmcnt);
440
441/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
442/// \p Version.
443///
444/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
445///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
446///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
447///     Waitcnt[6:4]   = \p Expcnt
448///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
449///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
450///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
451///
452/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
453/// isa \p Version.
454unsigned encodeWaitcnt(const IsaVersion &Version,
455                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
456
457unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
458
459namespace Hwreg {
460
461LLVM_READONLY
462int64_t getHwregId(const StringRef Name);
463
464LLVM_READNONE
465bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
466
467LLVM_READNONE
468bool isValidHwreg(int64_t Id);
469
470LLVM_READNONE
471bool isValidHwregOffset(int64_t Offset);
472
473LLVM_READNONE
474bool isValidHwregWidth(int64_t Width);
475
476LLVM_READNONE
477uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
478
479LLVM_READNONE
480StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
481
482void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
483
484} // namespace Hwreg
485
486namespace SendMsg {
487
488LLVM_READONLY
489int64_t getMsgId(const StringRef Name);
490
491LLVM_READONLY
492int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
493
494LLVM_READNONE
495StringRef getMsgName(int64_t MsgId);
496
497LLVM_READNONE
498StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
499
500LLVM_READNONE
501bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
502
503LLVM_READNONE
504bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
505
506LLVM_READNONE
507bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
508
509LLVM_READNONE
510bool msgRequiresOp(int64_t MsgId);
511
512LLVM_READNONE
513bool msgSupportsStream(int64_t MsgId, int64_t OpId);
514
515void decodeMsg(unsigned Val,
516               uint16_t &MsgId,
517               uint16_t &OpId,
518               uint16_t &StreamId);
519
520LLVM_READNONE
521uint64_t encodeMsg(uint64_t MsgId,
522                   uint64_t OpId,
523                   uint64_t StreamId);
524
525} // namespace SendMsg
526
527
528unsigned getInitialPSInputAddr(const Function &F);
529
530LLVM_READNONE
531bool isShader(CallingConv::ID CC);
532
533LLVM_READNONE
534bool isCompute(CallingConv::ID CC);
535
536LLVM_READNONE
537bool isEntryFunctionCC(CallingConv::ID CC);
538
539// FIXME: Remove this when calling conventions cleaned up
540LLVM_READNONE
541inline bool isKernel(CallingConv::ID CC) {
542  switch (CC) {
543  case CallingConv::AMDGPU_KERNEL:
544  case CallingConv::SPIR_KERNEL:
545    return true;
546  default:
547    return false;
548  }
549}
550
551bool hasXNACK(const MCSubtargetInfo &STI);
552bool hasSRAMECC(const MCSubtargetInfo &STI);
553bool hasMIMG_R128(const MCSubtargetInfo &STI);
554bool hasPackedD16(const MCSubtargetInfo &STI);
555
556bool isSI(const MCSubtargetInfo &STI);
557bool isCI(const MCSubtargetInfo &STI);
558bool isVI(const MCSubtargetInfo &STI);
559bool isGFX9(const MCSubtargetInfo &STI);
560bool isGFX10(const MCSubtargetInfo &STI);
561
562/// Is Reg - scalar register
563bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
564
565/// Is there any intersection between registers
566bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
567
568/// If \p Reg is a pseudo reg, return the correct hardware register given
569/// \p STI otherwise return \p Reg.
570unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
571
572/// Convert hardware register \p Reg to a pseudo register
573LLVM_READNONE
574unsigned mc2PseudoReg(unsigned Reg);
575
576/// Can this operand also contain immediate values?
577bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
578
579/// Is this floating-point operand?
580bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
581
582/// Does this opearnd support only inlinable literals?
583bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
584
585/// Get the size in bits of a register from the register class \p RC.
586unsigned getRegBitWidth(unsigned RCID);
587
588/// Get the size in bits of a register from the register class \p RC.
589unsigned getRegBitWidth(const MCRegisterClass &RC);
590
591/// Get size of register operand
592unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
593                           unsigned OpNo);
594
595LLVM_READNONE
596inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
597  switch (OpInfo.OperandType) {
598  case AMDGPU::OPERAND_REG_IMM_INT32:
599  case AMDGPU::OPERAND_REG_IMM_FP32:
600  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
601  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
602  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
603  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
604    return 4;
605
606  case AMDGPU::OPERAND_REG_IMM_INT64:
607  case AMDGPU::OPERAND_REG_IMM_FP64:
608  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
609  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
610    return 8;
611
612  case AMDGPU::OPERAND_REG_IMM_INT16:
613  case AMDGPU::OPERAND_REG_IMM_FP16:
614  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
615  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
616  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
617  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
618  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
619  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
620  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
621  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
622  case AMDGPU::OPERAND_REG_IMM_V2INT16:
623  case AMDGPU::OPERAND_REG_IMM_V2FP16:
624    return 2;
625
626  default:
627    llvm_unreachable("unhandled operand type");
628  }
629}
630
631LLVM_READNONE
632inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
633  return getOperandSize(Desc.OpInfo[OpNo]);
634}
635
636/// Is this literal inlinable
637LLVM_READNONE
638bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
639
640LLVM_READNONE
641bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
642
643LLVM_READNONE
644bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
645
646LLVM_READNONE
647bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
648
649bool isArgPassedInSGPR(const Argument *Arg);
650
651/// \returns The encoding that will be used for \p ByteOffset in the SMRD
652/// offset field.
653int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
654
655/// \returns true if this offset is small enough to fit in the SMRD
656/// offset field.  \p ByteOffset should be the offset in bytes and
657/// not the encoded offset.
658bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
659
660bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
661                      const GCNSubtarget *Subtarget, uint32_t Align = 4);
662
663/// \returns true if the intrinsic is divergent
664bool isIntrinsicSourceOfDivergence(unsigned IntrID);
665
666// Track defaults for fields in the MODE registser.
667struct SIModeRegisterDefaults {
668  /// Floating point opcodes that support exception flag gathering quiet and
669  /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
670  /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
671  /// quieting.
672  bool IEEE : 1;
673
674  /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
675  /// clamp NaN to zero; otherwise, pass NaN through.
676  bool DX10Clamp : 1;
677
678  /// If this is set, neither input or output denormals are flushed for most f32
679  /// instructions.
680  ///
681  /// TODO: Split into separate input and output fields if necessary like the
682  /// control bits really provide?
683  bool FP32Denormals : 1;
684
685  /// If this is set, neither input or output denormals are flushed for both f64
686  /// and f16/v2f16 instructions.
687  bool FP64FP16Denormals : 1;
688
689  SIModeRegisterDefaults() :
690    IEEE(true),
691    DX10Clamp(true),
692    FP32Denormals(true),
693    FP64FP16Denormals(true) {}
694
695  // FIXME: Should not depend on the subtarget
696  SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
697
698  static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
699    const bool IsCompute = AMDGPU::isCompute(CC);
700
701    SIModeRegisterDefaults Mode;
702    Mode.DX10Clamp = true;
703    Mode.IEEE = IsCompute;
704    Mode.FP32Denormals = false; // FIXME: Should be on by default.
705    Mode.FP64FP16Denormals = true;
706    return Mode;
707  }
708
709  bool operator ==(const SIModeRegisterDefaults Other) const {
710    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
711           FP32Denormals == Other.FP32Denormals &&
712           FP64FP16Denormals == Other.FP64FP16Denormals;
713  }
714
715  /// Returns true if a flag is compatible if it's enabled in the callee, but
716  /// disabled in the caller.
717  static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
718    return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
719  }
720
721  // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
722  // be able to override.
723  bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
724    if (DX10Clamp != CalleeMode.DX10Clamp)
725      return false;
726    if (IEEE != CalleeMode.IEEE)
727      return false;
728
729    // Allow inlining denormals enabled into denormals flushed functions.
730    return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
731           oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
732  }
733};
734
735} // end namespace AMDGPU
736} // end namespace llvm
737
738#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
739