1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "llvm/BinaryFormat/ELF.h"
15#include "llvm/IR/Attributes.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
21#include "llvm/IR/LLVMContext.h"
22#include "llvm/MC/MCInstrInfo.h"
23#include "llvm/MC/MCRegisterInfo.h"
24#include "llvm/MC/MCSubtargetInfo.h"
25#include "llvm/Support/AMDHSAKernelDescriptor.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/TargetParser/TargetParser.h"
28#include <optional>
29
30#define GET_INSTRINFO_NAMED_OPS
31#define GET_INSTRMAP_INFO
32#include "AMDGPUGenInstrInfo.inc"
33
34static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
35    "amdhsa-code-object-version", llvm::cl::Hidden,
36    llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
37    llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
38                   "or asm directive still take priority if present)"));
39
40namespace {
41
42/// \returns Bit mask for given bit \p Shift and bit \p Width.
43unsigned getBitMask(unsigned Shift, unsigned Width) {
44  return ((1 << Width) - 1) << Shift;
45}
46
47/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
48///
49/// \returns Packed \p Dst.
50unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
51  unsigned Mask = getBitMask(Shift, Width);
52  return ((Src << Shift) & Mask) | (Dst & ~Mask);
53}
54
55/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
56///
57/// \returns Unpacked bits.
58unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
59  return (Src & getBitMask(Shift, Width)) >> Shift;
60}
61
62/// \returns Vmcnt bit shift (lower bits).
63unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
64  return VersionMajor >= 11 ? 10 : 0;
65}
66
67/// \returns Vmcnt bit width (lower bits).
68unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
69  return VersionMajor >= 11 ? 6 : 4;
70}
71
72/// \returns Expcnt bit shift.
73unsigned getExpcntBitShift(unsigned VersionMajor) {
74  return VersionMajor >= 11 ? 0 : 4;
75}
76
77/// \returns Expcnt bit width.
78unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
79
80/// \returns Lgkmcnt bit shift.
81unsigned getLgkmcntBitShift(unsigned VersionMajor) {
82  return VersionMajor >= 11 ? 4 : 8;
83}
84
85/// \returns Lgkmcnt bit width.
86unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
87  return VersionMajor >= 10 ? 6 : 4;
88}
89
90/// \returns Vmcnt bit shift (higher bits).
91unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
92
93/// \returns Vmcnt bit width (higher bits).
94unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
95  return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
96}
97
98/// \returns Loadcnt bit width
99unsigned getLoadcntBitWidth(unsigned VersionMajor) {
100  return VersionMajor >= 12 ? 6 : 0;
101}
102
103/// \returns Samplecnt bit width.
104unsigned getSamplecntBitWidth(unsigned VersionMajor) {
105  return VersionMajor >= 12 ? 6 : 0;
106}
107
108/// \returns Bvhcnt bit width.
109unsigned getBvhcntBitWidth(unsigned VersionMajor) {
110  return VersionMajor >= 12 ? 3 : 0;
111}
112
113/// \returns Dscnt bit width.
114unsigned getDscntBitWidth(unsigned VersionMajor) {
115  return VersionMajor >= 12 ? 6 : 0;
116}
117
118/// \returns Dscnt bit shift in combined S_WAIT instructions.
119unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
120
121/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
122unsigned getStorecntBitWidth(unsigned VersionMajor) {
123  return VersionMajor >= 10 ? 6 : 0;
124}
125
126/// \returns Kmcnt bit width.
127unsigned getKmcntBitWidth(unsigned VersionMajor) {
128  return VersionMajor >= 12 ? 5 : 0;
129}
130
131/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
132unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
133  return VersionMajor >= 12 ? 8 : 0;
134}
135
136/// \returns VmVsrc bit width
137inline unsigned getVmVsrcBitWidth() { return 3; }
138
139/// \returns VmVsrc bit shift
140inline unsigned getVmVsrcBitShift() { return 2; }
141
142/// \returns VaVdst bit width
143inline unsigned getVaVdstBitWidth() { return 4; }
144
145/// \returns VaVdst bit shift
146inline unsigned getVaVdstBitShift() { return 12; }
147
148/// \returns SaSdst bit width
149inline unsigned getSaSdstBitWidth() { return 1; }
150
151/// \returns SaSdst bit shift
152inline unsigned getSaSdstBitShift() { return 0; }
153
154} // end namespace anonymous
155
156namespace llvm {
157
158namespace AMDGPU {
159
160/// \returns True if \p STI is AMDHSA.
161bool isHsaAbi(const MCSubtargetInfo &STI) {
162  return STI.getTargetTriple().getOS() == Triple::AMDHSA;
163}
164
165unsigned getAMDHSACodeObjectVersion(const Module &M) {
166  if (auto Ver = mdconst::extract_or_null<ConstantInt>(
167          M.getModuleFlag("amdgpu_code_object_version"))) {
168    return (unsigned)Ver->getZExtValue() / 100;
169  }
170
171  return getDefaultAMDHSACodeObjectVersion();
172}
173
174unsigned getDefaultAMDHSACodeObjectVersion() {
175  return DefaultAMDHSACodeObjectVersion;
176}
177
178uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
179  if (T.getOS() != Triple::AMDHSA)
180    return 0;
181
182  switch (CodeObjectVersion) {
183  case 4:
184    return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
185  case 5:
186    return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
187  default:
188    report_fatal_error("Unsupported AMDHSA Code Object Version " +
189                       Twine(CodeObjectVersion));
190  }
191}
192
193unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
194  switch (CodeObjectVersion) {
195  case AMDHSA_COV4:
196    return 48;
197  case AMDHSA_COV5:
198  default:
199    return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
200  }
201}
202
203
204// FIXME: All such magic numbers about the ABI should be in a
205// central TD file.
206unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
207  switch (CodeObjectVersion) {
208  case AMDHSA_COV4:
209    return 24;
210  case AMDHSA_COV5:
211  default:
212    return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
213  }
214}
215
216unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
217  switch (CodeObjectVersion) {
218  case AMDHSA_COV4:
219    return 32;
220  case AMDHSA_COV5:
221  default:
222    return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
223  }
224}
225
226unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
227  switch (CodeObjectVersion) {
228  case AMDHSA_COV4:
229    return 40;
230  case AMDHSA_COV5:
231  default:
232    return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
233  }
234}
235
236#define GET_MIMGBaseOpcodesTable_IMPL
237#define GET_MIMGDimInfoTable_IMPL
238#define GET_MIMGInfoTable_IMPL
239#define GET_MIMGLZMappingTable_IMPL
240#define GET_MIMGMIPMappingTable_IMPL
241#define GET_MIMGBiasMappingTable_IMPL
242#define GET_MIMGOffsetMappingTable_IMPL
243#define GET_MIMGG16MappingTable_IMPL
244#define GET_MAIInstInfoTable_IMPL
245#include "AMDGPUGenSearchableTables.inc"
246
247int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
248                  unsigned VDataDwords, unsigned VAddrDwords) {
249  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
250                                             VDataDwords, VAddrDwords);
251  return Info ? Info->Opcode : -1;
252}
253
254const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
255  const MIMGInfo *Info = getMIMGInfo(Opc);
256  return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
257}
258
259int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
260  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
261  const MIMGInfo *NewInfo =
262      getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
263                          NewChannels, OrigInfo->VAddrDwords);
264  return NewInfo ? NewInfo->Opcode : -1;
265}
266
267unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
268                           const MIMGDimInfo *Dim, bool IsA16,
269                           bool IsG16Supported) {
270  unsigned AddrWords = BaseOpcode->NumExtraArgs;
271  unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
272                            (BaseOpcode->LodOrClampOrMip ? 1 : 0);
273  if (IsA16)
274    AddrWords += divideCeil(AddrComponents, 2);
275  else
276    AddrWords += AddrComponents;
277
278  // Note: For subtargets that support A16 but not G16, enabling A16 also
279  // enables 16 bit gradients.
280  // For subtargets that support A16 (operand) and G16 (done with a different
281  // instruction encoding), they are independent.
282
283  if (BaseOpcode->Gradients) {
284    if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
285      // There are two gradients per coordinate, we pack them separately.
286      // For the 3d case,
287      // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
288      AddrWords += alignTo<2>(Dim->NumGradients / 2);
289    else
290      AddrWords += Dim->NumGradients;
291  }
292  return AddrWords;
293}
294
295struct MUBUFInfo {
296  uint16_t Opcode;
297  uint16_t BaseOpcode;
298  uint8_t elements;
299  bool has_vaddr;
300  bool has_srsrc;
301  bool has_soffset;
302  bool IsBufferInv;
303};
304
305struct MTBUFInfo {
306  uint16_t Opcode;
307  uint16_t BaseOpcode;
308  uint8_t elements;
309  bool has_vaddr;
310  bool has_srsrc;
311  bool has_soffset;
312};
313
314struct SMInfo {
315  uint16_t Opcode;
316  bool IsBuffer;
317};
318
319struct VOPInfo {
320  uint16_t Opcode;
321  bool IsSingle;
322};
323
324struct VOPC64DPPInfo {
325  uint16_t Opcode;
326};
327
328struct VOPDComponentInfo {
329  uint16_t BaseVOP;
330  uint16_t VOPDOp;
331  bool CanBeVOPDX;
332};
333
334struct VOPDInfo {
335  uint16_t Opcode;
336  uint16_t OpX;
337  uint16_t OpY;
338  uint16_t Subtarget;
339};
340
341struct VOPTrue16Info {
342  uint16_t Opcode;
343  bool IsTrue16;
344};
345
346#define GET_MTBUFInfoTable_DECL
347#define GET_MTBUFInfoTable_IMPL
348#define GET_MUBUFInfoTable_DECL
349#define GET_MUBUFInfoTable_IMPL
350#define GET_SMInfoTable_DECL
351#define GET_SMInfoTable_IMPL
352#define GET_VOP1InfoTable_DECL
353#define GET_VOP1InfoTable_IMPL
354#define GET_VOP2InfoTable_DECL
355#define GET_VOP2InfoTable_IMPL
356#define GET_VOP3InfoTable_DECL
357#define GET_VOP3InfoTable_IMPL
358#define GET_VOPC64DPPTable_DECL
359#define GET_VOPC64DPPTable_IMPL
360#define GET_VOPC64DPP8Table_DECL
361#define GET_VOPC64DPP8Table_IMPL
362#define GET_VOPDComponentTable_DECL
363#define GET_VOPDComponentTable_IMPL
364#define GET_VOPDPairs_DECL
365#define GET_VOPDPairs_IMPL
366#define GET_VOPTrue16Table_DECL
367#define GET_VOPTrue16Table_IMPL
368#define GET_WMMAOpcode2AddrMappingTable_DECL
369#define GET_WMMAOpcode2AddrMappingTable_IMPL
370#define GET_WMMAOpcode3AddrMappingTable_DECL
371#define GET_WMMAOpcode3AddrMappingTable_IMPL
372#include "AMDGPUGenSearchableTables.inc"
373
374int getMTBUFBaseOpcode(unsigned Opc) {
375  const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
376  return Info ? Info->BaseOpcode : -1;
377}
378
379int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
380  const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
381  return Info ? Info->Opcode : -1;
382}
383
384int getMTBUFElements(unsigned Opc) {
385  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
386  return Info ? Info->elements : 0;
387}
388
389bool getMTBUFHasVAddr(unsigned Opc) {
390  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
391  return Info ? Info->has_vaddr : false;
392}
393
394bool getMTBUFHasSrsrc(unsigned Opc) {
395  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
396  return Info ? Info->has_srsrc : false;
397}
398
399bool getMTBUFHasSoffset(unsigned Opc) {
400  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
401  return Info ? Info->has_soffset : false;
402}
403
404int getMUBUFBaseOpcode(unsigned Opc) {
405  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
406  return Info ? Info->BaseOpcode : -1;
407}
408
409int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
410  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
411  return Info ? Info->Opcode : -1;
412}
413
414int getMUBUFElements(unsigned Opc) {
415  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
416  return Info ? Info->elements : 0;
417}
418
419bool getMUBUFHasVAddr(unsigned Opc) {
420  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
421  return Info ? Info->has_vaddr : false;
422}
423
424bool getMUBUFHasSrsrc(unsigned Opc) {
425  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
426  return Info ? Info->has_srsrc : false;
427}
428
429bool getMUBUFHasSoffset(unsigned Opc) {
430  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
431  return Info ? Info->has_soffset : false;
432}
433
434bool getMUBUFIsBufferInv(unsigned Opc) {
435  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
436  return Info ? Info->IsBufferInv : false;
437}
438
439bool getSMEMIsBuffer(unsigned Opc) {
440  const SMInfo *Info = getSMEMOpcodeHelper(Opc);
441  return Info ? Info->IsBuffer : false;
442}
443
444bool getVOP1IsSingle(unsigned Opc) {
445  const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
446  return Info ? Info->IsSingle : false;
447}
448
449bool getVOP2IsSingle(unsigned Opc) {
450  const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
451  return Info ? Info->IsSingle : false;
452}
453
454bool getVOP3IsSingle(unsigned Opc) {
455  const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
456  return Info ? Info->IsSingle : false;
457}
458
459bool isVOPC64DPP(unsigned Opc) {
460  return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
461}
462
463bool getMAIIsDGEMM(unsigned Opc) {
464  const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
465  return Info ? Info->is_dgemm : false;
466}
467
468bool getMAIIsGFX940XDL(unsigned Opc) {
469  const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
470  return Info ? Info->is_gfx940_xdl : false;
471}
472
473unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
474  if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
475    return SIEncodingFamily::GFX12;
476  if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
477    return SIEncodingFamily::GFX11;
478  llvm_unreachable("Subtarget generation does not support VOPD!");
479}
480
481CanBeVOPD getCanBeVOPD(unsigned Opc) {
482  const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
483  if (Info)
484    return {Info->CanBeVOPDX, true};
485  else
486    return {false, false};
487}
488
489unsigned getVOPDOpcode(unsigned Opc) {
490  const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
491  return Info ? Info->VOPDOp : ~0u;
492}
493
494bool isVOPD(unsigned Opc) {
495  return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
496}
497
498bool isMAC(unsigned Opc) {
499  return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
500         Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
501         Opc == AMDGPU::V_MAC_F32_e64_vi ||
502         Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
503         Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
504         Opc == AMDGPU::V_MAC_F16_e64_vi ||
505         Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
506         Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
507         Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
508         Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
509         Opc == AMDGPU::V_FMAC_F32_e64_vi ||
510         Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
511         Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
512         Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
513         Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
514         Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
515         Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
516         Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
517         Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
518         Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
519}
520
521bool isPermlane16(unsigned Opc) {
522  return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
523         Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
524         Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
525         Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
526         Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
527         Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
528         Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
529         Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
530}
531
532bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
533  return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
534         Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
535         Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
536         Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
537         Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
538         Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
539         Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
540         Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
541}
542
543bool isGenericAtomic(unsigned Opc) {
544  return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
545         Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
546         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
547         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
548         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
549         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
550         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
551         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
552         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
553         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
554         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
555         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
556         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
557         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
558         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
559         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
560         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
561         Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
562         Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
563}
564
565bool isTrue16Inst(unsigned Opc) {
566  const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
567  return Info ? Info->IsTrue16 : false;
568}
569
570unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
571  const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
572  return Info ? Info->Opcode3Addr : ~0u;
573}
574
575unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
576  const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
577  return Info ? Info->Opcode2Addr : ~0u;
578}
579
580// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
581// header files, so we need to wrap it in a function that takes unsigned
582// instead.
583int getMCOpcode(uint16_t Opcode, unsigned Gen) {
584  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
585}
586
587int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
588  const VOPDInfo *Info =
589      getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
590  return Info ? Info->Opcode : -1;
591}
592
593std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
594  const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
595  assert(Info);
596  auto OpX = getVOPDBaseFromComponent(Info->OpX);
597  auto OpY = getVOPDBaseFromComponent(Info->OpY);
598  assert(OpX && OpY);
599  return {OpX->BaseVOP, OpY->BaseVOP};
600}
601
602namespace VOPD {
603
604ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
605  assert(OpDesc.getNumDefs() == Component::DST_NUM);
606
607  assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
608  assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
609  auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
610  assert(TiedIdx == -1 || TiedIdx == Component::DST);
611  HasSrc2Acc = TiedIdx != -1;
612
613  SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
614  assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
615
616  auto OperandsNum = OpDesc.getNumOperands();
617  unsigned CompOprIdx;
618  for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
619    if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
620      MandatoryLiteralIdx = CompOprIdx;
621      break;
622    }
623  }
624}
625
626unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
627  assert(CompOprIdx < Component::MAX_OPR_NUM);
628
629  if (CompOprIdx == Component::DST)
630    return getIndexOfDstInParsedOperands();
631
632  auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
633  if (CompSrcIdx < getCompParsedSrcOperandsNum())
634    return getIndexOfSrcInParsedOperands(CompSrcIdx);
635
636  // The specified operand does not exist.
637  return 0;
638}
639
640std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
641    std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
642
643  auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
644  auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
645
646  const unsigned CompOprNum =
647      SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
648  unsigned CompOprIdx;
649  for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
650    unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
651    if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
652        ((OpXRegs[CompOprIdx] & BanksMasks) ==
653         (OpYRegs[CompOprIdx] & BanksMasks)))
654      return CompOprIdx;
655  }
656
657  return {};
658}
659
660// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
661// by the specified component. If an operand is unused
662// or is not a VGPR, the corresponding value is 0.
663//
664// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
665// for the specified component and MC operand. The callback must return 0
666// if the operand is not a register or not a VGPR.
667InstInfo::RegIndices InstInfo::getRegIndices(
668    unsigned CompIdx,
669    std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
670  assert(CompIdx < COMPONENTS_NUM);
671
672  const auto &Comp = CompInfo[CompIdx];
673  InstInfo::RegIndices RegIndices;
674
675  RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
676
677  for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
678    unsigned CompSrcIdx = CompOprIdx - DST_NUM;
679    RegIndices[CompOprIdx] =
680        Comp.hasRegSrcOperand(CompSrcIdx)
681            ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
682            : 0;
683  }
684  return RegIndices;
685}
686
687} // namespace VOPD
688
689VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
690  return VOPD::InstInfo(OpX, OpY);
691}
692
693VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
694                               const MCInstrInfo *InstrInfo) {
695  auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
696  const auto &OpXDesc = InstrInfo->get(OpX);
697  const auto &OpYDesc = InstrInfo->get(OpY);
698  VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
699  VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
700  return VOPD::InstInfo(OpXInfo, OpYInfo);
701}
702
703namespace IsaInfo {
704
705AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
706    : STI(STI), XnackSetting(TargetIDSetting::Any),
707      SramEccSetting(TargetIDSetting::Any) {
708  if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
709    XnackSetting = TargetIDSetting::Unsupported;
710  if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
711    SramEccSetting = TargetIDSetting::Unsupported;
712}
713
714void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
715  // Check if xnack or sramecc is explicitly enabled or disabled.  In the
716  // absence of the target features we assume we must generate code that can run
717  // in any environment.
718  SubtargetFeatures Features(FS);
719  std::optional<bool> XnackRequested;
720  std::optional<bool> SramEccRequested;
721
722  for (const std::string &Feature : Features.getFeatures()) {
723    if (Feature == "+xnack")
724      XnackRequested = true;
725    else if (Feature == "-xnack")
726      XnackRequested = false;
727    else if (Feature == "+sramecc")
728      SramEccRequested = true;
729    else if (Feature == "-sramecc")
730      SramEccRequested = false;
731  }
732
733  bool XnackSupported = isXnackSupported();
734  bool SramEccSupported = isSramEccSupported();
735
736  if (XnackRequested) {
737    if (XnackSupported) {
738      XnackSetting =
739          *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
740    } else {
741      // If a specific xnack setting was requested and this GPU does not support
742      // xnack emit a warning. Setting will remain set to "Unsupported".
743      if (*XnackRequested) {
744        errs() << "warning: xnack 'On' was requested for a processor that does "
745                  "not support it!\n";
746      } else {
747        errs() << "warning: xnack 'Off' was requested for a processor that "
748                  "does not support it!\n";
749      }
750    }
751  }
752
753  if (SramEccRequested) {
754    if (SramEccSupported) {
755      SramEccSetting =
756          *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
757    } else {
758      // If a specific sramecc setting was requested and this GPU does not
759      // support sramecc emit a warning. Setting will remain set to
760      // "Unsupported".
761      if (*SramEccRequested) {
762        errs() << "warning: sramecc 'On' was requested for a processor that "
763                  "does not support it!\n";
764      } else {
765        errs() << "warning: sramecc 'Off' was requested for a processor that "
766                  "does not support it!\n";
767      }
768    }
769  }
770}
771
772static TargetIDSetting
773getTargetIDSettingFromFeatureString(StringRef FeatureString) {
774  if (FeatureString.ends_with("-"))
775    return TargetIDSetting::Off;
776  if (FeatureString.ends_with("+"))
777    return TargetIDSetting::On;
778
779  llvm_unreachable("Malformed feature string");
780}
781
782void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
783  SmallVector<StringRef, 3> TargetIDSplit;
784  TargetID.split(TargetIDSplit, ':');
785
786  for (const auto &FeatureString : TargetIDSplit) {
787    if (FeatureString.starts_with("xnack"))
788      XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
789    if (FeatureString.starts_with("sramecc"))
790      SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
791  }
792}
793
794std::string AMDGPUTargetID::toString() const {
795  std::string StringRep;
796  raw_string_ostream StreamRep(StringRep);
797
798  auto TargetTriple = STI.getTargetTriple();
799  auto Version = getIsaVersion(STI.getCPU());
800
801  StreamRep << TargetTriple.getArchName() << '-'
802            << TargetTriple.getVendorName() << '-'
803            << TargetTriple.getOSName() << '-'
804            << TargetTriple.getEnvironmentName() << '-';
805
806  std::string Processor;
807  // TODO: Following else statement is present here because we used various
808  // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
809  // Remove once all aliases are removed from GCNProcessors.td.
810  if (Version.Major >= 9)
811    Processor = STI.getCPU().str();
812  else
813    Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
814                 Twine(Version.Stepping))
815                    .str();
816
817  std::string Features;
818  if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
819    // sramecc.
820    if (getSramEccSetting() == TargetIDSetting::Off)
821      Features += ":sramecc-";
822    else if (getSramEccSetting() == TargetIDSetting::On)
823      Features += ":sramecc+";
824    // xnack.
825    if (getXnackSetting() == TargetIDSetting::Off)
826      Features += ":xnack-";
827    else if (getXnackSetting() == TargetIDSetting::On)
828      Features += ":xnack+";
829  }
830
831  StreamRep << Processor << Features;
832
833  StreamRep.flush();
834  return StringRep;
835}
836
837unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
838  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
839    return 16;
840  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
841    return 32;
842
843  return 64;
844}
845
846unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
847  unsigned BytesPerCU = 0;
848  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
849    BytesPerCU = 32768;
850  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
851    BytesPerCU = 65536;
852
853  // "Per CU" really means "per whatever functional block the waves of a
854  // workgroup must share". So the effective local memory size is doubled in
855  // WGP mode on gfx10.
856  if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
857    BytesPerCU *= 2;
858
859  return BytesPerCU;
860}
861
862unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
863  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
864    return 32768;
865  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
866    return 65536;
867  return 0;
868}
869
870unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
871  // "Per CU" really means "per whatever functional block the waves of a
872  // workgroup must share". For gfx10 in CU mode this is the CU, which contains
873  // two SIMDs.
874  if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
875    return 2;
876  // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
877  // two CUs, so a total of four SIMDs.
878  return 4;
879}
880
881unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
882                               unsigned FlatWorkGroupSize) {
883  assert(FlatWorkGroupSize != 0);
884  if (STI->getTargetTriple().getArch() != Triple::amdgcn)
885    return 8;
886  unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
887  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
888  if (N == 1) {
889    // Single-wave workgroups don't consume barrier resources.
890    return MaxWaves;
891  }
892
893  unsigned MaxBarriers = 16;
894  if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
895    MaxBarriers = 32;
896
897  return std::min(MaxWaves / N, MaxBarriers);
898}
899
900unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
901  return 1;
902}
903
904unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
905  // FIXME: Need to take scratch memory into account.
906  if (isGFX90A(*STI))
907    return 8;
908  if (!isGFX10Plus(*STI))
909    return 10;
910  return hasGFX10_3Insts(*STI) ? 16 : 20;
911}
912
913unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
914                                   unsigned FlatWorkGroupSize) {
915  return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
916                    getEUsPerCU(STI));
917}
918
919unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
920  return 1;
921}
922
923unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
924  // Some subtargets allow encoding 2048, but this isn't tested or supported.
925  return 1024;
926}
927
928unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
929                              unsigned FlatWorkGroupSize) {
930  return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
931}
932
933unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
934  IsaVersion Version = getIsaVersion(STI->getCPU());
935  if (Version.Major >= 10)
936    return getAddressableNumSGPRs(STI);
937  if (Version.Major >= 8)
938    return 16;
939  return 8;
940}
941
942unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
943  return 8;
944}
945
946unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
947  IsaVersion Version = getIsaVersion(STI->getCPU());
948  if (Version.Major >= 8)
949    return 800;
950  return 512;
951}
952
953unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
954  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
955    return FIXED_NUM_SGPRS_FOR_INIT_BUG;
956
957  IsaVersion Version = getIsaVersion(STI->getCPU());
958  if (Version.Major >= 10)
959    return 106;
960  if (Version.Major >= 8)
961    return 102;
962  return 104;
963}
964
965unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
966  assert(WavesPerEU != 0);
967
968  IsaVersion Version = getIsaVersion(STI->getCPU());
969  if (Version.Major >= 10)
970    return 0;
971
972  if (WavesPerEU >= getMaxWavesPerEU(STI))
973    return 0;
974
975  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
976  if (STI->getFeatureBits().test(FeatureTrapHandler))
977    MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
978  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
979  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
980}
981
982unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
983                        bool Addressable) {
984  assert(WavesPerEU != 0);
985
986  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
987  IsaVersion Version = getIsaVersion(STI->getCPU());
988  if (Version.Major >= 10)
989    return Addressable ? AddressableNumSGPRs : 108;
990  if (Version.Major >= 8 && !Addressable)
991    AddressableNumSGPRs = 112;
992  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
993  if (STI->getFeatureBits().test(FeatureTrapHandler))
994    MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
995  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
996  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
997}
998
999unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1000                          bool FlatScrUsed, bool XNACKUsed) {
1001  unsigned ExtraSGPRs = 0;
1002  if (VCCUsed)
1003    ExtraSGPRs = 2;
1004
1005  IsaVersion Version = getIsaVersion(STI->getCPU());
1006  if (Version.Major >= 10)
1007    return ExtraSGPRs;
1008
1009  if (Version.Major < 8) {
1010    if (FlatScrUsed)
1011      ExtraSGPRs = 4;
1012  } else {
1013    if (XNACKUsed)
1014      ExtraSGPRs = 4;
1015
1016    if (FlatScrUsed ||
1017        STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1018      ExtraSGPRs = 6;
1019  }
1020
1021  return ExtraSGPRs;
1022}
1023
1024unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1025                          bool FlatScrUsed) {
1026  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1027                          STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1028}
1029
1030unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1031  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1032  // SGPRBlocks is actual number of SGPR blocks minus 1.
1033  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1034}
1035
1036unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1037                             std::optional<bool> EnableWavefrontSize32) {
1038  if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1039    return 8;
1040
1041  bool IsWave32 = EnableWavefrontSize32 ?
1042      *EnableWavefrontSize32 :
1043      STI->getFeatureBits().test(FeatureWavefrontSize32);
1044
1045  if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1046    return IsWave32 ? 24 : 12;
1047
1048  if (hasGFX10_3Insts(*STI))
1049    return IsWave32 ? 16 : 8;
1050
1051  return IsWave32 ? 8 : 4;
1052}
1053
1054unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1055                                std::optional<bool> EnableWavefrontSize32) {
1056  if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1057    return 8;
1058
1059  bool IsWave32 = EnableWavefrontSize32 ?
1060      *EnableWavefrontSize32 :
1061      STI->getFeatureBits().test(FeatureWavefrontSize32);
1062
1063  return IsWave32 ? 8 : 4;
1064}
1065
1066unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1067  if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1068    return 512;
1069  if (!isGFX10Plus(*STI))
1070    return 256;
1071  bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1072  if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1073    return IsWave32 ? 1536 : 768;
1074  return IsWave32 ? 1024 : 512;
1075}
1076
1077unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
1078  if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1079    return 512;
1080  return 256;
1081}
1082
1083unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1084                                      unsigned NumVGPRs) {
1085  unsigned MaxWaves = getMaxWavesPerEU(STI);
1086  unsigned Granule = getVGPRAllocGranule(STI);
1087  if (NumVGPRs < Granule)
1088    return MaxWaves;
1089  unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1090  return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1091}
1092
1093unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1094  assert(WavesPerEU != 0);
1095
1096  unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1097  if (WavesPerEU >= MaxWavesPerEU)
1098    return 0;
1099
1100  unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1101  unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1102  unsigned Granule = getVGPRAllocGranule(STI);
1103  unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1104
1105  if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1106    return 0;
1107
1108  unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1109  if (WavesPerEU < MinWavesPerEU)
1110    return getMinNumVGPRs(STI, MinWavesPerEU);
1111
1112  unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1113  unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1114  return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1115}
1116
1117unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1118  assert(WavesPerEU != 0);
1119
1120  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1121                                   getVGPRAllocGranule(STI));
1122  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1123  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1124}
1125
1126unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1127                          std::optional<bool> EnableWavefrontSize32) {
1128  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1129                     getVGPREncodingGranule(STI, EnableWavefrontSize32));
1130  // VGPRBlocks is actual number of VGPR blocks minus 1.
1131  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1132}
1133
1134} // end namespace IsaInfo
1135
1136void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
1137                               const MCSubtargetInfo *STI) {
1138  IsaVersion Version = getIsaVersion(STI->getCPU());
1139
1140  memset(&Header, 0, sizeof(Header));
1141
1142  Header.amd_kernel_code_version_major = 1;
1143  Header.amd_kernel_code_version_minor = 2;
1144  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1145  Header.amd_machine_version_major = Version.Major;
1146  Header.amd_machine_version_minor = Version.Minor;
1147  Header.amd_machine_version_stepping = Version.Stepping;
1148  Header.kernel_code_entry_byte_offset = sizeof(Header);
1149  Header.wavefront_size = 6;
1150
1151  // If the code object does not support indirect functions, then the value must
1152  // be 0xffffffff.
1153  Header.call_convention = -1;
1154
1155  // These alignment values are specified in powers of two, so alignment =
1156  // 2^n.  The minimum alignment is 2^4 = 16.
1157  Header.kernarg_segment_alignment = 4;
1158  Header.group_segment_alignment = 4;
1159  Header.private_segment_alignment = 4;
1160
1161  if (Version.Major >= 10) {
1162    if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1163      Header.wavefront_size = 5;
1164      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1165    }
1166    Header.compute_pgm_resource_registers |=
1167      S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1168      S_00B848_MEM_ORDERED(1);
1169  }
1170}
1171
1172amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
1173    const MCSubtargetInfo *STI) {
1174  IsaVersion Version = getIsaVersion(STI->getCPU());
1175
1176  amdhsa::kernel_descriptor_t KD;
1177  memset(&KD, 0, sizeof(KD));
1178
1179  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1180                  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1181                  amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
1182  if (Version.Major >= 12) {
1183    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1184                    amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
1185    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1186                    amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
1187  } else {
1188    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1189                    amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
1190    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1191                    amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
1192  }
1193  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
1194                  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1195  if (Version.Major >= 10) {
1196    AMDHSA_BITS_SET(KD.kernel_code_properties,
1197                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1198                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1199    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1200                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
1201                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1202    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1203                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
1204  }
1205  if (AMDGPU::isGFX90A(*STI)) {
1206    AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
1207                    amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1208                    STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1209  }
1210  return KD;
1211}
1212
1213bool isGroupSegment(const GlobalValue *GV) {
1214  return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1215}
1216
1217bool isGlobalSegment(const GlobalValue *GV) {
1218  return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1219}
1220
1221bool isReadOnlySegment(const GlobalValue *GV) {
1222  unsigned AS = GV->getAddressSpace();
1223  return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1224         AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1225}
1226
1227bool shouldEmitConstantsToTextSection(const Triple &TT) {
1228  return TT.getArch() == Triple::r600;
1229}
1230
1231std::pair<unsigned, unsigned>
1232getIntegerPairAttribute(const Function &F, StringRef Name,
1233                        std::pair<unsigned, unsigned> Default,
1234                        bool OnlyFirstRequired) {
1235  Attribute A = F.getFnAttribute(Name);
1236  if (!A.isStringAttribute())
1237    return Default;
1238
1239  LLVMContext &Ctx = F.getContext();
1240  std::pair<unsigned, unsigned> Ints = Default;
1241  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1242  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1243    Ctx.emitError("can't parse first integer attribute " + Name);
1244    return Default;
1245  }
1246  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1247    if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1248      Ctx.emitError("can't parse second integer attribute " + Name);
1249      return Default;
1250    }
1251  }
1252
1253  return Ints;
1254}
1255
1256unsigned getVmcntBitMask(const IsaVersion &Version) {
1257  return (1 << (getVmcntBitWidthLo(Version.Major) +
1258                getVmcntBitWidthHi(Version.Major))) -
1259         1;
1260}
1261
1262unsigned getLoadcntBitMask(const IsaVersion &Version) {
1263  return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1264}
1265
1266unsigned getSamplecntBitMask(const IsaVersion &Version) {
1267  return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1268}
1269
1270unsigned getBvhcntBitMask(const IsaVersion &Version) {
1271  return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1272}
1273
1274unsigned getExpcntBitMask(const IsaVersion &Version) {
1275  return (1 << getExpcntBitWidth(Version.Major)) - 1;
1276}
1277
1278unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1279  return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1280}
1281
1282unsigned getDscntBitMask(const IsaVersion &Version) {
1283  return (1 << getDscntBitWidth(Version.Major)) - 1;
1284}
1285
1286unsigned getKmcntBitMask(const IsaVersion &Version) {
1287  return (1 << getKmcntBitWidth(Version.Major)) - 1;
1288}
1289
1290unsigned getStorecntBitMask(const IsaVersion &Version) {
1291  return (1 << getStorecntBitWidth(Version.Major)) - 1;
1292}
1293
1294unsigned getWaitcntBitMask(const IsaVersion &Version) {
1295  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1296                                getVmcntBitWidthLo(Version.Major));
1297  unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1298                               getExpcntBitWidth(Version.Major));
1299  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1300                                getLgkmcntBitWidth(Version.Major));
1301  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1302                                getVmcntBitWidthHi(Version.Major));
1303  return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1304}
1305
1306unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1307  unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1308                                getVmcntBitWidthLo(Version.Major));
1309  unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1310                                getVmcntBitWidthHi(Version.Major));
1311  return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1312}
1313
1314unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1315  return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1316                    getExpcntBitWidth(Version.Major));
1317}
1318
1319unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1320  return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1321                    getLgkmcntBitWidth(Version.Major));
1322}
1323
1324void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1325                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1326  Vmcnt = decodeVmcnt(Version, Waitcnt);
1327  Expcnt = decodeExpcnt(Version, Waitcnt);
1328  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1329}
1330
1331Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1332  Waitcnt Decoded;
1333  Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1334  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1335  Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1336  return Decoded;
1337}
1338
1339unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1340                     unsigned Vmcnt) {
1341  Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1342                     getVmcntBitWidthLo(Version.Major));
1343  return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1344                  getVmcntBitShiftHi(Version.Major),
1345                  getVmcntBitWidthHi(Version.Major));
1346}
1347
1348unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1349                      unsigned Expcnt) {
1350  return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1351                  getExpcntBitWidth(Version.Major));
1352}
1353
1354unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1355                       unsigned Lgkmcnt) {
1356  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1357                  getLgkmcntBitWidth(Version.Major));
1358}
1359
1360unsigned encodeWaitcnt(const IsaVersion &Version,
1361                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1362  unsigned Waitcnt = getWaitcntBitMask(Version);
1363  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1364  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1365  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1366  return Waitcnt;
1367}
1368
1369unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1370  return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1371}
1372
1373static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1374                                        bool IsStore) {
1375  unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1376                              getDscntBitWidth(Version.Major));
1377  if (IsStore) {
1378    unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1379                                   getStorecntBitWidth(Version.Major));
1380    return Dscnt | Storecnt;
1381  } else {
1382    unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1383                                  getLoadcntBitWidth(Version.Major));
1384    return Dscnt | Loadcnt;
1385  }
1386}
1387
1388Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1389  Waitcnt Decoded;
1390  Decoded.LoadCnt =
1391      unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1392                 getLoadcntBitWidth(Version.Major));
1393  Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1394                             getDscntBitWidth(Version.Major));
1395  return Decoded;
1396}
1397
1398Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1399  Waitcnt Decoded;
1400  Decoded.StoreCnt =
1401      unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1402                 getStorecntBitWidth(Version.Major));
1403  Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1404                             getDscntBitWidth(Version.Major));
1405  return Decoded;
1406}
1407
1408static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1409                              unsigned Loadcnt) {
1410  return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1411                  getLoadcntBitWidth(Version.Major));
1412}
1413
1414static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1415                               unsigned Storecnt) {
1416  return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1417                  getStorecntBitWidth(Version.Major));
1418}
1419
1420static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1421                            unsigned Dscnt) {
1422  return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1423                  getDscntBitWidth(Version.Major));
1424}
1425
1426static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1427                                   unsigned Dscnt) {
1428  unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1429  Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1430  Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1431  return Waitcnt;
1432}
1433
1434unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1435  return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1436}
1437
1438static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1439                                    unsigned Storecnt, unsigned Dscnt) {
1440  unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1441  Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1442  Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1443  return Waitcnt;
1444}
1445
1446unsigned encodeStorecntDscnt(const IsaVersion &Version,
1447                             const Waitcnt &Decoded) {
1448  return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1449}
1450
1451//===----------------------------------------------------------------------===//
1452// Custom Operands.
1453//
1454// A table of custom operands shall describe "primary" operand names
1455// first followed by aliases if any. It is not required but recommended
1456// to arrange operands so that operand encoding match operand position
1457// in the table. This will make disassembly a bit more efficient.
1458// Unused slots in the table shall have an empty name.
1459//
1460//===----------------------------------------------------------------------===//
1461
1462template <class T>
1463static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1464                       T Context) {
1465  return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1466         (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1467}
1468
1469template <class T>
1470static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1471                     const CustomOperand<T> OpInfo[], int OpInfoSize,
1472                     T Context) {
1473  int InvalidIdx = OPR_ID_UNKNOWN;
1474  for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1475    if (Test(OpInfo[Idx])) {
1476      if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1477        return Idx;
1478      InvalidIdx = OPR_ID_UNSUPPORTED;
1479    }
1480  }
1481  return InvalidIdx;
1482}
1483
1484template <class T>
1485static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1486                     int OpInfoSize, T Context) {
1487  auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1488  return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1489}
1490
1491template <class T>
1492static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1493                     T Context, bool QuickCheck = true) {
1494  auto Test = [=](const CustomOperand<T> &Op) {
1495    return Op.Encoding == Id && !Op.Name.empty();
1496  };
1497  // This is an optimization that should work in most cases.
1498  // As a side effect, it may cause selection of an alias
1499  // instead of a primary operand name in case of sparse tables.
1500  if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1501      OpInfo[Id].Encoding == Id) {
1502    return Id;
1503  }
1504  return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1505}
1506
1507//===----------------------------------------------------------------------===//
1508// Custom Operand Values
1509//===----------------------------------------------------------------------===//
1510
1511static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1512                                                int Size,
1513                                                const MCSubtargetInfo &STI) {
1514  unsigned Enc = 0;
1515  for (int Idx = 0; Idx < Size; ++Idx) {
1516    const auto &Op = Opr[Idx];
1517    if (Op.isSupported(STI))
1518      Enc |= Op.encode(Op.Default);
1519  }
1520  return Enc;
1521}
1522
1523static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1524                                            int Size, unsigned Code,
1525                                            bool &HasNonDefaultVal,
1526                                            const MCSubtargetInfo &STI) {
1527  unsigned UsedOprMask = 0;
1528  HasNonDefaultVal = false;
1529  for (int Idx = 0; Idx < Size; ++Idx) {
1530    const auto &Op = Opr[Idx];
1531    if (!Op.isSupported(STI))
1532      continue;
1533    UsedOprMask |= Op.getMask();
1534    unsigned Val = Op.decode(Code);
1535    if (!Op.isValid(Val))
1536      return false;
1537    HasNonDefaultVal |= (Val != Op.Default);
1538  }
1539  return (Code & ~UsedOprMask) == 0;
1540}
1541
1542static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1543                                unsigned Code, int &Idx, StringRef &Name,
1544                                unsigned &Val, bool &IsDefault,
1545                                const MCSubtargetInfo &STI) {
1546  while (Idx < Size) {
1547    const auto &Op = Opr[Idx++];
1548    if (Op.isSupported(STI)) {
1549      Name = Op.Name;
1550      Val = Op.decode(Code);
1551      IsDefault = (Val == Op.Default);
1552      return true;
1553    }
1554  }
1555
1556  return false;
1557}
1558
1559static int encodeCustomOperandVal(const CustomOperandVal &Op,
1560                                  int64_t InputVal) {
1561  if (InputVal < 0 || InputVal > Op.Max)
1562    return OPR_VAL_INVALID;
1563  return Op.encode(InputVal);
1564}
1565
1566static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1567                               const StringRef Name, int64_t InputVal,
1568                               unsigned &UsedOprMask,
1569                               const MCSubtargetInfo &STI) {
1570  int InvalidId = OPR_ID_UNKNOWN;
1571  for (int Idx = 0; Idx < Size; ++Idx) {
1572    const auto &Op = Opr[Idx];
1573    if (Op.Name == Name) {
1574      if (!Op.isSupported(STI)) {
1575        InvalidId = OPR_ID_UNSUPPORTED;
1576        continue;
1577      }
1578      auto OprMask = Op.getMask();
1579      if (OprMask & UsedOprMask)
1580        return OPR_ID_DUPLICATE;
1581      UsedOprMask |= OprMask;
1582      return encodeCustomOperandVal(Op, InputVal);
1583    }
1584  }
1585  return InvalidId;
1586}
1587
1588//===----------------------------------------------------------------------===//
1589// DepCtr
1590//===----------------------------------------------------------------------===//
1591
1592namespace DepCtr {
1593
1594int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1595  static int Default = -1;
1596  if (Default == -1)
1597    Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1598  return Default;
1599}
1600
1601bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1602                              const MCSubtargetInfo &STI) {
1603  return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1604                                         HasNonDefaultVal, STI);
1605}
1606
1607bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1608                  bool &IsDefault, const MCSubtargetInfo &STI) {
1609  return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1610                             IsDefault, STI);
1611}
1612
1613int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1614                 const MCSubtargetInfo &STI) {
1615  return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1616                             STI);
1617}
1618
1619unsigned decodeFieldVmVsrc(unsigned Encoded) {
1620  return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1621}
1622
1623unsigned decodeFieldVaVdst(unsigned Encoded) {
1624  return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1625}
1626
1627unsigned decodeFieldSaSdst(unsigned Encoded) {
1628  return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1629}
1630
1631unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1632  return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1633}
1634
1635unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1636  return encodeFieldVmVsrc(0xffff, VmVsrc);
1637}
1638
1639unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1640  return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1641}
1642
1643unsigned encodeFieldVaVdst(unsigned VaVdst) {
1644  return encodeFieldVaVdst(0xffff, VaVdst);
1645}
1646
1647unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1648  return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1649}
1650
1651unsigned encodeFieldSaSdst(unsigned SaSdst) {
1652  return encodeFieldSaSdst(0xffff, SaSdst);
1653}
1654
1655} // namespace DepCtr
1656
1657//===----------------------------------------------------------------------===//
1658// hwreg
1659//===----------------------------------------------------------------------===//
1660
1661namespace Hwreg {
1662
1663int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1664  int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1665  return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1666}
1667
1668bool isValidHwreg(int64_t Id) {
1669  return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1670}
1671
1672bool isValidHwregOffset(int64_t Offset) {
1673  return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1674}
1675
1676bool isValidHwregWidth(int64_t Width) {
1677  return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1678}
1679
1680uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
1681  return (Id << ID_SHIFT_) |
1682         (Offset << OFFSET_SHIFT_) |
1683         ((Width - 1) << WIDTH_M1_SHIFT_);
1684}
1685
1686StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1687  int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1688  return (Idx < 0) ? "" : Opr[Idx].Name;
1689}
1690
1691void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1692  Id = (Val & ID_MASK_) >> ID_SHIFT_;
1693  Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1694  Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1695}
1696
1697} // namespace Hwreg
1698
1699//===----------------------------------------------------------------------===//
1700// exp tgt
1701//===----------------------------------------------------------------------===//
1702
1703namespace Exp {
1704
1705struct ExpTgt {
1706  StringLiteral Name;
1707  unsigned Tgt;
1708  unsigned MaxIndex;
1709};
1710
1711static constexpr ExpTgt ExpTgtInfo[] = {
1712  {{"null"},           ET_NULL,            ET_NULL_MAX_IDX},
1713  {{"mrtz"},           ET_MRTZ,            ET_MRTZ_MAX_IDX},
1714  {{"prim"},           ET_PRIM,            ET_PRIM_MAX_IDX},
1715  {{"mrt"},            ET_MRT0,            ET_MRT_MAX_IDX},
1716  {{"pos"},            ET_POS0,            ET_POS_MAX_IDX},
1717  {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1718  {{"param"},          ET_PARAM0,          ET_PARAM_MAX_IDX},
1719};
1720
1721bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1722  for (const ExpTgt &Val : ExpTgtInfo) {
1723    if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1724      Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1725      Name = Val.Name;
1726      return true;
1727    }
1728  }
1729  return false;
1730}
1731
1732unsigned getTgtId(const StringRef Name) {
1733
1734  for (const ExpTgt &Val : ExpTgtInfo) {
1735    if (Val.MaxIndex == 0 && Name == Val.Name)
1736      return Val.Tgt;
1737
1738    if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1739      StringRef Suffix = Name.drop_front(Val.Name.size());
1740
1741      unsigned Id;
1742      if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1743        return ET_INVALID;
1744
1745      // Disable leading zeroes
1746      if (Suffix.size() > 1 && Suffix[0] == '0')
1747        return ET_INVALID;
1748
1749      return Val.Tgt + Id;
1750    }
1751  }
1752  return ET_INVALID;
1753}
1754
1755bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1756  switch (Id) {
1757  case ET_NULL:
1758    return !isGFX11Plus(STI);
1759  case ET_POS4:
1760  case ET_PRIM:
1761    return isGFX10Plus(STI);
1762  case ET_DUAL_SRC_BLEND0:
1763  case ET_DUAL_SRC_BLEND1:
1764    return isGFX11Plus(STI);
1765  default:
1766    if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1767      return !isGFX11Plus(STI);
1768    return true;
1769  }
1770}
1771
1772} // namespace Exp
1773
1774//===----------------------------------------------------------------------===//
1775// MTBUF Format
1776//===----------------------------------------------------------------------===//
1777
1778namespace MTBUFFormat {
1779
1780int64_t getDfmt(const StringRef Name) {
1781  for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1782    if (Name == DfmtSymbolic[Id])
1783      return Id;
1784  }
1785  return DFMT_UNDEF;
1786}
1787
1788StringRef getDfmtName(unsigned Id) {
1789  assert(Id <= DFMT_MAX);
1790  return DfmtSymbolic[Id];
1791}
1792
1793static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1794  if (isSI(STI) || isCI(STI))
1795    return NfmtSymbolicSICI;
1796  if (isVI(STI) || isGFX9(STI))
1797    return NfmtSymbolicVI;
1798  return NfmtSymbolicGFX10;
1799}
1800
1801int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1802  auto lookupTable = getNfmtLookupTable(STI);
1803  for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1804    if (Name == lookupTable[Id])
1805      return Id;
1806  }
1807  return NFMT_UNDEF;
1808}
1809
1810StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1811  assert(Id <= NFMT_MAX);
1812  return getNfmtLookupTable(STI)[Id];
1813}
1814
1815bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1816  unsigned Dfmt;
1817  unsigned Nfmt;
1818  decodeDfmtNfmt(Id, Dfmt, Nfmt);
1819  return isValidNfmt(Nfmt, STI);
1820}
1821
1822bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1823  return !getNfmtName(Id, STI).empty();
1824}
1825
1826int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1827  return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1828}
1829
1830void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1831  Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1832  Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1833}
1834
1835int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
1836  if (isGFX11Plus(STI)) {
1837    for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1838      if (Name == UfmtSymbolicGFX11[Id])
1839        return Id;
1840    }
1841  } else {
1842    for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1843      if (Name == UfmtSymbolicGFX10[Id])
1844        return Id;
1845    }
1846  }
1847  return UFMT_UNDEF;
1848}
1849
1850StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
1851  if(isValidUnifiedFormat(Id, STI))
1852    return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1853  return "";
1854}
1855
1856bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1857  return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1858}
1859
1860int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1861                             const MCSubtargetInfo &STI) {
1862  int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1863  if (isGFX11Plus(STI)) {
1864    for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1865      if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1866        return Id;
1867    }
1868  } else {
1869    for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1870      if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1871        return Id;
1872    }
1873  }
1874  return UFMT_UNDEF;
1875}
1876
1877bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1878  return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1879}
1880
1881unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1882  if (isGFX10Plus(STI))
1883    return UFMT_DEFAULT;
1884  return DFMT_NFMT_DEFAULT;
1885}
1886
1887} // namespace MTBUFFormat
1888
1889//===----------------------------------------------------------------------===//
1890// SendMsg
1891//===----------------------------------------------------------------------===//
1892
1893namespace SendMsg {
1894
1895static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
1896  return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
1897}
1898
1899int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1900  int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1901  return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1902}
1903
1904bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1905  return (MsgId & ~(getMsgIdMask(STI))) == 0;
1906}
1907
1908StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1909  int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1910  return (Idx < 0) ? "" : Msg[Idx].Name;
1911}
1912
1913int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1914  const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1915  const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1916  const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1917  for (int i = F; i < L; ++i) {
1918    if (Name == S[i]) {
1919      return i;
1920    }
1921  }
1922  return OP_UNKNOWN_;
1923}
1924
1925bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1926                  bool Strict) {
1927  assert(isValidMsgId(MsgId, STI));
1928
1929  if (!Strict)
1930    return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1931
1932  if (MsgId == ID_SYSMSG)
1933    return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1934  if (!isGFX11Plus(STI)) {
1935    switch (MsgId) {
1936    case ID_GS_PreGFX11:
1937      return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1938    case ID_GS_DONE_PreGFX11:
1939      return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1940    }
1941  }
1942  return OpId == OP_NONE_;
1943}
1944
1945StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1946                       const MCSubtargetInfo &STI) {
1947  assert(msgRequiresOp(MsgId, STI));
1948  return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1949}
1950
1951bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1952                      const MCSubtargetInfo &STI, bool Strict) {
1953  assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1954
1955  if (!Strict)
1956    return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1957
1958  if (!isGFX11Plus(STI)) {
1959    switch (MsgId) {
1960    case ID_GS_PreGFX11:
1961      return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1962    case ID_GS_DONE_PreGFX11:
1963      return (OpId == OP_GS_NOP) ?
1964          (StreamId == STREAM_ID_NONE_) :
1965          (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1966    }
1967  }
1968  return StreamId == STREAM_ID_NONE_;
1969}
1970
1971bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1972  return MsgId == ID_SYSMSG ||
1973      (!isGFX11Plus(STI) &&
1974       (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1975}
1976
1977bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1978                       const MCSubtargetInfo &STI) {
1979  return !isGFX11Plus(STI) &&
1980      (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1981      OpId != OP_GS_NOP;
1982}
1983
1984void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1985               uint16_t &StreamId, const MCSubtargetInfo &STI) {
1986  MsgId = Val & getMsgIdMask(STI);
1987  if (isGFX11Plus(STI)) {
1988    OpId = 0;
1989    StreamId = 0;
1990  } else {
1991    OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1992    StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1993  }
1994}
1995
1996uint64_t encodeMsg(uint64_t MsgId,
1997                   uint64_t OpId,
1998                   uint64_t StreamId) {
1999  return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2000}
2001
2002} // namespace SendMsg
2003
2004//===----------------------------------------------------------------------===//
2005//
2006//===----------------------------------------------------------------------===//
2007
2008unsigned getInitialPSInputAddr(const Function &F) {
2009  return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2010}
2011
2012bool getHasColorExport(const Function &F) {
2013  // As a safe default always respond as if PS has color exports.
2014  return F.getFnAttributeAsParsedInteger(
2015             "amdgpu-color-export",
2016             F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2017}
2018
2019bool getHasDepthExport(const Function &F) {
2020  return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2021}
2022
2023bool isShader(CallingConv::ID cc) {
2024  switch(cc) {
2025    case CallingConv::AMDGPU_VS:
2026    case CallingConv::AMDGPU_LS:
2027    case CallingConv::AMDGPU_HS:
2028    case CallingConv::AMDGPU_ES:
2029    case CallingConv::AMDGPU_GS:
2030    case CallingConv::AMDGPU_PS:
2031    case CallingConv::AMDGPU_CS_Chain:
2032    case CallingConv::AMDGPU_CS_ChainPreserve:
2033    case CallingConv::AMDGPU_CS:
2034      return true;
2035    default:
2036      return false;
2037  }
2038}
2039
2040bool isGraphics(CallingConv::ID cc) {
2041  return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2042}
2043
2044bool isCompute(CallingConv::ID cc) {
2045  return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2046}
2047
2048bool isEntryFunctionCC(CallingConv::ID CC) {
2049  switch (CC) {
2050  case CallingConv::AMDGPU_KERNEL:
2051  case CallingConv::SPIR_KERNEL:
2052  case CallingConv::AMDGPU_VS:
2053  case CallingConv::AMDGPU_GS:
2054  case CallingConv::AMDGPU_PS:
2055  case CallingConv::AMDGPU_CS:
2056  case CallingConv::AMDGPU_ES:
2057  case CallingConv::AMDGPU_HS:
2058  case CallingConv::AMDGPU_LS:
2059    return true;
2060  default:
2061    return false;
2062  }
2063}
2064
2065bool isModuleEntryFunctionCC(CallingConv::ID CC) {
2066  switch (CC) {
2067  case CallingConv::AMDGPU_Gfx:
2068    return true;
2069  default:
2070    return isEntryFunctionCC(CC) || isChainCC(CC);
2071  }
2072}
2073
2074bool isChainCC(CallingConv::ID CC) {
2075  switch (CC) {
2076  case CallingConv::AMDGPU_CS_Chain:
2077  case CallingConv::AMDGPU_CS_ChainPreserve:
2078    return true;
2079  default:
2080    return false;
2081  }
2082}
2083
2084bool isKernelCC(const Function *Func) {
2085  return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2086}
2087
2088bool hasXNACK(const MCSubtargetInfo &STI) {
2089  return STI.hasFeature(AMDGPU::FeatureXNACK);
2090}
2091
2092bool hasSRAMECC(const MCSubtargetInfo &STI) {
2093  return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2094}
2095
2096bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2097  return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2098}
2099
2100bool hasA16(const MCSubtargetInfo &STI) {
2101  return STI.hasFeature(AMDGPU::FeatureA16);
2102}
2103
2104bool hasG16(const MCSubtargetInfo &STI) {
2105  return STI.hasFeature(AMDGPU::FeatureG16);
2106}
2107
2108bool hasPackedD16(const MCSubtargetInfo &STI) {
2109  return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2110         !isSI(STI);
2111}
2112
2113bool hasGDS(const MCSubtargetInfo &STI) {
2114  return STI.hasFeature(AMDGPU::FeatureGDS);
2115}
2116
2117unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2118  auto Version = getIsaVersion(STI.getCPU());
2119  if (Version.Major == 10)
2120    return Version.Minor >= 3 ? 13 : 5;
2121  if (Version.Major == 11)
2122    return 5;
2123  if (Version.Major >= 12)
2124    return HasSampler ? 4 : 5;
2125  return 0;
2126}
2127
2128unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2129
2130bool isSI(const MCSubtargetInfo &STI) {
2131  return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2132}
2133
2134bool isCI(const MCSubtargetInfo &STI) {
2135  return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2136}
2137
2138bool isVI(const MCSubtargetInfo &STI) {
2139  return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2140}
2141
2142bool isGFX9(const MCSubtargetInfo &STI) {
2143  return STI.hasFeature(AMDGPU::FeatureGFX9);
2144}
2145
2146bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2147  return isGFX9(STI) || isGFX10(STI);
2148}
2149
2150bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2151  return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2152}
2153
2154bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2155  return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2156}
2157
2158bool isGFX8Plus(const MCSubtargetInfo &STI) {
2159  return isVI(STI) || isGFX9Plus(STI);
2160}
2161
2162bool isGFX9Plus(const MCSubtargetInfo &STI) {
2163  return isGFX9(STI) || isGFX10Plus(STI);
2164}
2165
2166bool isGFX10(const MCSubtargetInfo &STI) {
2167  return STI.hasFeature(AMDGPU::FeatureGFX10);
2168}
2169
2170bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2171  return isGFX10(STI) || isGFX11(STI);
2172}
2173
2174bool isGFX10Plus(const MCSubtargetInfo &STI) {
2175  return isGFX10(STI) || isGFX11Plus(STI);
2176}
2177
2178bool isGFX11(const MCSubtargetInfo &STI) {
2179  return STI.hasFeature(AMDGPU::FeatureGFX11);
2180}
2181
2182bool isGFX11Plus(const MCSubtargetInfo &STI) {
2183  return isGFX11(STI) || isGFX12Plus(STI);
2184}
2185
2186bool isGFX12(const MCSubtargetInfo &STI) {
2187  return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2188}
2189
2190bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2191
2192bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2193
2194bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
2195  return !isGFX11Plus(STI);
2196}
2197
2198bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2199  return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2200}
2201
2202bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2203  return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2204}
2205
2206bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2207  return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2208}
2209
2210bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2211  return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2212}
2213
2214bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2215  return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2216}
2217
2218bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2219  return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2220}
2221
2222bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2223  return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2224}
2225
2226bool isGFX90A(const MCSubtargetInfo &STI) {
2227  return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2228}
2229
2230bool isGFX940(const MCSubtargetInfo &STI) {
2231  return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2232}
2233
2234bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2235  return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2236}
2237
2238bool hasMAIInsts(const MCSubtargetInfo &STI) {
2239  return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2240}
2241
2242bool hasVOPD(const MCSubtargetInfo &STI) {
2243  return STI.hasFeature(AMDGPU::FeatureVOPD);
2244}
2245
2246bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2247  return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2248}
2249
2250unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2251  return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2252}
2253
2254int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2255                         int32_t ArgNumVGPR) {
2256  if (has90AInsts && ArgNumAGPR)
2257    return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2258  return std::max(ArgNumVGPR, ArgNumAGPR);
2259}
2260
2261bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2262  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2263  const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2264  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2265    Reg == AMDGPU::SCC;
2266}
2267
2268bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2269  return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2270}
2271
2272#define MAP_REG2REG \
2273  using namespace AMDGPU; \
2274  switch(Reg) { \
2275  default: return Reg; \
2276  CASE_CI_VI(FLAT_SCR) \
2277  CASE_CI_VI(FLAT_SCR_LO) \
2278  CASE_CI_VI(FLAT_SCR_HI) \
2279  CASE_VI_GFX9PLUS(TTMP0) \
2280  CASE_VI_GFX9PLUS(TTMP1) \
2281  CASE_VI_GFX9PLUS(TTMP2) \
2282  CASE_VI_GFX9PLUS(TTMP3) \
2283  CASE_VI_GFX9PLUS(TTMP4) \
2284  CASE_VI_GFX9PLUS(TTMP5) \
2285  CASE_VI_GFX9PLUS(TTMP6) \
2286  CASE_VI_GFX9PLUS(TTMP7) \
2287  CASE_VI_GFX9PLUS(TTMP8) \
2288  CASE_VI_GFX9PLUS(TTMP9) \
2289  CASE_VI_GFX9PLUS(TTMP10) \
2290  CASE_VI_GFX9PLUS(TTMP11) \
2291  CASE_VI_GFX9PLUS(TTMP12) \
2292  CASE_VI_GFX9PLUS(TTMP13) \
2293  CASE_VI_GFX9PLUS(TTMP14) \
2294  CASE_VI_GFX9PLUS(TTMP15) \
2295  CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2296  CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2297  CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2298  CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2299  CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2300  CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2301  CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2302  CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2303  CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2304  CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2305  CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2306  CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2307  CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2308  CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2309  CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2310  CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2311  CASE_GFXPRE11_GFX11PLUS(M0) \
2312  CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2313  CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2314  }
2315
2316#define CASE_CI_VI(node) \
2317  assert(!isSI(STI)); \
2318  case node: return isCI(STI) ? node##_ci : node##_vi;
2319
2320#define CASE_VI_GFX9PLUS(node) \
2321  case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2322
2323#define CASE_GFXPRE11_GFX11PLUS(node) \
2324  case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2325
2326#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2327  case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2328
2329unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2330  if (STI.getTargetTriple().getArch() == Triple::r600)
2331    return Reg;
2332  MAP_REG2REG
2333}
2334
2335#undef CASE_CI_VI
2336#undef CASE_VI_GFX9PLUS
2337#undef CASE_GFXPRE11_GFX11PLUS
2338#undef CASE_GFXPRE11_GFX11PLUS_TO
2339
2340#define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
2341#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2342#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2343#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2344
2345unsigned mc2PseudoReg(unsigned Reg) {
2346  MAP_REG2REG
2347}
2348
2349bool isInlineValue(unsigned Reg) {
2350  switch (Reg) {
2351  case AMDGPU::SRC_SHARED_BASE_LO:
2352  case AMDGPU::SRC_SHARED_BASE:
2353  case AMDGPU::SRC_SHARED_LIMIT_LO:
2354  case AMDGPU::SRC_SHARED_LIMIT:
2355  case AMDGPU::SRC_PRIVATE_BASE_LO:
2356  case AMDGPU::SRC_PRIVATE_BASE:
2357  case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2358  case AMDGPU::SRC_PRIVATE_LIMIT:
2359  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2360    return true;
2361  case AMDGPU::SRC_VCCZ:
2362  case AMDGPU::SRC_EXECZ:
2363  case AMDGPU::SRC_SCC:
2364    return true;
2365  case AMDGPU::SGPR_NULL:
2366    return true;
2367  default:
2368    return false;
2369  }
2370}
2371
2372#undef CASE_CI_VI
2373#undef CASE_VI_GFX9PLUS
2374#undef CASE_GFXPRE11_GFX11PLUS
2375#undef CASE_GFXPRE11_GFX11PLUS_TO
2376#undef MAP_REG2REG
2377
2378bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2379  assert(OpNo < Desc.NumOperands);
2380  unsigned OpType = Desc.operands()[OpNo].OperandType;
2381  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2382         OpType <= AMDGPU::OPERAND_SRC_LAST;
2383}
2384
2385bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2386  assert(OpNo < Desc.NumOperands);
2387  unsigned OpType = Desc.operands()[OpNo].OperandType;
2388  return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2389         OpType <= AMDGPU::OPERAND_KIMM_LAST;
2390}
2391
2392bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2393  assert(OpNo < Desc.NumOperands);
2394  unsigned OpType = Desc.operands()[OpNo].OperandType;
2395  switch (OpType) {
2396  case AMDGPU::OPERAND_REG_IMM_FP32:
2397  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2398  case AMDGPU::OPERAND_REG_IMM_FP64:
2399  case AMDGPU::OPERAND_REG_IMM_FP16:
2400  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2401  case AMDGPU::OPERAND_REG_IMM_V2FP16:
2402  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2403  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2404  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2405  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2406  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2407  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2408  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2409  case AMDGPU::OPERAND_REG_IMM_V2FP32:
2410  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2411  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2412    return true;
2413  default:
2414    return false;
2415  }
2416}
2417
2418bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2419  assert(OpNo < Desc.NumOperands);
2420  unsigned OpType = Desc.operands()[OpNo].OperandType;
2421  return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2422          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2423         (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2424          OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2425}
2426
2427// Avoid using MCRegisterClass::getSize, since that function will go away
2428// (move from MC* level to Target* level). Return size in bits.
2429unsigned getRegBitWidth(unsigned RCID) {
2430  switch (RCID) {
2431  case AMDGPU::SGPR_LO16RegClassID:
2432  case AMDGPU::AGPR_LO16RegClassID:
2433    return 16;
2434  case AMDGPU::SGPR_32RegClassID:
2435  case AMDGPU::VGPR_32RegClassID:
2436  case AMDGPU::VRegOrLds_32RegClassID:
2437  case AMDGPU::AGPR_32RegClassID:
2438  case AMDGPU::VS_32RegClassID:
2439  case AMDGPU::AV_32RegClassID:
2440  case AMDGPU::SReg_32RegClassID:
2441  case AMDGPU::SReg_32_XM0RegClassID:
2442  case AMDGPU::SRegOrLds_32RegClassID:
2443    return 32;
2444  case AMDGPU::SGPR_64RegClassID:
2445  case AMDGPU::VS_64RegClassID:
2446  case AMDGPU::SReg_64RegClassID:
2447  case AMDGPU::VReg_64RegClassID:
2448  case AMDGPU::AReg_64RegClassID:
2449  case AMDGPU::SReg_64_XEXECRegClassID:
2450  case AMDGPU::VReg_64_Align2RegClassID:
2451  case AMDGPU::AReg_64_Align2RegClassID:
2452  case AMDGPU::AV_64RegClassID:
2453  case AMDGPU::AV_64_Align2RegClassID:
2454    return 64;
2455  case AMDGPU::SGPR_96RegClassID:
2456  case AMDGPU::SReg_96RegClassID:
2457  case AMDGPU::VReg_96RegClassID:
2458  case AMDGPU::AReg_96RegClassID:
2459  case AMDGPU::VReg_96_Align2RegClassID:
2460  case AMDGPU::AReg_96_Align2RegClassID:
2461  case AMDGPU::AV_96RegClassID:
2462  case AMDGPU::AV_96_Align2RegClassID:
2463    return 96;
2464  case AMDGPU::SGPR_128RegClassID:
2465  case AMDGPU::SReg_128RegClassID:
2466  case AMDGPU::VReg_128RegClassID:
2467  case AMDGPU::AReg_128RegClassID:
2468  case AMDGPU::VReg_128_Align2RegClassID:
2469  case AMDGPU::AReg_128_Align2RegClassID:
2470  case AMDGPU::AV_128RegClassID:
2471  case AMDGPU::AV_128_Align2RegClassID:
2472    return 128;
2473  case AMDGPU::SGPR_160RegClassID:
2474  case AMDGPU::SReg_160RegClassID:
2475  case AMDGPU::VReg_160RegClassID:
2476  case AMDGPU::AReg_160RegClassID:
2477  case AMDGPU::VReg_160_Align2RegClassID:
2478  case AMDGPU::AReg_160_Align2RegClassID:
2479  case AMDGPU::AV_160RegClassID:
2480  case AMDGPU::AV_160_Align2RegClassID:
2481    return 160;
2482  case AMDGPU::SGPR_192RegClassID:
2483  case AMDGPU::SReg_192RegClassID:
2484  case AMDGPU::VReg_192RegClassID:
2485  case AMDGPU::AReg_192RegClassID:
2486  case AMDGPU::VReg_192_Align2RegClassID:
2487  case AMDGPU::AReg_192_Align2RegClassID:
2488  case AMDGPU::AV_192RegClassID:
2489  case AMDGPU::AV_192_Align2RegClassID:
2490    return 192;
2491  case AMDGPU::SGPR_224RegClassID:
2492  case AMDGPU::SReg_224RegClassID:
2493  case AMDGPU::VReg_224RegClassID:
2494  case AMDGPU::AReg_224RegClassID:
2495  case AMDGPU::VReg_224_Align2RegClassID:
2496  case AMDGPU::AReg_224_Align2RegClassID:
2497  case AMDGPU::AV_224RegClassID:
2498  case AMDGPU::AV_224_Align2RegClassID:
2499    return 224;
2500  case AMDGPU::SGPR_256RegClassID:
2501  case AMDGPU::SReg_256RegClassID:
2502  case AMDGPU::VReg_256RegClassID:
2503  case AMDGPU::AReg_256RegClassID:
2504  case AMDGPU::VReg_256_Align2RegClassID:
2505  case AMDGPU::AReg_256_Align2RegClassID:
2506  case AMDGPU::AV_256RegClassID:
2507  case AMDGPU::AV_256_Align2RegClassID:
2508    return 256;
2509  case AMDGPU::SGPR_288RegClassID:
2510  case AMDGPU::SReg_288RegClassID:
2511  case AMDGPU::VReg_288RegClassID:
2512  case AMDGPU::AReg_288RegClassID:
2513  case AMDGPU::VReg_288_Align2RegClassID:
2514  case AMDGPU::AReg_288_Align2RegClassID:
2515  case AMDGPU::AV_288RegClassID:
2516  case AMDGPU::AV_288_Align2RegClassID:
2517    return 288;
2518  case AMDGPU::SGPR_320RegClassID:
2519  case AMDGPU::SReg_320RegClassID:
2520  case AMDGPU::VReg_320RegClassID:
2521  case AMDGPU::AReg_320RegClassID:
2522  case AMDGPU::VReg_320_Align2RegClassID:
2523  case AMDGPU::AReg_320_Align2RegClassID:
2524  case AMDGPU::AV_320RegClassID:
2525  case AMDGPU::AV_320_Align2RegClassID:
2526    return 320;
2527  case AMDGPU::SGPR_352RegClassID:
2528  case AMDGPU::SReg_352RegClassID:
2529  case AMDGPU::VReg_352RegClassID:
2530  case AMDGPU::AReg_352RegClassID:
2531  case AMDGPU::VReg_352_Align2RegClassID:
2532  case AMDGPU::AReg_352_Align2RegClassID:
2533  case AMDGPU::AV_352RegClassID:
2534  case AMDGPU::AV_352_Align2RegClassID:
2535    return 352;
2536  case AMDGPU::SGPR_384RegClassID:
2537  case AMDGPU::SReg_384RegClassID:
2538  case AMDGPU::VReg_384RegClassID:
2539  case AMDGPU::AReg_384RegClassID:
2540  case AMDGPU::VReg_384_Align2RegClassID:
2541  case AMDGPU::AReg_384_Align2RegClassID:
2542  case AMDGPU::AV_384RegClassID:
2543  case AMDGPU::AV_384_Align2RegClassID:
2544    return 384;
2545  case AMDGPU::SGPR_512RegClassID:
2546  case AMDGPU::SReg_512RegClassID:
2547  case AMDGPU::VReg_512RegClassID:
2548  case AMDGPU::AReg_512RegClassID:
2549  case AMDGPU::VReg_512_Align2RegClassID:
2550  case AMDGPU::AReg_512_Align2RegClassID:
2551  case AMDGPU::AV_512RegClassID:
2552  case AMDGPU::AV_512_Align2RegClassID:
2553    return 512;
2554  case AMDGPU::SGPR_1024RegClassID:
2555  case AMDGPU::SReg_1024RegClassID:
2556  case AMDGPU::VReg_1024RegClassID:
2557  case AMDGPU::AReg_1024RegClassID:
2558  case AMDGPU::VReg_1024_Align2RegClassID:
2559  case AMDGPU::AReg_1024_Align2RegClassID:
2560  case AMDGPU::AV_1024RegClassID:
2561  case AMDGPU::AV_1024_Align2RegClassID:
2562    return 1024;
2563  default:
2564    llvm_unreachable("Unexpected register class");
2565  }
2566}
2567
2568unsigned getRegBitWidth(const MCRegisterClass &RC) {
2569  return getRegBitWidth(RC.getID());
2570}
2571
2572unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2573                           unsigned OpNo) {
2574  assert(OpNo < Desc.NumOperands);
2575  unsigned RCID = Desc.operands()[OpNo].RegClass;
2576  return getRegBitWidth(RCID) / 8;
2577}
2578
2579bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2580  if (isInlinableIntLiteral(Literal))
2581    return true;
2582
2583  uint64_t Val = static_cast<uint64_t>(Literal);
2584  return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2585         (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2586         (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2587         (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2588         (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2589         (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2590         (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2591         (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2592         (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2593         (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2594}
2595
2596bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2597  if (isInlinableIntLiteral(Literal))
2598    return true;
2599
2600  // The actual type of the operand does not seem to matter as long
2601  // as the bits match one of the inline immediate values.  For example:
2602  //
2603  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2604  // so it is a legal inline immediate.
2605  //
2606  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2607  // floating-point, so it is a legal inline immediate.
2608
2609  uint32_t Val = static_cast<uint32_t>(Literal);
2610  return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2611         (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2612         (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2613         (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2614         (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2615         (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2616         (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2617         (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2618         (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2619         (Val == 0x3e22f983 && HasInv2Pi);
2620}
2621
2622bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2623  if (!HasInv2Pi)
2624    return false;
2625
2626  if (isInlinableIntLiteral(Literal))
2627    return true;
2628
2629  uint16_t Val = static_cast<uint16_t>(Literal);
2630  return Val == 0x3C00 || // 1.0
2631         Val == 0xBC00 || // -1.0
2632         Val == 0x3800 || // 0.5
2633         Val == 0xB800 || // -0.5
2634         Val == 0x4000 || // 2.0
2635         Val == 0xC000 || // -2.0
2636         Val == 0x4400 || // 4.0
2637         Val == 0xC400 || // -4.0
2638         Val == 0x3118;   // 1/2pi
2639}
2640
2641std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2642  // Unfortunately, the Instruction Set Architecture Reference Guide is
2643  // misleading about how the inline operands work for (packed) 16-bit
2644  // instructions. In a nutshell, the actual HW behavior is:
2645  //
2646  //  - integer encodings (-16 .. 64) are always produced as sign-extended
2647  //    32-bit values
2648  //  - float encodings are produced as:
2649  //    - for F16 instructions: corresponding half-precision float values in
2650  //      the LSBs, 0 in the MSBs
2651  //    - for UI16 instructions: corresponding single-precision float value
2652  int32_t Signed = static_cast<int32_t>(Literal);
2653  if (Signed >= 0 && Signed <= 64)
2654    return 128 + Signed;
2655
2656  if (Signed >= -16 && Signed <= -1)
2657    return 192 + std::abs(Signed);
2658
2659  if (IsFloat) {
2660    // clang-format off
2661    switch (Literal) {
2662    case 0x3800: return 240; // 0.5
2663    case 0xB800: return 241; // -0.5
2664    case 0x3C00: return 242; // 1.0
2665    case 0xBC00: return 243; // -1.0
2666    case 0x4000: return 244; // 2.0
2667    case 0xC000: return 245; // -2.0
2668    case 0x4400: return 246; // 4.0
2669    case 0xC400: return 247; // -4.0
2670    case 0x3118: return 248; // 1.0 / (2.0 * pi)
2671    default: break;
2672    }
2673    // clang-format on
2674  } else {
2675    // clang-format off
2676    switch (Literal) {
2677    case 0x3F000000: return 240; // 0.5
2678    case 0xBF000000: return 241; // -0.5
2679    case 0x3F800000: return 242; // 1.0
2680    case 0xBF800000: return 243; // -1.0
2681    case 0x40000000: return 244; // 2.0
2682    case 0xC0000000: return 245; // -2.0
2683    case 0x40800000: return 246; // 4.0
2684    case 0xC0800000: return 247; // -4.0
2685    case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2686    default: break;
2687    }
2688    // clang-format on
2689  }
2690
2691  return {};
2692}
2693
2694// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2695// or nullopt.
2696std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2697  return getInlineEncodingV216(false, Literal);
2698}
2699
2700// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2701// or nullopt.
2702std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2703  return getInlineEncodingV216(true, Literal);
2704}
2705
2706// Whether the given literal can be inlined for a V_PK_* instruction.
2707bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2708  switch (OpType) {
2709  case AMDGPU::OPERAND_REG_IMM_V2INT16:
2710  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2711  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2712    return getInlineEncodingV216(false, Literal).has_value();
2713  case AMDGPU::OPERAND_REG_IMM_V2FP16:
2714  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2715  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2716    return getInlineEncodingV216(true, Literal).has_value();
2717  default:
2718    llvm_unreachable("bad packed operand type");
2719  }
2720}
2721
2722// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2723bool isInlinableLiteralV2I16(uint32_t Literal) {
2724  return getInlineEncodingV2I16(Literal).has_value();
2725}
2726
2727// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2728bool isInlinableLiteralV2F16(uint32_t Literal) {
2729  return getInlineEncodingV2F16(Literal).has_value();
2730}
2731
2732bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2733  if (IsFP64)
2734    return !(Val & 0xffffffffu);
2735
2736  return isUInt<32>(Val) || isInt<32>(Val);
2737}
2738
2739bool isArgPassedInSGPR(const Argument *A) {
2740  const Function *F = A->getParent();
2741
2742  // Arguments to compute shaders are never a source of divergence.
2743  CallingConv::ID CC = F->getCallingConv();
2744  switch (CC) {
2745  case CallingConv::AMDGPU_KERNEL:
2746  case CallingConv::SPIR_KERNEL:
2747    return true;
2748  case CallingConv::AMDGPU_VS:
2749  case CallingConv::AMDGPU_LS:
2750  case CallingConv::AMDGPU_HS:
2751  case CallingConv::AMDGPU_ES:
2752  case CallingConv::AMDGPU_GS:
2753  case CallingConv::AMDGPU_PS:
2754  case CallingConv::AMDGPU_CS:
2755  case CallingConv::AMDGPU_Gfx:
2756  case CallingConv::AMDGPU_CS_Chain:
2757  case CallingConv::AMDGPU_CS_ChainPreserve:
2758    // For non-compute shaders, SGPR inputs are marked with either inreg or
2759    // byval. Everything else is in VGPRs.
2760    return A->hasAttribute(Attribute::InReg) ||
2761           A->hasAttribute(Attribute::ByVal);
2762  default:
2763    // TODO: treat i1 as divergent?
2764    return A->hasAttribute(Attribute::InReg);
2765  }
2766}
2767
2768bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2769  // Arguments to compute shaders are never a source of divergence.
2770  CallingConv::ID CC = CB->getCallingConv();
2771  switch (CC) {
2772  case CallingConv::AMDGPU_KERNEL:
2773  case CallingConv::SPIR_KERNEL:
2774    return true;
2775  case CallingConv::AMDGPU_VS:
2776  case CallingConv::AMDGPU_LS:
2777  case CallingConv::AMDGPU_HS:
2778  case CallingConv::AMDGPU_ES:
2779  case CallingConv::AMDGPU_GS:
2780  case CallingConv::AMDGPU_PS:
2781  case CallingConv::AMDGPU_CS:
2782  case CallingConv::AMDGPU_Gfx:
2783  case CallingConv::AMDGPU_CS_Chain:
2784  case CallingConv::AMDGPU_CS_ChainPreserve:
2785    // For non-compute shaders, SGPR inputs are marked with either inreg or
2786    // byval. Everything else is in VGPRs.
2787    return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2788           CB->paramHasAttr(ArgNo, Attribute::ByVal);
2789  default:
2790    return CB->paramHasAttr(ArgNo, Attribute::InReg);
2791  }
2792}
2793
2794static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2795  return isGCN3Encoding(ST) || isGFX10Plus(ST);
2796}
2797
2798static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
2799  return isGFX9Plus(ST);
2800}
2801
2802bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2803                                      int64_t EncodedOffset) {
2804  if (isGFX12Plus(ST))
2805    return isUInt<23>(EncodedOffset);
2806
2807  return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2808                               : isUInt<8>(EncodedOffset);
2809}
2810
2811bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2812                                    int64_t EncodedOffset,
2813                                    bool IsBuffer) {
2814  if (isGFX12Plus(ST))
2815    return isInt<24>(EncodedOffset);
2816
2817  return !IsBuffer &&
2818         hasSMRDSignedImmOffset(ST) &&
2819         isInt<21>(EncodedOffset);
2820}
2821
2822static bool isDwordAligned(uint64_t ByteOffset) {
2823  return (ByteOffset & 3) == 0;
2824}
2825
2826uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2827                                uint64_t ByteOffset) {
2828  if (hasSMEMByteOffset(ST))
2829    return ByteOffset;
2830
2831  assert(isDwordAligned(ByteOffset));
2832  return ByteOffset >> 2;
2833}
2834
2835std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2836                                            int64_t ByteOffset, bool IsBuffer) {
2837  if (isGFX12Plus(ST)) // 24 bit signed offsets
2838    return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2839                                 : std::nullopt;
2840
2841  // The signed version is always a byte offset.
2842  if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2843    assert(hasSMEMByteOffset(ST));
2844    return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2845                                 : std::nullopt;
2846  }
2847
2848  if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2849    return std::nullopt;
2850
2851  int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2852  return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2853             ? std::optional<int64_t>(EncodedOffset)
2854             : std::nullopt;
2855}
2856
2857std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2858                                                     int64_t ByteOffset) {
2859  if (!isCI(ST) || !isDwordAligned(ByteOffset))
2860    return std::nullopt;
2861
2862  int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2863  return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2864                                   : std::nullopt;
2865}
2866
2867unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
2868  if (AMDGPU::isGFX10(ST))
2869    return 12;
2870
2871  if (AMDGPU::isGFX12(ST))
2872    return 24;
2873  return 13;
2874}
2875
2876namespace {
2877
2878struct SourceOfDivergence {
2879  unsigned Intr;
2880};
2881const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2882
2883struct AlwaysUniform {
2884  unsigned Intr;
2885};
2886const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2887
2888#define GET_SourcesOfDivergence_IMPL
2889#define GET_UniformIntrinsics_IMPL
2890#define GET_Gfx9BufferFormat_IMPL
2891#define GET_Gfx10BufferFormat_IMPL
2892#define GET_Gfx11PlusBufferFormat_IMPL
2893#include "AMDGPUGenSearchableTables.inc"
2894
2895} // end anonymous namespace
2896
2897bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2898  return lookupSourceOfDivergence(IntrID);
2899}
2900
2901bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2902  return lookupAlwaysUniform(IntrID);
2903}
2904
2905const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2906                                                  uint8_t NumComponents,
2907                                                  uint8_t NumFormat,
2908                                                  const MCSubtargetInfo &STI) {
2909  return isGFX11Plus(STI)
2910             ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2911                                            NumFormat)
2912             : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2913                                                       NumComponents, NumFormat)
2914                            : getGfx9BufferFormatInfo(BitsPerComp,
2915                                                      NumComponents, NumFormat);
2916}
2917
2918const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
2919                                                  const MCSubtargetInfo &STI) {
2920  return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2921                          : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2922                                         : getGfx9BufferFormatInfo(Format);
2923}
2924
2925bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
2926  for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2927                       OpName::src2 }) {
2928    int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2929    if (Idx == -1)
2930      continue;
2931
2932    if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2933        OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2934      return true;
2935  }
2936
2937  return false;
2938}
2939
2940bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2941  return hasAny64BitVGPROperands(OpDesc);
2942}
2943
2944} // namespace AMDGPU
2945
2946raw_ostream &operator<<(raw_ostream &OS,
2947                        const AMDGPU::IsaInfo::TargetIDSetting S) {
2948  switch (S) {
2949  case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
2950    OS << "Unsupported";
2951    break;
2952  case (AMDGPU::IsaInfo::TargetIDSetting::Any):
2953    OS << "Any";
2954    break;
2955  case (AMDGPU::IsaInfo::TargetIDSetting::Off):
2956    OS << "Off";
2957    break;
2958  case (AMDGPU::IsaInfo::TargetIDSetting::On):
2959    OS << "On";
2960    break;
2961  }
2962  return OS;
2963}
2964
2965} // namespace llvm
2966