1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
23#include "TargetInfo/AMDGPUTargetInfo.h"
24#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm-c/DisassemblerTypes.h"
26#include "llvm/BinaryFormat/ELF.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCDecoderOps.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
32#include "llvm/MC/MCRegisterInfo.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/AMDHSAKernelDescriptor.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX                                                               \
42  (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
43                 : AMDGPU::EncValues::SGPR_MAX_SI)
44
45using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
46
47AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
48                                       MCContext &Ctx, MCInstrInfo const *MCII)
49    : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50      MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
51  // ToDo: AMDGPUDisassembler supports only VI ISA.
52  if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
53    report_fatal_error("Disassembly not yet supported for subtarget");
54}
55
56inline static MCDisassembler::DecodeStatus
57addOperand(MCInst &Inst, const MCOperand& Opnd) {
58  Inst.addOperand(Opnd);
59  return Opnd.isValid() ?
60    MCDisassembler::Success :
61    MCDisassembler::Fail;
62}
63
64static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
65                                uint16_t NameIdx) {
66  int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67  if (OpIdx != -1) {
68    auto I = MI.begin();
69    std::advance(I, OpIdx);
70    MI.insert(I, Op);
71  }
72  return OpIdx;
73}
74
75static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
76                                       uint64_t Addr,
77                                       const MCDisassembler *Decoder) {
78  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
79
80  // Our branches take a simm16, but we need two extra bits to account for the
81  // factor of 4.
82  APInt SignedOffset(18, Imm * 4, true);
83  int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
84
85  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
86    return MCDisassembler::Success;
87  return addOperand(Inst, MCOperand::createImm(Imm));
88}
89
90static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
91                                     const MCDisassembler *Decoder) {
92  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93  int64_t Offset;
94  if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
95    Offset = SignExtend64<24>(Imm);
96  } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
97    Offset = Imm & 0xFFFFF;
98  } else { // GFX9+ supports 21-bit signed offsets.
99    Offset = SignExtend64<21>(Imm);
100  }
101  return addOperand(Inst, MCOperand::createImm(Offset));
102}
103
104static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
105                                  const MCDisassembler *Decoder) {
106  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
107  return addOperand(Inst, DAsm->decodeBoolReg(Val));
108}
109
110static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
111                                       uint64_t Addr,
112                                       const MCDisassembler *Decoder) {
113  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
114  return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
115}
116
117#define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
118  static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
119                                        uint64_t /*Addr*/,                     \
120                                        const MCDisassembler *Decoder) {       \
121    auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
122    return addOperand(Inst, DAsm->DecoderName(Imm));                           \
123  }
124
125// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
126// number of register. Used by VGPR only and AGPR only operands.
127#define DECODE_OPERAND_REG_8(RegClass)                                         \
128  static DecodeStatus Decode##RegClass##RegisterClass(                         \
129      MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,                           \
130      const MCDisassembler *Decoder) {                                         \
131    assert(Imm < (1 << 8) && "8-bit encoding");                                \
132    auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
133    return addOperand(                                                         \
134        Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm));      \
135  }
136
137#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral,         \
138                     ImmWidth)                                                 \
139  static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,      \
140                           const MCDisassembler *Decoder) {                    \
141    assert(Imm < (1 << EncSize) && #EncSize "-bit encoding");                  \
142    auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
143    return addOperand(Inst,                                                    \
144                      DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm,   \
145                                        MandatoryLiteral, ImmWidth));          \
146  }
147
148// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
149// get register class. Used by SGPR only operands.
150#define DECODE_OPERAND_REG_7(RegClass, OpWidth)                                \
151  DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
152
153// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
154// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
155// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
156// Used by AV_ register classes (AGPR or VGPR only register operands).
157#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth)                             \
158  DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth,                   \
159               Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
160
161// Decoder for Src(9-bit encoding) registers only.
162#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth)                            \
163  DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
164
165// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
166// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
167// only.
168#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth)                           \
169  DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
170
171// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
172// Imm{9} is acc, registers only.
173#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth)                         \
174  DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
175
176// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
177// register from RegClass or immediate. Registers that don't belong to RegClass
178// will be decoded and InstPrinter will report warning. Immediate will be
179// decoded into constant of size ImmWidth, should match width of immediate used
180// by OperandType (important for floating point types).
181#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth)           \
182  DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm,      \
183               false, ImmWidth)
184
185#define DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(Name, OpWidth, ImmWidth)         \
186  DECODE_SrcOp(decodeOperand_##Name, 9, OpWidth, Imm, false, ImmWidth)
187
188// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
189// and decode using 'enum10' from decodeSrcOp.
190#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth)          \
191  DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth,           \
192               Imm | 512, false, ImmWidth)
193
194#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth)  \
195  DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9,         \
196               OpWidth, Imm, true, ImmWidth)
197
198// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
199// when RegisterClass is used as an operand. Most often used for destination
200// operands.
201
202DECODE_OPERAND_REG_8(VGPR_32)
203DECODE_OPERAND_REG_8(VGPR_32_Lo128)
204DECODE_OPERAND_REG_8(VReg_64)
205DECODE_OPERAND_REG_8(VReg_96)
206DECODE_OPERAND_REG_8(VReg_128)
207DECODE_OPERAND_REG_8(VReg_256)
208DECODE_OPERAND_REG_8(VReg_288)
209DECODE_OPERAND_REG_8(VReg_352)
210DECODE_OPERAND_REG_8(VReg_384)
211DECODE_OPERAND_REG_8(VReg_512)
212DECODE_OPERAND_REG_8(VReg_1024)
213
214DECODE_OPERAND_REG_7(SReg_32, OPW32)
215DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
216DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
217DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
218DECODE_OPERAND_REG_7(SReg_64, OPW64)
219DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
220DECODE_OPERAND_REG_7(SReg_96, OPW96)
221DECODE_OPERAND_REG_7(SReg_128, OPW128)
222DECODE_OPERAND_REG_7(SReg_256, OPW256)
223DECODE_OPERAND_REG_7(SReg_512, OPW512)
224
225DECODE_OPERAND_REG_8(AGPR_32)
226DECODE_OPERAND_REG_8(AReg_64)
227DECODE_OPERAND_REG_8(AReg_128)
228DECODE_OPERAND_REG_8(AReg_256)
229DECODE_OPERAND_REG_8(AReg_512)
230DECODE_OPERAND_REG_8(AReg_1024)
231
232DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
233DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
234
235// Decoders for register only source RegisterOperands that use use 9-bit Src
236// encoding: 'decodeOperand_<RegClass>'.
237
238DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
239DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
240DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
241DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
242DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
243
244DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
245
246DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
247DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
248DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
249
250// Decoders for register or immediate RegisterOperands that use 9-bit Src
251// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
252
253DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
254DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
255DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
256DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
257DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
258DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
259DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
260DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
261DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
262DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
263DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 32)
264DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 16)
265DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
266DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 16)
267DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
268DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 32)
269DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
270DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
271
272DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2I16, OPW32, 32)
273DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2F16, OPW32, 16)
274
275DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
276DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
277DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
278DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
279DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
280
281DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
282DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
283DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
284DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
285
286static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
287                                               uint64_t /*Addr*/,
288                                               const MCDisassembler *Decoder) {
289  assert(isUInt<10>(Imm) && "10-bit encoding expected");
290  assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
291
292  bool IsHi = Imm & (1 << 9);
293  unsigned RegIdx = Imm & 0xff;
294  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
295  return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
296}
297
298static DecodeStatus
299DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
300                                 const MCDisassembler *Decoder) {
301  assert(isUInt<8>(Imm) && "8-bit encoding expected");
302
303  bool IsHi = Imm & (1 << 7);
304  unsigned RegIdx = Imm & 0x7f;
305  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
306  return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
307}
308
309static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
310                                                uint64_t /*Addr*/,
311                                                const MCDisassembler *Decoder) {
312  assert(isUInt<9>(Imm) && "9-bit encoding expected");
313
314  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
315  bool IsVGPR = Imm & (1 << 8);
316  if (IsVGPR) {
317    bool IsHi = Imm & (1 << 7);
318    unsigned RegIdx = Imm & 0x7f;
319    return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320  }
321  return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
322                                                   Imm & 0xFF, false, 16));
323}
324
325static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
326                                          uint64_t /*Addr*/,
327                                          const MCDisassembler *Decoder) {
328  assert(isUInt<10>(Imm) && "10-bit encoding expected");
329
330  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
331  bool IsVGPR = Imm & (1 << 8);
332  if (IsVGPR) {
333    bool IsHi = Imm & (1 << 9);
334    unsigned RegIdx = Imm & 0xff;
335    return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
336  }
337  return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
338                                                   Imm & 0xFF, false, 16));
339}
340
341static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
342                                         uint64_t Addr,
343                                         const MCDisassembler *Decoder) {
344  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
345  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
346}
347
348static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
349                                          uint64_t Addr, const void *Decoder) {
350  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
351  return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
352}
353
354static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
355                          const MCRegisterInfo *MRI) {
356  if (OpIdx < 0)
357    return false;
358
359  const MCOperand &Op = Inst.getOperand(OpIdx);
360  if (!Op.isReg())
361    return false;
362
363  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
364  auto Reg = Sub ? Sub : Op.getReg();
365  return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
366}
367
368static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
369                                             AMDGPUDisassembler::OpWidthTy Opw,
370                                             const MCDisassembler *Decoder) {
371  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
372  if (!DAsm->isGFX90A()) {
373    Imm &= 511;
374  } else {
375    // If atomic has both vdata and vdst their register classes are tied.
376    // The bit is decoded along with the vdst, first operand. We need to
377    // change register class to AGPR if vdst was AGPR.
378    // If a DS instruction has both data0 and data1 their register classes
379    // are also tied.
380    unsigned Opc = Inst.getOpcode();
381    uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
382    uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
383                                                        : AMDGPU::OpName::vdata;
384    const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
385    int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
386    if ((int)Inst.getNumOperands() == DataIdx) {
387      int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
388      if (IsAGPROperand(Inst, DstIdx, MRI))
389        Imm |= 512;
390    }
391
392    if (TSFlags & SIInstrFlags::DS) {
393      int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
394      if ((int)Inst.getNumOperands() == Data2Idx &&
395          IsAGPROperand(Inst, DataIdx, MRI))
396        Imm |= 512;
397    }
398  }
399  return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
400}
401
402static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
403                                           uint64_t Addr,
404                                           const MCDisassembler *Decoder) {
405  assert(Imm < (1 << 9) && "9-bit encoding");
406  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
407  return addOperand(
408      Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
409}
410
411static DecodeStatus
412DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
413                             const MCDisassembler *Decoder) {
414  return decodeOperand_AVLdSt_Any(Inst, Imm,
415                                  AMDGPUDisassembler::OPW32, Decoder);
416}
417
418static DecodeStatus
419DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
420                             const MCDisassembler *Decoder) {
421  return decodeOperand_AVLdSt_Any(Inst, Imm,
422                                  AMDGPUDisassembler::OPW64, Decoder);
423}
424
425static DecodeStatus
426DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
427                             const MCDisassembler *Decoder) {
428  return decodeOperand_AVLdSt_Any(Inst, Imm,
429                                  AMDGPUDisassembler::OPW96, Decoder);
430}
431
432static DecodeStatus
433DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
434                              const MCDisassembler *Decoder) {
435  return decodeOperand_AVLdSt_Any(Inst, Imm,
436                                  AMDGPUDisassembler::OPW128, Decoder);
437}
438
439static DecodeStatus
440DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
441                              const MCDisassembler *Decoder) {
442  return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW160,
443                                  Decoder);
444}
445
446#define DECODE_SDWA(DecName) \
447DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
448
449DECODE_SDWA(Src32)
450DECODE_SDWA(Src16)
451DECODE_SDWA(VopcDst)
452
453#include "AMDGPUGenDisassemblerTables.inc"
454
455//===----------------------------------------------------------------------===//
456//
457//===----------------------------------------------------------------------===//
458
459template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
460  assert(Bytes.size() >= sizeof(T));
461  const auto Res =
462      support::endian::read<T, llvm::endianness::little>(Bytes.data());
463  Bytes = Bytes.slice(sizeof(T));
464  return Res;
465}
466
467static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
468  assert(Bytes.size() >= 12);
469  uint64_t Lo =
470      support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
471  Bytes = Bytes.slice(8);
472  uint64_t Hi =
473      support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
474  Bytes = Bytes.slice(4);
475  return DecoderUInt128(Lo, Hi);
476}
477
478// The disassembler is greedy, so we need to check FI operand value to
479// not parse a dpp if the correct literal is not set. For dpp16 the
480// autogenerated decoder checks the dpp literal
481static bool isValidDPP8(const MCInst &MI) {
482  using namespace llvm::AMDGPU::DPP;
483  int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
484  assert(FiIdx != -1);
485  if ((unsigned)FiIdx >= MI.getNumOperands())
486    return false;
487  unsigned Fi = MI.getOperand(FiIdx).getImm();
488  return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
489}
490
491DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
492                                                ArrayRef<uint8_t> Bytes_,
493                                                uint64_t Address,
494                                                raw_ostream &CS) const {
495  bool IsSDWA = false;
496
497  unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
498  Bytes = Bytes_.slice(0, MaxInstBytesNum);
499
500  DecodeStatus Res = MCDisassembler::Fail;
501  do {
502    // ToDo: better to switch encoding length using some bit predicate
503    // but it is unknown yet, so try all we can
504
505    // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
506    // encodings
507    if (isGFX11Plus() && Bytes.size() >= 12 ) {
508      DecoderUInt128 DecW = eat12Bytes(Bytes);
509      Res =
510          tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
511                        MI, DecW, Address, CS);
512      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
513        break;
514      MI = MCInst(); // clear
515      Res =
516          tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
517                        MI, DecW, Address, CS);
518      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
519        break;
520      MI = MCInst(); // clear
521
522      const auto convertVOPDPP = [&]() {
523        if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
524          convertVOP3PDPPInst(MI);
525        } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) {
526          convertVOPCDPPInst(MI); // Special VOP3 case
527        } else {
528          assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
529          convertVOP3DPPInst(MI); // Regular VOP3 case
530        }
531      };
532      Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
533                          MI, DecW, Address, CS);
534      if (Res) {
535        convertVOPDPP();
536        break;
537      }
538      Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
539                          MI, DecW, Address, CS);
540      if (Res) {
541        convertVOPDPP();
542        break;
543      }
544      Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
545      if (Res)
546        break;
547
548      Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
549      if (Res)
550        break;
551
552      Res = tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS);
553      if (Res)
554        break;
555    }
556    // Reinitialize Bytes
557    Bytes = Bytes_.slice(0, MaxInstBytesNum);
558
559    if (Bytes.size() >= 8) {
560      const uint64_t QW = eatBytes<uint64_t>(Bytes);
561
562      if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
563        Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
564        if (Res) {
565          if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
566              == -1)
567            break;
568          if (convertDPP8Inst(MI) == MCDisassembler::Success)
569            break;
570          MI = MCInst(); // clear
571        }
572      }
573
574      Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
575      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
576        break;
577      MI = MCInst(); // clear
578
579      Res = tryDecodeInst(DecoderTableDPP8GFX1164,
580                          DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
581      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
582        break;
583      MI = MCInst(); // clear
584
585      Res = tryDecodeInst(DecoderTableDPP8GFX1264,
586                          DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
587      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
588        break;
589      MI = MCInst(); // clear
590
591      Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
592      if (Res) break;
593
594      Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
595                          MI, QW, Address, CS);
596      if (Res) {
597        if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
598          convertVOPCDPPInst(MI);
599        break;
600      }
601
602      Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
603                          MI, QW, Address, CS);
604      if (Res) {
605        if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
606          convertVOPCDPPInst(MI);
607        break;
608      }
609
610      Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
611      if (Res) { IsSDWA = true;  break; }
612
613      Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
614      if (Res) { IsSDWA = true;  break; }
615
616      Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
617      if (Res) { IsSDWA = true;  break; }
618
619      if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
620        Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
621        if (Res)
622          break;
623      }
624
625      // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
626      // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
627      // table first so we print the correct name.
628      if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
629        Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
630        if (Res)
631          break;
632      }
633    }
634
635    // Reinitialize Bytes as DPP64 could have eaten too much
636    Bytes = Bytes_.slice(0, MaxInstBytesNum);
637
638    // Try decode 32-bit instruction
639    if (Bytes.size() < 4) break;
640    const uint32_t DW = eatBytes<uint32_t>(Bytes);
641    Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
642    if (Res) break;
643
644    Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
645    if (Res) break;
646
647    Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
648    if (Res) break;
649
650    if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
651      Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
652      if (Res)
653        break;
654    }
655
656    if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
657      Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
658      if (Res) break;
659    }
660
661    Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
662    if (Res) break;
663
664    Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
665                        Address, CS);
666    if (Res) break;
667
668    Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
669                        Address, CS);
670    if (Res)
671      break;
672
673    if (Bytes.size() < 4) break;
674    const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
675
676    if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
677      Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
678      if (Res)
679        break;
680    }
681
682    if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
683      Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
684      if (Res)
685        break;
686    }
687
688    Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
689    if (Res) break;
690
691    Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
692    if (Res) break;
693
694    Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
695    if (Res) break;
696
697    Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
698    if (Res) break;
699
700    Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
701                        Address, CS);
702    if (Res)
703      break;
704
705    Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
706                        Address, CS);
707    if (Res)
708      break;
709
710    Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
711    if (Res)
712      break;
713
714    Res = tryDecodeInst(DecoderTableWMMAGFX1264, MI, QW, Address, CS);
715  } while (false);
716
717  if (Res && AMDGPU::isMAC(MI.getOpcode())) {
718    // Insert dummy unused src2_modifiers.
719    insertNamedMCOperand(MI, MCOperand::createImm(0),
720                         AMDGPU::OpName::src2_modifiers);
721  }
722
723  if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
724              MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
725    // Insert dummy unused src2_modifiers.
726    insertNamedMCOperand(MI, MCOperand::createImm(0),
727                         AMDGPU::OpName::src2_modifiers);
728  }
729
730  if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
731      !AMDGPU::hasGDS(STI)) {
732    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
733  }
734
735  if (Res && (MCII->get(MI.getOpcode()).TSFlags &
736          (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
737    int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
738                                             AMDGPU::OpName::cpol);
739    if (CPolPos != -1) {
740      unsigned CPol =
741          (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
742              AMDGPU::CPol::GLC : 0;
743      if (MI.getNumOperands() <= (unsigned)CPolPos) {
744        insertNamedMCOperand(MI, MCOperand::createImm(CPol),
745                             AMDGPU::OpName::cpol);
746      } else if (CPol) {
747        MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
748      }
749    }
750  }
751
752  if (Res && (MCII->get(MI.getOpcode()).TSFlags &
753              (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
754             (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
755    // GFX90A lost TFE, its place is occupied by ACC.
756    int TFEOpIdx =
757        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
758    if (TFEOpIdx != -1) {
759      auto TFEIter = MI.begin();
760      std::advance(TFEIter, TFEOpIdx);
761      MI.insert(TFEIter, MCOperand::createImm(0));
762    }
763  }
764
765  if (Res && (MCII->get(MI.getOpcode()).TSFlags &
766              (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
767    int SWZOpIdx =
768        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
769    if (SWZOpIdx != -1) {
770      auto SWZIter = MI.begin();
771      std::advance(SWZIter, SWZOpIdx);
772      MI.insert(SWZIter, MCOperand::createImm(0));
773    }
774  }
775
776  if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
777    int VAddr0Idx =
778        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
779    int RsrcIdx =
780        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
781    unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
782    if (VAddr0Idx >= 0 && NSAArgs > 0) {
783      unsigned NSAWords = (NSAArgs + 3) / 4;
784      if (Bytes.size() < 4 * NSAWords) {
785        Res = MCDisassembler::Fail;
786      } else {
787        for (unsigned i = 0; i < NSAArgs; ++i) {
788          const unsigned VAddrIdx = VAddr0Idx + 1 + i;
789          auto VAddrRCID =
790              MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
791          MI.insert(MI.begin() + VAddrIdx,
792                    createRegOperand(VAddrRCID, Bytes[i]));
793        }
794        Bytes = Bytes.slice(4 * NSAWords);
795      }
796    }
797
798    if (Res)
799      Res = convertMIMGInst(MI);
800  }
801
802  if (Res && (MCII->get(MI.getOpcode()).TSFlags &
803              (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
804    Res = convertMIMGInst(MI);
805
806  if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
807    Res = convertEXPInst(MI);
808
809  if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
810    Res = convertVINTERPInst(MI);
811
812  if (Res && IsSDWA)
813    Res = convertSDWAInst(MI);
814
815  int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
816                                              AMDGPU::OpName::vdst_in);
817  if (VDstIn_Idx != -1) {
818    int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
819                           MCOI::OperandConstraint::TIED_TO);
820    if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
821         !MI.getOperand(VDstIn_Idx).isReg() ||
822         MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
823      if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
824        MI.erase(&MI.getOperand(VDstIn_Idx));
825      insertNamedMCOperand(MI,
826        MCOperand::createReg(MI.getOperand(Tied).getReg()),
827        AMDGPU::OpName::vdst_in);
828    }
829  }
830
831  int ImmLitIdx =
832      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
833  bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
834  if (Res && ImmLitIdx != -1 && !IsSOPK)
835    Res = convertFMAanyK(MI, ImmLitIdx);
836
837  // if the opcode was not recognized we'll assume a Size of 4 bytes
838  // (unless there are fewer bytes left)
839  Size = Res ? (MaxInstBytesNum - Bytes.size())
840             : std::min((size_t)4, Bytes_.size());
841  return Res;
842}
843
844DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
845  if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
846    // The MCInst still has these fields even though they are no longer encoded
847    // in the GFX11 instruction.
848    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
849    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
850  }
851  return MCDisassembler::Success;
852}
853
854DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
855  if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
856      MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
857      MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
858      MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
859      MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
860      MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
861      MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
862      MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
863    // The MCInst has this field that is not directly encoded in the
864    // instruction.
865    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
866  }
867  return MCDisassembler::Success;
868}
869
870DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
871  if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
872      STI.hasFeature(AMDGPU::FeatureGFX10)) {
873    if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
874      // VOPC - insert clamp
875      insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
876  } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
877    int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
878    if (SDst != -1) {
879      // VOPC - insert VCC register as sdst
880      insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
881                           AMDGPU::OpName::sdst);
882    } else {
883      // VOP1/2 - insert omod if present in instruction
884      insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
885    }
886  }
887  return MCDisassembler::Success;
888}
889
890struct VOPModifiers {
891  unsigned OpSel = 0;
892  unsigned OpSelHi = 0;
893  unsigned NegLo = 0;
894  unsigned NegHi = 0;
895};
896
897// Reconstruct values of VOP3/VOP3P operands such as op_sel.
898// Note that these values do not affect disassembler output,
899// so this is only necessary for consistency with src_modifiers.
900static VOPModifiers collectVOPModifiers(const MCInst &MI,
901                                        bool IsVOP3P = false) {
902  VOPModifiers Modifiers;
903  unsigned Opc = MI.getOpcode();
904  const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
905                        AMDGPU::OpName::src1_modifiers,
906                        AMDGPU::OpName::src2_modifiers};
907  for (int J = 0; J < 3; ++J) {
908    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
909    if (OpIdx == -1)
910      continue;
911
912    unsigned Val = MI.getOperand(OpIdx).getImm();
913
914    Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
915    if (IsVOP3P) {
916      Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
917      Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
918      Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
919    } else if (J == 0) {
920      Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
921    }
922  }
923
924  return Modifiers;
925}
926
927// MAC opcodes have special old and src2 operands.
928// src2 is tied to dst, while old is not tied (but assumed to be).
929bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
930  constexpr int DST_IDX = 0;
931  auto Opcode = MI.getOpcode();
932  const auto &Desc = MCII->get(Opcode);
933  auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
934
935  if (OldIdx != -1 && Desc.getOperandConstraint(
936                          OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
937    assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
938    assert(Desc.getOperandConstraint(
939               AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
940               MCOI::OperandConstraint::TIED_TO) == DST_IDX);
941    (void)DST_IDX;
942    return true;
943  }
944
945  return false;
946}
947
948// Create dummy old operand and insert dummy unused src2_modifiers
949void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
950  assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
951  insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
952  insertNamedMCOperand(MI, MCOperand::createImm(0),
953                       AMDGPU::OpName::src2_modifiers);
954}
955
956// We must check FI == literal to reject not genuine dpp8 insts, and we must
957// first add optional MI operands to check FI
958DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
959  unsigned Opc = MI.getOpcode();
960
961  if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
962    convertVOP3PDPPInst(MI);
963  } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
964             AMDGPU::isVOPC64DPP(Opc)) {
965    convertVOPCDPPInst(MI);
966  } else {
967    if (isMacDPP(MI))
968      convertMacDPPInst(MI);
969
970    int VDstInIdx =
971        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
972    if (VDstInIdx != -1)
973      insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
974
975    if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
976        MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
977      insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
978
979    unsigned DescNumOps = MCII->get(Opc).getNumOperands();
980    if (MI.getNumOperands() < DescNumOps &&
981        AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
982      auto Mods = collectVOPModifiers(MI);
983      insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
984                           AMDGPU::OpName::op_sel);
985    } else {
986      // Insert dummy unused src modifiers.
987      if (MI.getNumOperands() < DescNumOps &&
988          AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
989        insertNamedMCOperand(MI, MCOperand::createImm(0),
990                             AMDGPU::OpName::src0_modifiers);
991
992      if (MI.getNumOperands() < DescNumOps &&
993          AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
994        insertNamedMCOperand(MI, MCOperand::createImm(0),
995                             AMDGPU::OpName::src1_modifiers);
996    }
997  }
998  return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
999}
1000
1001DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1002  if (isMacDPP(MI))
1003    convertMacDPPInst(MI);
1004
1005  int VDstInIdx =
1006      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1007  if (VDstInIdx != -1)
1008    insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1009
1010  if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
1011      MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
1012    insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
1013
1014  unsigned Opc = MI.getOpcode();
1015  unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1016  if (MI.getNumOperands() < DescNumOps &&
1017      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1018    auto Mods = collectVOPModifiers(MI);
1019    insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1020                         AMDGPU::OpName::op_sel);
1021  }
1022  return MCDisassembler::Success;
1023}
1024
1025// Note that before gfx10, the MIMG encoding provided no information about
1026// VADDR size. Consequently, decoded instructions always show address as if it
1027// has 1 dword, which could be not really so.
1028DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1029  auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1030
1031  int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1032                                           AMDGPU::OpName::vdst);
1033
1034  int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1035                                            AMDGPU::OpName::vdata);
1036  int VAddr0Idx =
1037      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1038  int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
1039                                                : AMDGPU::OpName::rsrc;
1040  int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1041  int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1042                                            AMDGPU::OpName::dmask);
1043
1044  int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1045                                            AMDGPU::OpName::tfe);
1046  int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1047                                            AMDGPU::OpName::d16);
1048
1049  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1050  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1051      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1052
1053  assert(VDataIdx != -1);
1054  if (BaseOpcode->BVH) {
1055    // Add A16 operand for intersect_ray instructions
1056    addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1057    return MCDisassembler::Success;
1058  }
1059
1060  bool IsAtomic = (VDstIdx != -1);
1061  bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1062  bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1063  bool IsNSA = false;
1064  bool IsPartialNSA = false;
1065  unsigned AddrSize = Info->VAddrDwords;
1066
1067  if (isGFX10Plus()) {
1068    unsigned DimIdx =
1069        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1070    int A16Idx =
1071        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1072    const AMDGPU::MIMGDimInfo *Dim =
1073        AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1074    const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1075
1076    AddrSize =
1077        AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1078
1079    // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1080    // VIMAGE insts other than BVH never use vaddr4.
1081    IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1082            Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1083            Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1084    if (!IsNSA) {
1085      if (!IsVSample && AddrSize > 12)
1086        AddrSize = 16;
1087    } else {
1088      if (AddrSize > Info->VAddrDwords) {
1089        if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1090          // The NSA encoding does not contain enough operands for the
1091          // combination of base opcode / dimension. Should this be an error?
1092          return MCDisassembler::Success;
1093        }
1094        IsPartialNSA = true;
1095      }
1096    }
1097  }
1098
1099  unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1100  unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1101
1102  bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1103  if (D16 && AMDGPU::hasPackedD16(STI)) {
1104    DstSize = (DstSize + 1) / 2;
1105  }
1106
1107  if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1108    DstSize += 1;
1109
1110  if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1111    return MCDisassembler::Success;
1112
1113  int NewOpcode =
1114      AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1115  if (NewOpcode == -1)
1116    return MCDisassembler::Success;
1117
1118  // Widen the register to the correct number of enabled channels.
1119  unsigned NewVdata = AMDGPU::NoRegister;
1120  if (DstSize != Info->VDataDwords) {
1121    auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1122
1123    // Get first subregister of VData
1124    unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1125    unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1126    Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1127
1128    NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1129                                       &MRI.getRegClass(DataRCID));
1130    if (NewVdata == AMDGPU::NoRegister) {
1131      // It's possible to encode this such that the low register + enabled
1132      // components exceeds the register count.
1133      return MCDisassembler::Success;
1134    }
1135  }
1136
1137  // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1138  // If using partial NSA on GFX11+ widen last address register.
1139  int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1140  unsigned NewVAddrSA = AMDGPU::NoRegister;
1141  if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1142      AddrSize != Info->VAddrDwords) {
1143    unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1144    unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1145    VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1146
1147    auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1148    NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1149                                        &MRI.getRegClass(AddrRCID));
1150    if (!NewVAddrSA)
1151      return MCDisassembler::Success;
1152  }
1153
1154  MI.setOpcode(NewOpcode);
1155
1156  if (NewVdata != AMDGPU::NoRegister) {
1157    MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1158
1159    if (IsAtomic) {
1160      // Atomic operations have an additional operand (a copy of data)
1161      MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1162    }
1163  }
1164
1165  if (NewVAddrSA) {
1166    MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1167  } else if (IsNSA) {
1168    assert(AddrSize <= Info->VAddrDwords);
1169    MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1170             MI.begin() + VAddr0Idx + Info->VAddrDwords);
1171  }
1172
1173  return MCDisassembler::Success;
1174}
1175
1176// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1177// decoder only adds to src_modifiers, so manually add the bits to the other
1178// operands.
1179DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1180  unsigned Opc = MI.getOpcode();
1181  unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1182  auto Mods = collectVOPModifiers(MI, true);
1183
1184  if (MI.getNumOperands() < DescNumOps &&
1185      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1186    insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1187
1188  if (MI.getNumOperands() < DescNumOps &&
1189      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1190    insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1191                         AMDGPU::OpName::op_sel);
1192  if (MI.getNumOperands() < DescNumOps &&
1193      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1194    insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1195                         AMDGPU::OpName::op_sel_hi);
1196  if (MI.getNumOperands() < DescNumOps &&
1197      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1198    insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1199                         AMDGPU::OpName::neg_lo);
1200  if (MI.getNumOperands() < DescNumOps &&
1201      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1202    insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1203                         AMDGPU::OpName::neg_hi);
1204
1205  return MCDisassembler::Success;
1206}
1207
1208// Create dummy old operand and insert optional operands
1209DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1210  unsigned Opc = MI.getOpcode();
1211  unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1212
1213  if (MI.getNumOperands() < DescNumOps &&
1214      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1215    insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1216
1217  if (MI.getNumOperands() < DescNumOps &&
1218      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1219    insertNamedMCOperand(MI, MCOperand::createImm(0),
1220                         AMDGPU::OpName::src0_modifiers);
1221
1222  if (MI.getNumOperands() < DescNumOps &&
1223      AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1224    insertNamedMCOperand(MI, MCOperand::createImm(0),
1225                         AMDGPU::OpName::src1_modifiers);
1226  return MCDisassembler::Success;
1227}
1228
1229DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
1230                                                int ImmLitIdx) const {
1231  assert(HasLiteral && "Should have decoded a literal");
1232  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1233  unsigned DescNumOps = Desc.getNumOperands();
1234  insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1235                       AMDGPU::OpName::immDeferred);
1236  assert(DescNumOps == MI.getNumOperands());
1237  for (unsigned I = 0; I < DescNumOps; ++I) {
1238    auto &Op = MI.getOperand(I);
1239    auto OpType = Desc.operands()[I].OperandType;
1240    bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1241                         OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1242    if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1243        IsDeferredOp)
1244      Op.setImm(Literal);
1245  }
1246  return MCDisassembler::Success;
1247}
1248
1249const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1250  return getContext().getRegisterInfo()->
1251    getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1252}
1253
1254inline
1255MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1256                                         const Twine& ErrMsg) const {
1257  *CommentStream << "Error: " + ErrMsg;
1258
1259  // ToDo: add support for error operands to MCInst.h
1260  // return MCOperand::createError(V);
1261  return MCOperand();
1262}
1263
1264inline
1265MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1266  return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
1267}
1268
1269inline
1270MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1271                                               unsigned Val) const {
1272  const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1273  if (Val >= RegCl.getNumRegs())
1274    return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1275                           ": unknown register " + Twine(Val));
1276  return createRegOperand(RegCl.getRegister(Val));
1277}
1278
1279inline
1280MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1281                                                unsigned Val) const {
1282  // ToDo: SI/CI have 104 SGPRs, VI - 102
1283  // Valery: here we accepting as much as we can, let assembler sort it out
1284  int shift = 0;
1285  switch (SRegClassID) {
1286  case AMDGPU::SGPR_32RegClassID:
1287  case AMDGPU::TTMP_32RegClassID:
1288    break;
1289  case AMDGPU::SGPR_64RegClassID:
1290  case AMDGPU::TTMP_64RegClassID:
1291    shift = 1;
1292    break;
1293  case AMDGPU::SGPR_96RegClassID:
1294  case AMDGPU::TTMP_96RegClassID:
1295  case AMDGPU::SGPR_128RegClassID:
1296  case AMDGPU::TTMP_128RegClassID:
1297  // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1298  // this bundle?
1299  case AMDGPU::SGPR_256RegClassID:
1300  case AMDGPU::TTMP_256RegClassID:
1301    // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1302  // this bundle?
1303  case AMDGPU::SGPR_288RegClassID:
1304  case AMDGPU::TTMP_288RegClassID:
1305  case AMDGPU::SGPR_320RegClassID:
1306  case AMDGPU::TTMP_320RegClassID:
1307  case AMDGPU::SGPR_352RegClassID:
1308  case AMDGPU::TTMP_352RegClassID:
1309  case AMDGPU::SGPR_384RegClassID:
1310  case AMDGPU::TTMP_384RegClassID:
1311  case AMDGPU::SGPR_512RegClassID:
1312  case AMDGPU::TTMP_512RegClassID:
1313    shift = 2;
1314    break;
1315  // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1316  // this bundle?
1317  default:
1318    llvm_unreachable("unhandled register class");
1319  }
1320
1321  if (Val % (1 << shift)) {
1322    *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1323                   << ": scalar reg isn't aligned " << Val;
1324  }
1325
1326  return createRegOperand(SRegClassID, Val >> shift);
1327}
1328
1329MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1330                                                  bool IsHi) const {
1331  unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1332  return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1333}
1334
1335// Decode Literals for insts which always have a literal in the encoding
1336MCOperand
1337AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1338  if (HasLiteral) {
1339    assert(
1340        AMDGPU::hasVOPD(STI) &&
1341        "Should only decode multiple kimm with VOPD, check VSrc operand types");
1342    if (Literal != Val)
1343      return errOperand(Val, "More than one unique literal is illegal");
1344  }
1345  HasLiteral = true;
1346  Literal = Val;
1347  return MCOperand::createImm(Literal);
1348}
1349
1350MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1351  // For now all literal constants are supposed to be unsigned integer
1352  // ToDo: deal with signed/unsigned 64-bit integer constants
1353  // ToDo: deal with float/double constants
1354  if (!HasLiteral) {
1355    if (Bytes.size() < 4) {
1356      return errOperand(0, "cannot read literal, inst bytes left " +
1357                        Twine(Bytes.size()));
1358    }
1359    HasLiteral = true;
1360    Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1361    if (ExtendFP64)
1362      Literal64 <<= 32;
1363  }
1364  return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1365}
1366
1367MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1368  using namespace AMDGPU::EncValues;
1369
1370  assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1371  return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1372    (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1373    (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1374      // Cast prevents negative overflow.
1375}
1376
1377static int64_t getInlineImmVal32(unsigned Imm) {
1378  switch (Imm) {
1379  case 240:
1380    return llvm::bit_cast<uint32_t>(0.5f);
1381  case 241:
1382    return llvm::bit_cast<uint32_t>(-0.5f);
1383  case 242:
1384    return llvm::bit_cast<uint32_t>(1.0f);
1385  case 243:
1386    return llvm::bit_cast<uint32_t>(-1.0f);
1387  case 244:
1388    return llvm::bit_cast<uint32_t>(2.0f);
1389  case 245:
1390    return llvm::bit_cast<uint32_t>(-2.0f);
1391  case 246:
1392    return llvm::bit_cast<uint32_t>(4.0f);
1393  case 247:
1394    return llvm::bit_cast<uint32_t>(-4.0f);
1395  case 248: // 1 / (2 * PI)
1396    return 0x3e22f983;
1397  default:
1398    llvm_unreachable("invalid fp inline imm");
1399  }
1400}
1401
1402static int64_t getInlineImmVal64(unsigned Imm) {
1403  switch (Imm) {
1404  case 240:
1405    return llvm::bit_cast<uint64_t>(0.5);
1406  case 241:
1407    return llvm::bit_cast<uint64_t>(-0.5);
1408  case 242:
1409    return llvm::bit_cast<uint64_t>(1.0);
1410  case 243:
1411    return llvm::bit_cast<uint64_t>(-1.0);
1412  case 244:
1413    return llvm::bit_cast<uint64_t>(2.0);
1414  case 245:
1415    return llvm::bit_cast<uint64_t>(-2.0);
1416  case 246:
1417    return llvm::bit_cast<uint64_t>(4.0);
1418  case 247:
1419    return llvm::bit_cast<uint64_t>(-4.0);
1420  case 248: // 1 / (2 * PI)
1421    return 0x3fc45f306dc9c882;
1422  default:
1423    llvm_unreachable("invalid fp inline imm");
1424  }
1425}
1426
1427static int64_t getInlineImmVal16(unsigned Imm) {
1428  switch (Imm) {
1429  case 240:
1430    return 0x3800;
1431  case 241:
1432    return 0xB800;
1433  case 242:
1434    return 0x3C00;
1435  case 243:
1436    return 0xBC00;
1437  case 244:
1438    return 0x4000;
1439  case 245:
1440    return 0xC000;
1441  case 246:
1442    return 0x4400;
1443  case 247:
1444    return 0xC400;
1445  case 248: // 1 / (2 * PI)
1446    return 0x3118;
1447  default:
1448    llvm_unreachable("invalid fp inline imm");
1449  }
1450}
1451
1452MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
1453  assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1454      && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1455
1456  // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1457  // ImmWidth 0 is a default case where operand should not allow immediates.
1458  // Imm value is still decoded into 32 bit immediate operand, inst printer will
1459  // use it to print verbose error message.
1460  switch (ImmWidth) {
1461  case 0:
1462  case 32:
1463    return MCOperand::createImm(getInlineImmVal32(Imm));
1464  case 64:
1465    return MCOperand::createImm(getInlineImmVal64(Imm));
1466  case 16:
1467    return MCOperand::createImm(getInlineImmVal16(Imm));
1468  default:
1469    llvm_unreachable("implement me");
1470  }
1471}
1472
1473unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1474  using namespace AMDGPU;
1475
1476  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1477  switch (Width) {
1478  default: // fall
1479  case OPW32:
1480  case OPW16:
1481  case OPWV216:
1482    return VGPR_32RegClassID;
1483  case OPW64:
1484  case OPWV232: return VReg_64RegClassID;
1485  case OPW96: return VReg_96RegClassID;
1486  case OPW128: return VReg_128RegClassID;
1487  case OPW160: return VReg_160RegClassID;
1488  case OPW256: return VReg_256RegClassID;
1489  case OPW288: return VReg_288RegClassID;
1490  case OPW320: return VReg_320RegClassID;
1491  case OPW352: return VReg_352RegClassID;
1492  case OPW384: return VReg_384RegClassID;
1493  case OPW512: return VReg_512RegClassID;
1494  case OPW1024: return VReg_1024RegClassID;
1495  }
1496}
1497
1498unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1499  using namespace AMDGPU;
1500
1501  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1502  switch (Width) {
1503  default: // fall
1504  case OPW32:
1505  case OPW16:
1506  case OPWV216:
1507    return AGPR_32RegClassID;
1508  case OPW64:
1509  case OPWV232: return AReg_64RegClassID;
1510  case OPW96: return AReg_96RegClassID;
1511  case OPW128: return AReg_128RegClassID;
1512  case OPW160: return AReg_160RegClassID;
1513  case OPW256: return AReg_256RegClassID;
1514  case OPW288: return AReg_288RegClassID;
1515  case OPW320: return AReg_320RegClassID;
1516  case OPW352: return AReg_352RegClassID;
1517  case OPW384: return AReg_384RegClassID;
1518  case OPW512: return AReg_512RegClassID;
1519  case OPW1024: return AReg_1024RegClassID;
1520  }
1521}
1522
1523
1524unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1525  using namespace AMDGPU;
1526
1527  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1528  switch (Width) {
1529  default: // fall
1530  case OPW32:
1531  case OPW16:
1532  case OPWV216:
1533    return SGPR_32RegClassID;
1534  case OPW64:
1535  case OPWV232: return SGPR_64RegClassID;
1536  case OPW96: return SGPR_96RegClassID;
1537  case OPW128: return SGPR_128RegClassID;
1538  case OPW160: return SGPR_160RegClassID;
1539  case OPW256: return SGPR_256RegClassID;
1540  case OPW288: return SGPR_288RegClassID;
1541  case OPW320: return SGPR_320RegClassID;
1542  case OPW352: return SGPR_352RegClassID;
1543  case OPW384: return SGPR_384RegClassID;
1544  case OPW512: return SGPR_512RegClassID;
1545  }
1546}
1547
1548unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1549  using namespace AMDGPU;
1550
1551  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1552  switch (Width) {
1553  default: // fall
1554  case OPW32:
1555  case OPW16:
1556  case OPWV216:
1557    return TTMP_32RegClassID;
1558  case OPW64:
1559  case OPWV232: return TTMP_64RegClassID;
1560  case OPW128: return TTMP_128RegClassID;
1561  case OPW256: return TTMP_256RegClassID;
1562  case OPW288: return TTMP_288RegClassID;
1563  case OPW320: return TTMP_320RegClassID;
1564  case OPW352: return TTMP_352RegClassID;
1565  case OPW384: return TTMP_384RegClassID;
1566  case OPW512: return TTMP_512RegClassID;
1567  }
1568}
1569
1570int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1571  using namespace AMDGPU::EncValues;
1572
1573  unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1574  unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1575
1576  return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1577}
1578
1579MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1580                                          bool MandatoryLiteral,
1581                                          unsigned ImmWidth, bool IsFP) const {
1582  using namespace AMDGPU::EncValues;
1583
1584  assert(Val < 1024); // enum10
1585
1586  bool IsAGPR = Val & 512;
1587  Val &= 511;
1588
1589  if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1590    return createRegOperand(IsAGPR ? getAgprClassId(Width)
1591                                   : getVgprClassId(Width), Val - VGPR_MIN);
1592  }
1593  return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1594                            IsFP);
1595}
1596
1597MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
1598                                                 unsigned Val,
1599                                                 bool MandatoryLiteral,
1600                                                 unsigned ImmWidth,
1601                                                 bool IsFP) const {
1602  // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1603  // decoded earlier.
1604  assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1605  using namespace AMDGPU::EncValues;
1606
1607  if (Val <= SGPR_MAX) {
1608    // "SGPR_MIN <= Val" is always true and causes compilation warning.
1609    static_assert(SGPR_MIN == 0);
1610    return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1611  }
1612
1613  int TTmpIdx = getTTmpIdx(Val);
1614  if (TTmpIdx >= 0) {
1615    return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1616  }
1617
1618  if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1619    return decodeIntImmed(Val);
1620
1621  if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1622    return decodeFPImmed(ImmWidth, Val);
1623
1624  if (Val == LITERAL_CONST) {
1625    if (MandatoryLiteral)
1626      // Keep a sentinel value for deferred setting
1627      return MCOperand::createImm(LITERAL_CONST);
1628    else
1629      return decodeLiteralConstant(IsFP && ImmWidth == 64);
1630  }
1631
1632  switch (Width) {
1633  case OPW32:
1634  case OPW16:
1635  case OPWV216:
1636    return decodeSpecialReg32(Val);
1637  case OPW64:
1638  case OPWV232:
1639    return decodeSpecialReg64(Val);
1640  default:
1641    llvm_unreachable("unexpected immediate type");
1642  }
1643}
1644
1645// Bit 0 of DstY isn't stored in the instruction, because it's always the
1646// opposite of bit 0 of DstX.
1647MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1648                                               unsigned Val) const {
1649  int VDstXInd =
1650      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1651  assert(VDstXInd != -1);
1652  assert(Inst.getOperand(VDstXInd).isReg());
1653  unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1654  Val |= ~XDstReg & 1;
1655  auto Width = llvm::AMDGPUDisassembler::OPW32;
1656  return createRegOperand(getVgprClassId(Width), Val);
1657}
1658
1659MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1660  using namespace AMDGPU;
1661
1662  switch (Val) {
1663  // clang-format off
1664  case 102: return createRegOperand(FLAT_SCR_LO);
1665  case 103: return createRegOperand(FLAT_SCR_HI);
1666  case 104: return createRegOperand(XNACK_MASK_LO);
1667  case 105: return createRegOperand(XNACK_MASK_HI);
1668  case 106: return createRegOperand(VCC_LO);
1669  case 107: return createRegOperand(VCC_HI);
1670  case 108: return createRegOperand(TBA_LO);
1671  case 109: return createRegOperand(TBA_HI);
1672  case 110: return createRegOperand(TMA_LO);
1673  case 111: return createRegOperand(TMA_HI);
1674  case 124:
1675    return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1676  case 125:
1677    return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1678  case 126: return createRegOperand(EXEC_LO);
1679  case 127: return createRegOperand(EXEC_HI);
1680  case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1681  case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1682  case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1683  case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1684  case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1685  case 251: return createRegOperand(SRC_VCCZ);
1686  case 252: return createRegOperand(SRC_EXECZ);
1687  case 253: return createRegOperand(SRC_SCC);
1688  case 254: return createRegOperand(LDS_DIRECT);
1689  default: break;
1690    // clang-format on
1691  }
1692  return errOperand(Val, "unknown operand encoding " + Twine(Val));
1693}
1694
1695MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1696  using namespace AMDGPU;
1697
1698  switch (Val) {
1699  case 102: return createRegOperand(FLAT_SCR);
1700  case 104: return createRegOperand(XNACK_MASK);
1701  case 106: return createRegOperand(VCC);
1702  case 108: return createRegOperand(TBA);
1703  case 110: return createRegOperand(TMA);
1704  case 124:
1705    if (isGFX11Plus())
1706      return createRegOperand(SGPR_NULL);
1707    break;
1708  case 125:
1709    if (!isGFX11Plus())
1710      return createRegOperand(SGPR_NULL);
1711    break;
1712  case 126: return createRegOperand(EXEC);
1713  case 235: return createRegOperand(SRC_SHARED_BASE);
1714  case 236: return createRegOperand(SRC_SHARED_LIMIT);
1715  case 237: return createRegOperand(SRC_PRIVATE_BASE);
1716  case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1717  case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1718  case 251: return createRegOperand(SRC_VCCZ);
1719  case 252: return createRegOperand(SRC_EXECZ);
1720  case 253: return createRegOperand(SRC_SCC);
1721  default: break;
1722  }
1723  return errOperand(Val, "unknown operand encoding " + Twine(Val));
1724}
1725
1726MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1727                                            const unsigned Val,
1728                                            unsigned ImmWidth) const {
1729  using namespace AMDGPU::SDWA;
1730  using namespace AMDGPU::EncValues;
1731
1732  if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1733      STI.hasFeature(AMDGPU::FeatureGFX10)) {
1734    // XXX: cast to int is needed to avoid stupid warning:
1735    // compare with unsigned is always true
1736    if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1737        Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1738      return createRegOperand(getVgprClassId(Width),
1739                              Val - SDWA9EncValues::SRC_VGPR_MIN);
1740    }
1741    if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1742        Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1743                              : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1744      return createSRegOperand(getSgprClassId(Width),
1745                               Val - SDWA9EncValues::SRC_SGPR_MIN);
1746    }
1747    if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1748        Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1749      return createSRegOperand(getTtmpClassId(Width),
1750                               Val - SDWA9EncValues::SRC_TTMP_MIN);
1751    }
1752
1753    const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1754
1755    if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1756      return decodeIntImmed(SVal);
1757
1758    if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1759      return decodeFPImmed(ImmWidth, SVal);
1760
1761    return decodeSpecialReg32(SVal);
1762  } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1763    return createRegOperand(getVgprClassId(Width), Val);
1764  }
1765  llvm_unreachable("unsupported target");
1766}
1767
1768MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1769  return decodeSDWASrc(OPW16, Val, 16);
1770}
1771
1772MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1773  return decodeSDWASrc(OPW32, Val, 32);
1774}
1775
1776MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1777  using namespace AMDGPU::SDWA;
1778
1779  assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1780          STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1781         "SDWAVopcDst should be present only on GFX9+");
1782
1783  bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1784
1785  if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1786    Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1787
1788    int TTmpIdx = getTTmpIdx(Val);
1789    if (TTmpIdx >= 0) {
1790      auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1791      return createSRegOperand(TTmpClsId, TTmpIdx);
1792    } else if (Val > SGPR_MAX) {
1793      return IsWave64 ? decodeSpecialReg64(Val)
1794                      : decodeSpecialReg32(Val);
1795    } else {
1796      return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1797    }
1798  } else {
1799    return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1800  }
1801}
1802
1803MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1804  return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1805             ? decodeSrcOp(OPW64, Val)
1806             : decodeSrcOp(OPW32, Val);
1807}
1808
1809MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1810  return decodeSrcOp(OPW32, Val);
1811}
1812
1813bool AMDGPUDisassembler::isVI() const {
1814  return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1815}
1816
1817bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1818
1819bool AMDGPUDisassembler::isGFX90A() const {
1820  return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1821}
1822
1823bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1824
1825bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1826
1827bool AMDGPUDisassembler::isGFX10Plus() const {
1828  return AMDGPU::isGFX10Plus(STI);
1829}
1830
1831bool AMDGPUDisassembler::isGFX11() const {
1832  return STI.hasFeature(AMDGPU::FeatureGFX11);
1833}
1834
1835bool AMDGPUDisassembler::isGFX11Plus() const {
1836  return AMDGPU::isGFX11Plus(STI);
1837}
1838
1839bool AMDGPUDisassembler::isGFX12Plus() const {
1840  return AMDGPU::isGFX12Plus(STI);
1841}
1842
1843bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1844  return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1845}
1846
1847bool AMDGPUDisassembler::hasKernargPreload() const {
1848  return AMDGPU::hasKernargPreload(STI);
1849}
1850
1851//===----------------------------------------------------------------------===//
1852// AMDGPU specific symbol handling
1853//===----------------------------------------------------------------------===//
1854#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1855#define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1856  do {                                                                         \
1857    KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n';            \
1858  } while (0)
1859#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)                        \
1860  do {                                                                         \
1861    KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " "       \
1862             << GET_FIELD(MASK) << '\n';                                       \
1863  } while (0)
1864
1865// NOLINTNEXTLINE(readability-identifier-naming)
1866MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1867    uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1868  using namespace amdhsa;
1869  StringRef Indent = "\t";
1870
1871  // We cannot accurately backward compute #VGPRs used from
1872  // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1873  // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1874  // simply calculate the inverse of what the assembler does.
1875
1876  uint32_t GranulatedWorkitemVGPRCount =
1877      GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1878
1879  uint32_t NextFreeVGPR =
1880      (GranulatedWorkitemVGPRCount + 1) *
1881      AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1882
1883  KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1884
1885  // We cannot backward compute values used to calculate
1886  // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1887  // directives can't be computed:
1888  // .amdhsa_reserve_vcc
1889  // .amdhsa_reserve_flat_scratch
1890  // .amdhsa_reserve_xnack_mask
1891  // They take their respective default values if not specified in the assembly.
1892  //
1893  // GRANULATED_WAVEFRONT_SGPR_COUNT
1894  //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1895  //
1896  // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1897  // are set to 0. So while disassembling we consider that:
1898  //
1899  // GRANULATED_WAVEFRONT_SGPR_COUNT
1900  //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1901  //
1902  // The disassembler cannot recover the original values of those 3 directives.
1903
1904  uint32_t GranulatedWavefrontSGPRCount =
1905      GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1906
1907  if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1908    return MCDisassembler::Fail;
1909
1910  uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1911                          AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1912
1913  KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1914  if (!hasArchitectedFlatScratch())
1915    KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1916  KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1917  KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1918
1919  if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1920    return MCDisassembler::Fail;
1921
1922  PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1923                  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1924  PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1925                  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1926  PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1927                  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1928  PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1929                  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1930
1931  if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1932    return MCDisassembler::Fail;
1933
1934  if (!isGFX12Plus())
1935    PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1936                    COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1937
1938  if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1939    return MCDisassembler::Fail;
1940
1941  if (!isGFX12Plus())
1942    PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1943                    COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1944
1945  if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1946    return MCDisassembler::Fail;
1947
1948  if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1949    return MCDisassembler::Fail;
1950
1951  if (isGFX9Plus())
1952    PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1953
1954  if (!isGFX9Plus())
1955    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
1956      return MCDisassembler::Fail;
1957  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
1958    return MCDisassembler::Fail;
1959  if (!isGFX10Plus())
1960    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
1961      return MCDisassembler::Fail;
1962
1963  if (isGFX10Plus()) {
1964    PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1965                    COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1966    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1967    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1968  }
1969
1970  if (isGFX12Plus())
1971    PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1972                    COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1973
1974  return MCDisassembler::Success;
1975}
1976
1977// NOLINTNEXTLINE(readability-identifier-naming)
1978MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1979    uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1980  using namespace amdhsa;
1981  StringRef Indent = "\t";
1982  if (hasArchitectedFlatScratch())
1983    PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1984                    COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1985  else
1986    PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1987                    COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1988  PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1989                  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1990  PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1991                  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1992  PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1993                  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1994  PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1995                  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1996  PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1997                  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1998
1999  if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
2000    return MCDisassembler::Fail;
2001
2002  if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
2003    return MCDisassembler::Fail;
2004
2005  if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
2006    return MCDisassembler::Fail;
2007
2008  PRINT_DIRECTIVE(
2009      ".amdhsa_exception_fp_ieee_invalid_op",
2010      COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2011  PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2012                  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2013  PRINT_DIRECTIVE(
2014      ".amdhsa_exception_fp_ieee_div_zero",
2015      COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2016  PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2017                  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2018  PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2019                  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2020  PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2021                  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2022  PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2023                  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2024
2025  if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
2026    return MCDisassembler::Fail;
2027
2028  return MCDisassembler::Success;
2029}
2030
2031// NOLINTNEXTLINE(readability-identifier-naming)
2032MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2033    uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2034  using namespace amdhsa;
2035  StringRef Indent = "\t";
2036  if (isGFX90A()) {
2037    KdStream << Indent << ".amdhsa_accum_offset "
2038             << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2039             << '\n';
2040    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
2041      return MCDisassembler::Fail;
2042    PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2043    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
2044      return MCDisassembler::Fail;
2045  } else if (isGFX10Plus()) {
2046    // Bits [0-3].
2047    if (!isGFX12Plus()) {
2048      if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2049        PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2050                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2051      } else {
2052        PRINT_PSEUDO_DIRECTIVE_COMMENT(
2053            "SHARED_VGPR_COUNT",
2054            COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2055      }
2056    } else {
2057      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0)
2058        return MCDisassembler::Fail;
2059    }
2060
2061    // Bits [4-11].
2062    if (isGFX11()) {
2063      PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2064                                     COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2065      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2066                                     COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2067      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2068                                     COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2069    } else if (isGFX12Plus()) {
2070      PRINT_PSEUDO_DIRECTIVE_COMMENT(
2071          "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2072    } else {
2073      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1)
2074        return MCDisassembler::Fail;
2075    }
2076
2077    // Bits [12].
2078    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2)
2079      return MCDisassembler::Fail;
2080
2081    // Bits [13].
2082    if (isGFX12Plus()) {
2083      PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2084                                     COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2085    } else {
2086      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3)
2087        return MCDisassembler::Fail;
2088    }
2089
2090    // Bits [14-30].
2091    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4)
2092      return MCDisassembler::Fail;
2093
2094    // Bits [31].
2095    if (isGFX11Plus()) {
2096      PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2097                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2098    } else {
2099      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5)
2100        return MCDisassembler::Fail;
2101    }
2102  } else if (FourByteBuffer) {
2103    return MCDisassembler::Fail;
2104  }
2105  return MCDisassembler::Success;
2106}
2107#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2108#undef PRINT_DIRECTIVE
2109#undef GET_FIELD
2110
2111MCDisassembler::DecodeStatus
2112AMDGPUDisassembler::decodeKernelDescriptorDirective(
2113    DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2114    raw_string_ostream &KdStream) const {
2115#define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
2116  do {                                                                         \
2117    KdStream << Indent << DIRECTIVE " "                                        \
2118             << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
2119  } while (0)
2120
2121  uint16_t TwoByteBuffer = 0;
2122  uint32_t FourByteBuffer = 0;
2123
2124  StringRef ReservedBytes;
2125  StringRef Indent = "\t";
2126
2127  assert(Bytes.size() == 64);
2128  DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2129
2130  switch (Cursor.tell()) {
2131  case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2132    FourByteBuffer = DE.getU32(Cursor);
2133    KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2134             << '\n';
2135    return MCDisassembler::Success;
2136
2137  case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2138    FourByteBuffer = DE.getU32(Cursor);
2139    KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2140             << FourByteBuffer << '\n';
2141    return MCDisassembler::Success;
2142
2143  case amdhsa::KERNARG_SIZE_OFFSET:
2144    FourByteBuffer = DE.getU32(Cursor);
2145    KdStream << Indent << ".amdhsa_kernarg_size "
2146             << FourByteBuffer << '\n';
2147    return MCDisassembler::Success;
2148
2149  case amdhsa::RESERVED0_OFFSET:
2150    // 4 reserved bytes, must be 0.
2151    ReservedBytes = DE.getBytes(Cursor, 4);
2152    for (int I = 0; I < 4; ++I) {
2153      if (ReservedBytes[I] != 0) {
2154        return MCDisassembler::Fail;
2155      }
2156    }
2157    return MCDisassembler::Success;
2158
2159  case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2160    // KERNEL_CODE_ENTRY_BYTE_OFFSET
2161    // So far no directive controls this for Code Object V3, so simply skip for
2162    // disassembly.
2163    DE.skip(Cursor, 8);
2164    return MCDisassembler::Success;
2165
2166  case amdhsa::RESERVED1_OFFSET:
2167    // 20 reserved bytes, must be 0.
2168    ReservedBytes = DE.getBytes(Cursor, 20);
2169    for (int I = 0; I < 20; ++I) {
2170      if (ReservedBytes[I] != 0) {
2171        return MCDisassembler::Fail;
2172      }
2173    }
2174    return MCDisassembler::Success;
2175
2176  case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2177    FourByteBuffer = DE.getU32(Cursor);
2178    return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2179
2180  case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2181    FourByteBuffer = DE.getU32(Cursor);
2182    return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2183
2184  case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2185    FourByteBuffer = DE.getU32(Cursor);
2186    return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2187
2188  case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2189    using namespace amdhsa;
2190    TwoByteBuffer = DE.getU16(Cursor);
2191
2192    if (!hasArchitectedFlatScratch())
2193      PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2194                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2195    PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2196                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2197    PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2198                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2199    PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2200                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2201    PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2202                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2203    if (!hasArchitectedFlatScratch())
2204      PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2205                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2206    PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2207                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2208
2209    if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2210      return MCDisassembler::Fail;
2211
2212    // Reserved for GFX9
2213    if (isGFX9() &&
2214        (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2215      return MCDisassembler::Fail;
2216    } else if (isGFX10Plus()) {
2217      PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2218                      KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2219    }
2220
2221    // FIXME: We should be looking at the ELF header ABI version for this.
2222    if (AMDGPU::getDefaultAMDHSACodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
2223      PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2224                      KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2225
2226    if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2227      return MCDisassembler::Fail;
2228
2229    return MCDisassembler::Success;
2230
2231  case amdhsa::KERNARG_PRELOAD_OFFSET:
2232    using namespace amdhsa;
2233    TwoByteBuffer = DE.getU16(Cursor);
2234    if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2235      PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2236                      KERNARG_PRELOAD_SPEC_LENGTH);
2237    }
2238
2239    if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2240      PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2241                      KERNARG_PRELOAD_SPEC_OFFSET);
2242    }
2243    return MCDisassembler::Success;
2244
2245  case amdhsa::RESERVED3_OFFSET:
2246    // 4 bytes from here are reserved, must be 0.
2247    ReservedBytes = DE.getBytes(Cursor, 4);
2248    for (int I = 0; I < 4; ++I) {
2249      if (ReservedBytes[I] != 0)
2250        return MCDisassembler::Fail;
2251    }
2252    return MCDisassembler::Success;
2253
2254  default:
2255    llvm_unreachable("Unhandled index. Case statements cover everything.");
2256    return MCDisassembler::Fail;
2257  }
2258#undef PRINT_DIRECTIVE
2259}
2260
2261MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
2262    StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2263  // CP microcode requires the kernel descriptor to be 64 aligned.
2264  if (Bytes.size() != 64 || KdAddress % 64 != 0)
2265    return MCDisassembler::Fail;
2266
2267  // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2268  // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2269  // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2270  // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2271  // when required.
2272  if (isGFX10Plus()) {
2273    uint16_t KernelCodeProperties =
2274        support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2275                                llvm::endianness::little);
2276    EnableWavefrontSize32 =
2277        AMDHSA_BITS_GET(KernelCodeProperties,
2278                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2279  }
2280
2281  std::string Kd;
2282  raw_string_ostream KdStream(Kd);
2283  KdStream << ".amdhsa_kernel " << KdName << '\n';
2284
2285  DataExtractor::Cursor C(0);
2286  while (C && C.tell() < Bytes.size()) {
2287    MCDisassembler::DecodeStatus Status =
2288        decodeKernelDescriptorDirective(C, Bytes, KdStream);
2289
2290    cantFail(C.takeError());
2291
2292    if (Status == MCDisassembler::Fail)
2293      return MCDisassembler::Fail;
2294  }
2295  KdStream << ".end_amdhsa_kernel\n";
2296  outs() << KdStream.str();
2297  return MCDisassembler::Success;
2298}
2299
2300std::optional<MCDisassembler::DecodeStatus>
2301AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
2302                                  ArrayRef<uint8_t> Bytes, uint64_t Address,
2303                                  raw_ostream &CStream) const {
2304  // Right now only kernel descriptor needs to be handled.
2305  // We ignore all other symbols for target specific handling.
2306  // TODO:
2307  // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2308  // Object V2 and V3 when symbols are marked protected.
2309
2310  // amd_kernel_code_t for Code Object V2.
2311  if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2312    Size = 256;
2313    return MCDisassembler::Fail;
2314  }
2315
2316  // Code Object V3 kernel descriptors.
2317  StringRef Name = Symbol.Name;
2318  if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2319    Size = 64; // Size = 64 regardless of success or failure.
2320    return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2321  }
2322  return std::nullopt;
2323}
2324
2325//===----------------------------------------------------------------------===//
2326// AMDGPUSymbolizer
2327//===----------------------------------------------------------------------===//
2328
2329// Try to find symbol name for specified label
2330bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2331    MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2332    uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2333    uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2334
2335  if (!IsBranch) {
2336    return false;
2337  }
2338
2339  auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2340  if (!Symbols)
2341    return false;
2342
2343  auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2344    return Val.Addr == static_cast<uint64_t>(Value) &&
2345           Val.Type == ELF::STT_NOTYPE;
2346  });
2347  if (Result != Symbols->end()) {
2348    auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2349    const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2350    Inst.addOperand(MCOperand::createExpr(Add));
2351    return true;
2352  }
2353  // Add to list of referenced addresses, so caller can synthesize a label.
2354  ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2355  return false;
2356}
2357
2358void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2359                                                       int64_t Value,
2360                                                       uint64_t Address) {
2361  llvm_unreachable("unimplemented");
2362}
2363
2364//===----------------------------------------------------------------------===//
2365// Initialization
2366//===----------------------------------------------------------------------===//
2367
2368static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2369                              LLVMOpInfoCallback /*GetOpInfo*/,
2370                              LLVMSymbolLookupCallback /*SymbolLookUp*/,
2371                              void *DisInfo,
2372                              MCContext *Ctx,
2373                              std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2374  return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2375}
2376
2377static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2378                                                const MCSubtargetInfo &STI,
2379                                                MCContext &Ctx) {
2380  return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2381}
2382
2383extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2384  TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2385                                         createAMDGPUDisassembler);
2386  TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2387                                       createAMDGPUSymbolizer);
2388}
2389