1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "AMDGPU.h"
21#include "AMDGPURegisterInfo.h"
22#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23#include "SIDefines.h"
24#include "TargetInfo/AMDGPUTargetInfo.h"
25#include "Utils/AMDGPUBaseInfo.h"
26#include "llvm-c/Disassembler.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/Twine.h"
30#include "llvm/BinaryFormat/ELF.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCContext.h"
33#include "llvm/MC/MCDisassembler/MCDisassembler.h"
34#include "llvm/MC/MCExpr.h"
35#include "llvm/MC/MCFixedLenDisassembler.h"
36#include "llvm/MC/MCInst.h"
37#include "llvm/MC/MCSubtargetInfo.h"
38#include "llvm/Support/Endian.h"
39#include "llvm/Support/ErrorHandling.h"
40#include "llvm/Support/MathExtras.h"
41#include "llvm/Support/TargetRegistry.h"
42#include "llvm/Support/raw_ostream.h"
43#include <algorithm>
44#include <cassert>
45#include <cstddef>
46#include <cstdint>
47#include <iterator>
48#include <tuple>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "amdgpu-disassembler"
54
55#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
56                            : AMDGPU::EncValues::SGPR_MAX_SI)
57
58using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
59
60AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
61                                       MCContext &Ctx,
62                                       MCInstrInfo const *MCII) :
63  MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
64  TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
65
66  // ToDo: AMDGPUDisassembler supports only VI ISA.
67  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
68    report_fatal_error("Disassembly not yet supported for subtarget");
69}
70
71inline static MCDisassembler::DecodeStatus
72addOperand(MCInst &Inst, const MCOperand& Opnd) {
73  Inst.addOperand(Opnd);
74  return Opnd.isValid() ?
75    MCDisassembler::Success :
76    MCDisassembler::Fail;
77}
78
79static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
80                                uint16_t NameIdx) {
81  int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
82  if (OpIdx != -1) {
83    auto I = MI.begin();
84    std::advance(I, OpIdx);
85    MI.insert(I, Op);
86  }
87  return OpIdx;
88}
89
90static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
91                                       uint64_t Addr, const void *Decoder) {
92  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93
94  // Our branches take a simm16, but we need two extra bits to account for the
95  // factor of 4.
96  APInt SignedOffset(18, Imm * 4, true);
97  int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
98
99  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
100    return MCDisassembler::Success;
101  return addOperand(Inst, MCOperand::createImm(Imm));
102}
103
104static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
105                                  uint64_t Addr, const void *Decoder) {
106  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
107  return addOperand(Inst, DAsm->decodeBoolReg(Val));
108}
109
110#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
111static DecodeStatus StaticDecoderName(MCInst &Inst, \
112                                       unsigned Imm, \
113                                       uint64_t /*Addr*/, \
114                                       const void *Decoder) { \
115  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
116  return addOperand(Inst, DAsm->DecoderName(Imm)); \
117}
118
119#define DECODE_OPERAND_REG(RegClass) \
120DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
121
122DECODE_OPERAND_REG(VGPR_32)
123DECODE_OPERAND_REG(VRegOrLds_32)
124DECODE_OPERAND_REG(VS_32)
125DECODE_OPERAND_REG(VS_64)
126DECODE_OPERAND_REG(VS_128)
127
128DECODE_OPERAND_REG(VReg_64)
129DECODE_OPERAND_REG(VReg_96)
130DECODE_OPERAND_REG(VReg_128)
131
132DECODE_OPERAND_REG(SReg_32)
133DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
134DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
135DECODE_OPERAND_REG(SRegOrLds_32)
136DECODE_OPERAND_REG(SReg_64)
137DECODE_OPERAND_REG(SReg_64_XEXEC)
138DECODE_OPERAND_REG(SReg_128)
139DECODE_OPERAND_REG(SReg_256)
140DECODE_OPERAND_REG(SReg_512)
141
142DECODE_OPERAND_REG(AGPR_32)
143DECODE_OPERAND_REG(AReg_128)
144DECODE_OPERAND_REG(AReg_512)
145DECODE_OPERAND_REG(AReg_1024)
146DECODE_OPERAND_REG(AV_32)
147DECODE_OPERAND_REG(AV_64)
148
149static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
150                                         unsigned Imm,
151                                         uint64_t Addr,
152                                         const void *Decoder) {
153  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
154  return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
155}
156
157static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
158                                         unsigned Imm,
159                                         uint64_t Addr,
160                                         const void *Decoder) {
161  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
162  return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
163}
164
165static DecodeStatus decodeOperand_VS_16(MCInst &Inst,
166                                        unsigned Imm,
167                                        uint64_t Addr,
168                                        const void *Decoder) {
169  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
170  return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
171}
172
173static DecodeStatus decodeOperand_VS_32(MCInst &Inst,
174                                        unsigned Imm,
175                                        uint64_t Addr,
176                                        const void *Decoder) {
177  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
178  return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
179}
180
181static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
182                                           unsigned Imm,
183                                           uint64_t Addr,
184                                           const void *Decoder) {
185  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
186  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
187}
188
189static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,
190                                           unsigned Imm,
191                                           uint64_t Addr,
192                                           const void *Decoder) {
193  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
194  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
195}
196
197static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,
198                                            unsigned Imm,
199                                            uint64_t Addr,
200                                            const void *Decoder) {
201  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
202  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
203}
204
205static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,
206                                          unsigned Imm,
207                                          uint64_t Addr,
208                                          const void *Decoder) {
209  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
210  return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
211}
212
213static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst,
214                                         unsigned Imm,
215                                         uint64_t Addr,
216                                         const void *Decoder) {
217  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
218  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm));
219}
220
221#define DECODE_SDWA(DecName) \
222DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
223
224DECODE_SDWA(Src32)
225DECODE_SDWA(Src16)
226DECODE_SDWA(VopcDst)
227
228#include "AMDGPUGenDisassemblerTables.inc"
229
230//===----------------------------------------------------------------------===//
231//
232//===----------------------------------------------------------------------===//
233
234template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
235  assert(Bytes.size() >= sizeof(T));
236  const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
237  Bytes = Bytes.slice(sizeof(T));
238  return Res;
239}
240
241DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
242                                               MCInst &MI,
243                                               uint64_t Inst,
244                                               uint64_t Address) const {
245  assert(MI.getOpcode() == 0);
246  assert(MI.getNumOperands() == 0);
247  MCInst TmpInst;
248  HasLiteral = false;
249  const auto SavedBytes = Bytes;
250  if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
251    MI = TmpInst;
252    return MCDisassembler::Success;
253  }
254  Bytes = SavedBytes;
255  return MCDisassembler::Fail;
256}
257
258static bool isValidDPP8(const MCInst &MI) {
259  using namespace llvm::AMDGPU::DPP;
260  int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
261  assert(FiIdx != -1);
262  if ((unsigned)FiIdx >= MI.getNumOperands())
263    return false;
264  unsigned Fi = MI.getOperand(FiIdx).getImm();
265  return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
266}
267
268DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
269                                                ArrayRef<uint8_t> Bytes_,
270                                                uint64_t Address,
271                                                raw_ostream &CS) const {
272  CommentStream = &CS;
273  bool IsSDWA = false;
274
275  unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
276  Bytes = Bytes_.slice(0, MaxInstBytesNum);
277
278  DecodeStatus Res = MCDisassembler::Fail;
279  do {
280    // ToDo: better to switch encoding length using some bit predicate
281    // but it is unknown yet, so try all we can
282
283    // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
284    // encodings
285    if (Bytes.size() >= 8) {
286      const uint64_t QW = eatBytes<uint64_t>(Bytes);
287
288      Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
289      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
290        break;
291
292      MI = MCInst(); // clear
293
294      Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
295      if (Res) break;
296
297      Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
298      if (Res) { IsSDWA = true;  break; }
299
300      Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
301      if (Res) { IsSDWA = true;  break; }
302
303      Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
304      if (Res) { IsSDWA = true;  break; }
305
306      if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
307        Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
308        if (Res)
309          break;
310      }
311
312      // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
313      // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
314      // table first so we print the correct name.
315      if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
316        Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
317        if (Res)
318          break;
319      }
320    }
321
322    // Reinitialize Bytes as DPP64 could have eaten too much
323    Bytes = Bytes_.slice(0, MaxInstBytesNum);
324
325    // Try decode 32-bit instruction
326    if (Bytes.size() < 4) break;
327    const uint32_t DW = eatBytes<uint32_t>(Bytes);
328    Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
329    if (Res) break;
330
331    Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
332    if (Res) break;
333
334    Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
335    if (Res) break;
336
337    Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
338    if (Res) break;
339
340    if (Bytes.size() < 4) break;
341    const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
342    Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
343    if (Res) break;
344
345    Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
346    if (Res) break;
347
348    Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
349    if (Res) break;
350
351    Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
352  } while (false);
353
354  if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral ||
355        !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) {
356    MaxInstBytesNum = 8;
357    Bytes = Bytes_.slice(0, MaxInstBytesNum);
358    eatBytes<uint64_t>(Bytes);
359  }
360
361  if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
362              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
363              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
364              MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
365              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
366              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
367              MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
368    // Insert dummy unused src2_modifiers.
369    insertNamedMCOperand(MI, MCOperand::createImm(0),
370                         AMDGPU::OpName::src2_modifiers);
371  }
372
373  if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
374    int VAddr0Idx =
375        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
376    int RsrcIdx =
377        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
378    unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
379    if (VAddr0Idx >= 0 && NSAArgs > 0) {
380      unsigned NSAWords = (NSAArgs + 3) / 4;
381      if (Bytes.size() < 4 * NSAWords) {
382        Res = MCDisassembler::Fail;
383      } else {
384        for (unsigned i = 0; i < NSAArgs; ++i) {
385          MI.insert(MI.begin() + VAddr0Idx + 1 + i,
386                    decodeOperand_VGPR_32(Bytes[i]));
387        }
388        Bytes = Bytes.slice(4 * NSAWords);
389      }
390    }
391
392    if (Res)
393      Res = convertMIMGInst(MI);
394  }
395
396  if (Res && IsSDWA)
397    Res = convertSDWAInst(MI);
398
399  int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
400                                              AMDGPU::OpName::vdst_in);
401  if (VDstIn_Idx != -1) {
402    int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
403                           MCOI::OperandConstraint::TIED_TO);
404    if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
405         !MI.getOperand(VDstIn_Idx).isReg() ||
406         MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
407      if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
408        MI.erase(&MI.getOperand(VDstIn_Idx));
409      insertNamedMCOperand(MI,
410        MCOperand::createReg(MI.getOperand(Tied).getReg()),
411        AMDGPU::OpName::vdst_in);
412    }
413  }
414
415  // if the opcode was not recognized we'll assume a Size of 4 bytes
416  // (unless there are fewer bytes left)
417  Size = Res ? (MaxInstBytesNum - Bytes.size())
418             : std::min((size_t)4, Bytes_.size());
419  return Res;
420}
421
422DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
423  if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
424      STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
425    if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
426      // VOPC - insert clamp
427      insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
428  } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
429    int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
430    if (SDst != -1) {
431      // VOPC - insert VCC register as sdst
432      insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
433                           AMDGPU::OpName::sdst);
434    } else {
435      // VOP1/2 - insert omod if present in instruction
436      insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
437    }
438  }
439  return MCDisassembler::Success;
440}
441
442DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
443  unsigned Opc = MI.getOpcode();
444  unsigned DescNumOps = MCII->get(Opc).getNumOperands();
445
446  // Insert dummy unused src modifiers.
447  if (MI.getNumOperands() < DescNumOps &&
448      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
449    insertNamedMCOperand(MI, MCOperand::createImm(0),
450                         AMDGPU::OpName::src0_modifiers);
451
452  if (MI.getNumOperands() < DescNumOps &&
453      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
454    insertNamedMCOperand(MI, MCOperand::createImm(0),
455                         AMDGPU::OpName::src1_modifiers);
456
457  return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
458}
459
460// Note that before gfx10, the MIMG encoding provided no information about
461// VADDR size. Consequently, decoded instructions always show address as if it
462// has 1 dword, which could be not really so.
463DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
464
465  int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
466                                           AMDGPU::OpName::vdst);
467
468  int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
469                                            AMDGPU::OpName::vdata);
470  int VAddr0Idx =
471      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
472  int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
473                                            AMDGPU::OpName::dmask);
474
475  int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
476                                            AMDGPU::OpName::tfe);
477  int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
478                                            AMDGPU::OpName::d16);
479
480  assert(VDataIdx != -1);
481  assert(DMaskIdx != -1);
482  assert(TFEIdx != -1);
483
484  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
485  bool IsAtomic = (VDstIdx != -1);
486  bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
487
488  bool IsNSA = false;
489  unsigned AddrSize = Info->VAddrDwords;
490
491  if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
492    unsigned DimIdx =
493        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
494    const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
495        AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
496    const AMDGPU::MIMGDimInfo *Dim =
497        AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
498
499    AddrSize = BaseOpcode->NumExtraArgs +
500               (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
501               (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
502               (BaseOpcode->LodOrClampOrMip ? 1 : 0);
503    IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
504    if (!IsNSA) {
505      if (AddrSize > 8)
506        AddrSize = 16;
507      else if (AddrSize > 4)
508        AddrSize = 8;
509    } else {
510      if (AddrSize > Info->VAddrDwords) {
511        // The NSA encoding does not contain enough operands for the combination
512        // of base opcode / dimension. Should this be an error?
513        return MCDisassembler::Success;
514      }
515    }
516  }
517
518  unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
519  unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
520
521  bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
522  if (D16 && AMDGPU::hasPackedD16(STI)) {
523    DstSize = (DstSize + 1) / 2;
524  }
525
526  // FIXME: Add tfe support
527  if (MI.getOperand(TFEIdx).getImm())
528    return MCDisassembler::Success;
529
530  if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
531    return MCDisassembler::Success;
532
533  int NewOpcode =
534      AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
535  if (NewOpcode == -1)
536    return MCDisassembler::Success;
537
538  // Widen the register to the correct number of enabled channels.
539  unsigned NewVdata = AMDGPU::NoRegister;
540  if (DstSize != Info->VDataDwords) {
541    auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
542
543    // Get first subregister of VData
544    unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
545    unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
546    Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
547
548    NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
549                                       &MRI.getRegClass(DataRCID));
550    if (NewVdata == AMDGPU::NoRegister) {
551      // It's possible to encode this such that the low register + enabled
552      // components exceeds the register count.
553      return MCDisassembler::Success;
554    }
555  }
556
557  unsigned NewVAddr0 = AMDGPU::NoRegister;
558  if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
559      AddrSize != Info->VAddrDwords) {
560    unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
561    unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
562    VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
563
564    auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
565    NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
566                                        &MRI.getRegClass(AddrRCID));
567    if (NewVAddr0 == AMDGPU::NoRegister)
568      return MCDisassembler::Success;
569  }
570
571  MI.setOpcode(NewOpcode);
572
573  if (NewVdata != AMDGPU::NoRegister) {
574    MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
575
576    if (IsAtomic) {
577      // Atomic operations have an additional operand (a copy of data)
578      MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
579    }
580  }
581
582  if (NewVAddr0 != AMDGPU::NoRegister) {
583    MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
584  } else if (IsNSA) {
585    assert(AddrSize <= Info->VAddrDwords);
586    MI.erase(MI.begin() + VAddr0Idx + AddrSize,
587             MI.begin() + VAddr0Idx + Info->VAddrDwords);
588  }
589
590  return MCDisassembler::Success;
591}
592
593const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
594  return getContext().getRegisterInfo()->
595    getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
596}
597
598inline
599MCOperand AMDGPUDisassembler::errOperand(unsigned V,
600                                         const Twine& ErrMsg) const {
601  *CommentStream << "Error: " + ErrMsg;
602
603  // ToDo: add support for error operands to MCInst.h
604  // return MCOperand::createError(V);
605  return MCOperand();
606}
607
608inline
609MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
610  return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
611}
612
613inline
614MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
615                                               unsigned Val) const {
616  const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
617  if (Val >= RegCl.getNumRegs())
618    return errOperand(Val, Twine(getRegClassName(RegClassID)) +
619                           ": unknown register " + Twine(Val));
620  return createRegOperand(RegCl.getRegister(Val));
621}
622
623inline
624MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
625                                                unsigned Val) const {
626  // ToDo: SI/CI have 104 SGPRs, VI - 102
627  // Valery: here we accepting as much as we can, let assembler sort it out
628  int shift = 0;
629  switch (SRegClassID) {
630  case AMDGPU::SGPR_32RegClassID:
631  case AMDGPU::TTMP_32RegClassID:
632    break;
633  case AMDGPU::SGPR_64RegClassID:
634  case AMDGPU::TTMP_64RegClassID:
635    shift = 1;
636    break;
637  case AMDGPU::SGPR_128RegClassID:
638  case AMDGPU::TTMP_128RegClassID:
639  // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
640  // this bundle?
641  case AMDGPU::SGPR_256RegClassID:
642  case AMDGPU::TTMP_256RegClassID:
643    // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
644  // this bundle?
645  case AMDGPU::SGPR_512RegClassID:
646  case AMDGPU::TTMP_512RegClassID:
647    shift = 2;
648    break;
649  // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
650  // this bundle?
651  default:
652    llvm_unreachable("unhandled register class");
653  }
654
655  if (Val % (1 << shift)) {
656    *CommentStream << "Warning: " << getRegClassName(SRegClassID)
657                   << ": scalar reg isn't aligned " << Val;
658  }
659
660  return createRegOperand(SRegClassID, Val >> shift);
661}
662
663MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
664  return decodeSrcOp(OPW32, Val);
665}
666
667MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
668  return decodeSrcOp(OPW64, Val);
669}
670
671MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
672  return decodeSrcOp(OPW128, Val);
673}
674
675MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
676  return decodeSrcOp(OPW16, Val);
677}
678
679MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
680  return decodeSrcOp(OPWV216, Val);
681}
682
683MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
684  // Some instructions have operand restrictions beyond what the encoding
685  // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
686  // high bit.
687  Val &= 255;
688
689  return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
690}
691
692MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
693  return decodeSrcOp(OPW32, Val);
694}
695
696MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
697  return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
698}
699
700MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
701  return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
702}
703
704MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
705  return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
706}
707
708MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
709  return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
710}
711
712MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
713  return decodeSrcOp(OPW32, Val);
714}
715
716MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
717  return decodeSrcOp(OPW64, Val);
718}
719
720MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
721  return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
722}
723
724MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
725  return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
726}
727
728MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
729  return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
730}
731
732MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
733  return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
734}
735
736MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
737  return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
738}
739
740MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
741  // table-gen generated disassembler doesn't care about operand types
742  // leaving only registry class so SSrc_32 operand turns into SReg_32
743  // and therefore we accept immediates and literals here as well
744  return decodeSrcOp(OPW32, Val);
745}
746
747MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
748  unsigned Val) const {
749  // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
750  return decodeOperand_SReg_32(Val);
751}
752
753MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
754  unsigned Val) const {
755  // SReg_32_XM0 is SReg_32 without EXEC_HI
756  return decodeOperand_SReg_32(Val);
757}
758
759MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
760  // table-gen generated disassembler doesn't care about operand types
761  // leaving only registry class so SSrc_32 operand turns into SReg_32
762  // and therefore we accept immediates and literals here as well
763  return decodeSrcOp(OPW32, Val);
764}
765
766MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
767  return decodeSrcOp(OPW64, Val);
768}
769
770MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
771  return decodeSrcOp(OPW64, Val);
772}
773
774MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
775  return decodeSrcOp(OPW128, Val);
776}
777
778MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
779  return decodeDstOp(OPW256, Val);
780}
781
782MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
783  return decodeDstOp(OPW512, Val);
784}
785
786MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
787  // For now all literal constants are supposed to be unsigned integer
788  // ToDo: deal with signed/unsigned 64-bit integer constants
789  // ToDo: deal with float/double constants
790  if (!HasLiteral) {
791    if (Bytes.size() < 4) {
792      return errOperand(0, "cannot read literal, inst bytes left " +
793                        Twine(Bytes.size()));
794    }
795    HasLiteral = true;
796    Literal = eatBytes<uint32_t>(Bytes);
797  }
798  return MCOperand::createImm(Literal);
799}
800
801MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
802  using namespace AMDGPU::EncValues;
803
804  assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
805  return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
806    (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
807    (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
808      // Cast prevents negative overflow.
809}
810
811static int64_t getInlineImmVal32(unsigned Imm) {
812  switch (Imm) {
813  case 240:
814    return FloatToBits(0.5f);
815  case 241:
816    return FloatToBits(-0.5f);
817  case 242:
818    return FloatToBits(1.0f);
819  case 243:
820    return FloatToBits(-1.0f);
821  case 244:
822    return FloatToBits(2.0f);
823  case 245:
824    return FloatToBits(-2.0f);
825  case 246:
826    return FloatToBits(4.0f);
827  case 247:
828    return FloatToBits(-4.0f);
829  case 248: // 1 / (2 * PI)
830    return 0x3e22f983;
831  default:
832    llvm_unreachable("invalid fp inline imm");
833  }
834}
835
836static int64_t getInlineImmVal64(unsigned Imm) {
837  switch (Imm) {
838  case 240:
839    return DoubleToBits(0.5);
840  case 241:
841    return DoubleToBits(-0.5);
842  case 242:
843    return DoubleToBits(1.0);
844  case 243:
845    return DoubleToBits(-1.0);
846  case 244:
847    return DoubleToBits(2.0);
848  case 245:
849    return DoubleToBits(-2.0);
850  case 246:
851    return DoubleToBits(4.0);
852  case 247:
853    return DoubleToBits(-4.0);
854  case 248: // 1 / (2 * PI)
855    return 0x3fc45f306dc9c882;
856  default:
857    llvm_unreachable("invalid fp inline imm");
858  }
859}
860
861static int64_t getInlineImmVal16(unsigned Imm) {
862  switch (Imm) {
863  case 240:
864    return 0x3800;
865  case 241:
866    return 0xB800;
867  case 242:
868    return 0x3C00;
869  case 243:
870    return 0xBC00;
871  case 244:
872    return 0x4000;
873  case 245:
874    return 0xC000;
875  case 246:
876    return 0x4400;
877  case 247:
878    return 0xC400;
879  case 248: // 1 / (2 * PI)
880    return 0x3118;
881  default:
882    llvm_unreachable("invalid fp inline imm");
883  }
884}
885
886MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
887  assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
888      && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
889
890  // ToDo: case 248: 1/(2*PI) - is allowed only on VI
891  switch (Width) {
892  case OPW32:
893  case OPW128: // splat constants
894  case OPW512:
895  case OPW1024:
896    return MCOperand::createImm(getInlineImmVal32(Imm));
897  case OPW64:
898    return MCOperand::createImm(getInlineImmVal64(Imm));
899  case OPW16:
900  case OPWV216:
901    return MCOperand::createImm(getInlineImmVal16(Imm));
902  default:
903    llvm_unreachable("implement me");
904  }
905}
906
907unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
908  using namespace AMDGPU;
909
910  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
911  switch (Width) {
912  default: // fall
913  case OPW32:
914  case OPW16:
915  case OPWV216:
916    return VGPR_32RegClassID;
917  case OPW64: return VReg_64RegClassID;
918  case OPW128: return VReg_128RegClassID;
919  }
920}
921
922unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
923  using namespace AMDGPU;
924
925  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
926  switch (Width) {
927  default: // fall
928  case OPW32:
929  case OPW16:
930  case OPWV216:
931    return AGPR_32RegClassID;
932  case OPW64: return AReg_64RegClassID;
933  case OPW128: return AReg_128RegClassID;
934  case OPW512: return AReg_512RegClassID;
935  case OPW1024: return AReg_1024RegClassID;
936  }
937}
938
939
940unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
941  using namespace AMDGPU;
942
943  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
944  switch (Width) {
945  default: // fall
946  case OPW32:
947  case OPW16:
948  case OPWV216:
949    return SGPR_32RegClassID;
950  case OPW64: return SGPR_64RegClassID;
951  case OPW128: return SGPR_128RegClassID;
952  case OPW256: return SGPR_256RegClassID;
953  case OPW512: return SGPR_512RegClassID;
954  }
955}
956
957unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
958  using namespace AMDGPU;
959
960  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
961  switch (Width) {
962  default: // fall
963  case OPW32:
964  case OPW16:
965  case OPWV216:
966    return TTMP_32RegClassID;
967  case OPW64: return TTMP_64RegClassID;
968  case OPW128: return TTMP_128RegClassID;
969  case OPW256: return TTMP_256RegClassID;
970  case OPW512: return TTMP_512RegClassID;
971  }
972}
973
974int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
975  using namespace AMDGPU::EncValues;
976
977  unsigned TTmpMin =
978      (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
979  unsigned TTmpMax =
980      (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
981
982  return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
983}
984
985MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
986  using namespace AMDGPU::EncValues;
987
988  assert(Val < 1024); // enum10
989
990  bool IsAGPR = Val & 512;
991  Val &= 511;
992
993  if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
994    return createRegOperand(IsAGPR ? getAgprClassId(Width)
995                                   : getVgprClassId(Width), Val - VGPR_MIN);
996  }
997  if (Val <= SGPR_MAX) {
998    assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
999    return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1000  }
1001
1002  int TTmpIdx = getTTmpIdx(Val);
1003  if (TTmpIdx >= 0) {
1004    return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1005  }
1006
1007  if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1008    return decodeIntImmed(Val);
1009
1010  if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1011    return decodeFPImmed(Width, Val);
1012
1013  if (Val == LITERAL_CONST)
1014    return decodeLiteralConstant();
1015
1016  switch (Width) {
1017  case OPW32:
1018  case OPW16:
1019  case OPWV216:
1020    return decodeSpecialReg32(Val);
1021  case OPW64:
1022    return decodeSpecialReg64(Val);
1023  default:
1024    llvm_unreachable("unexpected immediate type");
1025  }
1026}
1027
1028MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
1029  using namespace AMDGPU::EncValues;
1030
1031  assert(Val < 128);
1032  assert(Width == OPW256 || Width == OPW512);
1033
1034  if (Val <= SGPR_MAX) {
1035    assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
1036    return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1037  }
1038
1039  int TTmpIdx = getTTmpIdx(Val);
1040  if (TTmpIdx >= 0) {
1041    return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1042  }
1043
1044  llvm_unreachable("unknown dst register");
1045}
1046
1047MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1048  using namespace AMDGPU;
1049
1050  switch (Val) {
1051  case 102: return createRegOperand(FLAT_SCR_LO);
1052  case 103: return createRegOperand(FLAT_SCR_HI);
1053  case 104: return createRegOperand(XNACK_MASK_LO);
1054  case 105: return createRegOperand(XNACK_MASK_HI);
1055  case 106: return createRegOperand(VCC_LO);
1056  case 107: return createRegOperand(VCC_HI);
1057  case 108: return createRegOperand(TBA_LO);
1058  case 109: return createRegOperand(TBA_HI);
1059  case 110: return createRegOperand(TMA_LO);
1060  case 111: return createRegOperand(TMA_HI);
1061  case 124: return createRegOperand(M0);
1062  case 125: return createRegOperand(SGPR_NULL);
1063  case 126: return createRegOperand(EXEC_LO);
1064  case 127: return createRegOperand(EXEC_HI);
1065  case 235: return createRegOperand(SRC_SHARED_BASE);
1066  case 236: return createRegOperand(SRC_SHARED_LIMIT);
1067  case 237: return createRegOperand(SRC_PRIVATE_BASE);
1068  case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1069  case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1070  case 251: return createRegOperand(SRC_VCCZ);
1071  case 252: return createRegOperand(SRC_EXECZ);
1072  case 253: return createRegOperand(SRC_SCC);
1073  case 254: return createRegOperand(LDS_DIRECT);
1074  default: break;
1075  }
1076  return errOperand(Val, "unknown operand encoding " + Twine(Val));
1077}
1078
1079MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1080  using namespace AMDGPU;
1081
1082  switch (Val) {
1083  case 102: return createRegOperand(FLAT_SCR);
1084  case 104: return createRegOperand(XNACK_MASK);
1085  case 106: return createRegOperand(VCC);
1086  case 108: return createRegOperand(TBA);
1087  case 110: return createRegOperand(TMA);
1088  case 125: return createRegOperand(SGPR_NULL);
1089  case 126: return createRegOperand(EXEC);
1090  case 235: return createRegOperand(SRC_SHARED_BASE);
1091  case 236: return createRegOperand(SRC_SHARED_LIMIT);
1092  case 237: return createRegOperand(SRC_PRIVATE_BASE);
1093  case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1094  case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1095  case 251: return createRegOperand(SRC_VCCZ);
1096  case 252: return createRegOperand(SRC_EXECZ);
1097  case 253: return createRegOperand(SRC_SCC);
1098  default: break;
1099  }
1100  return errOperand(Val, "unknown operand encoding " + Twine(Val));
1101}
1102
1103MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1104                                            const unsigned Val) const {
1105  using namespace AMDGPU::SDWA;
1106  using namespace AMDGPU::EncValues;
1107
1108  if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1109      STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
1110    // XXX: cast to int is needed to avoid stupid warning:
1111    // compare with unsigned is always true
1112    if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1113        Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1114      return createRegOperand(getVgprClassId(Width),
1115                              Val - SDWA9EncValues::SRC_VGPR_MIN);
1116    }
1117    if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1118        Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1119                          : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1120      return createSRegOperand(getSgprClassId(Width),
1121                               Val - SDWA9EncValues::SRC_SGPR_MIN);
1122    }
1123    if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1124        Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1125      return createSRegOperand(getTtmpClassId(Width),
1126                               Val - SDWA9EncValues::SRC_TTMP_MIN);
1127    }
1128
1129    const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1130
1131    if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1132      return decodeIntImmed(SVal);
1133
1134    if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1135      return decodeFPImmed(Width, SVal);
1136
1137    return decodeSpecialReg32(SVal);
1138  } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
1139    return createRegOperand(getVgprClassId(Width), Val);
1140  }
1141  llvm_unreachable("unsupported target");
1142}
1143
1144MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1145  return decodeSDWASrc(OPW16, Val);
1146}
1147
1148MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1149  return decodeSDWASrc(OPW32, Val);
1150}
1151
1152MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1153  using namespace AMDGPU::SDWA;
1154
1155  assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1156          STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
1157         "SDWAVopcDst should be present only on GFX9+");
1158
1159  bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
1160
1161  if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1162    Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1163
1164    int TTmpIdx = getTTmpIdx(Val);
1165    if (TTmpIdx >= 0) {
1166      auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1167      return createSRegOperand(TTmpClsId, TTmpIdx);
1168    } else if (Val > SGPR_MAX) {
1169      return IsWave64 ? decodeSpecialReg64(Val)
1170                      : decodeSpecialReg32(Val);
1171    } else {
1172      return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1173    }
1174  } else {
1175    return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1176  }
1177}
1178
1179MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1180  return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1181    decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
1182}
1183
1184bool AMDGPUDisassembler::isVI() const {
1185  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1186}
1187
1188bool AMDGPUDisassembler::isGFX9() const {
1189  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
1190}
1191
1192bool AMDGPUDisassembler::isGFX10() const {
1193  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
1194}
1195
1196//===----------------------------------------------------------------------===//
1197// AMDGPUSymbolizer
1198//===----------------------------------------------------------------------===//
1199
1200// Try to find symbol name for specified label
1201bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
1202                                raw_ostream &/*cStream*/, int64_t Value,
1203                                uint64_t /*Address*/, bool IsBranch,
1204                                uint64_t /*Offset*/, uint64_t /*InstSize*/) {
1205  using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>;
1206  using SectionSymbolsTy = std::vector<SymbolInfoTy>;
1207
1208  if (!IsBranch) {
1209    return false;
1210  }
1211
1212  auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
1213  if (!Symbols)
1214    return false;
1215
1216  auto Result = std::find_if(Symbols->begin(), Symbols->end(),
1217                             [Value](const SymbolInfoTy& Val) {
1218                                return std::get<0>(Val) == static_cast<uint64_t>(Value)
1219                                    && std::get<2>(Val) == ELF::STT_NOTYPE;
1220                             });
1221  if (Result != Symbols->end()) {
1222    auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result));
1223    const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
1224    Inst.addOperand(MCOperand::createExpr(Add));
1225    return true;
1226  }
1227  return false;
1228}
1229
1230void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
1231                                                       int64_t Value,
1232                                                       uint64_t Address) {
1233  llvm_unreachable("unimplemented");
1234}
1235
1236//===----------------------------------------------------------------------===//
1237// Initialization
1238//===----------------------------------------------------------------------===//
1239
1240static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
1241                              LLVMOpInfoCallback /*GetOpInfo*/,
1242                              LLVMSymbolLookupCallback /*SymbolLookUp*/,
1243                              void *DisInfo,
1244                              MCContext *Ctx,
1245                              std::unique_ptr<MCRelocationInfo> &&RelInfo) {
1246  return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
1247}
1248
1249static MCDisassembler *createAMDGPUDisassembler(const Target &T,
1250                                                const MCSubtargetInfo &STI,
1251                                                MCContext &Ctx) {
1252  return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
1253}
1254
1255extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
1256  TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
1257                                         createAMDGPUDisassembler);
1258  TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
1259                                       createAMDGPUSymbolizer);
1260}
1261