1327952Sdim//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2303231Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6303231Sdim// 7303231Sdim//===----------------------------------------------------------------------===// 8303231Sdim// 9303231Sdim//===----------------------------------------------------------------------===// 10303231Sdim// 11303231Sdim/// \file 12303231Sdim/// 13303231Sdim/// This file contains definition for AMDGPU ISA disassembler 14303231Sdim// 15303231Sdim//===----------------------------------------------------------------------===// 16303231Sdim 17303231Sdim// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 18303231Sdim 19327952Sdim#include "Disassembler/AMDGPUDisassembler.h" 20303231Sdim#include "AMDGPU.h" 21303231Sdim#include "AMDGPURegisterInfo.h" 22341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23303231Sdim#include "SIDefines.h" 24353358Sdim#include "TargetInfo/AMDGPUTargetInfo.h" 25303231Sdim#include "Utils/AMDGPUBaseInfo.h" 26327952Sdim#include "llvm-c/Disassembler.h" 27327952Sdim#include "llvm/ADT/APInt.h" 28327952Sdim#include "llvm/ADT/ArrayRef.h" 29327952Sdim#include "llvm/ADT/Twine.h" 30321369Sdim#include "llvm/BinaryFormat/ELF.h" 31353358Sdim#include "llvm/MC/MCAsmInfo.h" 32303231Sdim#include "llvm/MC/MCContext.h" 33327952Sdim#include "llvm/MC/MCDisassembler/MCDisassembler.h" 34327952Sdim#include "llvm/MC/MCExpr.h" 35303231Sdim#include "llvm/MC/MCFixedLenDisassembler.h" 36303231Sdim#include "llvm/MC/MCInst.h" 37303231Sdim#include "llvm/MC/MCSubtargetInfo.h" 38303231Sdim#include "llvm/Support/Endian.h" 39327952Sdim#include "llvm/Support/ErrorHandling.h" 40327952Sdim#include "llvm/Support/MathExtras.h" 41303231Sdim#include "llvm/Support/TargetRegistry.h" 42327952Sdim#include "llvm/Support/raw_ostream.h" 43327952Sdim#include <algorithm> 44327952Sdim#include <cassert> 45327952Sdim#include <cstddef> 46327952Sdim#include <cstdint> 47327952Sdim#include <iterator> 48327952Sdim#include <tuple> 49327952Sdim#include <vector> 50303231Sdim 51303231Sdimusing namespace llvm; 52303231Sdim 53303231Sdim#define DEBUG_TYPE "amdgpu-disassembler" 54303231Sdim 55353358Sdim#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \ 56353358Sdim : AMDGPU::EncValues::SGPR_MAX_SI) 57353358Sdim 58327952Sdimusing DecodeStatus = llvm::MCDisassembler::DecodeStatus; 59303231Sdim 60353358SdimAMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, 61353358Sdim MCContext &Ctx, 62353358Sdim MCInstrInfo const *MCII) : 63353358Sdim MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), 64353358Sdim TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) { 65353358Sdim 66353358Sdim // ToDo: AMDGPUDisassembler supports only VI ISA. 67353358Sdim if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10()) 68353358Sdim report_fatal_error("Disassembly not yet supported for subtarget"); 69353358Sdim} 70353358Sdim 71303231Sdiminline static MCDisassembler::DecodeStatus 72303231SdimaddOperand(MCInst &Inst, const MCOperand& Opnd) { 73303231Sdim Inst.addOperand(Opnd); 74303231Sdim return Opnd.isValid() ? 75303231Sdim MCDisassembler::Success : 76360784Sdim MCDisassembler::Fail; 77303231Sdim} 78303231Sdim 79321369Sdimstatic int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 80321369Sdim uint16_t NameIdx) { 81321369Sdim int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 82321369Sdim if (OpIdx != -1) { 83321369Sdim auto I = MI.begin(); 84321369Sdim std::advance(I, OpIdx); 85321369Sdim MI.insert(I, Op); 86321369Sdim } 87321369Sdim return OpIdx; 88321369Sdim} 89321369Sdim 90314564Sdimstatic DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 91314564Sdim uint64_t Addr, const void *Decoder) { 92314564Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 93314564Sdim 94353358Sdim // Our branches take a simm16, but we need two extra bits to account for the 95353358Sdim // factor of 4. 96314564Sdim APInt SignedOffset(18, Imm * 4, true); 97314564Sdim int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 98314564Sdim 99314564Sdim if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 100314564Sdim return MCDisassembler::Success; 101314564Sdim return addOperand(Inst, MCOperand::createImm(Imm)); 102314564Sdim} 103314564Sdim 104353358Sdimstatic DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, 105353358Sdim uint64_t Addr, const void *Decoder) { 106353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 107353358Sdim return addOperand(Inst, DAsm->decodeBoolReg(Val)); 108353358Sdim} 109353358Sdim 110321369Sdim#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 111321369Sdimstatic DecodeStatus StaticDecoderName(MCInst &Inst, \ 112321369Sdim unsigned Imm, \ 113321369Sdim uint64_t /*Addr*/, \ 114321369Sdim const void *Decoder) { \ 115303231Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 116321369Sdim return addOperand(Inst, DAsm->DecoderName(Imm)); \ 117303231Sdim} 118303231Sdim 119321369Sdim#define DECODE_OPERAND_REG(RegClass) \ 120321369SdimDECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 121303231Sdim 122321369SdimDECODE_OPERAND_REG(VGPR_32) 123353358SdimDECODE_OPERAND_REG(VRegOrLds_32) 124321369SdimDECODE_OPERAND_REG(VS_32) 125321369SdimDECODE_OPERAND_REG(VS_64) 126321369SdimDECODE_OPERAND_REG(VS_128) 127303231Sdim 128321369SdimDECODE_OPERAND_REG(VReg_64) 129321369SdimDECODE_OPERAND_REG(VReg_96) 130321369SdimDECODE_OPERAND_REG(VReg_128) 131303231Sdim 132321369SdimDECODE_OPERAND_REG(SReg_32) 133321369SdimDECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 134327952SdimDECODE_OPERAND_REG(SReg_32_XEXEC_HI) 135353358SdimDECODE_OPERAND_REG(SRegOrLds_32) 136321369SdimDECODE_OPERAND_REG(SReg_64) 137321369SdimDECODE_OPERAND_REG(SReg_64_XEXEC) 138321369SdimDECODE_OPERAND_REG(SReg_128) 139321369SdimDECODE_OPERAND_REG(SReg_256) 140321369SdimDECODE_OPERAND_REG(SReg_512) 141303231Sdim 142353358SdimDECODE_OPERAND_REG(AGPR_32) 143353358SdimDECODE_OPERAND_REG(AReg_128) 144353358SdimDECODE_OPERAND_REG(AReg_512) 145353358SdimDECODE_OPERAND_REG(AReg_1024) 146353358SdimDECODE_OPERAND_REG(AV_32) 147353358SdimDECODE_OPERAND_REG(AV_64) 148353358Sdim 149314564Sdimstatic DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 150314564Sdim unsigned Imm, 151314564Sdim uint64_t Addr, 152314564Sdim const void *Decoder) { 153314564Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 154314564Sdim return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 155314564Sdim} 156314564Sdim 157321369Sdimstatic DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 158321369Sdim unsigned Imm, 159321369Sdim uint64_t Addr, 160321369Sdim const void *Decoder) { 161321369Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 162321369Sdim return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 163321369Sdim} 164303231Sdim 165353358Sdimstatic DecodeStatus decodeOperand_VS_16(MCInst &Inst, 166353358Sdim unsigned Imm, 167353358Sdim uint64_t Addr, 168353358Sdim const void *Decoder) { 169353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 170353358Sdim return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 171353358Sdim} 172353358Sdim 173353358Sdimstatic DecodeStatus decodeOperand_VS_32(MCInst &Inst, 174353358Sdim unsigned Imm, 175353358Sdim uint64_t Addr, 176353358Sdim const void *Decoder) { 177353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 178353358Sdim return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm)); 179353358Sdim} 180353358Sdim 181353358Sdimstatic DecodeStatus decodeOperand_AReg_128(MCInst &Inst, 182353358Sdim unsigned Imm, 183353358Sdim uint64_t Addr, 184353358Sdim const void *Decoder) { 185353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 186353358Sdim return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512)); 187353358Sdim} 188353358Sdim 189353358Sdimstatic DecodeStatus decodeOperand_AReg_512(MCInst &Inst, 190353358Sdim unsigned Imm, 191353358Sdim uint64_t Addr, 192353358Sdim const void *Decoder) { 193353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 194353358Sdim return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512)); 195353358Sdim} 196353358Sdim 197353358Sdimstatic DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, 198353358Sdim unsigned Imm, 199353358Sdim uint64_t Addr, 200353358Sdim const void *Decoder) { 201353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 202353358Sdim return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512)); 203353358Sdim} 204353358Sdim 205353358Sdimstatic DecodeStatus decodeOperand_SReg_32(MCInst &Inst, 206353358Sdim unsigned Imm, 207353358Sdim uint64_t Addr, 208353358Sdim const void *Decoder) { 209353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 210353358Sdim return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm)); 211353358Sdim} 212353358Sdim 213353358Sdimstatic DecodeStatus decodeOperand_VGPR_32(MCInst &Inst, 214353358Sdim unsigned Imm, 215353358Sdim uint64_t Addr, 216353358Sdim const void *Decoder) { 217353358Sdim auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 218353358Sdim return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm)); 219353358Sdim} 220353358Sdim 221321369Sdim#define DECODE_SDWA(DecName) \ 222321369SdimDECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 223321369Sdim 224321369SdimDECODE_SDWA(Src32) 225321369SdimDECODE_SDWA(Src16) 226321369SdimDECODE_SDWA(VopcDst) 227321369Sdim 228303231Sdim#include "AMDGPUGenDisassemblerTables.inc" 229303231Sdim 230303231Sdim//===----------------------------------------------------------------------===// 231303231Sdim// 232303231Sdim//===----------------------------------------------------------------------===// 233303231Sdim 234303231Sdimtemplate <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 235303231Sdim assert(Bytes.size() >= sizeof(T)); 236303231Sdim const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 237303231Sdim Bytes = Bytes.slice(sizeof(T)); 238303231Sdim return Res; 239303231Sdim} 240303231Sdim 241303231SdimDecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 242303231Sdim MCInst &MI, 243303231Sdim uint64_t Inst, 244303231Sdim uint64_t Address) const { 245303231Sdim assert(MI.getOpcode() == 0); 246303231Sdim assert(MI.getNumOperands() == 0); 247303231Sdim MCInst TmpInst; 248321369Sdim HasLiteral = false; 249303231Sdim const auto SavedBytes = Bytes; 250303231Sdim if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 251303231Sdim MI = TmpInst; 252303231Sdim return MCDisassembler::Success; 253303231Sdim } 254303231Sdim Bytes = SavedBytes; 255303231Sdim return MCDisassembler::Fail; 256303231Sdim} 257303231Sdim 258353358Sdimstatic bool isValidDPP8(const MCInst &MI) { 259353358Sdim using namespace llvm::AMDGPU::DPP; 260353358Sdim int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi); 261353358Sdim assert(FiIdx != -1); 262353358Sdim if ((unsigned)FiIdx >= MI.getNumOperands()) 263353358Sdim return false; 264353358Sdim unsigned Fi = MI.getOperand(FiIdx).getImm(); 265353358Sdim return Fi == DPP8_FI_0 || Fi == DPP8_FI_1; 266353358Sdim} 267353358Sdim 268303231SdimDecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 269303231Sdim ArrayRef<uint8_t> Bytes_, 270303231Sdim uint64_t Address, 271303231Sdim raw_ostream &CS) const { 272303231Sdim CommentStream = &CS; 273321369Sdim bool IsSDWA = false; 274303231Sdim 275353358Sdim unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size()); 276303231Sdim Bytes = Bytes_.slice(0, MaxInstBytesNum); 277303231Sdim 278303231Sdim DecodeStatus Res = MCDisassembler::Fail; 279303231Sdim do { 280303231Sdim // ToDo: better to switch encoding length using some bit predicate 281303231Sdim // but it is unknown yet, so try all we can 282303231Sdim 283303231Sdim // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 284303231Sdim // encodings 285303231Sdim if (Bytes.size() >= 8) { 286303231Sdim const uint64_t QW = eatBytes<uint64_t>(Bytes); 287353358Sdim 288353358Sdim Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address); 289353358Sdim if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) 290353358Sdim break; 291353358Sdim 292353358Sdim MI = MCInst(); // clear 293353358Sdim 294303231Sdim Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 295303231Sdim if (Res) break; 296303231Sdim 297303231Sdim Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 298321369Sdim if (Res) { IsSDWA = true; break; } 299321369Sdim 300321369Sdim Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 301321369Sdim if (Res) { IsSDWA = true; break; } 302341825Sdim 303353358Sdim Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address); 304353358Sdim if (Res) { IsSDWA = true; break; } 305353358Sdim 306341825Sdim if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 307341825Sdim Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 308341825Sdim if (Res) 309341825Sdim break; 310341825Sdim } 311341825Sdim 312341825Sdim // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 313341825Sdim // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 314341825Sdim // table first so we print the correct name. 315341825Sdim if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 316341825Sdim Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 317341825Sdim if (Res) 318341825Sdim break; 319341825Sdim } 320303231Sdim } 321303231Sdim 322303231Sdim // Reinitialize Bytes as DPP64 could have eaten too much 323303231Sdim Bytes = Bytes_.slice(0, MaxInstBytesNum); 324303231Sdim 325303231Sdim // Try decode 32-bit instruction 326303231Sdim if (Bytes.size() < 4) break; 327303231Sdim const uint32_t DW = eatBytes<uint32_t>(Bytes); 328353358Sdim Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address); 329303231Sdim if (Res) break; 330303231Sdim 331303231Sdim Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 332303231Sdim if (Res) break; 333303231Sdim 334327952Sdim Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 335327952Sdim if (Res) break; 336327952Sdim 337353358Sdim Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address); 338353358Sdim if (Res) break; 339353358Sdim 340303231Sdim if (Bytes.size() < 4) break; 341303231Sdim const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 342353358Sdim Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address); 343303231Sdim if (Res) break; 344303231Sdim 345303231Sdim Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 346327952Sdim if (Res) break; 347327952Sdim 348327952Sdim Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 349353358Sdim if (Res) break; 350353358Sdim 351353358Sdim Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address); 352303231Sdim } while (false); 353303231Sdim 354353358Sdim if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral || 355353358Sdim !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) { 356353358Sdim MaxInstBytesNum = 8; 357353358Sdim Bytes = Bytes_.slice(0, MaxInstBytesNum); 358353358Sdim eatBytes<uint64_t>(Bytes); 359353358Sdim } 360353358Sdim 361321369Sdim if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 362353358Sdim MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 363353358Sdim MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 || 364341825Sdim MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 365353358Sdim MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || 366353358Sdim MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || 367353358Sdim MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) { 368321369Sdim // Insert dummy unused src2_modifiers. 369321369Sdim insertNamedMCOperand(MI, MCOperand::createImm(0), 370321369Sdim AMDGPU::OpName::src2_modifiers); 371321369Sdim } 372321369Sdim 373327952Sdim if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 374353358Sdim int VAddr0Idx = 375353358Sdim AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 376353358Sdim int RsrcIdx = 377353358Sdim AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 378353358Sdim unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1; 379353358Sdim if (VAddr0Idx >= 0 && NSAArgs > 0) { 380353358Sdim unsigned NSAWords = (NSAArgs + 3) / 4; 381353358Sdim if (Bytes.size() < 4 * NSAWords) { 382353358Sdim Res = MCDisassembler::Fail; 383353358Sdim } else { 384353358Sdim for (unsigned i = 0; i < NSAArgs; ++i) { 385353358Sdim MI.insert(MI.begin() + VAddr0Idx + 1 + i, 386353358Sdim decodeOperand_VGPR_32(Bytes[i])); 387353358Sdim } 388353358Sdim Bytes = Bytes.slice(4 * NSAWords); 389353358Sdim } 390353358Sdim } 391353358Sdim 392353358Sdim if (Res) 393353358Sdim Res = convertMIMGInst(MI); 394327952Sdim } 395327952Sdim 396321369Sdim if (Res && IsSDWA) 397321369Sdim Res = convertSDWAInst(MI); 398321369Sdim 399353358Sdim int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 400353358Sdim AMDGPU::OpName::vdst_in); 401353358Sdim if (VDstIn_Idx != -1) { 402353358Sdim int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx, 403353358Sdim MCOI::OperandConstraint::TIED_TO); 404353358Sdim if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx || 405353358Sdim !MI.getOperand(VDstIn_Idx).isReg() || 406353358Sdim MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) { 407353358Sdim if (MI.getNumOperands() > (unsigned)VDstIn_Idx) 408353358Sdim MI.erase(&MI.getOperand(VDstIn_Idx)); 409353358Sdim insertNamedMCOperand(MI, 410353358Sdim MCOperand::createReg(MI.getOperand(Tied).getReg()), 411353358Sdim AMDGPU::OpName::vdst_in); 412353358Sdim } 413353358Sdim } 414353358Sdim 415341825Sdim // if the opcode was not recognized we'll assume a Size of 4 bytes 416341825Sdim // (unless there are fewer bytes left) 417341825Sdim Size = Res ? (MaxInstBytesNum - Bytes.size()) 418341825Sdim : std::min((size_t)4, Bytes_.size()); 419303231Sdim return Res; 420303231Sdim} 421303231Sdim 422321369SdimDecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 423353358Sdim if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || 424353358Sdim STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { 425321369Sdim if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 426321369Sdim // VOPC - insert clamp 427321369Sdim insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 428321369Sdim } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 429321369Sdim int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 430321369Sdim if (SDst != -1) { 431321369Sdim // VOPC - insert VCC register as sdst 432327952Sdim insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 433321369Sdim AMDGPU::OpName::sdst); 434321369Sdim } else { 435321369Sdim // VOP1/2 - insert omod if present in instruction 436321369Sdim insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 437321369Sdim } 438321369Sdim } 439321369Sdim return MCDisassembler::Success; 440321369Sdim} 441321369Sdim 442353358SdimDecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { 443353358Sdim unsigned Opc = MI.getOpcode(); 444353358Sdim unsigned DescNumOps = MCII->get(Opc).getNumOperands(); 445353358Sdim 446353358Sdim // Insert dummy unused src modifiers. 447353358Sdim if (MI.getNumOperands() < DescNumOps && 448353358Sdim AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) 449353358Sdim insertNamedMCOperand(MI, MCOperand::createImm(0), 450353358Sdim AMDGPU::OpName::src0_modifiers); 451353358Sdim 452353358Sdim if (MI.getNumOperands() < DescNumOps && 453353358Sdim AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) 454353358Sdim insertNamedMCOperand(MI, MCOperand::createImm(0), 455353358Sdim AMDGPU::OpName::src1_modifiers); 456353358Sdim 457353358Sdim return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail; 458353358Sdim} 459353358Sdim 460353358Sdim// Note that before gfx10, the MIMG encoding provided no information about 461353358Sdim// VADDR size. Consequently, decoded instructions always show address as if it 462353358Sdim// has 1 dword, which could be not really so. 463327952SdimDecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 464341825Sdim 465341825Sdim int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 466341825Sdim AMDGPU::OpName::vdst); 467341825Sdim 468327952Sdim int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 469327952Sdim AMDGPU::OpName::vdata); 470353358Sdim int VAddr0Idx = 471353358Sdim AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 472327952Sdim int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 473327952Sdim AMDGPU::OpName::dmask); 474341825Sdim 475341825Sdim int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 476341825Sdim AMDGPU::OpName::tfe); 477341825Sdim int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 478341825Sdim AMDGPU::OpName::d16); 479341825Sdim 480341825Sdim assert(VDataIdx != -1); 481341825Sdim assert(DMaskIdx != -1); 482341825Sdim assert(TFEIdx != -1); 483341825Sdim 484353358Sdim const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 485341825Sdim bool IsAtomic = (VDstIdx != -1); 486341825Sdim bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; 487341825Sdim 488353358Sdim bool IsNSA = false; 489353358Sdim unsigned AddrSize = Info->VAddrDwords; 490353358Sdim 491353358Sdim if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { 492353358Sdim unsigned DimIdx = 493353358Sdim AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); 494353358Sdim const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 495353358Sdim AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 496353358Sdim const AMDGPU::MIMGDimInfo *Dim = 497353358Sdim AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); 498353358Sdim 499353358Sdim AddrSize = BaseOpcode->NumExtraArgs + 500353358Sdim (BaseOpcode->Gradients ? Dim->NumGradients : 0) + 501353358Sdim (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 502353358Sdim (BaseOpcode->LodOrClampOrMip ? 1 : 0); 503353358Sdim IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; 504353358Sdim if (!IsNSA) { 505353358Sdim if (AddrSize > 8) 506353358Sdim AddrSize = 16; 507353358Sdim else if (AddrSize > 4) 508353358Sdim AddrSize = 8; 509353358Sdim } else { 510353358Sdim if (AddrSize > Info->VAddrDwords) { 511353358Sdim // The NSA encoding does not contain enough operands for the combination 512353358Sdim // of base opcode / dimension. Should this be an error? 513353358Sdim return MCDisassembler::Success; 514353358Sdim } 515353358Sdim } 516353358Sdim } 517353358Sdim 518327952Sdim unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 519353358Sdim unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u); 520327952Sdim 521341825Sdim bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 522341825Sdim if (D16 && AMDGPU::hasPackedD16(STI)) { 523341825Sdim DstSize = (DstSize + 1) / 2; 524341825Sdim } 525341825Sdim 526341825Sdim // FIXME: Add tfe support 527341825Sdim if (MI.getOperand(TFEIdx).getImm()) 528341825Sdim return MCDisassembler::Success; 529341825Sdim 530353358Sdim if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) 531353358Sdim return MCDisassembler::Success; 532341825Sdim 533353358Sdim int NewOpcode = 534353358Sdim AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize); 535353358Sdim if (NewOpcode == -1) 536353358Sdim return MCDisassembler::Success; 537353358Sdim 538353358Sdim // Widen the register to the correct number of enabled channels. 539353358Sdim unsigned NewVdata = AMDGPU::NoRegister; 540353358Sdim if (DstSize != Info->VDataDwords) { 541353358Sdim auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 542353358Sdim 543353358Sdim // Get first subregister of VData 544353358Sdim unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 545353358Sdim unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 546353358Sdim Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 547353358Sdim 548353358Sdim NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 549353358Sdim &MRI.getRegClass(DataRCID)); 550353358Sdim if (NewVdata == AMDGPU::NoRegister) { 551353358Sdim // It's possible to encode this such that the low register + enabled 552353358Sdim // components exceeds the register count. 553341825Sdim return MCDisassembler::Success; 554353358Sdim } 555341825Sdim } 556341825Sdim 557353358Sdim unsigned NewVAddr0 = AMDGPU::NoRegister; 558353358Sdim if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA && 559353358Sdim AddrSize != Info->VAddrDwords) { 560353358Sdim unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); 561353358Sdim unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); 562353358Sdim VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; 563327952Sdim 564353358Sdim auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass; 565353358Sdim NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0, 566353358Sdim &MRI.getRegClass(AddrRCID)); 567353358Sdim if (NewVAddr0 == AMDGPU::NoRegister) 568353358Sdim return MCDisassembler::Success; 569327952Sdim } 570327952Sdim 571327952Sdim MI.setOpcode(NewOpcode); 572341825Sdim 573353358Sdim if (NewVdata != AMDGPU::NoRegister) { 574353358Sdim MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 575353358Sdim 576353358Sdim if (IsAtomic) { 577353358Sdim // Atomic operations have an additional operand (a copy of data) 578353358Sdim MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 579353358Sdim } 580341825Sdim } 581341825Sdim 582353358Sdim if (NewVAddr0 != AMDGPU::NoRegister) { 583353358Sdim MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0); 584353358Sdim } else if (IsNSA) { 585353358Sdim assert(AddrSize <= Info->VAddrDwords); 586353358Sdim MI.erase(MI.begin() + VAddr0Idx + AddrSize, 587353358Sdim MI.begin() + VAddr0Idx + Info->VAddrDwords); 588353358Sdim } 589353358Sdim 590327952Sdim return MCDisassembler::Success; 591327952Sdim} 592327952Sdim 593303231Sdimconst char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 594303231Sdim return getContext().getRegisterInfo()-> 595303231Sdim getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 596303231Sdim} 597303231Sdim 598303231Sdiminline 599303231SdimMCOperand AMDGPUDisassembler::errOperand(unsigned V, 600303231Sdim const Twine& ErrMsg) const { 601303231Sdim *CommentStream << "Error: " + ErrMsg; 602303231Sdim 603303231Sdim // ToDo: add support for error operands to MCInst.h 604303231Sdim // return MCOperand::createError(V); 605303231Sdim return MCOperand(); 606303231Sdim} 607303231Sdim 608303231Sdiminline 609303231SdimMCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 610327952Sdim return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 611303231Sdim} 612303231Sdim 613303231Sdiminline 614303231SdimMCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 615303231Sdim unsigned Val) const { 616303231Sdim const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 617303231Sdim if (Val >= RegCl.getNumRegs()) 618303231Sdim return errOperand(Val, Twine(getRegClassName(RegClassID)) + 619303231Sdim ": unknown register " + Twine(Val)); 620303231Sdim return createRegOperand(RegCl.getRegister(Val)); 621303231Sdim} 622303231Sdim 623303231Sdiminline 624303231SdimMCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 625303231Sdim unsigned Val) const { 626303231Sdim // ToDo: SI/CI have 104 SGPRs, VI - 102 627303231Sdim // Valery: here we accepting as much as we can, let assembler sort it out 628303231Sdim int shift = 0; 629303231Sdim switch (SRegClassID) { 630303231Sdim case AMDGPU::SGPR_32RegClassID: 631303231Sdim case AMDGPU::TTMP_32RegClassID: 632303231Sdim break; 633303231Sdim case AMDGPU::SGPR_64RegClassID: 634303231Sdim case AMDGPU::TTMP_64RegClassID: 635303231Sdim shift = 1; 636303231Sdim break; 637303231Sdim case AMDGPU::SGPR_128RegClassID: 638303231Sdim case AMDGPU::TTMP_128RegClassID: 639303231Sdim // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 640303231Sdim // this bundle? 641327952Sdim case AMDGPU::SGPR_256RegClassID: 642327952Sdim case AMDGPU::TTMP_256RegClassID: 643327952Sdim // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 644303231Sdim // this bundle? 645327952Sdim case AMDGPU::SGPR_512RegClassID: 646327952Sdim case AMDGPU::TTMP_512RegClassID: 647303231Sdim shift = 2; 648303231Sdim break; 649303231Sdim // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 650303231Sdim // this bundle? 651303231Sdim default: 652314564Sdim llvm_unreachable("unhandled register class"); 653303231Sdim } 654314564Sdim 655314564Sdim if (Val % (1 << shift)) { 656303231Sdim *CommentStream << "Warning: " << getRegClassName(SRegClassID) 657303231Sdim << ": scalar reg isn't aligned " << Val; 658314564Sdim } 659314564Sdim 660303231Sdim return createRegOperand(SRegClassID, Val >> shift); 661303231Sdim} 662303231Sdim 663303231SdimMCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 664303231Sdim return decodeSrcOp(OPW32, Val); 665303231Sdim} 666303231Sdim 667303231SdimMCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 668303231Sdim return decodeSrcOp(OPW64, Val); 669303231Sdim} 670303231Sdim 671321369SdimMCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 672321369Sdim return decodeSrcOp(OPW128, Val); 673321369Sdim} 674321369Sdim 675314564SdimMCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 676314564Sdim return decodeSrcOp(OPW16, Val); 677314564Sdim} 678314564Sdim 679321369SdimMCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 680321369Sdim return decodeSrcOp(OPWV216, Val); 681321369Sdim} 682321369Sdim 683303231SdimMCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 684314564Sdim // Some instructions have operand restrictions beyond what the encoding 685314564Sdim // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 686314564Sdim // high bit. 687314564Sdim Val &= 255; 688314564Sdim 689303231Sdim return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 690303231Sdim} 691303231Sdim 692353358SdimMCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { 693353358Sdim return decodeSrcOp(OPW32, Val); 694353358Sdim} 695353358Sdim 696353358SdimMCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const { 697353358Sdim return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255); 698353358Sdim} 699353358Sdim 700353358SdimMCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const { 701353358Sdim return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255); 702353358Sdim} 703353358Sdim 704353358SdimMCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const { 705353358Sdim return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255); 706353358Sdim} 707353358Sdim 708353358SdimMCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const { 709353358Sdim return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255); 710353358Sdim} 711353358Sdim 712353358SdimMCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const { 713353358Sdim return decodeSrcOp(OPW32, Val); 714353358Sdim} 715353358Sdim 716353358SdimMCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const { 717353358Sdim return decodeSrcOp(OPW64, Val); 718353358Sdim} 719353358Sdim 720303231SdimMCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 721303231Sdim return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 722303231Sdim} 723303231Sdim 724303231SdimMCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 725303231Sdim return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 726303231Sdim} 727303231Sdim 728303231SdimMCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 729303231Sdim return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 730303231Sdim} 731303231Sdim 732353358SdimMCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const { 733353358Sdim return createRegOperand(AMDGPU::VReg_256RegClassID, Val); 734353358Sdim} 735353358Sdim 736353358SdimMCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const { 737353358Sdim return createRegOperand(AMDGPU::VReg_512RegClassID, Val); 738353358Sdim} 739353358Sdim 740303231SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 741303231Sdim // table-gen generated disassembler doesn't care about operand types 742303231Sdim // leaving only registry class so SSrc_32 operand turns into SReg_32 743303231Sdim // and therefore we accept immediates and literals here as well 744303231Sdim return decodeSrcOp(OPW32, Val); 745303231Sdim} 746303231Sdim 747314564SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 748314564Sdim unsigned Val) const { 749314564Sdim // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 750303231Sdim return decodeOperand_SReg_32(Val); 751303231Sdim} 752303231Sdim 753327952SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 754327952Sdim unsigned Val) const { 755327952Sdim // SReg_32_XM0 is SReg_32 without EXEC_HI 756327952Sdim return decodeOperand_SReg_32(Val); 757327952Sdim} 758327952Sdim 759353358SdimMCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { 760353358Sdim // table-gen generated disassembler doesn't care about operand types 761353358Sdim // leaving only registry class so SSrc_32 operand turns into SReg_32 762353358Sdim // and therefore we accept immediates and literals here as well 763353358Sdim return decodeSrcOp(OPW32, Val); 764353358Sdim} 765353358Sdim 766303231SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 767303231Sdim return decodeSrcOp(OPW64, Val); 768303231Sdim} 769303231Sdim 770314564SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 771314564Sdim return decodeSrcOp(OPW64, Val); 772314564Sdim} 773314564Sdim 774303231SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 775303231Sdim return decodeSrcOp(OPW128, Val); 776303231Sdim} 777303231Sdim 778303231SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 779327952Sdim return decodeDstOp(OPW256, Val); 780303231Sdim} 781303231Sdim 782303231SdimMCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 783327952Sdim return decodeDstOp(OPW512, Val); 784303231Sdim} 785303231Sdim 786303231SdimMCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 787303231Sdim // For now all literal constants are supposed to be unsigned integer 788303231Sdim // ToDo: deal with signed/unsigned 64-bit integer constants 789303231Sdim // ToDo: deal with float/double constants 790321369Sdim if (!HasLiteral) { 791321369Sdim if (Bytes.size() < 4) { 792321369Sdim return errOperand(0, "cannot read literal, inst bytes left " + 793321369Sdim Twine(Bytes.size())); 794321369Sdim } 795321369Sdim HasLiteral = true; 796321369Sdim Literal = eatBytes<uint32_t>(Bytes); 797321369Sdim } 798321369Sdim return MCOperand::createImm(Literal); 799303231Sdim} 800303231Sdim 801303231SdimMCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 802303231Sdim using namespace AMDGPU::EncValues; 803327952Sdim 804303231Sdim assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 805303231Sdim return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 806303231Sdim (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 807303231Sdim (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 808303231Sdim // Cast prevents negative overflow. 809303231Sdim} 810303231Sdim 811314564Sdimstatic int64_t getInlineImmVal32(unsigned Imm) { 812314564Sdim switch (Imm) { 813314564Sdim case 240: 814314564Sdim return FloatToBits(0.5f); 815314564Sdim case 241: 816314564Sdim return FloatToBits(-0.5f); 817314564Sdim case 242: 818314564Sdim return FloatToBits(1.0f); 819314564Sdim case 243: 820314564Sdim return FloatToBits(-1.0f); 821314564Sdim case 244: 822314564Sdim return FloatToBits(2.0f); 823314564Sdim case 245: 824314564Sdim return FloatToBits(-2.0f); 825314564Sdim case 246: 826314564Sdim return FloatToBits(4.0f); 827314564Sdim case 247: 828314564Sdim return FloatToBits(-4.0f); 829314564Sdim case 248: // 1 / (2 * PI) 830314564Sdim return 0x3e22f983; 831314564Sdim default: 832314564Sdim llvm_unreachable("invalid fp inline imm"); 833314564Sdim } 834314564Sdim} 835314564Sdim 836314564Sdimstatic int64_t getInlineImmVal64(unsigned Imm) { 837314564Sdim switch (Imm) { 838314564Sdim case 240: 839314564Sdim return DoubleToBits(0.5); 840314564Sdim case 241: 841314564Sdim return DoubleToBits(-0.5); 842314564Sdim case 242: 843314564Sdim return DoubleToBits(1.0); 844314564Sdim case 243: 845314564Sdim return DoubleToBits(-1.0); 846314564Sdim case 244: 847314564Sdim return DoubleToBits(2.0); 848314564Sdim case 245: 849314564Sdim return DoubleToBits(-2.0); 850314564Sdim case 246: 851314564Sdim return DoubleToBits(4.0); 852314564Sdim case 247: 853314564Sdim return DoubleToBits(-4.0); 854314564Sdim case 248: // 1 / (2 * PI) 855314564Sdim return 0x3fc45f306dc9c882; 856314564Sdim default: 857314564Sdim llvm_unreachable("invalid fp inline imm"); 858314564Sdim } 859314564Sdim} 860314564Sdim 861314564Sdimstatic int64_t getInlineImmVal16(unsigned Imm) { 862314564Sdim switch (Imm) { 863314564Sdim case 240: 864314564Sdim return 0x3800; 865314564Sdim case 241: 866314564Sdim return 0xB800; 867314564Sdim case 242: 868314564Sdim return 0x3C00; 869314564Sdim case 243: 870314564Sdim return 0xBC00; 871314564Sdim case 244: 872314564Sdim return 0x4000; 873314564Sdim case 245: 874314564Sdim return 0xC000; 875314564Sdim case 246: 876314564Sdim return 0x4400; 877314564Sdim case 247: 878314564Sdim return 0xC400; 879314564Sdim case 248: // 1 / (2 * PI) 880314564Sdim return 0x3118; 881314564Sdim default: 882314564Sdim llvm_unreachable("invalid fp inline imm"); 883314564Sdim } 884314564Sdim} 885314564Sdim 886314564SdimMCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 887303231Sdim assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 888303231Sdim && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 889314564Sdim 890303231Sdim // ToDo: case 248: 1/(2*PI) - is allowed only on VI 891314564Sdim switch (Width) { 892314564Sdim case OPW32: 893353358Sdim case OPW128: // splat constants 894353358Sdim case OPW512: 895353358Sdim case OPW1024: 896314564Sdim return MCOperand::createImm(getInlineImmVal32(Imm)); 897314564Sdim case OPW64: 898314564Sdim return MCOperand::createImm(getInlineImmVal64(Imm)); 899314564Sdim case OPW16: 900321369Sdim case OPWV216: 901314564Sdim return MCOperand::createImm(getInlineImmVal16(Imm)); 902314564Sdim default: 903314564Sdim llvm_unreachable("implement me"); 904303231Sdim } 905303231Sdim} 906303231Sdim 907303231Sdimunsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 908303231Sdim using namespace AMDGPU; 909327952Sdim 910303231Sdim assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 911303231Sdim switch (Width) { 912303231Sdim default: // fall 913314564Sdim case OPW32: 914314564Sdim case OPW16: 915321369Sdim case OPWV216: 916314564Sdim return VGPR_32RegClassID; 917303231Sdim case OPW64: return VReg_64RegClassID; 918303231Sdim case OPW128: return VReg_128RegClassID; 919303231Sdim } 920303231Sdim} 921303231Sdim 922353358Sdimunsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const { 923353358Sdim using namespace AMDGPU; 924353358Sdim 925353358Sdim assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 926353358Sdim switch (Width) { 927353358Sdim default: // fall 928353358Sdim case OPW32: 929353358Sdim case OPW16: 930353358Sdim case OPWV216: 931353358Sdim return AGPR_32RegClassID; 932353358Sdim case OPW64: return AReg_64RegClassID; 933353358Sdim case OPW128: return AReg_128RegClassID; 934353358Sdim case OPW512: return AReg_512RegClassID; 935353358Sdim case OPW1024: return AReg_1024RegClassID; 936353358Sdim } 937353358Sdim} 938353358Sdim 939353358Sdim 940303231Sdimunsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 941303231Sdim using namespace AMDGPU; 942327952Sdim 943303231Sdim assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 944303231Sdim switch (Width) { 945303231Sdim default: // fall 946314564Sdim case OPW32: 947314564Sdim case OPW16: 948321369Sdim case OPWV216: 949314564Sdim return SGPR_32RegClassID; 950303231Sdim case OPW64: return SGPR_64RegClassID; 951303231Sdim case OPW128: return SGPR_128RegClassID; 952327952Sdim case OPW256: return SGPR_256RegClassID; 953327952Sdim case OPW512: return SGPR_512RegClassID; 954303231Sdim } 955303231Sdim} 956303231Sdim 957303231Sdimunsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 958303231Sdim using namespace AMDGPU; 959327952Sdim 960303231Sdim assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 961303231Sdim switch (Width) { 962303231Sdim default: // fall 963314564Sdim case OPW32: 964314564Sdim case OPW16: 965321369Sdim case OPWV216: 966314564Sdim return TTMP_32RegClassID; 967303231Sdim case OPW64: return TTMP_64RegClassID; 968303231Sdim case OPW128: return TTMP_128RegClassID; 969327952Sdim case OPW256: return TTMP_256RegClassID; 970327952Sdim case OPW512: return TTMP_512RegClassID; 971303231Sdim } 972303231Sdim} 973303231Sdim 974327952Sdimint AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 975327952Sdim using namespace AMDGPU::EncValues; 976327952Sdim 977353358Sdim unsigned TTmpMin = 978353358Sdim (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN; 979353358Sdim unsigned TTmpMax = 980353358Sdim (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX; 981327952Sdim 982327952Sdim return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 983327952Sdim} 984327952Sdim 985303231SdimMCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 986303231Sdim using namespace AMDGPU::EncValues; 987327952Sdim 988353358Sdim assert(Val < 1024); // enum10 989303231Sdim 990353358Sdim bool IsAGPR = Val & 512; 991353358Sdim Val &= 511; 992353358Sdim 993303231Sdim if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 994353358Sdim return createRegOperand(IsAGPR ? getAgprClassId(Width) 995353358Sdim : getVgprClassId(Width), Val - VGPR_MIN); 996303231Sdim } 997303231Sdim if (Val <= SGPR_MAX) { 998303231Sdim assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 999303231Sdim return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 1000303231Sdim } 1001327952Sdim 1002327952Sdim int TTmpIdx = getTTmpIdx(Val); 1003327952Sdim if (TTmpIdx >= 0) { 1004327952Sdim return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 1005303231Sdim } 1006303231Sdim 1007303231Sdim if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 1008303231Sdim return decodeIntImmed(Val); 1009303231Sdim 1010303231Sdim if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 1011314564Sdim return decodeFPImmed(Width, Val); 1012303231Sdim 1013303231Sdim if (Val == LITERAL_CONST) 1014303231Sdim return decodeLiteralConstant(); 1015303231Sdim 1016314564Sdim switch (Width) { 1017314564Sdim case OPW32: 1018314564Sdim case OPW16: 1019321369Sdim case OPWV216: 1020314564Sdim return decodeSpecialReg32(Val); 1021314564Sdim case OPW64: 1022314564Sdim return decodeSpecialReg64(Val); 1023314564Sdim default: 1024314564Sdim llvm_unreachable("unexpected immediate type"); 1025314564Sdim } 1026303231Sdim} 1027303231Sdim 1028327952SdimMCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 1029327952Sdim using namespace AMDGPU::EncValues; 1030327952Sdim 1031327952Sdim assert(Val < 128); 1032327952Sdim assert(Width == OPW256 || Width == OPW512); 1033327952Sdim 1034327952Sdim if (Val <= SGPR_MAX) { 1035327952Sdim assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 1036327952Sdim return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 1037327952Sdim } 1038327952Sdim 1039327952Sdim int TTmpIdx = getTTmpIdx(Val); 1040327952Sdim if (TTmpIdx >= 0) { 1041327952Sdim return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 1042327952Sdim } 1043327952Sdim 1044327952Sdim llvm_unreachable("unknown dst register"); 1045327952Sdim} 1046327952Sdim 1047303231SdimMCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 1048303231Sdim using namespace AMDGPU; 1049327952Sdim 1050303231Sdim switch (Val) { 1051327952Sdim case 102: return createRegOperand(FLAT_SCR_LO); 1052327952Sdim case 103: return createRegOperand(FLAT_SCR_HI); 1053341825Sdim case 104: return createRegOperand(XNACK_MASK_LO); 1054341825Sdim case 105: return createRegOperand(XNACK_MASK_HI); 1055303231Sdim case 106: return createRegOperand(VCC_LO); 1056303231Sdim case 107: return createRegOperand(VCC_HI); 1057353358Sdim case 108: return createRegOperand(TBA_LO); 1058353358Sdim case 109: return createRegOperand(TBA_HI); 1059353358Sdim case 110: return createRegOperand(TMA_LO); 1060353358Sdim case 111: return createRegOperand(TMA_HI); 1061303231Sdim case 124: return createRegOperand(M0); 1062353358Sdim case 125: return createRegOperand(SGPR_NULL); 1063303231Sdim case 126: return createRegOperand(EXEC_LO); 1064303231Sdim case 127: return createRegOperand(EXEC_HI); 1065321369Sdim case 235: return createRegOperand(SRC_SHARED_BASE); 1066321369Sdim case 236: return createRegOperand(SRC_SHARED_LIMIT); 1067321369Sdim case 237: return createRegOperand(SRC_PRIVATE_BASE); 1068321369Sdim case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 1069353358Sdim case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 1070353358Sdim case 251: return createRegOperand(SRC_VCCZ); 1071353358Sdim case 252: return createRegOperand(SRC_EXECZ); 1072353358Sdim case 253: return createRegOperand(SRC_SCC); 1073353358Sdim case 254: return createRegOperand(LDS_DIRECT); 1074303231Sdim default: break; 1075303231Sdim } 1076303231Sdim return errOperand(Val, "unknown operand encoding " + Twine(Val)); 1077303231Sdim} 1078303231Sdim 1079303231SdimMCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 1080303231Sdim using namespace AMDGPU; 1081327952Sdim 1082303231Sdim switch (Val) { 1083327952Sdim case 102: return createRegOperand(FLAT_SCR); 1084341825Sdim case 104: return createRegOperand(XNACK_MASK); 1085303231Sdim case 106: return createRegOperand(VCC); 1086353358Sdim case 108: return createRegOperand(TBA); 1087353358Sdim case 110: return createRegOperand(TMA); 1088360784Sdim case 125: return createRegOperand(SGPR_NULL); 1089303231Sdim case 126: return createRegOperand(EXEC); 1090353358Sdim case 235: return createRegOperand(SRC_SHARED_BASE); 1091353358Sdim case 236: return createRegOperand(SRC_SHARED_LIMIT); 1092353358Sdim case 237: return createRegOperand(SRC_PRIVATE_BASE); 1093353358Sdim case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 1094353358Sdim case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 1095353358Sdim case 251: return createRegOperand(SRC_VCCZ); 1096353358Sdim case 252: return createRegOperand(SRC_EXECZ); 1097353358Sdim case 253: return createRegOperand(SRC_SCC); 1098303231Sdim default: break; 1099303231Sdim } 1100303231Sdim return errOperand(Val, "unknown operand encoding " + Twine(Val)); 1101303231Sdim} 1102303231Sdim 1103321369SdimMCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 1104341825Sdim const unsigned Val) const { 1105321369Sdim using namespace AMDGPU::SDWA; 1106341825Sdim using namespace AMDGPU::EncValues; 1107321369Sdim 1108353358Sdim if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || 1109353358Sdim STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { 1110353358Sdim // XXX: cast to int is needed to avoid stupid warning: 1111321369Sdim // compare with unsigned is always true 1112353358Sdim if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) && 1113321369Sdim Val <= SDWA9EncValues::SRC_VGPR_MAX) { 1114321369Sdim return createRegOperand(getVgprClassId(Width), 1115321369Sdim Val - SDWA9EncValues::SRC_VGPR_MIN); 1116321369Sdim } 1117321369Sdim if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 1118353358Sdim Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10 1119353358Sdim : SDWA9EncValues::SRC_SGPR_MAX_SI)) { 1120321369Sdim return createSRegOperand(getSgprClassId(Width), 1121321369Sdim Val - SDWA9EncValues::SRC_SGPR_MIN); 1122321369Sdim } 1123327952Sdim if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 1124327952Sdim Val <= SDWA9EncValues::SRC_TTMP_MAX) { 1125327952Sdim return createSRegOperand(getTtmpClassId(Width), 1126327952Sdim Val - SDWA9EncValues::SRC_TTMP_MIN); 1127327952Sdim } 1128321369Sdim 1129341825Sdim const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 1130341825Sdim 1131341825Sdim if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 1132341825Sdim return decodeIntImmed(SVal); 1133341825Sdim 1134341825Sdim if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 1135341825Sdim return decodeFPImmed(Width, SVal); 1136341825Sdim 1137341825Sdim return decodeSpecialReg32(SVal); 1138321369Sdim } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 1139321369Sdim return createRegOperand(getVgprClassId(Width), Val); 1140321369Sdim } 1141321369Sdim llvm_unreachable("unsupported target"); 1142321369Sdim} 1143321369Sdim 1144321369SdimMCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 1145321369Sdim return decodeSDWASrc(OPW16, Val); 1146321369Sdim} 1147321369Sdim 1148321369SdimMCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 1149321369Sdim return decodeSDWASrc(OPW32, Val); 1150321369Sdim} 1151321369Sdim 1152321369SdimMCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 1153321369Sdim using namespace AMDGPU::SDWA; 1154321369Sdim 1155353358Sdim assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] || 1156353358Sdim STI.getFeatureBits()[AMDGPU::FeatureGFX10]) && 1157353358Sdim "SDWAVopcDst should be present only on GFX9+"); 1158353358Sdim 1159353358Sdim bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64]; 1160353358Sdim 1161321369Sdim if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 1162321369Sdim Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 1163327952Sdim 1164327952Sdim int TTmpIdx = getTTmpIdx(Val); 1165327952Sdim if (TTmpIdx >= 0) { 1166360784Sdim auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32); 1167360784Sdim return createSRegOperand(TTmpClsId, TTmpIdx); 1168353358Sdim } else if (Val > SGPR_MAX) { 1169353358Sdim return IsWave64 ? decodeSpecialReg64(Val) 1170353358Sdim : decodeSpecialReg32(Val); 1171321369Sdim } else { 1172353358Sdim return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val); 1173321369Sdim } 1174321369Sdim } else { 1175353358Sdim return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO); 1176321369Sdim } 1177321369Sdim} 1178321369Sdim 1179353358SdimMCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { 1180353358Sdim return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? 1181353358Sdim decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val); 1182353358Sdim} 1183353358Sdim 1184327952Sdimbool AMDGPUDisassembler::isVI() const { 1185327952Sdim return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 1186327952Sdim} 1187327952Sdim 1188327952Sdimbool AMDGPUDisassembler::isGFX9() const { 1189327952Sdim return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 1190327952Sdim} 1191327952Sdim 1192353358Sdimbool AMDGPUDisassembler::isGFX10() const { 1193353358Sdim return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 1194353358Sdim} 1195353358Sdim 1196314564Sdim//===----------------------------------------------------------------------===// 1197314564Sdim// AMDGPUSymbolizer 1198314564Sdim//===----------------------------------------------------------------------===// 1199314564Sdim 1200314564Sdim// Try to find symbol name for specified label 1201314564Sdimbool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 1202314564Sdim raw_ostream &/*cStream*/, int64_t Value, 1203314564Sdim uint64_t /*Address*/, bool IsBranch, 1204314564Sdim uint64_t /*Offset*/, uint64_t /*InstSize*/) { 1205327952Sdim using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 1206327952Sdim using SectionSymbolsTy = std::vector<SymbolInfoTy>; 1207314564Sdim 1208314564Sdim if (!IsBranch) { 1209314564Sdim return false; 1210314564Sdim } 1211314564Sdim 1212314564Sdim auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 1213341825Sdim if (!Symbols) 1214341825Sdim return false; 1215341825Sdim 1216314564Sdim auto Result = std::find_if(Symbols->begin(), Symbols->end(), 1217314564Sdim [Value](const SymbolInfoTy& Val) { 1218314564Sdim return std::get<0>(Val) == static_cast<uint64_t>(Value) 1219314564Sdim && std::get<2>(Val) == ELF::STT_NOTYPE; 1220314564Sdim }); 1221314564Sdim if (Result != Symbols->end()) { 1222314564Sdim auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 1223314564Sdim const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 1224314564Sdim Inst.addOperand(MCOperand::createExpr(Add)); 1225314564Sdim return true; 1226314564Sdim } 1227314564Sdim return false; 1228314564Sdim} 1229314564Sdim 1230314564Sdimvoid AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 1231314564Sdim int64_t Value, 1232314564Sdim uint64_t Address) { 1233314564Sdim llvm_unreachable("unimplemented"); 1234314564Sdim} 1235314564Sdim 1236314564Sdim//===----------------------------------------------------------------------===// 1237314564Sdim// Initialization 1238314564Sdim//===----------------------------------------------------------------------===// 1239314564Sdim 1240314564Sdimstatic MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 1241314564Sdim LLVMOpInfoCallback /*GetOpInfo*/, 1242314564Sdim LLVMSymbolLookupCallback /*SymbolLookUp*/, 1243314564Sdim void *DisInfo, 1244314564Sdim MCContext *Ctx, 1245314564Sdim std::unique_ptr<MCRelocationInfo> &&RelInfo) { 1246314564Sdim return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 1247314564Sdim} 1248314564Sdim 1249303231Sdimstatic MCDisassembler *createAMDGPUDisassembler(const Target &T, 1250303231Sdim const MCSubtargetInfo &STI, 1251303231Sdim MCContext &Ctx) { 1252327952Sdim return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 1253303231Sdim} 1254303231Sdim 1255360784Sdimextern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() { 1256314564Sdim TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 1257314564Sdim createAMDGPUDisassembler); 1258314564Sdim TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 1259314564Sdim createAMDGPUSymbolizer); 1260303231Sdim} 1261