1//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file is part of the WebAssembly Disassembler.
11///
12/// It contains code to translate the data produced by the decoder into
13/// MCInsts.
14///
15//===----------------------------------------------------------------------===//
16
17#include "MCTargetDesc/WebAssemblyInstPrinter.h"
18#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
19#include "TargetInfo/WebAssemblyTargetInfo.h"
20#include "llvm/MC/MCContext.h"
21#include "llvm/MC/MCDisassembler/MCDisassembler.h"
22#include "llvm/MC/MCFixedLenDisassembler.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstrInfo.h"
25#include "llvm/MC/MCSubtargetInfo.h"
26#include "llvm/MC/MCSymbol.h"
27#include "llvm/MC/MCSymbolWasm.h"
28#include "llvm/Support/Endian.h"
29#include "llvm/Support/LEB128.h"
30#include "llvm/Support/TargetRegistry.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "wasm-disassembler"
35
36using DecodeStatus = MCDisassembler::DecodeStatus;
37
38#include "WebAssemblyGenDisassemblerTables.inc"
39
40namespace {
41static constexpr int WebAssemblyInstructionTableSize = 256;
42
43class WebAssemblyDisassembler final : public MCDisassembler {
44  std::unique_ptr<const MCInstrInfo> MCII;
45
46  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
47                              ArrayRef<uint8_t> Bytes, uint64_t Address,
48                              raw_ostream &CStream) const override;
49  Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
50                                       ArrayRef<uint8_t> Bytes,
51                                       uint64_t Address,
52                                       raw_ostream &CStream) const override;
53
54public:
55  WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
56                          std::unique_ptr<const MCInstrInfo> MCII)
57      : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
58};
59} // end anonymous namespace
60
61static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
62                                                     const MCSubtargetInfo &STI,
63                                                     MCContext &Ctx) {
64  std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
65  return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
66}
67
68extern "C" LLVM_EXTERNAL_VISIBILITY void
69LLVMInitializeWebAssemblyDisassembler() {
70  // Register the disassembler for each target.
71  TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
72                                         createWebAssemblyDisassembler);
73  TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
74                                         createWebAssemblyDisassembler);
75}
76
77static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
78  if (Size >= Bytes.size())
79    return -1;
80  auto V = Bytes[Size];
81  Size++;
82  return V;
83}
84
85static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
86                    bool Signed) {
87  unsigned N = 0;
88  const char *Error = nullptr;
89  Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
90                               Bytes.data() + Bytes.size(), &Error)
91               : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
92                                                    Bytes.data() + Bytes.size(),
93                                                    &Error));
94  if (Error)
95    return false;
96  Size += N;
97  return true;
98}
99
100static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
101                              ArrayRef<uint8_t> Bytes, bool Signed) {
102  int64_t Val;
103  if (!nextLEB(Val, Bytes, Size, Signed))
104    return false;
105  MI.addOperand(MCOperand::createImm(Val));
106  return true;
107}
108
109template <typename T>
110bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
111  if (Size + sizeof(T) > Bytes.size())
112    return false;
113  T Val = support::endian::read<T, support::endianness::little, 1>(
114      Bytes.data() + Size);
115  Size += sizeof(T);
116  if (std::is_floating_point<T>::value) {
117    MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
118  } else {
119    MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
120  }
121  return true;
122}
123
124Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
125    SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
126    uint64_t Address, raw_ostream &CStream) const {
127  Size = 0;
128  if (Address == 0) {
129    // Start of a code section: we're parsing only the function count.
130    int64_t FunctionCount;
131    if (!nextLEB(FunctionCount, Bytes, Size, false))
132      return None;
133    outs() << "        # " << FunctionCount << " functions in section.";
134  } else {
135    // Parse the start of a single function.
136    int64_t BodySize, LocalEntryCount;
137    if (!nextLEB(BodySize, Bytes, Size, false) ||
138        !nextLEB(LocalEntryCount, Bytes, Size, false))
139      return None;
140    if (LocalEntryCount) {
141      outs() << "        .local ";
142      for (int64_t I = 0; I < LocalEntryCount; I++) {
143        int64_t Count, Type;
144        if (!nextLEB(Count, Bytes, Size, false) ||
145            !nextLEB(Type, Bytes, Size, false))
146          return None;
147        for (int64_t J = 0; J < Count; J++) {
148          if (I || J)
149            outs() << ", ";
150          outs() << WebAssembly::anyTypeToString(Type);
151        }
152      }
153    }
154  }
155  outs() << "\n";
156  return MCDisassembler::Success;
157}
158
159MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
160    MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
161    raw_ostream &CS) const {
162  CommentStream = &CS;
163  Size = 0;
164  int Opc = nextByte(Bytes, Size);
165  if (Opc < 0)
166    return MCDisassembler::Fail;
167  const auto *WasmInst = &InstructionTable0[Opc];
168  // If this is a prefix byte, indirect to another table.
169  if (WasmInst->ET == ET_Prefix) {
170    WasmInst = nullptr;
171    // Linear search, so far only 2 entries.
172    for (auto PT = PrefixTable; PT->Table; PT++) {
173      if (PT->Prefix == Opc) {
174        WasmInst = PT->Table;
175        break;
176      }
177    }
178    if (!WasmInst)
179      return MCDisassembler::Fail;
180    int64_t PrefixedOpc;
181    if (!nextLEB(PrefixedOpc, Bytes, Size, false))
182      return MCDisassembler::Fail;
183    if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
184      return MCDisassembler::Fail;
185    WasmInst += PrefixedOpc;
186  }
187  if (WasmInst->ET == ET_Unused)
188    return MCDisassembler::Fail;
189  // At this point we must have a valid instruction to decode.
190  assert(WasmInst->ET == ET_Instruction);
191  MI.setOpcode(WasmInst->Opcode);
192  // Parse any operands.
193  for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
194    auto OT = OperandTable[WasmInst->OperandStart + OPI];
195    switch (OT) {
196    // ULEB operands:
197    case WebAssembly::OPERAND_BASIC_BLOCK:
198    case WebAssembly::OPERAND_LOCAL:
199    case WebAssembly::OPERAND_GLOBAL:
200    case WebAssembly::OPERAND_FUNCTION32:
201    case WebAssembly::OPERAND_OFFSET32:
202    case WebAssembly::OPERAND_OFFSET64:
203    case WebAssembly::OPERAND_P2ALIGN:
204    case WebAssembly::OPERAND_TYPEINDEX:
205    case WebAssembly::OPERAND_EVENT:
206    case MCOI::OPERAND_IMMEDIATE: {
207      if (!parseLEBImmediate(MI, Size, Bytes, false))
208        return MCDisassembler::Fail;
209      break;
210    }
211    // SLEB operands:
212    case WebAssembly::OPERAND_I32IMM:
213    case WebAssembly::OPERAND_I64IMM: {
214      if (!parseLEBImmediate(MI, Size, Bytes, true))
215        return MCDisassembler::Fail;
216      break;
217    }
218    // block_type operands:
219    case WebAssembly::OPERAND_SIGNATURE: {
220      int64_t Val;
221      uint64_t PrevSize = Size;
222      if (!nextLEB(Val, Bytes, Size, true))
223        return MCDisassembler::Fail;
224      if (Val < 0) {
225        // Negative values are single septet value types or empty types
226        if (Size != PrevSize + 1) {
227          MI.addOperand(
228              MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
229        } else {
230          MI.addOperand(MCOperand::createImm(Val & 0x7f));
231        }
232      } else {
233        // We don't have access to the signature, so create a symbol without one
234        MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
235        auto *WasmSym = cast<MCSymbolWasm>(Sym);
236        WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
237        const MCExpr *Expr = MCSymbolRefExpr::create(
238            WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
239        MI.addOperand(MCOperand::createExpr(Expr));
240      }
241      break;
242    }
243    // FP operands.
244    case WebAssembly::OPERAND_F32IMM: {
245      if (!parseImmediate<float>(MI, Size, Bytes))
246        return MCDisassembler::Fail;
247      break;
248    }
249    case WebAssembly::OPERAND_F64IMM: {
250      if (!parseImmediate<double>(MI, Size, Bytes))
251        return MCDisassembler::Fail;
252      break;
253    }
254    // Vector lane operands (not LEB encoded).
255    case WebAssembly::OPERAND_VEC_I8IMM: {
256      if (!parseImmediate<uint8_t>(MI, Size, Bytes))
257        return MCDisassembler::Fail;
258      break;
259    }
260    case WebAssembly::OPERAND_VEC_I16IMM: {
261      if (!parseImmediate<uint16_t>(MI, Size, Bytes))
262        return MCDisassembler::Fail;
263      break;
264    }
265    case WebAssembly::OPERAND_VEC_I32IMM: {
266      if (!parseImmediate<uint32_t>(MI, Size, Bytes))
267        return MCDisassembler::Fail;
268      break;
269    }
270    case WebAssembly::OPERAND_VEC_I64IMM: {
271      if (!parseImmediate<uint64_t>(MI, Size, Bytes))
272        return MCDisassembler::Fail;
273      break;
274    }
275    case WebAssembly::OPERAND_BRLIST: {
276      int64_t TargetTableLen;
277      if (!nextLEB(TargetTableLen, Bytes, Size, false))
278        return MCDisassembler::Fail;
279      for (int64_t I = 0; I < TargetTableLen; I++) {
280        if (!parseLEBImmediate(MI, Size, Bytes, false))
281          return MCDisassembler::Fail;
282      }
283      // Default case.
284      if (!parseLEBImmediate(MI, Size, Bytes, false))
285        return MCDisassembler::Fail;
286      break;
287    }
288    case MCOI::OPERAND_REGISTER:
289      // The tablegen header currently does not have any register operands since
290      // we use only the stack (_S) instructions.
291      // If you hit this that probably means a bad instruction definition in
292      // tablegen.
293      llvm_unreachable("Register operand in WebAssemblyDisassembler");
294    default:
295      llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
296    }
297  }
298  return MCDisassembler::Success;
299}
300