1//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class implements the disassembler of strings of bytes written in
10// hexadecimal, from standard input or from a file.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Disassembler.h"
15#include "llvm/ADT/Triple.h"
16#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCObjectFileInfo.h"
21#include "llvm/MC/MCRegisterInfo.h"
22#include "llvm/MC/MCStreamer.h"
23#include "llvm/MC/MCSubtargetInfo.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/SourceMgr.h"
26#include "llvm/Support/TargetRegistry.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
32    ByteArrayTy;
33
34static bool PrintInsts(const MCDisassembler &DisAsm,
35                       const ByteArrayTy &Bytes,
36                       SourceMgr &SM, raw_ostream &Out,
37                       MCStreamer &Streamer, bool InAtomicBlock,
38                       const MCSubtargetInfo &STI) {
39  ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());
40
41  // Disassemble it to strings.
42  uint64_t Size;
43  uint64_t Index;
44
45  for (Index = 0; Index < Bytes.first.size(); Index += Size) {
46    MCInst Inst;
47
48    MCDisassembler::DecodeStatus S;
49    S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
50    switch (S) {
51    case MCDisassembler::Fail:
52      SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
53                      SourceMgr::DK_Warning,
54                      "invalid instruction encoding");
55      // Don't try to resynchronise the stream in a block
56      if (InAtomicBlock)
57        return true;
58
59      if (Size == 0)
60        Size = 1; // skip illegible bytes
61
62      break;
63
64    case MCDisassembler::SoftFail:
65      SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
66                      SourceMgr::DK_Warning,
67                      "potentially undefined instruction encoding");
68      LLVM_FALLTHROUGH;
69
70    case MCDisassembler::Success:
71      Streamer.emitInstruction(Inst, STI);
72      break;
73    }
74  }
75
76  return false;
77}
78
79static bool SkipToToken(StringRef &Str) {
80  for (;;) {
81    if (Str.empty())
82      return false;
83
84    // Strip horizontal whitespace and commas.
85    if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {
86      Str = Str.substr(Pos);
87      continue;
88    }
89
90    // If this is the start of a comment, remove the rest of the line.
91    if (Str[0] == '#') {
92        Str = Str.substr(Str.find_first_of('\n'));
93      continue;
94    }
95    return true;
96  }
97}
98
99
100static bool ByteArrayFromString(ByteArrayTy &ByteArray,
101                                StringRef &Str,
102                                SourceMgr &SM) {
103  while (SkipToToken(Str)) {
104    // Handled by higher level
105    if (Str[0] == '[' || Str[0] == ']')
106      return false;
107
108    // Get the current token.
109    size_t Next = Str.find_first_of(" \t\n\r,#[]");
110    StringRef Value = Str.substr(0, Next);
111
112    // Convert to a byte and add to the byte vector.
113    unsigned ByteVal;
114    if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
115      // If we have an error, print it and skip to the end of line.
116      SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
117                      "invalid input token");
118      Str = Str.substr(Str.find('\n'));
119      ByteArray.first.clear();
120      ByteArray.second.clear();
121      continue;
122    }
123
124    ByteArray.first.push_back(ByteVal);
125    ByteArray.second.push_back(Value.data());
126    Str = Str.substr(Next);
127  }
128
129  return false;
130}
131
132int Disassembler::disassemble(const Target &T, const std::string &Triple,
133                              MCSubtargetInfo &STI, MCStreamer &Streamer,
134                              MemoryBuffer &Buffer, SourceMgr &SM,
135                              MCContext &Ctx, raw_ostream &Out,
136                              const MCTargetOptions &MCOptions) {
137
138  std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
139  if (!MRI) {
140    errs() << "error: no register info for target " << Triple << "\n";
141    return -1;
142  }
143
144  std::unique_ptr<const MCAsmInfo> MAI(
145      T.createMCAsmInfo(*MRI, Triple, MCOptions));
146  if (!MAI) {
147    errs() << "error: no assembly info for target " << Triple << "\n";
148    return -1;
149  }
150
151  std::unique_ptr<const MCDisassembler> DisAsm(
152    T.createMCDisassembler(STI, Ctx));
153  if (!DisAsm) {
154    errs() << "error: no disassembler for target " << Triple << "\n";
155    return -1;
156  }
157
158  // Set up initial section manually here
159  Streamer.InitSections(false);
160
161  bool ErrorOccurred = false;
162
163  // Convert the input to a vector for disassembly.
164  ByteArrayTy ByteArray;
165  StringRef Str = Buffer.getBuffer();
166  bool InAtomicBlock = false;
167
168  while (SkipToToken(Str)) {
169    ByteArray.first.clear();
170    ByteArray.second.clear();
171
172    if (Str[0] == '[') {
173      if (InAtomicBlock) {
174        SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
175                        "nested atomic blocks make no sense");
176        ErrorOccurred = true;
177      }
178      InAtomicBlock = true;
179      Str = Str.drop_front();
180      continue;
181    } else if (Str[0] == ']') {
182      if (!InAtomicBlock) {
183        SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
184                        "attempt to close atomic block without opening");
185        ErrorOccurred = true;
186      }
187      InAtomicBlock = false;
188      Str = Str.drop_front();
189      continue;
190    }
191
192    // It's a real token, get the bytes and emit them
193    ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
194
195    if (!ByteArray.first.empty())
196      ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer,
197                                  InAtomicBlock, STI);
198  }
199
200  if (InAtomicBlock) {
201    SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
202                    "unclosed atomic block");
203    ErrorOccurred = true;
204  }
205
206  return ErrorOccurred;
207}
208