1//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the disassembler of strings of bytes written in
11// hexadecimal, from standard input or from a file.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Disassembler.h"
16#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
17#include "../../lib/MC/MCDisassembler/EDInst.h"
18#include "../../lib/MC/MCDisassembler/EDOperand.h"
19#include "../../lib/MC/MCDisassembler/EDToken.h"
20#include "llvm/MC/MCDisassembler.h"
21#include "llvm/MC/MCInst.h"
22#include "llvm/MC/MCStreamer.h"
23#include "llvm/MC/MCSubtargetInfo.h"
24#include "llvm/ADT/OwningPtr.h"
25#include "llvm/ADT/Triple.h"
26#include "llvm/Support/MemoryBuffer.h"
27#include "llvm/Support/MemoryObject.h"
28#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/TargetRegistry.h"
30#include "llvm/Support/raw_ostream.h"
31
32using namespace llvm;
33
34typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
35
36namespace {
37class VectorMemoryObject : public MemoryObject {
38private:
39  const ByteArrayTy &Bytes;
40public:
41  VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
42
43  uint64_t getBase() const { return 0; }
44  uint64_t getExtent() const { return Bytes.size(); }
45
46  int readByte(uint64_t Addr, uint8_t *Byte) const {
47    if (Addr >= getExtent())
48      return -1;
49    *Byte = Bytes[Addr].first;
50    return 0;
51  }
52};
53}
54
55static bool PrintInsts(const MCDisassembler &DisAsm,
56                       const ByteArrayTy &Bytes,
57                       SourceMgr &SM, raw_ostream &Out,
58                       MCStreamer &Streamer) {
59  // Wrap the vector in a MemoryObject.
60  VectorMemoryObject memoryObject(Bytes);
61
62  // Disassemble it to strings.
63  uint64_t Size;
64  uint64_t Index;
65
66  for (Index = 0; Index < Bytes.size(); Index += Size) {
67    MCInst Inst;
68
69    MCDisassembler::DecodeStatus S;
70    S = DisAsm.getInstruction(Inst, Size, memoryObject, Index,
71                              /*REMOVE*/ nulls(), nulls());
72    switch (S) {
73    case MCDisassembler::Fail:
74      SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
75                      SourceMgr::DK_Warning,
76                      "invalid instruction encoding");
77      if (Size == 0)
78        Size = 1; // skip illegible bytes
79      break;
80
81    case MCDisassembler::SoftFail:
82      SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
83                      SourceMgr::DK_Warning,
84                      "potentially undefined instruction encoding");
85      // Fall through
86
87    case MCDisassembler::Success:
88      Streamer.EmitInstruction(Inst);
89      break;
90    }
91  }
92
93  return false;
94}
95
96static bool ByteArrayFromString(ByteArrayTy &ByteArray,
97                                StringRef &Str,
98                                SourceMgr &SM) {
99  while (!Str.empty()) {
100    // Strip horizontal whitespace.
101    if (size_t Pos = Str.find_first_not_of(" \t\r")) {
102      Str = Str.substr(Pos);
103      continue;
104    }
105
106    // If this is the end of a line or start of a comment, remove the rest of
107    // the line.
108    if (Str[0] == '\n' || Str[0] == '#') {
109      // Strip to the end of line if we already processed any bytes on this
110      // line.  This strips the comment and/or the \n.
111      if (Str[0] == '\n') {
112        Str = Str.substr(1);
113      } else {
114        Str = Str.substr(Str.find_first_of('\n'));
115        if (!Str.empty())
116          Str = Str.substr(1);
117      }
118      continue;
119    }
120
121    // Get the current token.
122    size_t Next = Str.find_first_of(" \t\n\r#");
123    StringRef Value = Str.substr(0, Next);
124
125    // Convert to a byte and add to the byte vector.
126    unsigned ByteVal;
127    if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
128      // If we have an error, print it and skip to the end of line.
129      SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
130                      "invalid input token");
131      Str = Str.substr(Str.find('\n'));
132      ByteArray.clear();
133      continue;
134    }
135
136    ByteArray.push_back(std::make_pair((unsigned char)ByteVal, Value.data()));
137    Str = Str.substr(Next);
138  }
139
140  return false;
141}
142
143int Disassembler::disassemble(const Target &T,
144                              const std::string &Triple,
145                              MCSubtargetInfo &STI,
146                              MCStreamer &Streamer,
147                              MemoryBuffer &Buffer,
148                              SourceMgr &SM,
149                              raw_ostream &Out) {
150  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler(STI));
151  if (!DisAsm) {
152    errs() << "error: no disassembler for target " << Triple << "\n";
153    return -1;
154  }
155
156  // Set up initial section manually here
157  Streamer.InitSections();
158
159  bool ErrorOccurred = false;
160
161  // Convert the input to a vector for disassembly.
162  ByteArrayTy ByteArray;
163  StringRef Str = Buffer.getBuffer();
164
165  ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
166
167  if (!ByteArray.empty())
168    ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer);
169
170  return ErrorOccurred;
171}
172
173static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
174  ByteArrayTy &ByteArray = *((ByteArrayTy*)Arg);
175
176  if (A >= ByteArray.size())
177    return -1;
178
179  *B = ByteArray[A].first;
180
181  return 0;
182}
183
184static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
185  EDDisassembler &disassembler = *(EDDisassembler *)((void **)Arg)[0];
186  raw_ostream &Out = *(raw_ostream *)((void **)Arg)[1];
187
188  if (const char *regName = disassembler.nameWithRegisterID(R))
189    Out << "[" << regName << "/" << R << "]";
190
191  if (disassembler.registerIsStackPointer(R))
192    Out << "(sp)";
193  if (disassembler.registerIsProgramCounter(R))
194    Out << "(pc)";
195
196  *V = 0;
197  return 0;
198}
199
200int Disassembler::disassembleEnhanced(const std::string &TS,
201                                      MemoryBuffer &Buffer,
202                                      SourceMgr &SM,
203                                      raw_ostream &Out) {
204  ByteArrayTy ByteArray;
205  StringRef Str = Buffer.getBuffer();
206
207  if (ByteArrayFromString(ByteArray, Str, SM)) {
208    return -1;
209  }
210
211  Triple T(TS);
212  EDDisassembler::AssemblySyntax AS;
213
214  switch (T.getArch()) {
215  default:
216    errs() << "error: no default assembly syntax for " << TS.c_str() << "\n";
217    return -1;
218  case Triple::arm:
219  case Triple::thumb:
220    AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
221    break;
222  case Triple::x86:
223  case Triple::x86_64:
224    AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
225    break;
226  }
227
228  OwningPtr<EDDisassembler>
229    disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS));
230
231  if (disassembler == 0) {
232    errs() << "error: couldn't get disassembler for " << TS << '\n';
233    return -1;
234  }
235
236  while (ByteArray.size()) {
237    OwningPtr<EDInst>
238      inst(disassembler->createInst(byteArrayReader, 0, &ByteArray));
239
240    if (inst == 0) {
241      errs() << "error: Didn't get an instruction\n";
242      return -1;
243    }
244
245    ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
246
247    unsigned numTokens = inst->numTokens();
248    if ((int)numTokens < 0) {
249      errs() << "error: couldn't count the instruction's tokens\n";
250      return -1;
251    }
252
253    for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
254      EDToken *token;
255
256      if (inst->getToken(token, tokenIndex)) {
257        errs() << "error: Couldn't get token\n";
258        return -1;
259      }
260
261      const char *buf;
262      if (token->getString(buf)) {
263        errs() << "error: Couldn't get string for token\n";
264        return -1;
265      }
266
267      Out << '[';
268      int operandIndex = token->operandID();
269
270      if (operandIndex >= 0)
271        Out << operandIndex << "-";
272
273      switch (token->type()) {
274      case EDToken::kTokenWhitespace: Out << "w"; break;
275      case EDToken::kTokenPunctuation: Out << "p"; break;
276      case EDToken::kTokenOpcode: Out << "o"; break;
277      case EDToken::kTokenLiteral: Out << "l"; break;
278      case EDToken::kTokenRegister: Out << "r"; break;
279      }
280
281      Out << ":" << buf;
282
283      if (token->type() == EDToken::kTokenLiteral) {
284        Out << "=";
285        if (token->literalSign())
286          Out << "-";
287        uint64_t absoluteValue;
288        if (token->literalAbsoluteValue(absoluteValue)) {
289          errs() << "error: Couldn't get the value of a literal token\n";
290          return -1;
291        }
292        Out << absoluteValue;
293      } else if (token->type() == EDToken::kTokenRegister) {
294        Out << "=";
295        unsigned regID;
296        if (token->registerID(regID)) {
297          errs() << "error: Couldn't get the ID of a register token\n";
298          return -1;
299        }
300        Out << "r" << regID;
301      }
302
303      Out << "]";
304    }
305
306    Out << " ";
307
308    if (inst->isBranch())
309      Out << "<br> ";
310    if (inst->isMove())
311      Out << "<mov> ";
312
313    unsigned numOperands = inst->numOperands();
314
315    if ((int)numOperands < 0) {
316      errs() << "error: Couldn't count operands\n";
317      return -1;
318    }
319
320    for (unsigned operandIndex = 0; operandIndex != numOperands;
321         ++operandIndex) {
322      Out << operandIndex << ":";
323
324      EDOperand *operand;
325      if (inst->getOperand(operand, operandIndex)) {
326        errs() << "error: couldn't get operand\n";
327        return -1;
328      }
329
330      uint64_t evaluatedResult;
331      void *Arg[] = { disassembler.get(), &Out };
332      if (operand->evaluate(evaluatedResult, verboseEvaluator, Arg)) {
333        errs() << "error: Couldn't evaluate an operand\n";
334        return -1;
335      }
336      Out << "=" << evaluatedResult << " ";
337    }
338
339    Out << '\n';
340  }
341
342  return 0;
343}
344