Disassembler.cpp revision 221337
1//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the disassembler of strings of bytes written in
11// hexadecimal, from standard input or from a file.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Disassembler.h"
16#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
17#include "../../lib/MC/MCDisassembler/EDInst.h"
18#include "../../lib/MC/MCDisassembler/EDOperand.h"
19#include "../../lib/MC/MCDisassembler/EDToken.h"
20#include "llvm/MC/MCAsmInfo.h"
21#include "llvm/MC/MCDisassembler.h"
22#include "llvm/MC/MCInst.h"
23#include "llvm/MC/MCInstPrinter.h"
24#include "llvm/Target/TargetRegistry.h"
25#include "llvm/ADT/OwningPtr.h"
26#include "llvm/ADT/Triple.h"
27#include "llvm/ADT/Twine.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/MemoryObject.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/Support/SourceMgr.h"
32using namespace llvm;
33
34typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
35
36namespace {
37class VectorMemoryObject : public MemoryObject {
38private:
39  const ByteArrayTy &Bytes;
40public:
41  VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
42
43  uint64_t getBase() const { return 0; }
44  uint64_t getExtent() const { return Bytes.size(); }
45
46  int readByte(uint64_t Addr, uint8_t *Byte) const {
47    if (Addr >= getExtent())
48      return -1;
49    *Byte = Bytes[Addr].first;
50    return 0;
51  }
52};
53}
54
55static bool PrintInsts(const MCDisassembler &DisAsm,
56                       MCInstPrinter &Printer, const ByteArrayTy &Bytes,
57                       SourceMgr &SM, raw_ostream &Out) {
58  // Wrap the vector in a MemoryObject.
59  VectorMemoryObject memoryObject(Bytes);
60
61  // Disassemble it to strings.
62  uint64_t Size;
63  uint64_t Index;
64
65  for (Index = 0; Index < Bytes.size(); Index += Size) {
66    MCInst Inst;
67
68    if (DisAsm.getInstruction(Inst, Size, memoryObject, Index,
69                               /*REMOVE*/ nulls())) {
70      Printer.printInst(&Inst, Out);
71      Out << "\n";
72    } else {
73      SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
74                      "invalid instruction encoding", "warning");
75      if (Size == 0)
76        Size = 1; // skip illegible bytes
77    }
78  }
79
80  return false;
81}
82
83static bool ByteArrayFromString(ByteArrayTy &ByteArray,
84                                StringRef &Str,
85                                SourceMgr &SM) {
86  while (!Str.empty()) {
87    // Strip horizontal whitespace.
88    if (size_t Pos = Str.find_first_not_of(" \t\r")) {
89      Str = Str.substr(Pos);
90      continue;
91    }
92
93    // If this is the end of a line or start of a comment, remove the rest of
94    // the line.
95    if (Str[0] == '\n' || Str[0] == '#') {
96      // Strip to the end of line if we already processed any bytes on this
97      // line.  This strips the comment and/or the \n.
98      if (Str[0] == '\n') {
99        Str = Str.substr(1);
100      } else {
101        Str = Str.substr(Str.find_first_of('\n'));
102        if (!Str.empty())
103          Str = Str.substr(1);
104      }
105      continue;
106    }
107
108    // Get the current token.
109    size_t Next = Str.find_first_of(" \t\n\r#");
110    StringRef Value = Str.substr(0, Next);
111
112    // Convert to a byte and add to the byte vector.
113    unsigned ByteVal;
114    if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
115      // If we have an error, print it and skip to the end of line.
116      SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
117                      "invalid input token", "error");
118      Str = Str.substr(Str.find('\n'));
119      ByteArray.clear();
120      continue;
121    }
122
123    ByteArray.push_back(std::make_pair((unsigned char)ByteVal, Value.data()));
124    Str = Str.substr(Next);
125  }
126
127  return false;
128}
129
130int Disassembler::disassemble(const Target &T,  TargetMachine &TM,
131                              const std::string &Triple,
132                              MemoryBuffer &Buffer,
133                              raw_ostream &Out) {
134  // Set up disassembler.
135  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
136
137  if (!AsmInfo) {
138    errs() << "error: no assembly info for target " << Triple << "\n";
139    return -1;
140  }
141
142  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler());
143  if (!DisAsm) {
144    errs() << "error: no disassembler for target " << Triple << "\n";
145    return -1;
146  }
147
148  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
149  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(TM, AsmPrinterVariant,
150                                                    *AsmInfo));
151  if (!IP) {
152    errs() << "error: no instruction printer for target " << Triple << '\n';
153    return -1;
154  }
155
156  bool ErrorOccurred = false;
157
158  SourceMgr SM;
159  SM.AddNewSourceBuffer(&Buffer, SMLoc());
160
161  // Convert the input to a vector for disassembly.
162  ByteArrayTy ByteArray;
163  StringRef Str = Buffer.getBuffer();
164
165  ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
166
167  if (!ByteArray.empty())
168    ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM, Out);
169
170  return ErrorOccurred;
171}
172
173static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
174  ByteArrayTy &ByteArray = *((ByteArrayTy*)Arg);
175
176  if (A >= ByteArray.size())
177    return -1;
178
179  *B = ByteArray[A].first;
180
181  return 0;
182}
183
184static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
185  EDDisassembler &disassembler = *(EDDisassembler *)((void **)Arg)[0];
186  raw_ostream &Out = *(raw_ostream *)((void **)Arg)[1];
187
188  if (const char *regName = disassembler.nameWithRegisterID(R))
189    Out << "[" << regName << "/" << R << "]";
190
191  if (disassembler.registerIsStackPointer(R))
192    Out << "(sp)";
193  if (disassembler.registerIsProgramCounter(R))
194    Out << "(pc)";
195
196  *V = 0;
197  return 0;
198}
199
200int Disassembler::disassembleEnhanced(const std::string &TS,
201                                      MemoryBuffer &Buffer,
202                                      raw_ostream &Out) {
203  ByteArrayTy ByteArray;
204  StringRef Str = Buffer.getBuffer();
205  SourceMgr SM;
206
207  SM.AddNewSourceBuffer(&Buffer, SMLoc());
208
209  if (ByteArrayFromString(ByteArray, Str, SM)) {
210    return -1;
211  }
212
213  Triple T(TS);
214  EDDisassembler::AssemblySyntax AS;
215
216  switch (T.getArch()) {
217  default:
218    errs() << "error: no default assembly syntax for " << TS.c_str() << "\n";
219    return -1;
220  case Triple::arm:
221  case Triple::thumb:
222    AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
223    break;
224  case Triple::x86:
225  case Triple::x86_64:
226    AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
227    break;
228  }
229
230  EDDisassembler::initialize();
231  OwningPtr<EDDisassembler>
232    disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS));
233
234  if (disassembler == 0) {
235    errs() << "error: couldn't get disassembler for " << TS << '\n';
236    return -1;
237  }
238
239  while (ByteArray.size()) {
240    OwningPtr<EDInst>
241      inst(disassembler->createInst(byteArrayReader, 0, &ByteArray));
242
243    if (inst == 0) {
244      errs() << "error: Didn't get an instruction\n";
245      return -1;
246    }
247
248    ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
249
250    unsigned numTokens = inst->numTokens();
251    if ((int)numTokens < 0) {
252      errs() << "error: couldn't count the instruction's tokens\n";
253      return -1;
254    }
255
256    for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
257      EDToken *token;
258
259      if (inst->getToken(token, tokenIndex)) {
260        errs() << "error: Couldn't get token\n";
261        return -1;
262      }
263
264      const char *buf;
265      if (token->getString(buf)) {
266        errs() << "error: Couldn't get string for token\n";
267        return -1;
268      }
269
270      Out << '[';
271      int operandIndex = token->operandID();
272
273      if (operandIndex >= 0)
274        Out << operandIndex << "-";
275
276      switch (token->type()) {
277      default: Out << "?"; break;
278      case EDToken::kTokenWhitespace: Out << "w"; break;
279      case EDToken::kTokenPunctuation: Out << "p"; break;
280      case EDToken::kTokenOpcode: Out << "o"; break;
281      case EDToken::kTokenLiteral: Out << "l"; break;
282      case EDToken::kTokenRegister: Out << "r"; break;
283      }
284
285      Out << ":" << buf;
286
287      if (token->type() == EDToken::kTokenLiteral) {
288        Out << "=";
289        if (token->literalSign())
290          Out << "-";
291        uint64_t absoluteValue;
292        if (token->literalAbsoluteValue(absoluteValue)) {
293          errs() << "error: Couldn't get the value of a literal token\n";
294          return -1;
295        }
296        Out << absoluteValue;
297      } else if (token->type() == EDToken::kTokenRegister) {
298        Out << "=";
299        unsigned regID;
300        if (token->registerID(regID)) {
301          errs() << "error: Couldn't get the ID of a register token\n";
302          return -1;
303        }
304        Out << "r" << regID;
305      }
306
307      Out << "]";
308    }
309
310    Out << " ";
311
312    if (inst->isBranch())
313      Out << "<br> ";
314    if (inst->isMove())
315      Out << "<mov> ";
316
317    unsigned numOperands = inst->numOperands();
318
319    if ((int)numOperands < 0) {
320      errs() << "error: Couldn't count operands\n";
321      return -1;
322    }
323
324    for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) {
325      Out << operandIndex << ":";
326
327      EDOperand *operand;
328      if (inst->getOperand(operand, operandIndex)) {
329        errs() << "error: couldn't get operand\n";
330        return -1;
331      }
332
333      uint64_t evaluatedResult;
334      void *Arg[] = { disassembler.get(), &Out };
335      if (operand->evaluate(evaluatedResult, verboseEvaluator, Arg)) {
336        errs() << "error: Couldn't evaluate an operand\n";
337        return -1;
338      }
339      Out << "=" << evaluatedResult << " ";
340    }
341
342    Out << '\n';
343  }
344
345  return 0;
346}
347
348