1//===-- MCFunction.cpp ----------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the algorithm to break down a region of machine code
11// into basic blocks and try to reconstruct a CFG from it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCFunction.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/MC/MCDisassembler.h"
18#include "llvm/MC/MCInst.h"
19#include "llvm/MC/MCInstPrinter.h"
20#include "llvm/MC/MCInstrAnalysis.h"
21#include "llvm/MC/MCInstrDesc.h"
22#include "llvm/MC/MCInstrInfo.h"
23#include "llvm/Support/MemoryObject.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/Support/system_error.h"
26#include <set>
27using namespace llvm;
28
29MCFunction
30MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
31                                 const MemoryObject &Region, uint64_t Start,
32                                 uint64_t End, const MCInstrAnalysis *Ana,
33                                 raw_ostream &DebugOut,
34                                 SmallVectorImpl<uint64_t> &Calls) {
35  std::vector<MCDecodedInst> Instructions;
36  std::set<uint64_t> Splits;
37  Splits.insert(Start);
38  uint64_t Size;
39
40  MCFunction f(Name);
41
42  {
43  DenseSet<uint64_t> VisitedInsts;
44  SmallVector<uint64_t, 16> WorkList;
45  WorkList.push_back(Start);
46  // Disassemble code and gather basic block split points.
47  while (!WorkList.empty()) {
48    uint64_t Index = WorkList.pop_back_val();
49    if (VisitedInsts.find(Index) != VisitedInsts.end())
50      continue; // Already visited this location.
51
52    for (;Index < End; Index += Size) {
53      VisitedInsts.insert(Index);
54
55      MCInst Inst;
56      if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
57        Instructions.push_back(MCDecodedInst(Index, Size, Inst));
58        if (Ana->isBranch(Inst)) {
59          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
60          if (targ != -1ULL && targ == Index+Size)
61            continue; // Skip nop jumps.
62
63          // If we could determine the branch target, make a note to start a
64          // new basic block there and add the target to the worklist.
65          if (targ != -1ULL) {
66            Splits.insert(targ);
67            WorkList.push_back(targ);
68            WorkList.push_back(Index+Size);
69          }
70          Splits.insert(Index+Size);
71          break;
72        } else if (Ana->isReturn(Inst)) {
73          // Return instruction. This basic block ends here.
74          Splits.insert(Index+Size);
75          break;
76        } else if (Ana->isCall(Inst)) {
77          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
78          // Add the call to the call list if the destination is known.
79          if (targ != -1ULL && targ != Index+Size)
80            Calls.push_back(targ);
81        }
82      } else {
83        errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
84        if (Size == 0)
85          Size = 1; // skip illegible bytes
86      }
87    }
88  }
89  }
90
91  // Make sure the instruction list is sorted.
92  std::sort(Instructions.begin(), Instructions.end());
93
94  // Create basic blocks.
95  unsigned ii = 0, ie = Instructions.size();
96  for (std::set<uint64_t>::iterator spi = Splits.begin(),
97       spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
98    MCBasicBlock BB;
99    uint64_t BlockEnd = *llvm::next(spi);
100    // Add instructions to the BB.
101    for (; ii != ie; ++ii) {
102      if (Instructions[ii].Address < *spi ||
103          Instructions[ii].Address >= BlockEnd)
104        break;
105      BB.addInst(Instructions[ii]);
106    }
107    f.addBlock(*spi, BB);
108  }
109
110  std::sort(f.Blocks.begin(), f.Blocks.end());
111
112  // Calculate successors of each block.
113  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
114    MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
115    if (BB.getInsts().empty()) continue;
116    const MCDecodedInst &Inst = BB.getInsts().back();
117
118    if (Ana->isBranch(Inst.Inst)) {
119      uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
120      if (targ == -1ULL) {
121        // Indirect branch. Bail and add all blocks of the function as a
122        // successor.
123        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
124          BB.addSucc(i->first);
125      } else if (targ != Inst.Address+Inst.Size)
126        BB.addSucc(targ);
127      // Conditional branches can also fall through to the next block.
128      if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
129        BB.addSucc(llvm::next(i)->first);
130    } else {
131      // No branch. Fall through to the next block.
132      if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
133        BB.addSucc(llvm::next(i)->first);
134    }
135  }
136
137  return f;
138}
139