1259698Sdim//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
2259698Sdim//
3259698Sdim//                     The LLVM Compiler Infrastructure
4259698Sdim//
5259698Sdim// This file is distributed under the University of Illinois Open Source
6259698Sdim// License. See LICENSE.TXT for details.
7259698Sdim//
8259698Sdim//===----------------------------------------------------------------------===//
9259698Sdim
10259698Sdim#include "llvm/MC/MCObjectDisassembler.h"
11259698Sdim#include "llvm/ADT/SetVector.h"
12259698Sdim#include "llvm/ADT/SmallPtrSet.h"
13259698Sdim#include "llvm/ADT/StringExtras.h"
14259698Sdim#include "llvm/ADT/StringRef.h"
15259698Sdim#include "llvm/ADT/Twine.h"
16259698Sdim#include "llvm/MC/MCAtom.h"
17259698Sdim#include "llvm/MC/MCDisassembler.h"
18259698Sdim#include "llvm/MC/MCFunction.h"
19259698Sdim#include "llvm/MC/MCInstrAnalysis.h"
20259698Sdim#include "llvm/MC/MCModule.h"
21259698Sdim#include "llvm/MC/MCObjectSymbolizer.h"
22259698Sdim#include "llvm/Object/MachO.h"
23259698Sdim#include "llvm/Object/ObjectFile.h"
24259698Sdim#include "llvm/Support/Debug.h"
25259698Sdim#include "llvm/Support/MachO.h"
26259698Sdim#include "llvm/Support/MemoryObject.h"
27259698Sdim#include "llvm/Support/StringRefMemoryObject.h"
28259698Sdim#include "llvm/Support/raw_ostream.h"
29259698Sdim#include <map>
30259698Sdim
31259698Sdimusing namespace llvm;
32259698Sdimusing namespace object;
33259698Sdim
34259698SdimMCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
35259698Sdim                                           const MCDisassembler &Dis,
36259698Sdim                                           const MCInstrAnalysis &MIA)
37259698Sdim    : Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {}
38259698Sdim
39259698Sdimuint64_t MCObjectDisassembler::getEntrypoint() {
40259698Sdim  error_code ec;
41259698Sdim  for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
42259698Sdim       SI != SE; SI.increment(ec)) {
43259698Sdim    if (ec)
44259698Sdim      break;
45259698Sdim    StringRef Name;
46259698Sdim    SI->getName(Name);
47259698Sdim    if (Name == "main" || Name == "_main") {
48259698Sdim      uint64_t Entrypoint;
49259698Sdim      SI->getAddress(Entrypoint);
50259698Sdim      return getEffectiveLoadAddr(Entrypoint);
51259698Sdim    }
52259698Sdim  }
53259698Sdim  return 0;
54259698Sdim}
55259698Sdim
56259698SdimArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
57259698Sdim  return ArrayRef<uint64_t>();
58259698Sdim}
59259698Sdim
60259698SdimArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
61259698Sdim  return ArrayRef<uint64_t>();
62259698Sdim}
63259698Sdim
64259698SdimMemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
65259698Sdim  // FIXME: Keep track of object sections.
66259698Sdim  return FallbackRegion.get();
67259698Sdim}
68259698Sdim
69259698Sdimuint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
70259698Sdim  return Addr;
71259698Sdim}
72259698Sdim
73259698Sdimuint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
74259698Sdim  return Addr;
75259698Sdim}
76259698Sdim
77259698SdimMCModule *MCObjectDisassembler::buildEmptyModule() {
78259698Sdim  MCModule *Module = new MCModule;
79259698Sdim  Module->Entrypoint = getEntrypoint();
80259698Sdim  return Module;
81259698Sdim}
82259698Sdim
83259698SdimMCModule *MCObjectDisassembler::buildModule(bool withCFG) {
84259698Sdim  MCModule *Module = buildEmptyModule();
85259698Sdim
86259698Sdim  buildSectionAtoms(Module);
87259698Sdim  if (withCFG)
88259698Sdim    buildCFG(Module);
89259698Sdim  return Module;
90259698Sdim}
91259698Sdim
92259698Sdimvoid MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
93259698Sdim  error_code ec;
94259698Sdim  for (section_iterator SI = Obj.begin_sections(),
95259698Sdim                        SE = Obj.end_sections();
96259698Sdim                        SI != SE;
97259698Sdim                        SI.increment(ec)) {
98259698Sdim    if (ec) break;
99259698Sdim
100259698Sdim    bool isText; SI->isText(isText);
101259698Sdim    bool isData; SI->isData(isData);
102259698Sdim    if (!isData && !isText)
103259698Sdim      continue;
104259698Sdim
105259698Sdim    uint64_t StartAddr; SI->getAddress(StartAddr);
106259698Sdim    uint64_t SecSize; SI->getSize(SecSize);
107259698Sdim    if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
108259698Sdim      continue;
109259698Sdim    StartAddr = getEffectiveLoadAddr(StartAddr);
110259698Sdim
111259698Sdim    StringRef Contents; SI->getContents(Contents);
112259698Sdim    StringRefMemoryObject memoryObject(Contents, StartAddr);
113259698Sdim
114259698Sdim    // We don't care about things like non-file-backed sections yet.
115259698Sdim    if (Contents.size() != SecSize || !SecSize)
116259698Sdim      continue;
117259698Sdim    uint64_t EndAddr = StartAddr + SecSize - 1;
118259698Sdim
119259698Sdim    StringRef SecName; SI->getName(SecName);
120259698Sdim
121259698Sdim    if (isText) {
122259698Sdim      MCTextAtom *Text = 0;
123259698Sdim      MCDataAtom *InvalidData = 0;
124259698Sdim
125259698Sdim      uint64_t InstSize;
126259698Sdim      for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
127259698Sdim        const uint64_t CurAddr = StartAddr + Index;
128259698Sdim        MCInst Inst;
129259698Sdim        if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
130259698Sdim                               nulls())) {
131259698Sdim          if (!Text) {
132259698Sdim            Text = Module->createTextAtom(CurAddr, CurAddr);
133259698Sdim            Text->setName(SecName);
134259698Sdim          }
135259698Sdim          Text->addInst(Inst, InstSize);
136259698Sdim          InvalidData = 0;
137259698Sdim        } else {
138259698Sdim          assert(InstSize && "getInstruction() consumed no bytes");
139259698Sdim          if (!InvalidData) {
140259698Sdim            Text = 0;
141259698Sdim            InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
142259698Sdim          }
143259698Sdim          for (uint64_t I = 0; I < InstSize; ++I)
144259698Sdim            InvalidData->addData(Contents[Index+I]);
145259698Sdim        }
146259698Sdim      }
147259698Sdim    } else {
148259698Sdim      MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
149259698Sdim      Data->setName(SecName);
150259698Sdim      for (uint64_t Index = 0; Index < SecSize; ++Index)
151259698Sdim        Data->addData(Contents[Index]);
152259698Sdim    }
153259698Sdim  }
154259698Sdim}
155259698Sdim
156259698Sdimnamespace {
157259698Sdim  struct BBInfo;
158259698Sdim  typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
159259698Sdim
160259698Sdim  struct BBInfo {
161259698Sdim    MCTextAtom *Atom;
162259698Sdim    MCBasicBlock *BB;
163259698Sdim    BBInfoSetTy Succs;
164259698Sdim    BBInfoSetTy Preds;
165259698Sdim    MCObjectDisassembler::AddressSetTy SuccAddrs;
166259698Sdim
167259698Sdim    BBInfo() : Atom(0), BB(0) {}
168259698Sdim
169259698Sdim    void addSucc(BBInfo &Succ) {
170259698Sdim      Succs.insert(&Succ);
171259698Sdim      Succ.Preds.insert(this);
172259698Sdim    }
173259698Sdim  };
174259698Sdim}
175259698Sdim
176259698Sdimstatic void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
177259698Sdim  std::sort(V.begin(), V.end());
178259698Sdim  V.erase(std::unique(V.begin(), V.end()), V.end());
179259698Sdim}
180259698Sdim
181259698Sdimvoid MCObjectDisassembler::buildCFG(MCModule *Module) {
182259698Sdim  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
183259698Sdim  BBInfoByAddrTy BBInfos;
184259698Sdim  AddressSetTy Splits;
185259698Sdim  AddressSetTy Calls;
186259698Sdim
187259698Sdim  error_code ec;
188259698Sdim  for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
189259698Sdim       SI != SE; SI.increment(ec)) {
190259698Sdim    if (ec)
191259698Sdim      break;
192259698Sdim    SymbolRef::Type SymType;
193259698Sdim    SI->getType(SymType);
194259698Sdim    if (SymType == SymbolRef::ST_Function) {
195259698Sdim      uint64_t SymAddr;
196259698Sdim      SI->getAddress(SymAddr);
197259698Sdim      SymAddr = getEffectiveLoadAddr(SymAddr);
198259698Sdim      Calls.push_back(SymAddr);
199259698Sdim      Splits.push_back(SymAddr);
200259698Sdim    }
201259698Sdim  }
202259698Sdim
203259698Sdim  assert(Module->func_begin() == Module->func_end()
204259698Sdim         && "Module already has a CFG!");
205259698Sdim
206259698Sdim  // First, determine the basic block boundaries and call targets.
207259698Sdim  for (MCModule::atom_iterator AI = Module->atom_begin(),
208259698Sdim                               AE = Module->atom_end();
209259698Sdim       AI != AE; ++AI) {
210259698Sdim    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
211259698Sdim    if (!TA) continue;
212259698Sdim    Calls.push_back(TA->getBeginAddr());
213259698Sdim    BBInfos[TA->getBeginAddr()].Atom = TA;
214259698Sdim    for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
215259698Sdim         II != IE; ++II) {
216259698Sdim      if (MIA.isTerminator(II->Inst))
217259698Sdim        Splits.push_back(II->Address + II->Size);
218259698Sdim      uint64_t Target;
219259698Sdim      if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
220259698Sdim        if (MIA.isCall(II->Inst))
221259698Sdim          Calls.push_back(Target);
222259698Sdim        Splits.push_back(Target);
223259698Sdim      }
224259698Sdim    }
225259698Sdim  }
226259698Sdim
227259698Sdim  RemoveDupsFromAddressVector(Splits);
228259698Sdim  RemoveDupsFromAddressVector(Calls);
229259698Sdim
230259698Sdim  // Split text atoms into basic block atoms.
231259698Sdim  for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
232259698Sdim       SI != SE; ++SI) {
233259698Sdim    MCAtom *A = Module->findAtomContaining(*SI);
234259698Sdim    if (!A) continue;
235259698Sdim    MCTextAtom *TA = cast<MCTextAtom>(A);
236259698Sdim    if (TA->getBeginAddr() == *SI)
237259698Sdim      continue;
238259698Sdim    MCTextAtom *NewAtom = TA->split(*SI);
239259698Sdim    BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
240259698Sdim    StringRef BBName = TA->getName();
241259698Sdim    BBName = BBName.substr(0, BBName.find_last_of(':'));
242259698Sdim    NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
243259698Sdim  }
244259698Sdim
245259698Sdim  // Compute succs/preds.
246259698Sdim  for (MCModule::atom_iterator AI = Module->atom_begin(),
247259698Sdim                               AE = Module->atom_end();
248259698Sdim                               AI != AE; ++AI) {
249259698Sdim    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
250259698Sdim    if (!TA) continue;
251259698Sdim    BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
252259698Sdim    const MCDecodedInst &LI = TA->back();
253259698Sdim    if (MIA.isBranch(LI.Inst)) {
254259698Sdim      uint64_t Target;
255259698Sdim      if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
256259698Sdim        CurBB.addSucc(BBInfos[Target]);
257259698Sdim      if (MIA.isConditionalBranch(LI.Inst))
258259698Sdim        CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
259259698Sdim    } else if (!MIA.isTerminator(LI.Inst))
260259698Sdim      CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
261259698Sdim  }
262259698Sdim
263259698Sdim
264259698Sdim  // Create functions and basic blocks.
265259698Sdim  for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
266259698Sdim       CI != CE; ++CI) {
267259698Sdim    BBInfo &BBI = BBInfos[*CI];
268259698Sdim    if (!BBI.Atom) continue;
269259698Sdim
270259698Sdim    MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
271259698Sdim
272259698Sdim    // Create MCBBs.
273259698Sdim    SmallSetVector<BBInfo*, 16> Worklist;
274259698Sdim    Worklist.insert(&BBI);
275259698Sdim    for (size_t wi = 0; wi < Worklist.size(); ++wi) {
276259698Sdim      BBInfo *BBI = Worklist[wi];
277259698Sdim      if (!BBI->Atom)
278259698Sdim        continue;
279259698Sdim      BBI->BB = &MCFN.createBlock(*BBI->Atom);
280259698Sdim      // Add all predecessors and successors to the worklist.
281259698Sdim      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
282259698Sdim                                 SI != SE; ++SI)
283259698Sdim        Worklist.insert(*SI);
284259698Sdim      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
285259698Sdim                                 PI != PE; ++PI)
286259698Sdim        Worklist.insert(*PI);
287259698Sdim    }
288259698Sdim
289259698Sdim    // Set preds/succs.
290259698Sdim    for (size_t wi = 0; wi < Worklist.size(); ++wi) {
291259698Sdim      BBInfo *BBI = Worklist[wi];
292259698Sdim      MCBasicBlock *MCBB = BBI->BB;
293259698Sdim      if (!MCBB)
294259698Sdim        continue;
295259698Sdim      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
296259698Sdim           SI != SE; ++SI)
297259698Sdim        if ((*SI)->BB)
298259698Sdim          MCBB->addSuccessor((*SI)->BB);
299259698Sdim      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
300259698Sdim           PI != PE; ++PI)
301259698Sdim        if ((*PI)->BB)
302259698Sdim          MCBB->addPredecessor((*PI)->BB);
303259698Sdim    }
304259698Sdim  }
305259698Sdim}
306259698Sdim
307259698Sdim// Basic idea of the disassembly + discovery:
308259698Sdim//
309259698Sdim// start with the wanted address, insert it in the worklist
310259698Sdim// while worklist not empty, take next address in the worklist:
311259698Sdim// - check if atom exists there
312259698Sdim//   - if middle of atom:
313259698Sdim//     - split basic blocks referencing the atom
314259698Sdim//     - look for an already encountered BBInfo (using a map<atom, bbinfo>)
315259698Sdim//       - if there is, split it (new one, fallthrough, move succs, etc..)
316259698Sdim//   - if start of atom: nothing else to do
317259698Sdim//   - if no atom: create new atom and new bbinfo
318259698Sdim// - look at the last instruction in the atom, add succs to worklist
319259698Sdim// for all elements in the worklist:
320259698Sdim// - create basic block, update preds/succs, etc..
321259698Sdim//
322259698SdimMCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
323259698Sdim                                            uint64_t BBBeginAddr,
324259698Sdim                                            AddressSetTy &CallTargets,
325259698Sdim                                            AddressSetTy &TailCallTargets) {
326259698Sdim  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
327259698Sdim  typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
328259698Sdim  BBInfoByAddrTy BBInfos;
329259698Sdim  AddrWorklistTy Worklist;
330259698Sdim
331259698Sdim  Worklist.insert(BBBeginAddr);
332259698Sdim  for (size_t wi = 0; wi < Worklist.size(); ++wi) {
333259698Sdim    const uint64_t BeginAddr = Worklist[wi];
334259698Sdim    BBInfo *BBI = &BBInfos[BeginAddr];
335259698Sdim
336259698Sdim    MCTextAtom *&TA = BBI->Atom;
337259698Sdim    assert(!TA && "Discovered basic block already has an associated atom!");
338259698Sdim
339259698Sdim    // Look for an atom at BeginAddr.
340259698Sdim    if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
341259698Sdim      // FIXME: We don't care about mixed atoms, see above.
342259698Sdim      TA = cast<MCTextAtom>(A);
343259698Sdim
344259698Sdim      // The found atom doesn't begin at BeginAddr, we have to split it.
345259698Sdim      if (TA->getBeginAddr() != BeginAddr) {
346259698Sdim        // FIXME: Handle overlapping atoms: middle-starting instructions, etc..
347259698Sdim        MCTextAtom *NewTA = TA->split(BeginAddr);
348259698Sdim
349259698Sdim        // Look for an already encountered basic block that needs splitting
350259698Sdim        BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
351259698Sdim        if (It != BBInfos.end() && It->second.Atom) {
352259698Sdim          BBI->SuccAddrs = It->second.SuccAddrs;
353259698Sdim          It->second.SuccAddrs.clear();
354259698Sdim          It->second.SuccAddrs.push_back(BeginAddr);
355259698Sdim        }
356259698Sdim        TA = NewTA;
357259698Sdim      }
358259698Sdim      BBI->Atom = TA;
359259698Sdim    } else {
360259698Sdim      // If we didn't find an atom, then we have to disassemble to create one!
361259698Sdim
362259698Sdim      MemoryObject *Region = getRegionFor(BeginAddr);
363259698Sdim      if (!Region)
364259698Sdim        llvm_unreachable(("Couldn't find suitable region for disassembly at " +
365259698Sdim                          utostr(BeginAddr)).c_str());
366259698Sdim
367259698Sdim      uint64_t InstSize;
368259698Sdim      uint64_t EndAddr = Region->getBase() + Region->getExtent();
369259698Sdim
370259698Sdim      // We want to stop before the next atom and have a fallthrough to it.
371259698Sdim      if (MCTextAtom *NextAtom =
372259698Sdim              cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
373259698Sdim        EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
374259698Sdim
375259698Sdim      for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
376259698Sdim        MCInst Inst;
377259698Sdim        if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
378259698Sdim                               nulls())) {
379259698Sdim          if (!TA)
380259698Sdim            TA = Module->createTextAtom(Addr, Addr);
381259698Sdim          TA->addInst(Inst, InstSize);
382259698Sdim        } else {
383259698Sdim          // We don't care about splitting mixed atoms either.
384259698Sdim          llvm_unreachable("Couldn't disassemble instruction in atom.");
385259698Sdim        }
386259698Sdim
387259698Sdim        uint64_t BranchTarget;
388259698Sdim        if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
389259698Sdim          if (MIA.isCall(Inst))
390259698Sdim            CallTargets.push_back(BranchTarget);
391259698Sdim        }
392259698Sdim
393259698Sdim        if (MIA.isTerminator(Inst))
394259698Sdim          break;
395259698Sdim      }
396259698Sdim      BBI->Atom = TA;
397259698Sdim    }
398259698Sdim
399259698Sdim    assert(TA && "Couldn't disassemble atom, none was created!");
400259698Sdim    assert(TA->begin() != TA->end() && "Empty atom!");
401259698Sdim
402259698Sdim    MemoryObject *Region = getRegionFor(TA->getBeginAddr());
403259698Sdim    assert(Region && "Couldn't find region for already disassembled code!");
404259698Sdim    uint64_t EndRegion = Region->getBase() + Region->getExtent();
405259698Sdim
406259698Sdim    // Now we have a basic block atom, add successors.
407259698Sdim    // Add the fallthrough block.
408259698Sdim    if ((MIA.isConditionalBranch(TA->back().Inst) ||
409259698Sdim         !MIA.isTerminator(TA->back().Inst)) &&
410259698Sdim        (TA->getEndAddr() + 1 < EndRegion)) {
411259698Sdim      BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
412259698Sdim      Worklist.insert(TA->getEndAddr() + 1);
413259698Sdim    }
414259698Sdim
415259698Sdim    // If the terminator is a branch, add the target block.
416259698Sdim    if (MIA.isBranch(TA->back().Inst)) {
417259698Sdim      uint64_t BranchTarget;
418259698Sdim      if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
419259698Sdim                             TA->back().Size, BranchTarget)) {
420259698Sdim        StringRef ExtFnName;
421259698Sdim        if (MOS)
422259698Sdim          ExtFnName =
423259698Sdim              MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
424259698Sdim        if (!ExtFnName.empty()) {
425259698Sdim          TailCallTargets.push_back(BranchTarget);
426259698Sdim          CallTargets.push_back(BranchTarget);
427259698Sdim        } else {
428259698Sdim          BBI->SuccAddrs.push_back(BranchTarget);
429259698Sdim          Worklist.insert(BranchTarget);
430259698Sdim        }
431259698Sdim      }
432259698Sdim    }
433259698Sdim  }
434259698Sdim
435259698Sdim  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
436259698Sdim    const uint64_t BeginAddr = Worklist[wi];
437259698Sdim    BBInfo *BBI = &BBInfos[BeginAddr];
438259698Sdim
439259698Sdim    assert(BBI->Atom && "Found a basic block without an associated atom!");
440259698Sdim
441259698Sdim    // Look for a basic block at BeginAddr.
442259698Sdim    BBI->BB = MCFN->find(BeginAddr);
443259698Sdim    if (BBI->BB) {
444259698Sdim      // FIXME: check that the succs/preds are the same
445259698Sdim      continue;
446259698Sdim    }
447259698Sdim    // If there was none, we have to create one from the atom.
448259698Sdim    BBI->BB = &MCFN->createBlock(*BBI->Atom);
449259698Sdim  }
450259698Sdim
451259698Sdim  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
452259698Sdim    const uint64_t BeginAddr = Worklist[wi];
453259698Sdim    BBInfo *BBI = &BBInfos[BeginAddr];
454259698Sdim    MCBasicBlock *BB = BBI->BB;
455259698Sdim
456259698Sdim    RemoveDupsFromAddressVector(BBI->SuccAddrs);
457259698Sdim    for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
458259698Sdim         SE = BBI->SuccAddrs.end();
459259698Sdim         SE != SE; ++SI) {
460259698Sdim      MCBasicBlock *Succ = BBInfos[*SI].BB;
461259698Sdim      BB->addSuccessor(Succ);
462259698Sdim      Succ->addPredecessor(BB);
463259698Sdim    }
464259698Sdim  }
465259698Sdim
466259698Sdim  assert(BBInfos[Worklist[0]].BB &&
467259698Sdim         "No basic block created at requested address?");
468259698Sdim
469259698Sdim  return BBInfos[Worklist[0]].BB;
470259698Sdim}
471259698Sdim
472259698SdimMCFunction *
473259698SdimMCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
474259698Sdim                                     AddressSetTy &CallTargets,
475259698Sdim                                     AddressSetTy &TailCallTargets) {
476259698Sdim  // First, check if this is an external function.
477259698Sdim  StringRef ExtFnName;
478259698Sdim  if (MOS)
479259698Sdim    ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
480259698Sdim  if (!ExtFnName.empty())
481259698Sdim    return Module->createFunction(ExtFnName);
482259698Sdim
483259698Sdim  // If it's not, look for an existing function.
484259698Sdim  for (MCModule::func_iterator FI = Module->func_begin(),
485259698Sdim                               FE = Module->func_end();
486259698Sdim       FI != FE; ++FI) {
487259698Sdim    if ((*FI)->empty())
488259698Sdim      continue;
489259698Sdim    // FIXME: MCModule should provide a findFunctionByAddr()
490259698Sdim    if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
491259698Sdim      return *FI;
492259698Sdim  }
493259698Sdim
494259698Sdim  // Finally, just create a new one.
495259698Sdim  MCFunction *MCFN = Module->createFunction("");
496259698Sdim  getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
497259698Sdim  return MCFN;
498259698Sdim}
499259698Sdim
500259698Sdim// MachO MCObjectDisassembler implementation.
501259698Sdim
502259698SdimMCMachOObjectDisassembler::MCMachOObjectDisassembler(
503259698Sdim    const MachOObjectFile &MOOF, const MCDisassembler &Dis,
504259698Sdim    const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
505259698Sdim    uint64_t HeaderLoadAddress)
506259698Sdim    : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
507259698Sdim      VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
508259698Sdim
509259698Sdim  error_code ec;
510259698Sdim  for (section_iterator SI = MOOF.begin_sections(), SE = MOOF.end_sections();
511259698Sdim       SI != SE; SI.increment(ec)) {
512259698Sdim    if (ec)
513259698Sdim      break;
514259698Sdim    StringRef Name;
515259698Sdim    SI->getName(Name);
516259698Sdim    // FIXME: We should use the S_ section type instead of the name.
517259698Sdim    if (Name == "__mod_init_func") {
518259698Sdim      DEBUG(dbgs() << "Found __mod_init_func section!\n");
519259698Sdim      SI->getContents(ModInitContents);
520259698Sdim    } else if (Name == "__mod_exit_func") {
521259698Sdim      DEBUG(dbgs() << "Found __mod_exit_func section!\n");
522259698Sdim      SI->getContents(ModExitContents);
523259698Sdim    }
524259698Sdim  }
525259698Sdim}
526259698Sdim
527259698Sdim// FIXME: Only do the translations for addresses actually inside the object.
528259698Sdimuint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
529259698Sdim  return Addr + VMAddrSlide;
530259698Sdim}
531259698Sdim
532259698Sdimuint64_t
533259698SdimMCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
534259698Sdim  return EffectiveAddr - VMAddrSlide;
535259698Sdim}
536259698Sdim
537259698Sdimuint64_t MCMachOObjectDisassembler::getEntrypoint() {
538259698Sdim  uint64_t EntryFileOffset = 0;
539259698Sdim
540259698Sdim  // Look for LC_MAIN.
541259698Sdim  {
542259698Sdim    uint32_t LoadCommandCount = MOOF.getHeader().ncmds;
543259698Sdim    MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
544259698Sdim    for (unsigned I = 0;; ++I) {
545259698Sdim      if (Load.C.cmd == MachO::LC_MAIN) {
546259698Sdim        EntryFileOffset =
547259698Sdim            ((const MachO::entry_point_command *)Load.Ptr)->entryoff;
548259698Sdim        break;
549259698Sdim      }
550259698Sdim
551259698Sdim      if (I == LoadCommandCount - 1)
552259698Sdim        break;
553259698Sdim      else
554259698Sdim        Load = MOOF.getNextLoadCommandInfo(Load);
555259698Sdim    }
556259698Sdim  }
557259698Sdim
558259698Sdim  // If we didn't find anything, default to the common implementation.
559259698Sdim  // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
560259698Sdim  if (EntryFileOffset)
561259698Sdim    return MCObjectDisassembler::getEntrypoint();
562259698Sdim
563259698Sdim  return EntryFileOffset + HeaderLoadAddress;
564259698Sdim}
565259698Sdim
566259698SdimArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
567259698Sdim  // FIXME: We only handle 64bit mach-o
568259698Sdim  assert(MOOF.is64Bit());
569259698Sdim
570259698Sdim  size_t EntrySize = 8;
571259698Sdim  size_t EntryCount = ModInitContents.size() / EntrySize;
572259698Sdim  return ArrayRef<uint64_t>(
573259698Sdim      reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
574259698Sdim}
575259698Sdim
576259698SdimArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
577259698Sdim  // FIXME: We only handle 64bit mach-o
578259698Sdim  assert(MOOF.is64Bit());
579259698Sdim
580259698Sdim  size_t EntrySize = 8;
581259698Sdim  size_t EntryCount = ModExitContents.size() / EntrySize;
582259698Sdim  return ArrayRef<uint64_t>(
583259698Sdim      reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
584259698Sdim}
585