1//===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "FileAnalysis.h"
10#include "GraphBuilder.h"
11
12#include "llvm/BinaryFormat/ELF.h"
13#include "llvm/DebugInfo/DWARF/DWARFContext.h"
14#include "llvm/MC/MCAsmInfo.h"
15#include "llvm/MC/MCContext.h"
16#include "llvm/MC/MCDisassembler/MCDisassembler.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/MC/MCInstPrinter.h"
19#include "llvm/MC/MCInstrAnalysis.h"
20#include "llvm/MC/MCInstrDesc.h"
21#include "llvm/MC/MCInstrInfo.h"
22#include "llvm/MC/MCObjectFileInfo.h"
23#include "llvm/MC/MCRegisterInfo.h"
24#include "llvm/MC/MCSubtargetInfo.h"
25#include "llvm/MC/MCTargetOptions.h"
26#include "llvm/Object/Binary.h"
27#include "llvm/Object/COFF.h"
28#include "llvm/Object/ELFObjectFile.h"
29#include "llvm/Object/ObjectFile.h"
30#include "llvm/Support/Casting.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Error.h"
33#include "llvm/Support/MemoryBuffer.h"
34#include "llvm/Support/TargetRegistry.h"
35#include "llvm/Support/TargetSelect.h"
36#include "llvm/Support/raw_ostream.h"
37
38
39using Instr = llvm::cfi_verify::FileAnalysis::Instr;
40using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
41
42namespace llvm {
43namespace cfi_verify {
44
45bool IgnoreDWARFFlag;
46
47static cl::opt<bool, true> IgnoreDWARFArg(
48    "ignore-dwarf",
49    cl::desc(
50        "Ignore all DWARF data. This relaxes the requirements for all "
51        "statically linked libraries to have been compiled with '-g', but "
52        "will result in false positives for 'CFI unprotected' instructions."),
53    cl::location(IgnoreDWARFFlag), cl::init(false));
54
55StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
56  switch (Status) {
57  case CFIProtectionStatus::PROTECTED:
58    return "PROTECTED";
59  case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
60    return "FAIL_NOT_INDIRECT_CF";
61  case CFIProtectionStatus::FAIL_ORPHANS:
62    return "FAIL_ORPHANS";
63  case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
64    return "FAIL_BAD_CONDITIONAL_BRANCH";
65  case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
66    return "FAIL_REGISTER_CLOBBERED";
67  case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
68    return "FAIL_INVALID_INSTRUCTION";
69  }
70  llvm_unreachable("Attempted to stringify an unknown enum value.");
71}
72
73Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
74  // Open the filename provided.
75  Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
76      object::createBinary(Filename);
77  if (!BinaryOrErr)
78    return BinaryOrErr.takeError();
79
80  // Construct the object and allow it to take ownership of the binary.
81  object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
82  FileAnalysis Analysis(std::move(Binary));
83
84  Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
85  if (!Analysis.Object)
86    return make_error<UnsupportedDisassembly>("Failed to cast object");
87
88  switch (Analysis.Object->getArch()) {
89    case Triple::x86:
90    case Triple::x86_64:
91    case Triple::aarch64:
92    case Triple::aarch64_be:
93      break;
94    default:
95      return make_error<UnsupportedDisassembly>("Unsupported architecture.");
96  }
97
98  Analysis.ObjectTriple = Analysis.Object->makeTriple();
99  Analysis.Features = Analysis.Object->getFeatures();
100
101  // Init the rest of the object.
102  if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
103    return std::move(InitResponse);
104
105  if (auto SectionParseResponse = Analysis.parseCodeSections())
106    return std::move(SectionParseResponse);
107
108  if (auto SymbolTableParseResponse = Analysis.parseSymbolTable())
109    return std::move(SymbolTableParseResponse);
110
111  return std::move(Analysis);
112}
113
114FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
115    : Binary(std::move(Binary)) {}
116
117FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
118                           const SubtargetFeatures &Features)
119    : ObjectTriple(ObjectTriple), Features(Features) {}
120
121const Instr *
122FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
123  std::map<uint64_t, Instr>::const_iterator KV =
124      Instructions.find(InstrMeta.VMAddress);
125  if (KV == Instructions.end() || KV == Instructions.begin())
126    return nullptr;
127
128  if (!(--KV)->second.Valid)
129    return nullptr;
130
131  return &KV->second;
132}
133
134const Instr *
135FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
136  std::map<uint64_t, Instr>::const_iterator KV =
137      Instructions.find(InstrMeta.VMAddress);
138  if (KV == Instructions.end() || ++KV == Instructions.end())
139    return nullptr;
140
141  if (!KV->second.Valid)
142    return nullptr;
143
144  return &KV->second;
145}
146
147bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
148  for (const auto &Operand : InstrMeta.Instruction) {
149    if (Operand.isReg())
150      return true;
151  }
152  return false;
153}
154
155const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
156  const auto &InstrKV = Instructions.find(Address);
157  if (InstrKV == Instructions.end())
158    return nullptr;
159
160  return &InstrKV->second;
161}
162
163const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
164  const auto &InstrKV = Instructions.find(Address);
165  assert(InstrKV != Instructions.end() && "Address doesn't exist.");
166  return InstrKV->second;
167}
168
169bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
170  const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
171  return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta);
172}
173
174bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const {
175  const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
176  if (!InstrDesc.isCall())
177    return false;
178  uint64_t Target;
179  if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
180                           InstrMeta.InstructionSize, Target))
181    return false;
182  return TrapOnFailFunctionAddresses.contains(Target);
183}
184
185bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
186  if (!InstrMeta.Valid)
187    return false;
188
189  if (isCFITrap(InstrMeta))
190    return false;
191
192  const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
193  if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
194    return InstrDesc.isConditionalBranch();
195
196  return true;
197}
198
199const Instr *
200FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
201  if (!InstrMeta.Valid)
202    return nullptr;
203
204  if (isCFITrap(InstrMeta))
205    return nullptr;
206
207  const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
208  const Instr *NextMetaPtr;
209  if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
210    if (InstrDesc.isConditionalBranch())
211      return nullptr;
212
213    uint64_t Target;
214    if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
215                             InstrMeta.InstructionSize, Target))
216      return nullptr;
217
218    NextMetaPtr = getInstruction(Target);
219  } else {
220    NextMetaPtr =
221        getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
222  }
223
224  if (!NextMetaPtr || !NextMetaPtr->Valid)
225    return nullptr;
226
227  return NextMetaPtr;
228}
229
230std::set<const Instr *>
231FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
232  std::set<const Instr *> CFCrossReferences;
233  const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
234
235  if (PrevInstruction && canFallThrough(*PrevInstruction))
236    CFCrossReferences.insert(PrevInstruction);
237
238  const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
239  if (TargetRefsKV == StaticBranchTargetings.end())
240    return CFCrossReferences;
241
242  for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
243    const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
244    if (SourceInstrKV == Instructions.end()) {
245      errs() << "Failed to find source instruction at address "
246             << format_hex(SourceInstrAddress, 2)
247             << " for the cross-reference to instruction at address "
248             << format_hex(InstrMeta.VMAddress, 2) << ".\n";
249      continue;
250    }
251
252    CFCrossReferences.insert(&SourceInstrKV->second);
253  }
254
255  return CFCrossReferences;
256}
257
258const std::set<object::SectionedAddress> &
259FileAnalysis::getIndirectInstructions() const {
260  return IndirectInstructions;
261}
262
263const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
264  return RegisterInfo.get();
265}
266
267const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
268
269const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
270  return MIA.get();
271}
272
273Expected<DIInliningInfo>
274FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) {
275  assert(Symbolizer != nullptr && "Symbolizer is invalid.");
276
277  return Symbolizer->symbolizeInlinedCode(std::string(Object->getFileName()),
278                                          Address);
279}
280
281CFIProtectionStatus
282FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
283  const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
284  if (!InstrMetaPtr)
285    return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
286
287  const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
288  if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
289    return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
290
291  if (!usesRegisterOperand(*InstrMetaPtr))
292    return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
293
294  if (!Graph.OrphanedNodes.empty())
295    return CFIProtectionStatus::FAIL_ORPHANS;
296
297  for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
298    if (!BranchNode.CFIProtection)
299      return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
300  }
301
302  if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
303    return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
304
305  return CFIProtectionStatus::PROTECTED;
306}
307
308uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
309  assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
310
311  // Get the set of registers we must check to ensure they're not clobbered.
312  const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
313  DenseSet<unsigned> RegisterNumbers;
314  for (const auto &Operand : IndirectCF.Instruction) {
315    if (Operand.isReg())
316      RegisterNumbers.insert(Operand.getReg());
317  }
318  assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
319
320  // Now check all branches to indirect CFs and ensure no clobbering happens.
321  for (const auto &Branch : Graph.ConditionalBranchNodes) {
322    uint64_t Node;
323    if (Branch.IndirectCFIsOnTargetPath)
324      Node = Branch.Target;
325    else
326      Node = Branch.Fallthrough;
327
328    // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
329    // we allow them one load.
330    bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
331
332    // We walk backwards from the indirect CF.  It is the last node returned by
333    // Graph.flattenAddress, so we skip it since we already handled it.
334    DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
335    std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
336    for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
337      Node = *I;
338      const Instr &NodeInstr = getInstructionOrDie(Node);
339      const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
340
341      for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
342           RI != RE; ++RI) {
343        unsigned RegNum = *RI;
344        if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
345                                      *RegisterInfo)) {
346          if (!canLoad || !InstrDesc.mayLoad())
347            return Node;
348          canLoad = false;
349          CurRegisterNumbers.erase(RI);
350          // Add the registers this load reads to those we check for clobbers.
351          for (unsigned i = InstrDesc.getNumDefs(),
352                        e = InstrDesc.getNumOperands(); i != e; i++) {
353            const auto Operand = NodeInstr.Instruction.getOperand(i);
354            if (Operand.isReg())
355              CurRegisterNumbers.insert(Operand.getReg());
356          }
357          break;
358        }
359      }
360    }
361  }
362
363  return Graph.BaseAddress;
364}
365
366void FileAnalysis::printInstruction(const Instr &InstrMeta,
367                                    raw_ostream &OS) const {
368  Printer->printInst(&InstrMeta.Instruction, 0, "", *SubtargetInfo.get(), OS);
369}
370
371Error FileAnalysis::initialiseDisassemblyMembers() {
372  std::string TripleName = ObjectTriple.getTriple();
373  ArchName = "";
374  MCPU = "";
375  std::string ErrorString;
376
377  LLVMSymbolizer::Options Opt;
378  Opt.UseSymbolTable = false;
379  Symbolizer.reset(new LLVMSymbolizer(Opt));
380
381  ObjectTarget =
382      TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
383  if (!ObjectTarget)
384    return make_error<UnsupportedDisassembly>(
385        (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
386         "\", failed with error: " + ErrorString)
387            .str());
388
389  RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
390  if (!RegisterInfo)
391    return make_error<UnsupportedDisassembly>(
392        "Failed to initialise RegisterInfo.");
393
394  MCTargetOptions MCOptions;
395  AsmInfo.reset(
396      ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName, MCOptions));
397  if (!AsmInfo)
398    return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
399
400  SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
401      TripleName, MCPU, Features.getString()));
402  if (!SubtargetInfo)
403    return make_error<UnsupportedDisassembly>(
404        "Failed to initialise SubtargetInfo.");
405
406  MII.reset(ObjectTarget->createMCInstrInfo());
407  if (!MII)
408    return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
409
410  Context.reset(new MCContext(Triple(TripleName), AsmInfo.get(),
411                              RegisterInfo.get(), SubtargetInfo.get()));
412
413  Disassembler.reset(
414      ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
415
416  if (!Disassembler)
417    return make_error<UnsupportedDisassembly>(
418        "No disassembler available for target");
419
420  MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
421
422  Printer.reset(ObjectTarget->createMCInstPrinter(
423      ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
424      *RegisterInfo));
425
426  return Error::success();
427}
428
429Error FileAnalysis::parseCodeSections() {
430  if (!IgnoreDWARFFlag) {
431    std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
432    if (!DWARF)
433      return make_error<StringError>("Could not create DWARF information.",
434                                     inconvertibleErrorCode());
435
436    bool LineInfoValid = false;
437
438    for (auto &Unit : DWARF->compile_units()) {
439      const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
440      if (LineTable && !LineTable->Rows.empty()) {
441        LineInfoValid = true;
442        break;
443      }
444    }
445
446    if (!LineInfoValid)
447      return make_error<StringError>(
448          "DWARF line information missing. Did you compile with '-g'?",
449          inconvertibleErrorCode());
450  }
451
452  for (const object::SectionRef &Section : Object->sections()) {
453    // Ensure only executable sections get analysed.
454    if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
455      continue;
456
457    // Avoid checking the PLT since it produces spurious failures on AArch64
458    // when ignoring DWARF data.
459    Expected<StringRef> NameOrErr = Section.getName();
460    if (NameOrErr && *NameOrErr == ".plt")
461      continue;
462    consumeError(NameOrErr.takeError());
463
464    Expected<StringRef> Contents = Section.getContents();
465    if (!Contents)
466      return Contents.takeError();
467    ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents);
468
469    parseSectionContents(SectionBytes,
470                         {Section.getAddress(), Section.getIndex()});
471  }
472  return Error::success();
473}
474
475void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
476                                        object::SectionedAddress Address) {
477  assert(Symbolizer && "Symbolizer is uninitialised.");
478  MCInst Instruction;
479  Instr InstrMeta;
480  uint64_t InstructionSize;
481
482  for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
483    bool ValidInstruction =
484        Disassembler->getInstruction(Instruction, InstructionSize,
485                                     SectionBytes.drop_front(Byte), 0,
486                                     outs()) == MCDisassembler::Success;
487
488    Byte += InstructionSize;
489
490    uint64_t VMAddress = Address.Address + Byte - InstructionSize;
491    InstrMeta.Instruction = Instruction;
492    InstrMeta.VMAddress = VMAddress;
493    InstrMeta.InstructionSize = InstructionSize;
494    InstrMeta.Valid = ValidInstruction;
495
496    addInstruction(InstrMeta);
497
498    if (!ValidInstruction)
499      continue;
500
501    // Skip additional parsing for instructions that do not affect the control
502    // flow.
503    const auto &InstrDesc = MII->get(Instruction.getOpcode());
504    if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
505      continue;
506
507    uint64_t Target;
508    if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
509      // If the target can be evaluated, it's not indirect.
510      StaticBranchTargetings[Target].push_back(VMAddress);
511      continue;
512    }
513
514    if (!usesRegisterOperand(InstrMeta))
515      continue;
516
517    if (InstrDesc.isReturn())
518      continue;
519
520    // Check if this instruction exists in the range of the DWARF metadata.
521    if (!IgnoreDWARFFlag) {
522      auto LineInfo =
523          Symbolizer->symbolizeCode(std::string(Object->getFileName()),
524                                    {VMAddress, Address.SectionIndex});
525      if (!LineInfo) {
526        handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
527          errs() << "Symbolizer failed to get line: " << E.message() << "\n";
528        });
529        continue;
530      }
531
532      if (LineInfo->FileName == DILineInfo::BadString)
533        continue;
534    }
535
536    IndirectInstructions.insert({VMAddress, Address.SectionIndex});
537  }
538}
539
540void FileAnalysis::addInstruction(const Instr &Instruction) {
541  const auto &KV =
542      Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
543  if (!KV.second) {
544    errs() << "Failed to add instruction at address "
545           << format_hex(Instruction.VMAddress, 2)
546           << ": Instruction at this address already exists.\n";
547    exit(EXIT_FAILURE);
548  }
549}
550
551Error FileAnalysis::parseSymbolTable() {
552  // Functions that will trap on CFI violations.
553  SmallSet<StringRef, 4> TrapOnFailFunctions;
554  TrapOnFailFunctions.insert("__cfi_slowpath");
555  TrapOnFailFunctions.insert("__cfi_slowpath_diag");
556  TrapOnFailFunctions.insert("abort");
557
558  // Look through the list of symbols for functions that will trap on CFI
559  // violations.
560  for (auto &Sym : Object->symbols()) {
561    auto SymNameOrErr = Sym.getName();
562    if (!SymNameOrErr)
563      consumeError(SymNameOrErr.takeError());
564    else if (TrapOnFailFunctions.contains(*SymNameOrErr)) {
565      auto AddrOrErr = Sym.getAddress();
566      if (!AddrOrErr)
567        consumeError(AddrOrErr.takeError());
568      else
569        TrapOnFailFunctionAddresses.insert(*AddrOrErr);
570    }
571  }
572  if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) {
573    for (const auto &Addr : ElfObject->getPltAddresses()) {
574      if (!Addr.first)
575        continue;
576      object::SymbolRef Sym(*Addr.first, Object);
577      auto SymNameOrErr = Sym.getName();
578      if (!SymNameOrErr)
579        consumeError(SymNameOrErr.takeError());
580      else if (TrapOnFailFunctions.contains(*SymNameOrErr))
581        TrapOnFailFunctionAddresses.insert(Addr.second);
582    }
583  }
584  return Error::success();
585}
586
587UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text)
588    : Text(std::string(Text)) {}
589
590char UnsupportedDisassembly::ID;
591void UnsupportedDisassembly::log(raw_ostream &OS) const {
592  OS << "Could not initialise disassembler: " << Text;
593}
594
595std::error_code UnsupportedDisassembly::convertToErrorCode() const {
596  return std::error_code();
597}
598
599} // namespace cfi_verify
600} // namespace llvm
601