1//===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "FileAnalysis.h" 10#include "GraphBuilder.h" 11 12#include "llvm/BinaryFormat/ELF.h" 13#include "llvm/DebugInfo/DWARF/DWARFContext.h" 14#include "llvm/MC/MCAsmInfo.h" 15#include "llvm/MC/MCContext.h" 16#include "llvm/MC/MCDisassembler/MCDisassembler.h" 17#include "llvm/MC/MCInst.h" 18#include "llvm/MC/MCInstPrinter.h" 19#include "llvm/MC/MCInstrAnalysis.h" 20#include "llvm/MC/MCInstrDesc.h" 21#include "llvm/MC/MCInstrInfo.h" 22#include "llvm/MC/MCObjectFileInfo.h" 23#include "llvm/MC/MCRegisterInfo.h" 24#include "llvm/MC/MCSubtargetInfo.h" 25#include "llvm/MC/MCTargetOptions.h" 26#include "llvm/Object/Binary.h" 27#include "llvm/Object/COFF.h" 28#include "llvm/Object/ELFObjectFile.h" 29#include "llvm/Object/ObjectFile.h" 30#include "llvm/Support/Casting.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Error.h" 33#include "llvm/Support/MemoryBuffer.h" 34#include "llvm/Support/TargetRegistry.h" 35#include "llvm/Support/TargetSelect.h" 36#include "llvm/Support/raw_ostream.h" 37 38 39using Instr = llvm::cfi_verify::FileAnalysis::Instr; 40using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; 41 42namespace llvm { 43namespace cfi_verify { 44 45bool IgnoreDWARFFlag; 46 47static cl::opt<bool, true> IgnoreDWARFArg( 48 "ignore-dwarf", 49 cl::desc( 50 "Ignore all DWARF data. This relaxes the requirements for all " 51 "statically linked libraries to have been compiled with '-g', but " 52 "will result in false positives for 'CFI unprotected' instructions."), 53 cl::location(IgnoreDWARFFlag), cl::init(false)); 54 55StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { 56 switch (Status) { 57 case CFIProtectionStatus::PROTECTED: 58 return "PROTECTED"; 59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: 60 return "FAIL_NOT_INDIRECT_CF"; 61 case CFIProtectionStatus::FAIL_ORPHANS: 62 return "FAIL_ORPHANS"; 63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: 64 return "FAIL_BAD_CONDITIONAL_BRANCH"; 65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: 66 return "FAIL_REGISTER_CLOBBERED"; 67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: 68 return "FAIL_INVALID_INSTRUCTION"; 69 } 70 llvm_unreachable("Attempted to stringify an unknown enum value."); 71} 72 73Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { 74 // Open the filename provided. 75 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 76 object::createBinary(Filename); 77 if (!BinaryOrErr) 78 return BinaryOrErr.takeError(); 79 80 // Construct the object and allow it to take ownership of the binary. 81 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); 82 FileAnalysis Analysis(std::move(Binary)); 83 84 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); 85 if (!Analysis.Object) 86 return make_error<UnsupportedDisassembly>("Failed to cast object"); 87 88 switch (Analysis.Object->getArch()) { 89 case Triple::x86: 90 case Triple::x86_64: 91 case Triple::aarch64: 92 case Triple::aarch64_be: 93 break; 94 default: 95 return make_error<UnsupportedDisassembly>("Unsupported architecture."); 96 } 97 98 Analysis.ObjectTriple = Analysis.Object->makeTriple(); 99 Analysis.Features = Analysis.Object->getFeatures(); 100 101 // Init the rest of the object. 102 if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) 103 return std::move(InitResponse); 104 105 if (auto SectionParseResponse = Analysis.parseCodeSections()) 106 return std::move(SectionParseResponse); 107 108 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable()) 109 return std::move(SymbolTableParseResponse); 110 111 return std::move(Analysis); 112} 113 114FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) 115 : Binary(std::move(Binary)) {} 116 117FileAnalysis::FileAnalysis(const Triple &ObjectTriple, 118 const SubtargetFeatures &Features) 119 : ObjectTriple(ObjectTriple), Features(Features) {} 120 121const Instr * 122FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { 123 std::map<uint64_t, Instr>::const_iterator KV = 124 Instructions.find(InstrMeta.VMAddress); 125 if (KV == Instructions.end() || KV == Instructions.begin()) 126 return nullptr; 127 128 if (!(--KV)->second.Valid) 129 return nullptr; 130 131 return &KV->second; 132} 133 134const Instr * 135FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { 136 std::map<uint64_t, Instr>::const_iterator KV = 137 Instructions.find(InstrMeta.VMAddress); 138 if (KV == Instructions.end() || ++KV == Instructions.end()) 139 return nullptr; 140 141 if (!KV->second.Valid) 142 return nullptr; 143 144 return &KV->second; 145} 146 147bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { 148 for (const auto &Operand : InstrMeta.Instruction) { 149 if (Operand.isReg()) 150 return true; 151 } 152 return false; 153} 154 155const Instr *FileAnalysis::getInstruction(uint64_t Address) const { 156 const auto &InstrKV = Instructions.find(Address); 157 if (InstrKV == Instructions.end()) 158 return nullptr; 159 160 return &InstrKV->second; 161} 162 163const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { 164 const auto &InstrKV = Instructions.find(Address); 165 assert(InstrKV != Instructions.end() && "Address doesn't exist."); 166 return InstrKV->second; 167} 168 169bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { 170 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 171 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta); 172} 173 174bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { 175 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 176 if (!InstrDesc.isCall()) 177 return false; 178 uint64_t Target; 179 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 180 InstrMeta.InstructionSize, Target)) 181 return false; 182 return TrapOnFailFunctionAddresses.contains(Target); 183} 184 185bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { 186 if (!InstrMeta.Valid) 187 return false; 188 189 if (isCFITrap(InstrMeta)) 190 return false; 191 192 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 193 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) 194 return InstrDesc.isConditionalBranch(); 195 196 return true; 197} 198 199const Instr * 200FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { 201 if (!InstrMeta.Valid) 202 return nullptr; 203 204 if (isCFITrap(InstrMeta)) 205 return nullptr; 206 207 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); 208 const Instr *NextMetaPtr; 209 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { 210 if (InstrDesc.isConditionalBranch()) 211 return nullptr; 212 213 uint64_t Target; 214 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, 215 InstrMeta.InstructionSize, Target)) 216 return nullptr; 217 218 NextMetaPtr = getInstruction(Target); 219 } else { 220 NextMetaPtr = 221 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); 222 } 223 224 if (!NextMetaPtr || !NextMetaPtr->Valid) 225 return nullptr; 226 227 return NextMetaPtr; 228} 229 230std::set<const Instr *> 231FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { 232 std::set<const Instr *> CFCrossReferences; 233 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); 234 235 if (PrevInstruction && canFallThrough(*PrevInstruction)) 236 CFCrossReferences.insert(PrevInstruction); 237 238 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); 239 if (TargetRefsKV == StaticBranchTargetings.end()) 240 return CFCrossReferences; 241 242 for (uint64_t SourceInstrAddress : TargetRefsKV->second) { 243 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); 244 if (SourceInstrKV == Instructions.end()) { 245 errs() << "Failed to find source instruction at address " 246 << format_hex(SourceInstrAddress, 2) 247 << " for the cross-reference to instruction at address " 248 << format_hex(InstrMeta.VMAddress, 2) << ".\n"; 249 continue; 250 } 251 252 CFCrossReferences.insert(&SourceInstrKV->second); 253 } 254 255 return CFCrossReferences; 256} 257 258const std::set<object::SectionedAddress> & 259FileAnalysis::getIndirectInstructions() const { 260 return IndirectInstructions; 261} 262 263const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { 264 return RegisterInfo.get(); 265} 266 267const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } 268 269const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { 270 return MIA.get(); 271} 272 273Expected<DIInliningInfo> 274FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) { 275 assert(Symbolizer != nullptr && "Symbolizer is invalid."); 276 277 return Symbolizer->symbolizeInlinedCode(std::string(Object->getFileName()), 278 Address); 279} 280 281CFIProtectionStatus 282FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { 283 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); 284 if (!InstrMetaPtr) 285 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; 286 287 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); 288 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) 289 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 290 291 if (!usesRegisterOperand(*InstrMetaPtr)) 292 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; 293 294 if (!Graph.OrphanedNodes.empty()) 295 return CFIProtectionStatus::FAIL_ORPHANS; 296 297 for (const auto &BranchNode : Graph.ConditionalBranchNodes) { 298 if (!BranchNode.CFIProtection) 299 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; 300 } 301 302 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) 303 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; 304 305 return CFIProtectionStatus::PROTECTED; 306} 307 308uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { 309 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); 310 311 // Get the set of registers we must check to ensure they're not clobbered. 312 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); 313 DenseSet<unsigned> RegisterNumbers; 314 for (const auto &Operand : IndirectCF.Instruction) { 315 if (Operand.isReg()) 316 RegisterNumbers.insert(Operand.getReg()); 317 } 318 assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); 319 320 // Now check all branches to indirect CFs and ensure no clobbering happens. 321 for (const auto &Branch : Graph.ConditionalBranchNodes) { 322 uint64_t Node; 323 if (Branch.IndirectCFIsOnTargetPath) 324 Node = Branch.Target; 325 else 326 Node = Branch.Fallthrough; 327 328 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so 329 // we allow them one load. 330 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); 331 332 // We walk backwards from the indirect CF. It is the last node returned by 333 // Graph.flattenAddress, so we skip it since we already handled it. 334 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; 335 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); 336 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { 337 Node = *I; 338 const Instr &NodeInstr = getInstructionOrDie(Node); 339 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); 340 341 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); 342 RI != RE; ++RI) { 343 unsigned RegNum = *RI; 344 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, 345 *RegisterInfo)) { 346 if (!canLoad || !InstrDesc.mayLoad()) 347 return Node; 348 canLoad = false; 349 CurRegisterNumbers.erase(RI); 350 // Add the registers this load reads to those we check for clobbers. 351 for (unsigned i = InstrDesc.getNumDefs(), 352 e = InstrDesc.getNumOperands(); i != e; i++) { 353 const auto Operand = NodeInstr.Instruction.getOperand(i); 354 if (Operand.isReg()) 355 CurRegisterNumbers.insert(Operand.getReg()); 356 } 357 break; 358 } 359 } 360 } 361 } 362 363 return Graph.BaseAddress; 364} 365 366void FileAnalysis::printInstruction(const Instr &InstrMeta, 367 raw_ostream &OS) const { 368 Printer->printInst(&InstrMeta.Instruction, 0, "", *SubtargetInfo.get(), OS); 369} 370 371Error FileAnalysis::initialiseDisassemblyMembers() { 372 std::string TripleName = ObjectTriple.getTriple(); 373 ArchName = ""; 374 MCPU = ""; 375 std::string ErrorString; 376 377 LLVMSymbolizer::Options Opt; 378 Opt.UseSymbolTable = false; 379 Symbolizer.reset(new LLVMSymbolizer(Opt)); 380 381 ObjectTarget = 382 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); 383 if (!ObjectTarget) 384 return make_error<UnsupportedDisassembly>( 385 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + 386 "\", failed with error: " + ErrorString) 387 .str()); 388 389 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); 390 if (!RegisterInfo) 391 return make_error<UnsupportedDisassembly>( 392 "Failed to initialise RegisterInfo."); 393 394 MCTargetOptions MCOptions; 395 AsmInfo.reset( 396 ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName, MCOptions)); 397 if (!AsmInfo) 398 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); 399 400 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( 401 TripleName, MCPU, Features.getString())); 402 if (!SubtargetInfo) 403 return make_error<UnsupportedDisassembly>( 404 "Failed to initialise SubtargetInfo."); 405 406 MII.reset(ObjectTarget->createMCInstrInfo()); 407 if (!MII) 408 return make_error<UnsupportedDisassembly>("Failed to initialise MII."); 409 410 Context.reset(new MCContext(Triple(TripleName), AsmInfo.get(), 411 RegisterInfo.get(), SubtargetInfo.get())); 412 413 Disassembler.reset( 414 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); 415 416 if (!Disassembler) 417 return make_error<UnsupportedDisassembly>( 418 "No disassembler available for target"); 419 420 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); 421 422 Printer.reset(ObjectTarget->createMCInstPrinter( 423 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, 424 *RegisterInfo)); 425 426 return Error::success(); 427} 428 429Error FileAnalysis::parseCodeSections() { 430 if (!IgnoreDWARFFlag) { 431 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); 432 if (!DWARF) 433 return make_error<StringError>("Could not create DWARF information.", 434 inconvertibleErrorCode()); 435 436 bool LineInfoValid = false; 437 438 for (auto &Unit : DWARF->compile_units()) { 439 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); 440 if (LineTable && !LineTable->Rows.empty()) { 441 LineInfoValid = true; 442 break; 443 } 444 } 445 446 if (!LineInfoValid) 447 return make_error<StringError>( 448 "DWARF line information missing. Did you compile with '-g'?", 449 inconvertibleErrorCode()); 450 } 451 452 for (const object::SectionRef &Section : Object->sections()) { 453 // Ensure only executable sections get analysed. 454 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) 455 continue; 456 457 // Avoid checking the PLT since it produces spurious failures on AArch64 458 // when ignoring DWARF data. 459 Expected<StringRef> NameOrErr = Section.getName(); 460 if (NameOrErr && *NameOrErr == ".plt") 461 continue; 462 consumeError(NameOrErr.takeError()); 463 464 Expected<StringRef> Contents = Section.getContents(); 465 if (!Contents) 466 return Contents.takeError(); 467 ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents); 468 469 parseSectionContents(SectionBytes, 470 {Section.getAddress(), Section.getIndex()}); 471 } 472 return Error::success(); 473} 474 475void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, 476 object::SectionedAddress Address) { 477 assert(Symbolizer && "Symbolizer is uninitialised."); 478 MCInst Instruction; 479 Instr InstrMeta; 480 uint64_t InstructionSize; 481 482 for (uint64_t Byte = 0; Byte < SectionBytes.size();) { 483 bool ValidInstruction = 484 Disassembler->getInstruction(Instruction, InstructionSize, 485 SectionBytes.drop_front(Byte), 0, 486 outs()) == MCDisassembler::Success; 487 488 Byte += InstructionSize; 489 490 uint64_t VMAddress = Address.Address + Byte - InstructionSize; 491 InstrMeta.Instruction = Instruction; 492 InstrMeta.VMAddress = VMAddress; 493 InstrMeta.InstructionSize = InstructionSize; 494 InstrMeta.Valid = ValidInstruction; 495 496 addInstruction(InstrMeta); 497 498 if (!ValidInstruction) 499 continue; 500 501 // Skip additional parsing for instructions that do not affect the control 502 // flow. 503 const auto &InstrDesc = MII->get(Instruction.getOpcode()); 504 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) 505 continue; 506 507 uint64_t Target; 508 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { 509 // If the target can be evaluated, it's not indirect. 510 StaticBranchTargetings[Target].push_back(VMAddress); 511 continue; 512 } 513 514 if (!usesRegisterOperand(InstrMeta)) 515 continue; 516 517 if (InstrDesc.isReturn()) 518 continue; 519 520 // Check if this instruction exists in the range of the DWARF metadata. 521 if (!IgnoreDWARFFlag) { 522 auto LineInfo = 523 Symbolizer->symbolizeCode(std::string(Object->getFileName()), 524 {VMAddress, Address.SectionIndex}); 525 if (!LineInfo) { 526 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { 527 errs() << "Symbolizer failed to get line: " << E.message() << "\n"; 528 }); 529 continue; 530 } 531 532 if (LineInfo->FileName == DILineInfo::BadString) 533 continue; 534 } 535 536 IndirectInstructions.insert({VMAddress, Address.SectionIndex}); 537 } 538} 539 540void FileAnalysis::addInstruction(const Instr &Instruction) { 541 const auto &KV = 542 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); 543 if (!KV.second) { 544 errs() << "Failed to add instruction at address " 545 << format_hex(Instruction.VMAddress, 2) 546 << ": Instruction at this address already exists.\n"; 547 exit(EXIT_FAILURE); 548 } 549} 550 551Error FileAnalysis::parseSymbolTable() { 552 // Functions that will trap on CFI violations. 553 SmallSet<StringRef, 4> TrapOnFailFunctions; 554 TrapOnFailFunctions.insert("__cfi_slowpath"); 555 TrapOnFailFunctions.insert("__cfi_slowpath_diag"); 556 TrapOnFailFunctions.insert("abort"); 557 558 // Look through the list of symbols for functions that will trap on CFI 559 // violations. 560 for (auto &Sym : Object->symbols()) { 561 auto SymNameOrErr = Sym.getName(); 562 if (!SymNameOrErr) 563 consumeError(SymNameOrErr.takeError()); 564 else if (TrapOnFailFunctions.contains(*SymNameOrErr)) { 565 auto AddrOrErr = Sym.getAddress(); 566 if (!AddrOrErr) 567 consumeError(AddrOrErr.takeError()); 568 else 569 TrapOnFailFunctionAddresses.insert(*AddrOrErr); 570 } 571 } 572 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) { 573 for (const auto &Addr : ElfObject->getPltAddresses()) { 574 if (!Addr.first) 575 continue; 576 object::SymbolRef Sym(*Addr.first, Object); 577 auto SymNameOrErr = Sym.getName(); 578 if (!SymNameOrErr) 579 consumeError(SymNameOrErr.takeError()); 580 else if (TrapOnFailFunctions.contains(*SymNameOrErr)) 581 TrapOnFailFunctionAddresses.insert(Addr.second); 582 } 583 } 584 return Error::success(); 585} 586 587UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) 588 : Text(std::string(Text)) {} 589 590char UnsupportedDisassembly::ID; 591void UnsupportedDisassembly::log(raw_ostream &OS) const { 592 OS << "Could not initialise disassembler: " << Text; 593} 594 595std::error_code UnsupportedDisassembly::convertToErrorCode() const { 596 return std::error_code(); 597} 598 599} // namespace cfi_verify 600} // namespace llvm 601