1//===-- sancov.cpp --------------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// This file is a command-line tool for reading and analyzing sanitizer 9// coverage. 10//===----------------------------------------------------------------------===// 11#include "llvm/ADT/STLExtras.h" 12#include "llvm/ADT/StringExtras.h" 13#include "llvm/ADT/Twine.h" 14#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 15#include "llvm/DebugInfo/Symbolize/Symbolize.h" 16#include "llvm/MC/MCAsmInfo.h" 17#include "llvm/MC/MCContext.h" 18#include "llvm/MC/MCDisassembler/MCDisassembler.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/MC/MCInstrAnalysis.h" 21#include "llvm/MC/MCInstrInfo.h" 22#include "llvm/MC/MCObjectFileInfo.h" 23#include "llvm/MC/MCRegisterInfo.h" 24#include "llvm/MC/MCSubtargetInfo.h" 25#include "llvm/MC/MCTargetOptions.h" 26#include "llvm/MC/TargetRegistry.h" 27#include "llvm/Object/Archive.h" 28#include "llvm/Object/Binary.h" 29#include "llvm/Object/COFF.h" 30#include "llvm/Object/MachO.h" 31#include "llvm/Object/ObjectFile.h" 32#include "llvm/Support/Casting.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/Errc.h" 35#include "llvm/Support/ErrorOr.h" 36#include "llvm/Support/FileSystem.h" 37#include "llvm/Support/InitLLVM.h" 38#include "llvm/Support/JSON.h" 39#include "llvm/Support/MD5.h" 40#include "llvm/Support/MemoryBuffer.h" 41#include "llvm/Support/Path.h" 42#include "llvm/Support/Regex.h" 43#include "llvm/Support/SHA1.h" 44#include "llvm/Support/SourceMgr.h" 45#include "llvm/Support/SpecialCaseList.h" 46#include "llvm/Support/TargetSelect.h" 47#include "llvm/Support/VirtualFileSystem.h" 48#include "llvm/Support/YAMLParser.h" 49#include "llvm/Support/raw_ostream.h" 50 51#include <set> 52#include <vector> 53 54using namespace llvm; 55 56namespace { 57 58// --------- COMMAND LINE FLAGS --------- 59 60cl::OptionCategory Cat("sancov Options"); 61 62enum ActionType { 63 CoveredFunctionsAction, 64 HtmlReportAction, 65 MergeAction, 66 NotCoveredFunctionsAction, 67 PrintAction, 68 PrintCovPointsAction, 69 StatsAction, 70 SymbolizeAction 71}; 72 73cl::opt<ActionType> Action( 74 cl::desc("Action (required)"), cl::Required, 75 cl::values( 76 clEnumValN(PrintAction, "print", "Print coverage addresses"), 77 clEnumValN(PrintCovPointsAction, "print-coverage-pcs", 78 "Print coverage instrumentation points addresses."), 79 clEnumValN(CoveredFunctionsAction, "covered-functions", 80 "Print all covered funcions."), 81 clEnumValN(NotCoveredFunctionsAction, "not-covered-functions", 82 "Print all not covered funcions."), 83 clEnumValN(StatsAction, "print-coverage-stats", 84 "Print coverage statistics."), 85 clEnumValN(HtmlReportAction, "html-report", 86 "REMOVED. Use -symbolize & coverage-report-server.py."), 87 clEnumValN(SymbolizeAction, "symbolize", 88 "Produces a symbolized JSON report from binary report."), 89 clEnumValN(MergeAction, "merge", "Merges reports.")), 90 cl::cat(Cat)); 91 92static cl::list<std::string> 93 ClInputFiles(cl::Positional, cl::OneOrMore, 94 cl::desc("<action> <binary files...> <.sancov files...> " 95 "<.symcov files...>"), 96 cl::cat(Cat)); 97 98static cl::opt<bool> ClDemangle("demangle", cl::init(true), 99 cl::desc("Print demangled function name"), 100 cl::cat(Cat)); 101 102static cl::opt<bool> 103 ClSkipDeadFiles("skip-dead-files", cl::init(true), 104 cl::desc("Do not list dead source files in reports"), 105 cl::cat(Cat)); 106 107static cl::opt<std::string> 108 ClStripPathPrefix("strip_path_prefix", cl::init(""), 109 cl::desc("Strip this prefix from file paths in reports"), 110 cl::cat(Cat)); 111 112static cl::opt<std::string> 113 ClIgnorelist("ignorelist", cl::init(""), 114 cl::desc("Ignorelist file (sanitizer ignorelist format)"), 115 cl::cat(Cat)); 116 117static cl::opt<bool> ClUseDefaultIgnorelist( 118 "use_default_ignorelist", cl::init(true), cl::Hidden, 119 cl::desc("Controls if default ignorelist should be used"), cl::cat(Cat)); 120 121static const char *const DefaultIgnorelistStr = "fun:__sanitizer_.*\n" 122 "src:/usr/include/.*\n" 123 "src:.*/libc\\+\\+/.*\n"; 124 125// --------- FORMAT SPECIFICATION --------- 126 127struct FileHeader { 128 uint32_t Bitness; 129 uint32_t Magic; 130}; 131 132static const uint32_t BinCoverageMagic = 0xC0BFFFFF; 133static const uint32_t Bitness32 = 0xFFFFFF32; 134static const uint32_t Bitness64 = 0xFFFFFF64; 135 136static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov"); 137static const Regex SymcovFileRegex(".*\\.symcov"); 138 139// --------- MAIN DATASTRUCTURES ---------- 140 141// Contents of .sancov file: list of coverage point addresses that were 142// executed. 143struct RawCoverage { 144 explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs) 145 : Addrs(std::move(Addrs)) {} 146 147 // Read binary .sancov file. 148 static ErrorOr<std::unique_ptr<RawCoverage>> 149 read(const std::string &FileName); 150 151 std::unique_ptr<std::set<uint64_t>> Addrs; 152}; 153 154// Coverage point has an opaque Id and corresponds to multiple source locations. 155struct CoveragePoint { 156 explicit CoveragePoint(const std::string &Id) : Id(Id) {} 157 158 std::string Id; 159 SmallVector<DILineInfo, 1> Locs; 160}; 161 162// Symcov file content: set of covered Ids plus information about all available 163// coverage points. 164struct SymbolizedCoverage { 165 // Read json .symcov file. 166 static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile); 167 168 std::set<std::string> CoveredIds; 169 std::string BinaryHash; 170 std::vector<CoveragePoint> Points; 171}; 172 173struct CoverageStats { 174 size_t AllPoints; 175 size_t CovPoints; 176 size_t AllFns; 177 size_t CovFns; 178}; 179 180// --------- ERROR HANDLING --------- 181 182static void fail(const llvm::Twine &E) { 183 errs() << "ERROR: " << E << "\n"; 184 exit(1); 185} 186 187static void failIf(bool B, const llvm::Twine &E) { 188 if (B) 189 fail(E); 190} 191 192static void failIfError(std::error_code Error) { 193 if (!Error) 194 return; 195 errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n"; 196 exit(1); 197} 198 199template <typename T> static void failIfError(const ErrorOr<T> &E) { 200 failIfError(E.getError()); 201} 202 203static void failIfError(Error Err) { 204 if (Err) { 205 logAllUnhandledErrors(std::move(Err), errs(), "ERROR: "); 206 exit(1); 207 } 208} 209 210template <typename T> static void failIfError(Expected<T> &E) { 211 failIfError(E.takeError()); 212} 213 214static void failIfNotEmpty(const llvm::Twine &E) { 215 if (E.str().empty()) 216 return; 217 fail(E); 218} 219 220template <typename T> 221static void failIfEmpty(const std::unique_ptr<T> &Ptr, 222 const std::string &Message) { 223 if (Ptr.get()) 224 return; 225 fail(Message); 226} 227 228// ----------- Coverage I/O ---------- 229template <typename T> 230static void readInts(const char *Start, const char *End, 231 std::set<uint64_t> *Ints) { 232 const T *S = reinterpret_cast<const T *>(Start); 233 const T *E = reinterpret_cast<const T *>(End); 234 std::copy(S, E, std::inserter(*Ints, Ints->end())); 235} 236 237ErrorOr<std::unique_ptr<RawCoverage>> 238RawCoverage::read(const std::string &FileName) { 239 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = 240 MemoryBuffer::getFile(FileName); 241 if (!BufOrErr) 242 return BufOrErr.getError(); 243 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get()); 244 if (Buf->getBufferSize() < 8) { 245 errs() << "File too small (<8): " << Buf->getBufferSize() << '\n'; 246 return make_error_code(errc::illegal_byte_sequence); 247 } 248 const FileHeader *Header = 249 reinterpret_cast<const FileHeader *>(Buf->getBufferStart()); 250 251 if (Header->Magic != BinCoverageMagic) { 252 errs() << "Wrong magic: " << Header->Magic << '\n'; 253 return make_error_code(errc::illegal_byte_sequence); 254 } 255 256 auto Addrs = std::make_unique<std::set<uint64_t>>(); 257 258 switch (Header->Bitness) { 259 case Bitness64: 260 readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(), 261 Addrs.get()); 262 break; 263 case Bitness32: 264 readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(), 265 Addrs.get()); 266 break; 267 default: 268 errs() << "Unsupported bitness: " << Header->Bitness << '\n'; 269 return make_error_code(errc::illegal_byte_sequence); 270 } 271 272 // Ignore slots that are zero, so a runtime implementation is not required 273 // to compactify the data. 274 Addrs->erase(0); 275 276 return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs))); 277} 278 279// Print coverage addresses. 280raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) { 281 for (auto Addr : *CoverageData.Addrs) { 282 OS << "0x"; 283 OS.write_hex(Addr); 284 OS << "\n"; 285 } 286 return OS; 287} 288 289static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) { 290 OS << "all-edges: " << Stats.AllPoints << "\n"; 291 OS << "cov-edges: " << Stats.CovPoints << "\n"; 292 OS << "all-functions: " << Stats.AllFns << "\n"; 293 OS << "cov-functions: " << Stats.CovFns << "\n"; 294 return OS; 295} 296 297// Output symbolized information for coverage points in JSON. 298// Format: 299// { 300// '<file_name>' : { 301// '<function_name>' : { 302// '<point_id'> : '<line_number>:'<column_number'. 303// .... 304// } 305// } 306// } 307static void operator<<(json::OStream &W, 308 const std::vector<CoveragePoint> &Points) { 309 // Group points by file. 310 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile; 311 for (const auto &Point : Points) { 312 for (const DILineInfo &Loc : Point.Locs) { 313 PointsByFile[Loc.FileName].push_back(&Point); 314 } 315 } 316 317 for (const auto &P : PointsByFile) { 318 std::string FileName = P.first; 319 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn; 320 for (auto PointPtr : P.second) { 321 for (const DILineInfo &Loc : PointPtr->Locs) { 322 PointsByFn[Loc.FunctionName].push_back(PointPtr); 323 } 324 } 325 326 W.attributeObject(P.first, [&] { 327 // Group points by function. 328 for (const auto &P : PointsByFn) { 329 std::string FunctionName = P.first; 330 std::set<std::string> WrittenIds; 331 332 W.attributeObject(FunctionName, [&] { 333 for (const CoveragePoint *Point : P.second) { 334 for (const auto &Loc : Point->Locs) { 335 if (Loc.FileName != FileName || Loc.FunctionName != FunctionName) 336 continue; 337 if (WrittenIds.find(Point->Id) != WrittenIds.end()) 338 continue; 339 340 // Output <point_id> : "<line>:<col>". 341 WrittenIds.insert(Point->Id); 342 W.attribute(Point->Id, 343 (utostr(Loc.Line) + ":" + utostr(Loc.Column))); 344 } 345 } 346 }); 347 } 348 }); 349 } 350} 351 352static void operator<<(json::OStream &W, const SymbolizedCoverage &C) { 353 W.object([&] { 354 W.attributeArray("covered-points", [&] { 355 for (const std::string &P : C.CoveredIds) { 356 W.value(P); 357 } 358 }); 359 W.attribute("binary-hash", C.BinaryHash); 360 W.attributeObject("point-symbol-info", [&] { W << C.Points; }); 361 }); 362} 363 364static std::string parseScalarString(yaml::Node *N) { 365 SmallString<64> StringStorage; 366 yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N); 367 failIf(!S, "expected string"); 368 return std::string(S->getValue(StringStorage)); 369} 370 371std::unique_ptr<SymbolizedCoverage> 372SymbolizedCoverage::read(const std::string &InputFile) { 373 auto Coverage(std::make_unique<SymbolizedCoverage>()); 374 375 std::map<std::string, CoveragePoint> Points; 376 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = 377 MemoryBuffer::getFile(InputFile); 378 failIfError(BufOrErr); 379 380 SourceMgr SM; 381 yaml::Stream S(**BufOrErr, SM); 382 383 yaml::document_iterator DI = S.begin(); 384 failIf(DI == S.end(), "empty document: " + InputFile); 385 yaml::Node *Root = DI->getRoot(); 386 failIf(!Root, "expecting root node: " + InputFile); 387 yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root); 388 failIf(!Top, "expecting mapping node: " + InputFile); 389 390 for (auto &KVNode : *Top) { 391 auto Key = parseScalarString(KVNode.getKey()); 392 393 if (Key == "covered-points") { 394 yaml::SequenceNode *Points = 395 dyn_cast<yaml::SequenceNode>(KVNode.getValue()); 396 failIf(!Points, "expected array: " + InputFile); 397 398 for (auto I = Points->begin(), E = Points->end(); I != E; ++I) { 399 Coverage->CoveredIds.insert(parseScalarString(&*I)); 400 } 401 } else if (Key == "binary-hash") { 402 Coverage->BinaryHash = parseScalarString(KVNode.getValue()); 403 } else if (Key == "point-symbol-info") { 404 yaml::MappingNode *PointSymbolInfo = 405 dyn_cast<yaml::MappingNode>(KVNode.getValue()); 406 failIf(!PointSymbolInfo, "expected mapping node: " + InputFile); 407 408 for (auto &FileKVNode : *PointSymbolInfo) { 409 auto Filename = parseScalarString(FileKVNode.getKey()); 410 411 yaml::MappingNode *FileInfo = 412 dyn_cast<yaml::MappingNode>(FileKVNode.getValue()); 413 failIf(!FileInfo, "expected mapping node: " + InputFile); 414 415 for (auto &FunctionKVNode : *FileInfo) { 416 auto FunctionName = parseScalarString(FunctionKVNode.getKey()); 417 418 yaml::MappingNode *FunctionInfo = 419 dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue()); 420 failIf(!FunctionInfo, "expected mapping node: " + InputFile); 421 422 for (auto &PointKVNode : *FunctionInfo) { 423 auto PointId = parseScalarString(PointKVNode.getKey()); 424 auto Loc = parseScalarString(PointKVNode.getValue()); 425 426 size_t ColonPos = Loc.find(':'); 427 failIf(ColonPos == std::string::npos, "expected ':': " + InputFile); 428 429 auto LineStr = Loc.substr(0, ColonPos); 430 auto ColStr = Loc.substr(ColonPos + 1, Loc.size()); 431 432 if (Points.find(PointId) == Points.end()) 433 Points.insert(std::make_pair(PointId, CoveragePoint(PointId))); 434 435 DILineInfo LineInfo; 436 LineInfo.FileName = Filename; 437 LineInfo.FunctionName = FunctionName; 438 char *End; 439 LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10); 440 LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10); 441 442 CoveragePoint *CoveragePoint = &Points.find(PointId)->second; 443 CoveragePoint->Locs.push_back(LineInfo); 444 } 445 } 446 } 447 } else { 448 errs() << "Ignoring unknown key: " << Key << "\n"; 449 } 450 } 451 452 for (auto &KV : Points) { 453 Coverage->Points.push_back(KV.second); 454 } 455 456 return Coverage; 457} 458 459// ---------- MAIN FUNCTIONALITY ---------- 460 461std::string stripPathPrefix(std::string Path) { 462 if (ClStripPathPrefix.empty()) 463 return Path; 464 size_t Pos = Path.find(ClStripPathPrefix); 465 if (Pos == std::string::npos) 466 return Path; 467 return Path.substr(Pos + ClStripPathPrefix.size()); 468} 469 470static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() { 471 symbolize::LLVMSymbolizer::Options SymbolizerOptions; 472 SymbolizerOptions.Demangle = ClDemangle; 473 SymbolizerOptions.UseSymbolTable = true; 474 return std::unique_ptr<symbolize::LLVMSymbolizer>( 475 new symbolize::LLVMSymbolizer(SymbolizerOptions)); 476} 477 478static std::string normalizeFilename(const std::string &FileName) { 479 SmallString<256> S(FileName); 480 sys::path::remove_dots(S, /* remove_dot_dot */ true); 481 return stripPathPrefix(sys::path::convert_to_slash(std::string(S))); 482} 483 484class Ignorelists { 485public: 486 Ignorelists() 487 : DefaultIgnorelist(createDefaultIgnorelist()), 488 UserIgnorelist(createUserIgnorelist()) {} 489 490 bool isIgnorelisted(const DILineInfo &I) { 491 if (DefaultIgnorelist && 492 DefaultIgnorelist->inSection("sancov", "fun", I.FunctionName)) 493 return true; 494 if (DefaultIgnorelist && 495 DefaultIgnorelist->inSection("sancov", "src", I.FileName)) 496 return true; 497 if (UserIgnorelist && 498 UserIgnorelist->inSection("sancov", "fun", I.FunctionName)) 499 return true; 500 if (UserIgnorelist && 501 UserIgnorelist->inSection("sancov", "src", I.FileName)) 502 return true; 503 return false; 504 } 505 506private: 507 static std::unique_ptr<SpecialCaseList> createDefaultIgnorelist() { 508 if (!ClUseDefaultIgnorelist) 509 return std::unique_ptr<SpecialCaseList>(); 510 std::unique_ptr<MemoryBuffer> MB = 511 MemoryBuffer::getMemBuffer(DefaultIgnorelistStr); 512 std::string Error; 513 auto Ignorelist = SpecialCaseList::create(MB.get(), Error); 514 failIfNotEmpty(Error); 515 return Ignorelist; 516 } 517 518 static std::unique_ptr<SpecialCaseList> createUserIgnorelist() { 519 if (ClIgnorelist.empty()) 520 return std::unique_ptr<SpecialCaseList>(); 521 return SpecialCaseList::createOrDie({{ClIgnorelist}}, 522 *vfs::getRealFileSystem()); 523 } 524 std::unique_ptr<SpecialCaseList> DefaultIgnorelist; 525 std::unique_ptr<SpecialCaseList> UserIgnorelist; 526}; 527 528static std::vector<CoveragePoint> 529getCoveragePoints(const std::string &ObjectFile, 530 const std::set<uint64_t> &Addrs, 531 const std::set<uint64_t> &CoveredAddrs) { 532 std::vector<CoveragePoint> Result; 533 auto Symbolizer(createSymbolizer()); 534 Ignorelists Ig; 535 536 std::set<std::string> CoveredFiles; 537 if (ClSkipDeadFiles) { 538 for (auto Addr : CoveredAddrs) { 539 // TODO: it would be neccessary to set proper section index here. 540 // object::SectionedAddress::UndefSection works for only absolute 541 // addresses. 542 object::SectionedAddress ModuleAddress = { 543 Addr, object::SectionedAddress::UndefSection}; 544 545 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress); 546 failIfError(LineInfo); 547 CoveredFiles.insert(LineInfo->FileName); 548 auto InliningInfo = 549 Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress); 550 failIfError(InliningInfo); 551 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) { 552 auto FrameInfo = InliningInfo->getFrame(I); 553 CoveredFiles.insert(FrameInfo.FileName); 554 } 555 } 556 } 557 558 for (auto Addr : Addrs) { 559 std::set<DILineInfo> Infos; // deduplicate debug info. 560 561 // TODO: it would be neccessary to set proper section index here. 562 // object::SectionedAddress::UndefSection works for only absolute addresses. 563 object::SectionedAddress ModuleAddress = { 564 Addr, object::SectionedAddress::UndefSection}; 565 566 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress); 567 failIfError(LineInfo); 568 if (ClSkipDeadFiles && 569 CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end()) 570 continue; 571 LineInfo->FileName = normalizeFilename(LineInfo->FileName); 572 if (Ig.isIgnorelisted(*LineInfo)) 573 continue; 574 575 auto Id = utohexstr(Addr, true); 576 auto Point = CoveragePoint(Id); 577 Infos.insert(*LineInfo); 578 Point.Locs.push_back(*LineInfo); 579 580 auto InliningInfo = 581 Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress); 582 failIfError(InliningInfo); 583 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) { 584 auto FrameInfo = InliningInfo->getFrame(I); 585 if (ClSkipDeadFiles && 586 CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end()) 587 continue; 588 FrameInfo.FileName = normalizeFilename(FrameInfo.FileName); 589 if (Ig.isIgnorelisted(FrameInfo)) 590 continue; 591 if (Infos.find(FrameInfo) == Infos.end()) { 592 Infos.insert(FrameInfo); 593 Point.Locs.push_back(FrameInfo); 594 } 595 } 596 597 Result.push_back(Point); 598 } 599 600 return Result; 601} 602 603static bool isCoveragePointSymbol(StringRef Name) { 604 return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" || 605 Name == "__sanitizer_cov_trace_func_enter" || 606 Name == "__sanitizer_cov_trace_pc_guard" || 607 // Mac has '___' prefix 608 Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" || 609 Name == "___sanitizer_cov_trace_func_enter" || 610 Name == "___sanitizer_cov_trace_pc_guard"; 611} 612 613// Locate __sanitizer_cov* function addresses inside the stubs table on MachO. 614static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O, 615 std::set<uint64_t> *Result) { 616 MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand(); 617 MachO::symtab_command Symtab = O.getSymtabLoadCommand(); 618 619 for (const auto &Load : O.load_commands()) { 620 if (Load.C.cmd == MachO::LC_SEGMENT_64) { 621 MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load); 622 for (unsigned J = 0; J < Seg.nsects; ++J) { 623 MachO::section_64 Sec = O.getSection64(Load, J); 624 625 uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE; 626 if (SectionType == MachO::S_SYMBOL_STUBS) { 627 uint32_t Stride = Sec.reserved2; 628 uint32_t Cnt = Sec.size / Stride; 629 uint32_t N = Sec.reserved1; 630 for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) { 631 uint32_t IndirectSymbol = 632 O.getIndirectSymbolTableEntry(Dysymtab, N + J); 633 uint64_t Addr = Sec.addr + J * Stride; 634 if (IndirectSymbol < Symtab.nsyms) { 635 object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol)); 636 Expected<StringRef> Name = Symbol.getName(); 637 failIfError(Name); 638 if (isCoveragePointSymbol(Name.get())) { 639 Result->insert(Addr); 640 } 641 } 642 } 643 } 644 } 645 } 646 if (Load.C.cmd == MachO::LC_SEGMENT) { 647 errs() << "ERROR: 32 bit MachO binaries not supported\n"; 648 } 649 } 650} 651 652// Locate __sanitizer_cov* function addresses that are used for coverage 653// reporting. 654static std::set<uint64_t> 655findSanitizerCovFunctions(const object::ObjectFile &O) { 656 std::set<uint64_t> Result; 657 658 for (const object::SymbolRef &Symbol : O.symbols()) { 659 Expected<uint64_t> AddressOrErr = Symbol.getAddress(); 660 failIfError(AddressOrErr); 661 uint64_t Address = AddressOrErr.get(); 662 663 Expected<StringRef> NameOrErr = Symbol.getName(); 664 failIfError(NameOrErr); 665 StringRef Name = NameOrErr.get(); 666 667 Expected<uint32_t> FlagsOrErr = Symbol.getFlags(); 668 // TODO: Test this error. 669 failIfError(FlagsOrErr); 670 uint32_t Flags = FlagsOrErr.get(); 671 672 if (!(Flags & object::BasicSymbolRef::SF_Undefined) && 673 isCoveragePointSymbol(Name)) { 674 Result.insert(Address); 675 } 676 } 677 678 if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) { 679 for (const object::ExportDirectoryEntryRef &Export : 680 CO->export_directories()) { 681 uint32_t RVA; 682 failIfError(Export.getExportRVA(RVA)); 683 684 StringRef Name; 685 failIfError(Export.getSymbolName(Name)); 686 687 if (isCoveragePointSymbol(Name)) 688 Result.insert(CO->getImageBase() + RVA); 689 } 690 } 691 692 if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) { 693 findMachOIndirectCovFunctions(*MO, &Result); 694 } 695 696 return Result; 697} 698 699// Ported from 700// compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h:GetPreviousInstructionPc 701// GetPreviousInstructionPc. 702static uint64_t getPreviousInstructionPc(uint64_t PC, 703 Triple TheTriple) { 704 if (TheTriple.isARM()) 705 return (PC - 3) & (~1); 706 if (TheTriple.isMIPS() || TheTriple.isSPARC()) 707 return PC - 8; 708 if (TheTriple.isRISCV()) 709 return PC - 2; 710 if (TheTriple.isX86() || TheTriple.isSystemZ()) 711 return PC - 1; 712 return PC - 4; 713} 714 715// Locate addresses of all coverage points in a file. Coverage point 716// is defined as the 'address of instruction following __sanitizer_cov 717// call - 1'. 718static void getObjectCoveragePoints(const object::ObjectFile &O, 719 std::set<uint64_t> *Addrs) { 720 Triple TheTriple("unknown-unknown-unknown"); 721 TheTriple.setArch(Triple::ArchType(O.getArch())); 722 auto TripleName = TheTriple.getTriple(); 723 724 std::string Error; 725 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 726 failIfNotEmpty(Error); 727 728 std::unique_ptr<const MCSubtargetInfo> STI( 729 TheTarget->createMCSubtargetInfo(TripleName, "", "")); 730 failIfEmpty(STI, "no subtarget info for target " + TripleName); 731 732 std::unique_ptr<const MCRegisterInfo> MRI( 733 TheTarget->createMCRegInfo(TripleName)); 734 failIfEmpty(MRI, "no register info for target " + TripleName); 735 736 MCTargetOptions MCOptions; 737 std::unique_ptr<const MCAsmInfo> AsmInfo( 738 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); 739 failIfEmpty(AsmInfo, "no asm info for target " + TripleName); 740 741 MCContext Ctx(TheTriple, AsmInfo.get(), MRI.get(), STI.get()); 742 std::unique_ptr<MCDisassembler> DisAsm( 743 TheTarget->createMCDisassembler(*STI, Ctx)); 744 failIfEmpty(DisAsm, "no disassembler info for target " + TripleName); 745 746 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 747 failIfEmpty(MII, "no instruction info for target " + TripleName); 748 749 std::unique_ptr<const MCInstrAnalysis> MIA( 750 TheTarget->createMCInstrAnalysis(MII.get())); 751 failIfEmpty(MIA, "no instruction analysis info for target " + TripleName); 752 753 auto SanCovAddrs = findSanitizerCovFunctions(O); 754 if (SanCovAddrs.empty()) 755 fail("__sanitizer_cov* functions not found"); 756 757 for (object::SectionRef Section : O.sections()) { 758 if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same. 759 continue; 760 uint64_t SectionAddr = Section.getAddress(); 761 uint64_t SectSize = Section.getSize(); 762 if (!SectSize) 763 continue; 764 765 Expected<StringRef> BytesStr = Section.getContents(); 766 failIfError(BytesStr); 767 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr); 768 769 for (uint64_t Index = 0, Size = 0; Index < Section.getSize(); 770 Index += Size) { 771 MCInst Inst; 772 ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index); 773 uint64_t ThisAddr = SectionAddr + Index; 774 if (!DisAsm->getInstruction(Inst, Size, ThisBytes, ThisAddr, nulls())) { 775 if (Size == 0) 776 Size = std::min<uint64_t>( 777 ThisBytes.size(), 778 DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr)); 779 continue; 780 } 781 uint64_t Addr = Index + SectionAddr; 782 // Sanitizer coverage uses the address of the next instruction - 1. 783 uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple); 784 uint64_t Target; 785 if (MIA->isCall(Inst) && 786 MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) && 787 SanCovAddrs.find(Target) != SanCovAddrs.end()) 788 Addrs->insert(CovPoint); 789 } 790 } 791} 792 793static void 794visitObjectFiles(const object::Archive &A, 795 function_ref<void(const object::ObjectFile &)> Fn) { 796 Error Err = Error::success(); 797 for (auto &C : A.children(Err)) { 798 Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary(); 799 failIfError(ChildOrErr); 800 if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get())) 801 Fn(*O); 802 else 803 failIfError(object::object_error::invalid_file_type); 804 } 805 failIfError(std::move(Err)); 806} 807 808static void 809visitObjectFiles(const std::string &FileName, 810 function_ref<void(const object::ObjectFile &)> Fn) { 811 Expected<object::OwningBinary<object::Binary>> BinaryOrErr = 812 object::createBinary(FileName); 813 if (!BinaryOrErr) 814 failIfError(BinaryOrErr); 815 816 object::Binary &Binary = *BinaryOrErr.get().getBinary(); 817 if (object::Archive *A = dyn_cast<object::Archive>(&Binary)) 818 visitObjectFiles(*A, Fn); 819 else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary)) 820 Fn(*O); 821 else 822 failIfError(object::object_error::invalid_file_type); 823} 824 825static std::set<uint64_t> 826findSanitizerCovFunctions(const std::string &FileName) { 827 std::set<uint64_t> Result; 828 visitObjectFiles(FileName, [&](const object::ObjectFile &O) { 829 auto Addrs = findSanitizerCovFunctions(O); 830 Result.insert(Addrs.begin(), Addrs.end()); 831 }); 832 return Result; 833} 834 835// Locate addresses of all coverage points in a file. Coverage point 836// is defined as the 'address of instruction following __sanitizer_cov 837// call - 1'. 838static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) { 839 std::set<uint64_t> Result; 840 visitObjectFiles(FileName, [&](const object::ObjectFile &O) { 841 getObjectCoveragePoints(O, &Result); 842 }); 843 return Result; 844} 845 846static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) { 847 for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) { 848 OS << "0x"; 849 OS.write_hex(Addr); 850 OS << "\n"; 851 } 852} 853 854static ErrorOr<bool> isCoverageFile(const std::string &FileName) { 855 auto ShortFileName = llvm::sys::path::filename(FileName); 856 if (!SancovFileRegex.match(ShortFileName)) 857 return false; 858 859 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = 860 MemoryBuffer::getFile(FileName); 861 if (!BufOrErr) { 862 errs() << "Warning: " << BufOrErr.getError().message() << "(" 863 << BufOrErr.getError().value() 864 << "), filename: " << llvm::sys::path::filename(FileName) << "\n"; 865 return BufOrErr.getError(); 866 } 867 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get()); 868 if (Buf->getBufferSize() < 8) { 869 return false; 870 } 871 const FileHeader *Header = 872 reinterpret_cast<const FileHeader *>(Buf->getBufferStart()); 873 return Header->Magic == BinCoverageMagic; 874} 875 876static bool isSymbolizedCoverageFile(const std::string &FileName) { 877 auto ShortFileName = llvm::sys::path::filename(FileName); 878 return SymcovFileRegex.match(ShortFileName); 879} 880 881static std::unique_ptr<SymbolizedCoverage> 882symbolize(const RawCoverage &Data, const std::string ObjectFile) { 883 auto Coverage = std::make_unique<SymbolizedCoverage>(); 884 885 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = 886 MemoryBuffer::getFile(ObjectFile); 887 failIfError(BufOrErr); 888 SHA1 Hasher; 889 Hasher.update((*BufOrErr)->getBuffer()); 890 Coverage->BinaryHash = toHex(Hasher.final()); 891 892 Ignorelists Ig; 893 auto Symbolizer(createSymbolizer()); 894 895 for (uint64_t Addr : *Data.Addrs) { 896 // TODO: it would be neccessary to set proper section index here. 897 // object::SectionedAddress::UndefSection works for only absolute addresses. 898 auto LineInfo = Symbolizer->symbolizeCode( 899 ObjectFile, {Addr, object::SectionedAddress::UndefSection}); 900 failIfError(LineInfo); 901 if (Ig.isIgnorelisted(*LineInfo)) 902 continue; 903 904 Coverage->CoveredIds.insert(utohexstr(Addr, true)); 905 } 906 907 std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile); 908 if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(), 909 Data.Addrs->end())) { 910 fail("Coverage points in binary and .sancov file do not match."); 911 } 912 Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs); 913 return Coverage; 914} 915 916struct FileFn { 917 bool operator<(const FileFn &RHS) const { 918 return std::tie(FileName, FunctionName) < 919 std::tie(RHS.FileName, RHS.FunctionName); 920 } 921 922 std::string FileName; 923 std::string FunctionName; 924}; 925 926static std::set<FileFn> 927computeFunctions(const std::vector<CoveragePoint> &Points) { 928 std::set<FileFn> Fns; 929 for (const auto &Point : Points) { 930 for (const auto &Loc : Point.Locs) { 931 Fns.insert(FileFn{Loc.FileName, Loc.FunctionName}); 932 } 933 } 934 return Fns; 935} 936 937static std::set<FileFn> 938computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) { 939 auto Fns = computeFunctions(Coverage.Points); 940 941 for (const auto &Point : Coverage.Points) { 942 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end()) 943 continue; 944 945 for (const auto &Loc : Point.Locs) { 946 Fns.erase(FileFn{Loc.FileName, Loc.FunctionName}); 947 } 948 } 949 950 return Fns; 951} 952 953static std::set<FileFn> 954computeCoveredFunctions(const SymbolizedCoverage &Coverage) { 955 auto AllFns = computeFunctions(Coverage.Points); 956 std::set<FileFn> Result; 957 958 for (const auto &Point : Coverage.Points) { 959 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end()) 960 continue; 961 962 for (const auto &Loc : Point.Locs) { 963 Result.insert(FileFn{Loc.FileName, Loc.FunctionName}); 964 } 965 } 966 967 return Result; 968} 969 970typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs; 971// finds first location in a file for each function. 972static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage, 973 const std::set<FileFn> &Fns) { 974 FunctionLocs Result; 975 for (const auto &Point : Coverage.Points) { 976 for (const auto &Loc : Point.Locs) { 977 FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName}; 978 if (Fns.find(Fn) == Fns.end()) 979 continue; 980 981 auto P = std::make_pair(Loc.Line, Loc.Column); 982 auto I = Result.find(Fn); 983 if (I == Result.end() || I->second > P) { 984 Result[Fn] = P; 985 } 986 } 987 } 988 return Result; 989} 990 991static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) { 992 for (const auto &P : FnLocs) { 993 OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " " 994 << P.first.FunctionName << "\n"; 995 } 996} 997CoverageStats computeStats(const SymbolizedCoverage &Coverage) { 998 CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(), 999 computeFunctions(Coverage.Points).size(), 1000 computeCoveredFunctions(Coverage).size()}; 1001 return Stats; 1002} 1003 1004// Print list of covered functions. 1005// Line format: <file_name>:<line> <function_name> 1006static void printCoveredFunctions(const SymbolizedCoverage &CovData, 1007 raw_ostream &OS) { 1008 auto CoveredFns = computeCoveredFunctions(CovData); 1009 printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS); 1010} 1011 1012// Print list of not covered functions. 1013// Line format: <file_name>:<line> <function_name> 1014static void printNotCoveredFunctions(const SymbolizedCoverage &CovData, 1015 raw_ostream &OS) { 1016 auto NotCoveredFns = computeNotCoveredFunctions(CovData); 1017 printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS); 1018} 1019 1020// Read list of files and merges their coverage info. 1021static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames, 1022 raw_ostream &OS) { 1023 std::vector<std::unique_ptr<RawCoverage>> Covs; 1024 for (const auto &FileName : FileNames) { 1025 auto Cov = RawCoverage::read(FileName); 1026 if (!Cov) 1027 continue; 1028 OS << *Cov.get(); 1029 } 1030} 1031 1032static std::unique_ptr<SymbolizedCoverage> 1033merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) { 1034 if (Coverages.empty()) 1035 return nullptr; 1036 1037 auto Result = std::make_unique<SymbolizedCoverage>(); 1038 1039 for (size_t I = 0; I < Coverages.size(); ++I) { 1040 const SymbolizedCoverage &Coverage = *Coverages[I]; 1041 std::string Prefix; 1042 if (Coverages.size() > 1) { 1043 // prefix is not needed when there's only one file. 1044 Prefix = utostr(I); 1045 } 1046 1047 for (const auto &Id : Coverage.CoveredIds) { 1048 Result->CoveredIds.insert(Prefix + Id); 1049 } 1050 1051 for (const auto &CovPoint : Coverage.Points) { 1052 CoveragePoint NewPoint(CovPoint); 1053 NewPoint.Id = Prefix + CovPoint.Id; 1054 Result->Points.push_back(NewPoint); 1055 } 1056 } 1057 1058 if (Coverages.size() == 1) { 1059 Result->BinaryHash = Coverages[0]->BinaryHash; 1060 } 1061 1062 return Result; 1063} 1064 1065static std::unique_ptr<SymbolizedCoverage> 1066readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) { 1067 std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages; 1068 1069 { 1070 // Short name => file name. 1071 std::map<std::string, std::string> ObjFiles; 1072 std::string FirstObjFile; 1073 std::set<std::string> CovFiles; 1074 1075 // Partition input values into coverage/object files. 1076 for (const auto &FileName : FileNames) { 1077 if (isSymbolizedCoverageFile(FileName)) { 1078 Coverages.push_back(SymbolizedCoverage::read(FileName)); 1079 } 1080 1081 auto ErrorOrIsCoverage = isCoverageFile(FileName); 1082 if (!ErrorOrIsCoverage) 1083 continue; 1084 if (ErrorOrIsCoverage.get()) { 1085 CovFiles.insert(FileName); 1086 } else { 1087 auto ShortFileName = llvm::sys::path::filename(FileName); 1088 if (ObjFiles.find(std::string(ShortFileName)) != ObjFiles.end()) { 1089 fail("Duplicate binary file with a short name: " + ShortFileName); 1090 } 1091 1092 ObjFiles[std::string(ShortFileName)] = FileName; 1093 if (FirstObjFile.empty()) 1094 FirstObjFile = FileName; 1095 } 1096 } 1097 1098 SmallVector<StringRef, 2> Components; 1099 1100 // Object file => list of corresponding coverage file names. 1101 std::map<std::string, std::vector<std::string>> CoverageByObjFile; 1102 for (const auto &FileName : CovFiles) { 1103 auto ShortFileName = llvm::sys::path::filename(FileName); 1104 auto Ok = SancovFileRegex.match(ShortFileName, &Components); 1105 if (!Ok) { 1106 fail("Can't match coverage file name against " 1107 "<module_name>.<pid>.sancov pattern: " + 1108 FileName); 1109 } 1110 1111 auto Iter = ObjFiles.find(std::string(Components[1])); 1112 if (Iter == ObjFiles.end()) { 1113 fail("Object file for coverage not found: " + FileName); 1114 } 1115 1116 CoverageByObjFile[Iter->second].push_back(FileName); 1117 }; 1118 1119 for (const auto &Pair : ObjFiles) { 1120 auto FileName = Pair.second; 1121 if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end()) 1122 errs() << "WARNING: No coverage file for " << FileName << "\n"; 1123 } 1124 1125 // Read raw coverage and symbolize it. 1126 for (const auto &Pair : CoverageByObjFile) { 1127 if (findSanitizerCovFunctions(Pair.first).empty()) { 1128 errs() 1129 << "WARNING: Ignoring " << Pair.first 1130 << " and its coverage because __sanitizer_cov* functions were not " 1131 "found.\n"; 1132 continue; 1133 } 1134 1135 for (const std::string &CoverageFile : Pair.second) { 1136 auto DataOrError = RawCoverage::read(CoverageFile); 1137 failIfError(DataOrError); 1138 Coverages.push_back(symbolize(*DataOrError.get(), Pair.first)); 1139 } 1140 } 1141 } 1142 1143 return merge(Coverages); 1144} 1145 1146} // namespace 1147 1148int main(int Argc, char **Argv) { 1149 llvm::InitLLVM X(Argc, Argv); 1150 cl::HideUnrelatedOptions(Cat); 1151 1152 llvm::InitializeAllTargetInfos(); 1153 llvm::InitializeAllTargetMCs(); 1154 llvm::InitializeAllDisassemblers(); 1155 1156 cl::ParseCommandLineOptions(Argc, Argv, 1157 "Sanitizer Coverage Processing Tool (sancov)\n\n" 1158 " This tool can extract various coverage-related information from: \n" 1159 " coverage-instrumented binary files, raw .sancov files and their " 1160 "symbolized .symcov version.\n" 1161 " Depending on chosen action the tool expects different input files:\n" 1162 " -print-coverage-pcs - coverage-instrumented binary files\n" 1163 " -print-coverage - .sancov files\n" 1164 " <other actions> - .sancov files & corresponding binary " 1165 "files, .symcov files\n" 1166 ); 1167 1168 // -print doesn't need object files. 1169 if (Action == PrintAction) { 1170 readAndPrintRawCoverage(ClInputFiles, outs()); 1171 return 0; 1172 } else if (Action == PrintCovPointsAction) { 1173 // -print-coverage-points doesn't need coverage files. 1174 for (const std::string &ObjFile : ClInputFiles) { 1175 printCovPoints(ObjFile, outs()); 1176 } 1177 return 0; 1178 } 1179 1180 auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles); 1181 failIf(!Coverage, "No valid coverage files given."); 1182 1183 switch (Action) { 1184 case CoveredFunctionsAction: { 1185 printCoveredFunctions(*Coverage, outs()); 1186 return 0; 1187 } 1188 case NotCoveredFunctionsAction: { 1189 printNotCoveredFunctions(*Coverage, outs()); 1190 return 0; 1191 } 1192 case StatsAction: { 1193 outs() << computeStats(*Coverage); 1194 return 0; 1195 } 1196 case MergeAction: 1197 case SymbolizeAction: { // merge & symbolize are synonims. 1198 json::OStream W(outs(), 2); 1199 W << *Coverage; 1200 return 0; 1201 } 1202 case HtmlReportAction: 1203 errs() << "-html-report option is removed: " 1204 "use -symbolize & coverage-report-server.py instead\n"; 1205 return 1; 1206 case PrintAction: 1207 case PrintCovPointsAction: 1208 llvm_unreachable("unsupported action"); 1209 } 1210} 1211