1//===-- sancov.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file is a command-line tool for reading and analyzing sanitizer
9// coverage.
10//===----------------------------------------------------------------------===//
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/StringExtras.h"
13#include "llvm/ADT/Twine.h"
14#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15#include "llvm/DebugInfo/Symbolize/Symbolize.h"
16#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstrAnalysis.h"
21#include "llvm/MC/MCInstrInfo.h"
22#include "llvm/MC/MCObjectFileInfo.h"
23#include "llvm/MC/MCRegisterInfo.h"
24#include "llvm/MC/MCSubtargetInfo.h"
25#include "llvm/MC/MCTargetOptions.h"
26#include "llvm/MC/TargetRegistry.h"
27#include "llvm/Object/Archive.h"
28#include "llvm/Object/Binary.h"
29#include "llvm/Object/COFF.h"
30#include "llvm/Object/MachO.h"
31#include "llvm/Object/ObjectFile.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Errc.h"
35#include "llvm/Support/ErrorOr.h"
36#include "llvm/Support/FileSystem.h"
37#include "llvm/Support/InitLLVM.h"
38#include "llvm/Support/JSON.h"
39#include "llvm/Support/MD5.h"
40#include "llvm/Support/MemoryBuffer.h"
41#include "llvm/Support/Path.h"
42#include "llvm/Support/Regex.h"
43#include "llvm/Support/SHA1.h"
44#include "llvm/Support/SourceMgr.h"
45#include "llvm/Support/SpecialCaseList.h"
46#include "llvm/Support/TargetSelect.h"
47#include "llvm/Support/VirtualFileSystem.h"
48#include "llvm/Support/YAMLParser.h"
49#include "llvm/Support/raw_ostream.h"
50
51#include <set>
52#include <vector>
53
54using namespace llvm;
55
56namespace {
57
58// --------- COMMAND LINE FLAGS ---------
59
60cl::OptionCategory Cat("sancov Options");
61
62enum ActionType {
63  CoveredFunctionsAction,
64  HtmlReportAction,
65  MergeAction,
66  NotCoveredFunctionsAction,
67  PrintAction,
68  PrintCovPointsAction,
69  StatsAction,
70  SymbolizeAction
71};
72
73cl::opt<ActionType> Action(
74    cl::desc("Action (required)"), cl::Required,
75    cl::values(
76        clEnumValN(PrintAction, "print", "Print coverage addresses"),
77        clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
78                   "Print coverage instrumentation points addresses."),
79        clEnumValN(CoveredFunctionsAction, "covered-functions",
80                   "Print all covered funcions."),
81        clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
82                   "Print all not covered funcions."),
83        clEnumValN(StatsAction, "print-coverage-stats",
84                   "Print coverage statistics."),
85        clEnumValN(HtmlReportAction, "html-report",
86                   "REMOVED. Use -symbolize & coverage-report-server.py."),
87        clEnumValN(SymbolizeAction, "symbolize",
88                   "Produces a symbolized JSON report from binary report."),
89        clEnumValN(MergeAction, "merge", "Merges reports.")),
90    cl::cat(Cat));
91
92static cl::list<std::string>
93    ClInputFiles(cl::Positional, cl::OneOrMore,
94                 cl::desc("<action> <binary files...> <.sancov files...> "
95                          "<.symcov files...>"),
96                 cl::cat(Cat));
97
98static cl::opt<bool> ClDemangle("demangle", cl::init(true),
99                                cl::desc("Print demangled function name"),
100                                cl::cat(Cat));
101
102static cl::opt<bool>
103    ClSkipDeadFiles("skip-dead-files", cl::init(true),
104                    cl::desc("Do not list dead source files in reports"),
105                    cl::cat(Cat));
106
107static cl::opt<std::string>
108    ClStripPathPrefix("strip_path_prefix", cl::init(""),
109                      cl::desc("Strip this prefix from file paths in reports"),
110                      cl::cat(Cat));
111
112static cl::opt<std::string>
113    ClIgnorelist("ignorelist", cl::init(""),
114                 cl::desc("Ignorelist file (sanitizer ignorelist format)"),
115                 cl::cat(Cat));
116
117static cl::opt<bool> ClUseDefaultIgnorelist(
118    "use_default_ignorelist", cl::init(true), cl::Hidden,
119    cl::desc("Controls if default ignorelist should be used"), cl::cat(Cat));
120
121static const char *const DefaultIgnorelistStr = "fun:__sanitizer_.*\n"
122                                                "src:/usr/include/.*\n"
123                                                "src:.*/libc\\+\\+/.*\n";
124
125// --------- FORMAT SPECIFICATION ---------
126
127struct FileHeader {
128  uint32_t Bitness;
129  uint32_t Magic;
130};
131
132static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
133static const uint32_t Bitness32 = 0xFFFFFF32;
134static const uint32_t Bitness64 = 0xFFFFFF64;
135
136static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
137static const Regex SymcovFileRegex(".*\\.symcov");
138
139// --------- MAIN DATASTRUCTURES ----------
140
141// Contents of .sancov file: list of coverage point addresses that were
142// executed.
143struct RawCoverage {
144  explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
145      : Addrs(std::move(Addrs)) {}
146
147  // Read binary .sancov file.
148  static ErrorOr<std::unique_ptr<RawCoverage>>
149  read(const std::string &FileName);
150
151  std::unique_ptr<std::set<uint64_t>> Addrs;
152};
153
154// Coverage point has an opaque Id and corresponds to multiple source locations.
155struct CoveragePoint {
156  explicit CoveragePoint(const std::string &Id) : Id(Id) {}
157
158  std::string Id;
159  SmallVector<DILineInfo, 1> Locs;
160};
161
162// Symcov file content: set of covered Ids plus information about all available
163// coverage points.
164struct SymbolizedCoverage {
165  // Read json .symcov file.
166  static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
167
168  std::set<std::string> CoveredIds;
169  std::string BinaryHash;
170  std::vector<CoveragePoint> Points;
171};
172
173struct CoverageStats {
174  size_t AllPoints;
175  size_t CovPoints;
176  size_t AllFns;
177  size_t CovFns;
178};
179
180// --------- ERROR HANDLING ---------
181
182static void fail(const llvm::Twine &E) {
183  errs() << "ERROR: " << E << "\n";
184  exit(1);
185}
186
187static void failIf(bool B, const llvm::Twine &E) {
188  if (B)
189    fail(E);
190}
191
192static void failIfError(std::error_code Error) {
193  if (!Error)
194    return;
195  errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n";
196  exit(1);
197}
198
199template <typename T> static void failIfError(const ErrorOr<T> &E) {
200  failIfError(E.getError());
201}
202
203static void failIfError(Error Err) {
204  if (Err) {
205    logAllUnhandledErrors(std::move(Err), errs(), "ERROR: ");
206    exit(1);
207  }
208}
209
210template <typename T> static void failIfError(Expected<T> &E) {
211  failIfError(E.takeError());
212}
213
214static void failIfNotEmpty(const llvm::Twine &E) {
215  if (E.str().empty())
216    return;
217  fail(E);
218}
219
220template <typename T>
221static void failIfEmpty(const std::unique_ptr<T> &Ptr,
222                        const std::string &Message) {
223  if (Ptr.get())
224    return;
225  fail(Message);
226}
227
228// ----------- Coverage I/O ----------
229template <typename T>
230static void readInts(const char *Start, const char *End,
231                     std::set<uint64_t> *Ints) {
232  const T *S = reinterpret_cast<const T *>(Start);
233  const T *E = reinterpret_cast<const T *>(End);
234  std::copy(S, E, std::inserter(*Ints, Ints->end()));
235}
236
237ErrorOr<std::unique_ptr<RawCoverage>>
238RawCoverage::read(const std::string &FileName) {
239  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
240      MemoryBuffer::getFile(FileName);
241  if (!BufOrErr)
242    return BufOrErr.getError();
243  std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
244  if (Buf->getBufferSize() < 8) {
245    errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
246    return make_error_code(errc::illegal_byte_sequence);
247  }
248  const FileHeader *Header =
249      reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
250
251  if (Header->Magic != BinCoverageMagic) {
252    errs() << "Wrong magic: " << Header->Magic << '\n';
253    return make_error_code(errc::illegal_byte_sequence);
254  }
255
256  auto Addrs = std::make_unique<std::set<uint64_t>>();
257
258  switch (Header->Bitness) {
259  case Bitness64:
260    readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
261                       Addrs.get());
262    break;
263  case Bitness32:
264    readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
265                       Addrs.get());
266    break;
267  default:
268    errs() << "Unsupported bitness: " << Header->Bitness << '\n';
269    return make_error_code(errc::illegal_byte_sequence);
270  }
271
272  // Ignore slots that are zero, so a runtime implementation is not required
273  // to compactify the data.
274  Addrs->erase(0);
275
276  return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs)));
277}
278
279// Print coverage addresses.
280raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
281  for (auto Addr : *CoverageData.Addrs) {
282    OS << "0x";
283    OS.write_hex(Addr);
284    OS << "\n";
285  }
286  return OS;
287}
288
289static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
290  OS << "all-edges: " << Stats.AllPoints << "\n";
291  OS << "cov-edges: " << Stats.CovPoints << "\n";
292  OS << "all-functions: " << Stats.AllFns << "\n";
293  OS << "cov-functions: " << Stats.CovFns << "\n";
294  return OS;
295}
296
297// Output symbolized information for coverage points in JSON.
298// Format:
299// {
300//   '<file_name>' : {
301//     '<function_name>' : {
302//       '<point_id'> : '<line_number>:'<column_number'.
303//          ....
304//       }
305//    }
306// }
307static void operator<<(json::OStream &W,
308                       const std::vector<CoveragePoint> &Points) {
309  // Group points by file.
310  std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
311  for (const auto &Point : Points) {
312    for (const DILineInfo &Loc : Point.Locs) {
313      PointsByFile[Loc.FileName].push_back(&Point);
314    }
315  }
316
317  for (const auto &P : PointsByFile) {
318    std::string FileName = P.first;
319    std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
320    for (auto PointPtr : P.second) {
321      for (const DILineInfo &Loc : PointPtr->Locs) {
322        PointsByFn[Loc.FunctionName].push_back(PointPtr);
323      }
324    }
325
326    W.attributeObject(P.first, [&] {
327      // Group points by function.
328      for (const auto &P : PointsByFn) {
329        std::string FunctionName = P.first;
330        std::set<std::string> WrittenIds;
331
332        W.attributeObject(FunctionName, [&] {
333          for (const CoveragePoint *Point : P.second) {
334            for (const auto &Loc : Point->Locs) {
335              if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
336                continue;
337              if (WrittenIds.find(Point->Id) != WrittenIds.end())
338                continue;
339
340              // Output <point_id> : "<line>:<col>".
341              WrittenIds.insert(Point->Id);
342              W.attribute(Point->Id,
343                          (utostr(Loc.Line) + ":" + utostr(Loc.Column)));
344            }
345          }
346        });
347      }
348    });
349  }
350}
351
352static void operator<<(json::OStream &W, const SymbolizedCoverage &C) {
353  W.object([&] {
354    W.attributeArray("covered-points", [&] {
355      for (const std::string &P : C.CoveredIds) {
356        W.value(P);
357      }
358    });
359    W.attribute("binary-hash", C.BinaryHash);
360    W.attributeObject("point-symbol-info", [&] { W << C.Points; });
361  });
362}
363
364static std::string parseScalarString(yaml::Node *N) {
365  SmallString<64> StringStorage;
366  yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
367  failIf(!S, "expected string");
368  return std::string(S->getValue(StringStorage));
369}
370
371std::unique_ptr<SymbolizedCoverage>
372SymbolizedCoverage::read(const std::string &InputFile) {
373  auto Coverage(std::make_unique<SymbolizedCoverage>());
374
375  std::map<std::string, CoveragePoint> Points;
376  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
377      MemoryBuffer::getFile(InputFile);
378  failIfError(BufOrErr);
379
380  SourceMgr SM;
381  yaml::Stream S(**BufOrErr, SM);
382
383  yaml::document_iterator DI = S.begin();
384  failIf(DI == S.end(), "empty document: " + InputFile);
385  yaml::Node *Root = DI->getRoot();
386  failIf(!Root, "expecting root node: " + InputFile);
387  yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
388  failIf(!Top, "expecting mapping node: " + InputFile);
389
390  for (auto &KVNode : *Top) {
391    auto Key = parseScalarString(KVNode.getKey());
392
393    if (Key == "covered-points") {
394      yaml::SequenceNode *Points =
395          dyn_cast<yaml::SequenceNode>(KVNode.getValue());
396      failIf(!Points, "expected array: " + InputFile);
397
398      for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
399        Coverage->CoveredIds.insert(parseScalarString(&*I));
400      }
401    } else if (Key == "binary-hash") {
402      Coverage->BinaryHash = parseScalarString(KVNode.getValue());
403    } else if (Key == "point-symbol-info") {
404      yaml::MappingNode *PointSymbolInfo =
405          dyn_cast<yaml::MappingNode>(KVNode.getValue());
406      failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
407
408      for (auto &FileKVNode : *PointSymbolInfo) {
409        auto Filename = parseScalarString(FileKVNode.getKey());
410
411        yaml::MappingNode *FileInfo =
412            dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
413        failIf(!FileInfo, "expected mapping node: " + InputFile);
414
415        for (auto &FunctionKVNode : *FileInfo) {
416          auto FunctionName = parseScalarString(FunctionKVNode.getKey());
417
418          yaml::MappingNode *FunctionInfo =
419              dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
420          failIf(!FunctionInfo, "expected mapping node: " + InputFile);
421
422          for (auto &PointKVNode : *FunctionInfo) {
423            auto PointId = parseScalarString(PointKVNode.getKey());
424            auto Loc = parseScalarString(PointKVNode.getValue());
425
426            size_t ColonPos = Loc.find(':');
427            failIf(ColonPos == std::string::npos, "expected ':': " + InputFile);
428
429            auto LineStr = Loc.substr(0, ColonPos);
430            auto ColStr = Loc.substr(ColonPos + 1, Loc.size());
431
432            if (Points.find(PointId) == Points.end())
433              Points.insert(std::make_pair(PointId, CoveragePoint(PointId)));
434
435            DILineInfo LineInfo;
436            LineInfo.FileName = Filename;
437            LineInfo.FunctionName = FunctionName;
438            char *End;
439            LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10);
440            LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10);
441
442            CoveragePoint *CoveragePoint = &Points.find(PointId)->second;
443            CoveragePoint->Locs.push_back(LineInfo);
444          }
445        }
446      }
447    } else {
448      errs() << "Ignoring unknown key: " << Key << "\n";
449    }
450  }
451
452  for (auto &KV : Points) {
453    Coverage->Points.push_back(KV.second);
454  }
455
456  return Coverage;
457}
458
459// ---------- MAIN FUNCTIONALITY ----------
460
461std::string stripPathPrefix(std::string Path) {
462  if (ClStripPathPrefix.empty())
463    return Path;
464  size_t Pos = Path.find(ClStripPathPrefix);
465  if (Pos == std::string::npos)
466    return Path;
467  return Path.substr(Pos + ClStripPathPrefix.size());
468}
469
470static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
471  symbolize::LLVMSymbolizer::Options SymbolizerOptions;
472  SymbolizerOptions.Demangle = ClDemangle;
473  SymbolizerOptions.UseSymbolTable = true;
474  return std::unique_ptr<symbolize::LLVMSymbolizer>(
475      new symbolize::LLVMSymbolizer(SymbolizerOptions));
476}
477
478static std::string normalizeFilename(const std::string &FileName) {
479  SmallString<256> S(FileName);
480  sys::path::remove_dots(S, /* remove_dot_dot */ true);
481  return stripPathPrefix(sys::path::convert_to_slash(std::string(S)));
482}
483
484class Ignorelists {
485public:
486  Ignorelists()
487      : DefaultIgnorelist(createDefaultIgnorelist()),
488        UserIgnorelist(createUserIgnorelist()) {}
489
490  bool isIgnorelisted(const DILineInfo &I) {
491    if (DefaultIgnorelist &&
492        DefaultIgnorelist->inSection("sancov", "fun", I.FunctionName))
493      return true;
494    if (DefaultIgnorelist &&
495        DefaultIgnorelist->inSection("sancov", "src", I.FileName))
496      return true;
497    if (UserIgnorelist &&
498        UserIgnorelist->inSection("sancov", "fun", I.FunctionName))
499      return true;
500    if (UserIgnorelist &&
501        UserIgnorelist->inSection("sancov", "src", I.FileName))
502      return true;
503    return false;
504  }
505
506private:
507  static std::unique_ptr<SpecialCaseList> createDefaultIgnorelist() {
508    if (!ClUseDefaultIgnorelist)
509      return std::unique_ptr<SpecialCaseList>();
510    std::unique_ptr<MemoryBuffer> MB =
511        MemoryBuffer::getMemBuffer(DefaultIgnorelistStr);
512    std::string Error;
513    auto Ignorelist = SpecialCaseList::create(MB.get(), Error);
514    failIfNotEmpty(Error);
515    return Ignorelist;
516  }
517
518  static std::unique_ptr<SpecialCaseList> createUserIgnorelist() {
519    if (ClIgnorelist.empty())
520      return std::unique_ptr<SpecialCaseList>();
521    return SpecialCaseList::createOrDie({{ClIgnorelist}},
522                                        *vfs::getRealFileSystem());
523  }
524  std::unique_ptr<SpecialCaseList> DefaultIgnorelist;
525  std::unique_ptr<SpecialCaseList> UserIgnorelist;
526};
527
528static std::vector<CoveragePoint>
529getCoveragePoints(const std::string &ObjectFile,
530                  const std::set<uint64_t> &Addrs,
531                  const std::set<uint64_t> &CoveredAddrs) {
532  std::vector<CoveragePoint> Result;
533  auto Symbolizer(createSymbolizer());
534  Ignorelists Ig;
535
536  std::set<std::string> CoveredFiles;
537  if (ClSkipDeadFiles) {
538    for (auto Addr : CoveredAddrs) {
539      // TODO: it would be neccessary to set proper section index here.
540      // object::SectionedAddress::UndefSection works for only absolute
541      // addresses.
542      object::SectionedAddress ModuleAddress = {
543          Addr, object::SectionedAddress::UndefSection};
544
545      auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
546      failIfError(LineInfo);
547      CoveredFiles.insert(LineInfo->FileName);
548      auto InliningInfo =
549          Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
550      failIfError(InliningInfo);
551      for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
552        auto FrameInfo = InliningInfo->getFrame(I);
553        CoveredFiles.insert(FrameInfo.FileName);
554      }
555    }
556  }
557
558  for (auto Addr : Addrs) {
559    std::set<DILineInfo> Infos; // deduplicate debug info.
560
561    // TODO: it would be neccessary to set proper section index here.
562    // object::SectionedAddress::UndefSection works for only absolute addresses.
563    object::SectionedAddress ModuleAddress = {
564        Addr, object::SectionedAddress::UndefSection};
565
566    auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
567    failIfError(LineInfo);
568    if (ClSkipDeadFiles &&
569        CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end())
570      continue;
571    LineInfo->FileName = normalizeFilename(LineInfo->FileName);
572    if (Ig.isIgnorelisted(*LineInfo))
573      continue;
574
575    auto Id = utohexstr(Addr, true);
576    auto Point = CoveragePoint(Id);
577    Infos.insert(*LineInfo);
578    Point.Locs.push_back(*LineInfo);
579
580    auto InliningInfo =
581        Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
582    failIfError(InliningInfo);
583    for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
584      auto FrameInfo = InliningInfo->getFrame(I);
585      if (ClSkipDeadFiles &&
586          CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end())
587        continue;
588      FrameInfo.FileName = normalizeFilename(FrameInfo.FileName);
589      if (Ig.isIgnorelisted(FrameInfo))
590        continue;
591      if (Infos.find(FrameInfo) == Infos.end()) {
592        Infos.insert(FrameInfo);
593        Point.Locs.push_back(FrameInfo);
594      }
595    }
596
597    Result.push_back(Point);
598  }
599
600  return Result;
601}
602
603static bool isCoveragePointSymbol(StringRef Name) {
604  return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
605         Name == "__sanitizer_cov_trace_func_enter" ||
606         Name == "__sanitizer_cov_trace_pc_guard" ||
607         // Mac has '___' prefix
608         Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
609         Name == "___sanitizer_cov_trace_func_enter" ||
610         Name == "___sanitizer_cov_trace_pc_guard";
611}
612
613// Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
614static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
615                                          std::set<uint64_t> *Result) {
616  MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
617  MachO::symtab_command Symtab = O.getSymtabLoadCommand();
618
619  for (const auto &Load : O.load_commands()) {
620    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
621      MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load);
622      for (unsigned J = 0; J < Seg.nsects; ++J) {
623        MachO::section_64 Sec = O.getSection64(Load, J);
624
625        uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
626        if (SectionType == MachO::S_SYMBOL_STUBS) {
627          uint32_t Stride = Sec.reserved2;
628          uint32_t Cnt = Sec.size / Stride;
629          uint32_t N = Sec.reserved1;
630          for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
631            uint32_t IndirectSymbol =
632                O.getIndirectSymbolTableEntry(Dysymtab, N + J);
633            uint64_t Addr = Sec.addr + J * Stride;
634            if (IndirectSymbol < Symtab.nsyms) {
635              object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol));
636              Expected<StringRef> Name = Symbol.getName();
637              failIfError(Name);
638              if (isCoveragePointSymbol(Name.get())) {
639                Result->insert(Addr);
640              }
641            }
642          }
643        }
644      }
645    }
646    if (Load.C.cmd == MachO::LC_SEGMENT) {
647      errs() << "ERROR: 32 bit MachO binaries not supported\n";
648    }
649  }
650}
651
652// Locate __sanitizer_cov* function addresses that are used for coverage
653// reporting.
654static std::set<uint64_t>
655findSanitizerCovFunctions(const object::ObjectFile &O) {
656  std::set<uint64_t> Result;
657
658  for (const object::SymbolRef &Symbol : O.symbols()) {
659    Expected<uint64_t> AddressOrErr = Symbol.getAddress();
660    failIfError(AddressOrErr);
661    uint64_t Address = AddressOrErr.get();
662
663    Expected<StringRef> NameOrErr = Symbol.getName();
664    failIfError(NameOrErr);
665    StringRef Name = NameOrErr.get();
666
667    Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
668    // TODO: Test this error.
669    failIfError(FlagsOrErr);
670    uint32_t Flags = FlagsOrErr.get();
671
672    if (!(Flags & object::BasicSymbolRef::SF_Undefined) &&
673        isCoveragePointSymbol(Name)) {
674      Result.insert(Address);
675    }
676  }
677
678  if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) {
679    for (const object::ExportDirectoryEntryRef &Export :
680         CO->export_directories()) {
681      uint32_t RVA;
682      failIfError(Export.getExportRVA(RVA));
683
684      StringRef Name;
685      failIfError(Export.getSymbolName(Name));
686
687      if (isCoveragePointSymbol(Name))
688        Result.insert(CO->getImageBase() + RVA);
689    }
690  }
691
692  if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) {
693    findMachOIndirectCovFunctions(*MO, &Result);
694  }
695
696  return Result;
697}
698
699// Ported from
700// compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h:GetPreviousInstructionPc
701// GetPreviousInstructionPc.
702static uint64_t getPreviousInstructionPc(uint64_t PC,
703                                         Triple TheTriple) {
704  if (TheTriple.isARM())
705    return (PC - 3) & (~1);
706  if (TheTriple.isMIPS() || TheTriple.isSPARC())
707    return PC - 8;
708  if (TheTriple.isRISCV())
709    return PC - 2;
710  if (TheTriple.isX86() || TheTriple.isSystemZ())
711    return PC - 1;
712  return PC - 4;
713}
714
715// Locate addresses of all coverage points in a file. Coverage point
716// is defined as the 'address of instruction following __sanitizer_cov
717// call - 1'.
718static void getObjectCoveragePoints(const object::ObjectFile &O,
719                                    std::set<uint64_t> *Addrs) {
720  Triple TheTriple("unknown-unknown-unknown");
721  TheTriple.setArch(Triple::ArchType(O.getArch()));
722  auto TripleName = TheTriple.getTriple();
723
724  std::string Error;
725  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
726  failIfNotEmpty(Error);
727
728  std::unique_ptr<const MCSubtargetInfo> STI(
729      TheTarget->createMCSubtargetInfo(TripleName, "", ""));
730  failIfEmpty(STI, "no subtarget info for target " + TripleName);
731
732  std::unique_ptr<const MCRegisterInfo> MRI(
733      TheTarget->createMCRegInfo(TripleName));
734  failIfEmpty(MRI, "no register info for target " + TripleName);
735
736  MCTargetOptions MCOptions;
737  std::unique_ptr<const MCAsmInfo> AsmInfo(
738      TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
739  failIfEmpty(AsmInfo, "no asm info for target " + TripleName);
740
741  MCContext Ctx(TheTriple, AsmInfo.get(), MRI.get(), STI.get());
742  std::unique_ptr<MCDisassembler> DisAsm(
743      TheTarget->createMCDisassembler(*STI, Ctx));
744  failIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
745
746  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
747  failIfEmpty(MII, "no instruction info for target " + TripleName);
748
749  std::unique_ptr<const MCInstrAnalysis> MIA(
750      TheTarget->createMCInstrAnalysis(MII.get()));
751  failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
752
753  auto SanCovAddrs = findSanitizerCovFunctions(O);
754  if (SanCovAddrs.empty())
755    fail("__sanitizer_cov* functions not found");
756
757  for (object::SectionRef Section : O.sections()) {
758    if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
759      continue;
760    uint64_t SectionAddr = Section.getAddress();
761    uint64_t SectSize = Section.getSize();
762    if (!SectSize)
763      continue;
764
765    Expected<StringRef> BytesStr = Section.getContents();
766    failIfError(BytesStr);
767    ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr);
768
769    for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
770         Index += Size) {
771      MCInst Inst;
772      ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index);
773      uint64_t ThisAddr = SectionAddr + Index;
774      if (!DisAsm->getInstruction(Inst, Size, ThisBytes, ThisAddr, nulls())) {
775        if (Size == 0)
776          Size = std::min<uint64_t>(
777              ThisBytes.size(),
778              DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr));
779        continue;
780      }
781      uint64_t Addr = Index + SectionAddr;
782      // Sanitizer coverage uses the address of the next instruction - 1.
783      uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple);
784      uint64_t Target;
785      if (MIA->isCall(Inst) &&
786          MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
787          SanCovAddrs.find(Target) != SanCovAddrs.end())
788        Addrs->insert(CovPoint);
789    }
790  }
791}
792
793static void
794visitObjectFiles(const object::Archive &A,
795                 function_ref<void(const object::ObjectFile &)> Fn) {
796  Error Err = Error::success();
797  for (auto &C : A.children(Err)) {
798    Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
799    failIfError(ChildOrErr);
800    if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
801      Fn(*O);
802    else
803      failIfError(object::object_error::invalid_file_type);
804  }
805  failIfError(std::move(Err));
806}
807
808static void
809visitObjectFiles(const std::string &FileName,
810                 function_ref<void(const object::ObjectFile &)> Fn) {
811  Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
812      object::createBinary(FileName);
813  if (!BinaryOrErr)
814    failIfError(BinaryOrErr);
815
816  object::Binary &Binary = *BinaryOrErr.get().getBinary();
817  if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
818    visitObjectFiles(*A, Fn);
819  else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
820    Fn(*O);
821  else
822    failIfError(object::object_error::invalid_file_type);
823}
824
825static std::set<uint64_t>
826findSanitizerCovFunctions(const std::string &FileName) {
827  std::set<uint64_t> Result;
828  visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
829    auto Addrs = findSanitizerCovFunctions(O);
830    Result.insert(Addrs.begin(), Addrs.end());
831  });
832  return Result;
833}
834
835// Locate addresses of all coverage points in a file. Coverage point
836// is defined as the 'address of instruction following __sanitizer_cov
837// call - 1'.
838static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
839  std::set<uint64_t> Result;
840  visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
841    getObjectCoveragePoints(O, &Result);
842  });
843  return Result;
844}
845
846static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
847  for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) {
848    OS << "0x";
849    OS.write_hex(Addr);
850    OS << "\n";
851  }
852}
853
854static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
855  auto ShortFileName = llvm::sys::path::filename(FileName);
856  if (!SancovFileRegex.match(ShortFileName))
857    return false;
858
859  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
860      MemoryBuffer::getFile(FileName);
861  if (!BufOrErr) {
862    errs() << "Warning: " << BufOrErr.getError().message() << "("
863           << BufOrErr.getError().value()
864           << "), filename: " << llvm::sys::path::filename(FileName) << "\n";
865    return BufOrErr.getError();
866  }
867  std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
868  if (Buf->getBufferSize() < 8) {
869    return false;
870  }
871  const FileHeader *Header =
872      reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
873  return Header->Magic == BinCoverageMagic;
874}
875
876static bool isSymbolizedCoverageFile(const std::string &FileName) {
877  auto ShortFileName = llvm::sys::path::filename(FileName);
878  return SymcovFileRegex.match(ShortFileName);
879}
880
881static std::unique_ptr<SymbolizedCoverage>
882symbolize(const RawCoverage &Data, const std::string ObjectFile) {
883  auto Coverage = std::make_unique<SymbolizedCoverage>();
884
885  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
886      MemoryBuffer::getFile(ObjectFile);
887  failIfError(BufOrErr);
888  SHA1 Hasher;
889  Hasher.update((*BufOrErr)->getBuffer());
890  Coverage->BinaryHash = toHex(Hasher.final());
891
892  Ignorelists Ig;
893  auto Symbolizer(createSymbolizer());
894
895  for (uint64_t Addr : *Data.Addrs) {
896    // TODO: it would be neccessary to set proper section index here.
897    // object::SectionedAddress::UndefSection works for only absolute addresses.
898    auto LineInfo = Symbolizer->symbolizeCode(
899        ObjectFile, {Addr, object::SectionedAddress::UndefSection});
900    failIfError(LineInfo);
901    if (Ig.isIgnorelisted(*LineInfo))
902      continue;
903
904    Coverage->CoveredIds.insert(utohexstr(Addr, true));
905  }
906
907  std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile);
908  if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(),
909                     Data.Addrs->end())) {
910    fail("Coverage points in binary and .sancov file do not match.");
911  }
912  Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs);
913  return Coverage;
914}
915
916struct FileFn {
917  bool operator<(const FileFn &RHS) const {
918    return std::tie(FileName, FunctionName) <
919           std::tie(RHS.FileName, RHS.FunctionName);
920  }
921
922  std::string FileName;
923  std::string FunctionName;
924};
925
926static std::set<FileFn>
927computeFunctions(const std::vector<CoveragePoint> &Points) {
928  std::set<FileFn> Fns;
929  for (const auto &Point : Points) {
930    for (const auto &Loc : Point.Locs) {
931      Fns.insert(FileFn{Loc.FileName, Loc.FunctionName});
932    }
933  }
934  return Fns;
935}
936
937static std::set<FileFn>
938computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
939  auto Fns = computeFunctions(Coverage.Points);
940
941  for (const auto &Point : Coverage.Points) {
942    if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
943      continue;
944
945    for (const auto &Loc : Point.Locs) {
946      Fns.erase(FileFn{Loc.FileName, Loc.FunctionName});
947    }
948  }
949
950  return Fns;
951}
952
953static std::set<FileFn>
954computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
955  auto AllFns = computeFunctions(Coverage.Points);
956  std::set<FileFn> Result;
957
958  for (const auto &Point : Coverage.Points) {
959    if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
960      continue;
961
962    for (const auto &Loc : Point.Locs) {
963      Result.insert(FileFn{Loc.FileName, Loc.FunctionName});
964    }
965  }
966
967  return Result;
968}
969
970typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
971// finds first location in a file for each function.
972static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
973                                     const std::set<FileFn> &Fns) {
974  FunctionLocs Result;
975  for (const auto &Point : Coverage.Points) {
976    for (const auto &Loc : Point.Locs) {
977      FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName};
978      if (Fns.find(Fn) == Fns.end())
979        continue;
980
981      auto P = std::make_pair(Loc.Line, Loc.Column);
982      auto I = Result.find(Fn);
983      if (I == Result.end() || I->second > P) {
984        Result[Fn] = P;
985      }
986    }
987  }
988  return Result;
989}
990
991static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
992  for (const auto &P : FnLocs) {
993    OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " "
994       << P.first.FunctionName << "\n";
995  }
996}
997CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
998  CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(),
999                         computeFunctions(Coverage.Points).size(),
1000                         computeCoveredFunctions(Coverage).size()};
1001  return Stats;
1002}
1003
1004// Print list of covered functions.
1005// Line format: <file_name>:<line> <function_name>
1006static void printCoveredFunctions(const SymbolizedCoverage &CovData,
1007                                  raw_ostream &OS) {
1008  auto CoveredFns = computeCoveredFunctions(CovData);
1009  printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS);
1010}
1011
1012// Print list of not covered functions.
1013// Line format: <file_name>:<line> <function_name>
1014static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
1015                                     raw_ostream &OS) {
1016  auto NotCoveredFns = computeNotCoveredFunctions(CovData);
1017  printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS);
1018}
1019
1020// Read list of files and merges their coverage info.
1021static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
1022                                    raw_ostream &OS) {
1023  std::vector<std::unique_ptr<RawCoverage>> Covs;
1024  for (const auto &FileName : FileNames) {
1025    auto Cov = RawCoverage::read(FileName);
1026    if (!Cov)
1027      continue;
1028    OS << *Cov.get();
1029  }
1030}
1031
1032static std::unique_ptr<SymbolizedCoverage>
1033merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
1034  if (Coverages.empty())
1035    return nullptr;
1036
1037  auto Result = std::make_unique<SymbolizedCoverage>();
1038
1039  for (size_t I = 0; I < Coverages.size(); ++I) {
1040    const SymbolizedCoverage &Coverage = *Coverages[I];
1041    std::string Prefix;
1042    if (Coverages.size() > 1) {
1043      // prefix is not needed when there's only one file.
1044      Prefix = utostr(I);
1045    }
1046
1047    for (const auto &Id : Coverage.CoveredIds) {
1048      Result->CoveredIds.insert(Prefix + Id);
1049    }
1050
1051    for (const auto &CovPoint : Coverage.Points) {
1052      CoveragePoint NewPoint(CovPoint);
1053      NewPoint.Id = Prefix + CovPoint.Id;
1054      Result->Points.push_back(NewPoint);
1055    }
1056  }
1057
1058  if (Coverages.size() == 1) {
1059    Result->BinaryHash = Coverages[0]->BinaryHash;
1060  }
1061
1062  return Result;
1063}
1064
1065static std::unique_ptr<SymbolizedCoverage>
1066readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
1067  std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
1068
1069  {
1070    // Short name => file name.
1071    std::map<std::string, std::string> ObjFiles;
1072    std::string FirstObjFile;
1073    std::set<std::string> CovFiles;
1074
1075    // Partition input values into coverage/object files.
1076    for (const auto &FileName : FileNames) {
1077      if (isSymbolizedCoverageFile(FileName)) {
1078        Coverages.push_back(SymbolizedCoverage::read(FileName));
1079      }
1080
1081      auto ErrorOrIsCoverage = isCoverageFile(FileName);
1082      if (!ErrorOrIsCoverage)
1083        continue;
1084      if (ErrorOrIsCoverage.get()) {
1085        CovFiles.insert(FileName);
1086      } else {
1087        auto ShortFileName = llvm::sys::path::filename(FileName);
1088        if (ObjFiles.find(std::string(ShortFileName)) != ObjFiles.end()) {
1089          fail("Duplicate binary file with a short name: " + ShortFileName);
1090        }
1091
1092        ObjFiles[std::string(ShortFileName)] = FileName;
1093        if (FirstObjFile.empty())
1094          FirstObjFile = FileName;
1095      }
1096    }
1097
1098    SmallVector<StringRef, 2> Components;
1099
1100    // Object file => list of corresponding coverage file names.
1101    std::map<std::string, std::vector<std::string>> CoverageByObjFile;
1102    for (const auto &FileName : CovFiles) {
1103      auto ShortFileName = llvm::sys::path::filename(FileName);
1104      auto Ok = SancovFileRegex.match(ShortFileName, &Components);
1105      if (!Ok) {
1106        fail("Can't match coverage file name against "
1107             "<module_name>.<pid>.sancov pattern: " +
1108             FileName);
1109      }
1110
1111      auto Iter = ObjFiles.find(std::string(Components[1]));
1112      if (Iter == ObjFiles.end()) {
1113        fail("Object file for coverage not found: " + FileName);
1114      }
1115
1116      CoverageByObjFile[Iter->second].push_back(FileName);
1117    };
1118
1119    for (const auto &Pair : ObjFiles) {
1120      auto FileName = Pair.second;
1121      if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end())
1122        errs() << "WARNING: No coverage file for " << FileName << "\n";
1123    }
1124
1125    // Read raw coverage and symbolize it.
1126    for (const auto &Pair : CoverageByObjFile) {
1127      if (findSanitizerCovFunctions(Pair.first).empty()) {
1128        errs()
1129            << "WARNING: Ignoring " << Pair.first
1130            << " and its coverage because  __sanitizer_cov* functions were not "
1131               "found.\n";
1132        continue;
1133      }
1134
1135      for (const std::string &CoverageFile : Pair.second) {
1136        auto DataOrError = RawCoverage::read(CoverageFile);
1137        failIfError(DataOrError);
1138        Coverages.push_back(symbolize(*DataOrError.get(), Pair.first));
1139      }
1140    }
1141  }
1142
1143  return merge(Coverages);
1144}
1145
1146} // namespace
1147
1148int main(int Argc, char **Argv) {
1149  llvm::InitLLVM X(Argc, Argv);
1150  cl::HideUnrelatedOptions(Cat);
1151
1152  llvm::InitializeAllTargetInfos();
1153  llvm::InitializeAllTargetMCs();
1154  llvm::InitializeAllDisassemblers();
1155
1156  cl::ParseCommandLineOptions(Argc, Argv,
1157      "Sanitizer Coverage Processing Tool (sancov)\n\n"
1158      "  This tool can extract various coverage-related information from: \n"
1159      "  coverage-instrumented binary files, raw .sancov files and their "
1160      "symbolized .symcov version.\n"
1161      "  Depending on chosen action the tool expects different input files:\n"
1162      "    -print-coverage-pcs     - coverage-instrumented binary files\n"
1163      "    -print-coverage         - .sancov files\n"
1164      "    <other actions>         - .sancov files & corresponding binary "
1165      "files, .symcov files\n"
1166      );
1167
1168  // -print doesn't need object files.
1169  if (Action == PrintAction) {
1170    readAndPrintRawCoverage(ClInputFiles, outs());
1171    return 0;
1172  } else if (Action == PrintCovPointsAction) {
1173    // -print-coverage-points doesn't need coverage files.
1174    for (const std::string &ObjFile : ClInputFiles) {
1175      printCovPoints(ObjFile, outs());
1176    }
1177    return 0;
1178  }
1179
1180  auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles);
1181  failIf(!Coverage, "No valid coverage files given.");
1182
1183  switch (Action) {
1184  case CoveredFunctionsAction: {
1185    printCoveredFunctions(*Coverage, outs());
1186    return 0;
1187  }
1188  case NotCoveredFunctionsAction: {
1189    printNotCoveredFunctions(*Coverage, outs());
1190    return 0;
1191  }
1192  case StatsAction: {
1193    outs() << computeStats(*Coverage);
1194    return 0;
1195  }
1196  case MergeAction:
1197  case SymbolizeAction: { // merge & symbolize are synonims.
1198    json::OStream W(outs(), 2);
1199    W << *Coverage;
1200    return 0;
1201  }
1202  case HtmlReportAction:
1203    errs() << "-html-report option is removed: "
1204              "use -symbolize & coverage-report-server.py instead\n";
1205    return 1;
1206  case PrintAction:
1207  case PrintCovPointsAction:
1208    llvm_unreachable("unsupported action");
1209  }
1210}
1211