1//===-------------- llvm-remark-size-diff/RemarkSizeDiff.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Diffs instruction count and stack size remarks between two remark files.
11///
12/// This is intended for use by compiler developers who want to see how their
13/// changes impact program code size.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm-c/Remarks.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/Remarks/Remark.h"
21#include "llvm/Remarks/RemarkParser.h"
22#include "llvm/Remarks/RemarkSerializer.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/Compiler.h"
25#include "llvm/Support/Error.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/FormatVariadic.h"
28#include "llvm/Support/InitLLVM.h"
29#include "llvm/Support/JSON.h"
30#include "llvm/Support/MemoryBuffer.h"
31#include "llvm/Support/ToolOutputFile.h"
32#include "llvm/Support/WithColor.h"
33#include "llvm/Support/raw_ostream.h"
34
35using namespace llvm;
36
37enum ParserFormatOptions { yaml, bitstream };
38enum ReportStyleOptions { human_output, json_output };
39static cl::OptionCategory SizeDiffCategory("llvm-remark-size-diff options");
40static cl::opt<std::string> InputFileNameA(cl::Positional, cl::Required,
41                                           cl::cat(SizeDiffCategory),
42                                           cl::desc("remarks_a"));
43static cl::opt<std::string> InputFileNameB(cl::Positional, cl::Required,
44                                           cl::cat(SizeDiffCategory),
45                                           cl::desc("remarks_b"));
46static cl::opt<std::string> OutputFilename("o", cl::init("-"),
47                                           cl::cat(SizeDiffCategory),
48                                           cl::desc("Output"),
49                                           cl::value_desc("file"));
50static cl::opt<ParserFormatOptions>
51    ParserFormat("parser", cl::cat(SizeDiffCategory), cl::init(bitstream),
52                 cl::desc("Set the remark parser format:"),
53                 cl::values(clEnumVal(yaml, "YAML format"),
54                            clEnumVal(bitstream, "Bitstream format")));
55static cl::opt<ReportStyleOptions> ReportStyle(
56    "report_style", cl::cat(SizeDiffCategory),
57    cl::init(ReportStyleOptions::human_output),
58    cl::desc("Choose the report output format:"),
59    cl::values(clEnumValN(human_output, "human", "Human-readable format"),
60               clEnumValN(json_output, "json", "JSON format")));
61static cl::opt<bool> PrettyPrint("pretty", cl::cat(SizeDiffCategory),
62                                 cl::init(false),
63                                 cl::desc("Pretty-print JSON"));
64
65/// Contains information from size remarks.
66// This is a little nicer to read than a std::pair.
67struct InstCountAndStackSize {
68  int64_t InstCount = 0;
69  int64_t StackSize = 0;
70};
71
72/// Represents which files a function appeared in.
73enum FilesPresent { A, B, BOTH };
74
75/// Contains the data from the remarks in file A and file B for some function.
76/// E.g. instruction count, stack size...
77struct FunctionDiff {
78  /// Function name from the remark.
79  std::string FuncName;
80  // Idx 0 = A, Idx 1 = B.
81  int64_t InstCount[2] = {0, 0};
82  int64_t StackSize[2] = {0, 0};
83
84  // Calculate diffs between the first and second files.
85  int64_t getInstDiff() const { return InstCount[1] - InstCount[0]; }
86  int64_t getStackDiff() const { return StackSize[1] - StackSize[0]; }
87
88  // Accessors for the remarks from the first file.
89  int64_t getInstCountA() const { return InstCount[0]; }
90  int64_t getStackSizeA() const { return StackSize[0]; }
91
92  // Accessors for the remarks from the second file.
93  int64_t getInstCountB() const { return InstCount[1]; }
94  int64_t getStackSizeB() const { return StackSize[1]; }
95
96  /// \returns which files this function was present in.
97  FilesPresent getFilesPresent() const {
98    if (getInstCountA() == 0)
99      return B;
100    if (getInstCountB() == 0)
101      return A;
102    return BOTH;
103  }
104
105  FunctionDiff(StringRef FuncName, const InstCountAndStackSize &A,
106               const InstCountAndStackSize &B)
107      : FuncName(FuncName) {
108    InstCount[0] = A.InstCount;
109    InstCount[1] = B.InstCount;
110    StackSize[0] = A.StackSize;
111    StackSize[1] = B.StackSize;
112  }
113};
114
115/// Organizes the diffs into 3 categories:
116/// - Functions which only appeared in the first file
117/// - Functions which only appeared in the second file
118/// - Functions which appeared in both files
119struct DiffsCategorizedByFilesPresent {
120  /// Diffs for functions which only appeared in the first file.
121  SmallVector<FunctionDiff> OnlyInA;
122
123  /// Diffs for functions which only appeared in the second file.
124  SmallVector<FunctionDiff> OnlyInB;
125
126  /// Diffs for functions which appeared in both files.
127  SmallVector<FunctionDiff> InBoth;
128
129  /// Add a diff to the appropriate list.
130  void addDiff(FunctionDiff &FD) {
131    switch (FD.getFilesPresent()) {
132    case A:
133      OnlyInA.push_back(FD);
134      break;
135    case B:
136      OnlyInB.push_back(FD);
137      break;
138    case BOTH:
139      InBoth.push_back(FD);
140      break;
141    }
142  }
143};
144
145static void printFunctionDiff(const FunctionDiff &FD, llvm::raw_ostream &OS) {
146  // Describe which files the function had remarks in.
147  FilesPresent FP = FD.getFilesPresent();
148  const std::string &FuncName = FD.FuncName;
149  const int64_t InstDiff = FD.getInstDiff();
150  assert(InstDiff && "Shouldn't get functions with no size change?");
151  const int64_t StackDiff = FD.getStackDiff();
152  // Output an indicator denoting which files the function was present in.
153  switch (FP) {
154  case FilesPresent::A:
155    OS << "-- ";
156    break;
157  case FilesPresent::B:
158    OS << "++ ";
159    break;
160  case FilesPresent::BOTH:
161    OS << "== ";
162    break;
163  }
164  // Output an indicator denoting if a function changed in size.
165  if (InstDiff > 0)
166    OS << "> ";
167  else
168    OS << "< ";
169  OS << FuncName << ", ";
170  OS << InstDiff << " instrs, ";
171  OS << StackDiff << " stack B";
172  OS << "\n";
173}
174
175/// Print an item in the summary section.
176///
177/// \p TotalA - Total count of the metric in file A.
178/// \p TotalB - Total count of the metric in file B.
179/// \p Metric - Name of the metric we want to print (e.g. instruction
180/// count).
181/// \p OS - The output stream.
182static void printSummaryItem(int64_t TotalA, int64_t TotalB, StringRef Metric,
183                             llvm::raw_ostream &OS) {
184  OS << "  " << Metric << ": ";
185  int64_t TotalDiff = TotalB - TotalA;
186  if (TotalDiff == 0) {
187    OS << "None\n";
188    return;
189  }
190  OS << TotalDiff << " (" << formatv("{0:p}", TotalDiff / (double)TotalA)
191     << ")\n";
192}
193
194/// Print all contents of \p Diff and a high-level summary of the differences.
195static void printDiffsCategorizedByFilesPresent(
196    DiffsCategorizedByFilesPresent &DiffsByFilesPresent,
197    llvm::raw_ostream &OS) {
198  int64_t InstrsA = 0;
199  int64_t InstrsB = 0;
200  int64_t StackA = 0;
201  int64_t StackB = 0;
202  // Helper lambda to sort + print a list of diffs.
203  auto PrintDiffList = [&](SmallVector<FunctionDiff> &FunctionDiffList) {
204    if (FunctionDiffList.empty())
205      return;
206    stable_sort(FunctionDiffList,
207                [](const FunctionDiff &LHS, const FunctionDiff &RHS) {
208                  return LHS.getInstDiff() < RHS.getInstDiff();
209                });
210    for (const auto &FuncDiff : FunctionDiffList) {
211      // If there is a difference in instruction count, then print out info for
212      // the function.
213      if (FuncDiff.getInstDiff())
214        printFunctionDiff(FuncDiff, OS);
215      InstrsA += FuncDiff.getInstCountA();
216      InstrsB += FuncDiff.getInstCountB();
217      StackA += FuncDiff.getStackSizeA();
218      StackB += FuncDiff.getStackSizeB();
219    }
220  };
221  PrintDiffList(DiffsByFilesPresent.OnlyInA);
222  PrintDiffList(DiffsByFilesPresent.OnlyInB);
223  PrintDiffList(DiffsByFilesPresent.InBoth);
224  OS << "\n### Summary ###\n";
225  OS << "Total change: \n";
226  printSummaryItem(InstrsA, InstrsB, "instruction count", OS);
227  printSummaryItem(StackA, StackB, "stack byte usage", OS);
228}
229
230/// Collects an expected integer value from a given argument index in a remark.
231///
232/// \p Remark - The remark.
233/// \p ArgIdx - The index where the integer value should be found.
234/// \p ExpectedKeyName - The expected key name for the index
235/// (e.g. "InstructionCount")
236///
237/// \returns the integer value at the index if it exists, and the key-value pair
238/// is what is expected. Otherwise, returns an Error.
239static Expected<int64_t> getIntValFromKey(const remarks::Remark &Remark,
240                                          unsigned ArgIdx,
241                                          StringRef ExpectedKeyName) {
242  auto KeyName = Remark.Args[ArgIdx].Key;
243  if (KeyName != ExpectedKeyName)
244    return createStringError(
245        inconvertibleErrorCode(),
246        Twine("Unexpected key at argument index " + std::to_string(ArgIdx) +
247              ": Expected '" + ExpectedKeyName + "', got '" + KeyName + "'"));
248  long long Val;
249  auto ValStr = Remark.Args[ArgIdx].Val;
250  if (getAsSignedInteger(ValStr, 0, Val))
251    return createStringError(
252        inconvertibleErrorCode(),
253        Twine("Could not convert string to signed integer: " + ValStr));
254  return static_cast<int64_t>(Val);
255}
256
257/// Collects relevant size information from \p Remark if it is an size-related
258/// remark of some kind (e.g. instruction count). Otherwise records nothing.
259///
260/// \p Remark - The remark.
261/// \p FuncNameToSizeInfo - Maps function names to relevant size info.
262/// \p NumInstCountRemarksParsed - Keeps track of the number of instruction
263/// count remarks parsed. We need at least 1 in both files to produce a diff.
264static Error processRemark(const remarks::Remark &Remark,
265                           StringMap<InstCountAndStackSize> &FuncNameToSizeInfo,
266                           unsigned &NumInstCountRemarksParsed) {
267  const auto &RemarkName = Remark.RemarkName;
268  const auto &PassName = Remark.PassName;
269  // Collect remarks which contain the number of instructions in a function.
270  if (PassName == "asm-printer" && RemarkName == "InstructionCount") {
271    // Expecting the 0-th argument to have the key "NumInstructions" and an
272    // integer value.
273    auto MaybeInstCount =
274        getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumInstructions");
275    if (!MaybeInstCount)
276      return MaybeInstCount.takeError();
277    FuncNameToSizeInfo[Remark.FunctionName].InstCount = *MaybeInstCount;
278    ++NumInstCountRemarksParsed;
279  }
280  // Collect remarks which contain the stack size of a function.
281  else if (PassName == "prologepilog" && RemarkName == "StackSize") {
282    // Expecting the 0-th argument to have the key "NumStackBytes" and an
283    // integer value.
284    auto MaybeStackSize =
285        getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumStackBytes");
286    if (!MaybeStackSize)
287      return MaybeStackSize.takeError();
288    FuncNameToSizeInfo[Remark.FunctionName].StackSize = *MaybeStackSize;
289  }
290  // Either we collected a remark, or it's something we don't care about. In
291  // both cases, this is a success.
292  return Error::success();
293}
294
295/// Process all of the size-related remarks in a file.
296///
297/// \param[in] InputFileName - Name of file to read from.
298/// \param[in, out] FuncNameToSizeInfo - Maps function names to relevant
299/// size info.
300static Error readFileAndProcessRemarks(
301    StringRef InputFileName,
302    StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) {
303  auto Buf = MemoryBuffer::getFile(InputFileName);
304  if (auto EC = Buf.getError())
305    return createStringError(
306        EC, Twine("Cannot open file '" + InputFileName + "': " + EC.message()));
307  auto MaybeParser = remarks::createRemarkParserFromMeta(
308      ParserFormat == bitstream ? remarks::Format::Bitstream
309                                : remarks::Format::YAML,
310      (*Buf)->getBuffer());
311  if (!MaybeParser)
312    return MaybeParser.takeError();
313  auto &Parser = **MaybeParser;
314  auto MaybeRemark = Parser.next();
315  unsigned NumInstCountRemarksParsed = 0;
316  for (; MaybeRemark; MaybeRemark = Parser.next()) {
317    if (auto E = processRemark(**MaybeRemark, FuncNameToSizeInfo,
318                               NumInstCountRemarksParsed))
319      return E;
320  }
321  auto E = MaybeRemark.takeError();
322  if (!E.isA<remarks::EndOfFileError>())
323    return E;
324  consumeError(std::move(E));
325  // We need at least one instruction count remark in each file to produce a
326  // meaningful diff.
327  if (NumInstCountRemarksParsed == 0)
328    return createStringError(
329        inconvertibleErrorCode(),
330        "File '" + InputFileName +
331            "' did not contain any instruction-count remarks!");
332  return Error::success();
333}
334
335/// Wrapper function for readFileAndProcessRemarks which handles errors.
336///
337/// \param[in] InputFileName - Name of file to read from.
338/// \param[out] FuncNameToSizeInfo - Populated with information from size
339/// remarks in the input file.
340///
341/// \returns true if readFileAndProcessRemarks returned no errors. False
342/// otherwise.
343static bool tryReadFileAndProcessRemarks(
344    StringRef InputFileName,
345    StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) {
346  if (Error E = readFileAndProcessRemarks(InputFileName, FuncNameToSizeInfo)) {
347    handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) {
348      PE.log(WithColor::error());
349      errs() << '\n';
350    });
351    return false;
352  }
353  return true;
354}
355
356/// Populates \p FuncDiffs with the difference between \p
357/// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB.
358///
359/// \param[in] FuncNameToSizeInfoA - Size info collected from the first
360/// remarks file.
361/// \param[in] FuncNameToSizeInfoB - Size info collected from
362/// the second remarks file.
363/// \param[out] DiffsByFilesPresent - Filled with the diff between \p
364/// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB.
365static void
366computeDiff(const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoA,
367            const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoB,
368            DiffsCategorizedByFilesPresent &DiffsByFilesPresent) {
369  SmallSet<std::string, 10> FuncNames;
370  for (const auto &FuncName : FuncNameToSizeInfoA.keys())
371    FuncNames.insert(FuncName.str());
372  for (const auto &FuncName : FuncNameToSizeInfoB.keys())
373    FuncNames.insert(FuncName.str());
374  for (const std::string &FuncName : FuncNames) {
375    const auto &SizeInfoA = FuncNameToSizeInfoA.lookup(FuncName);
376    const auto &SizeInfoB = FuncNameToSizeInfoB.lookup(FuncName);
377    FunctionDiff FuncDiff(FuncName, SizeInfoA, SizeInfoB);
378    DiffsByFilesPresent.addDiff(FuncDiff);
379  }
380}
381
382/// Attempt to get the output stream for writing the diff.
383static ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
384  if (OutputFilename == "")
385    OutputFilename = "-";
386  std::error_code EC;
387  auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC,
388                                              sys::fs::OF_TextWithCRLF);
389  if (!EC)
390    return std::move(Out);
391  return EC;
392}
393
394/// \return a json::Array representing all FunctionDiffs in \p FunctionDiffs.
395/// \p WhichFiles represents which files the functions in \p FunctionDiffs
396/// appeared in (A, B, or both).
397json::Array
398getFunctionDiffListAsJSON(const SmallVector<FunctionDiff> &FunctionDiffs,
399                          const FilesPresent &WhichFiles) {
400  json::Array FunctionDiffsAsJSON;
401  int64_t InstCountA, InstCountB, StackSizeA, StackSizeB;
402  for (auto &Diff : FunctionDiffs) {
403    InstCountA = InstCountB = StackSizeA = StackSizeB = 0;
404    switch (WhichFiles) {
405    case BOTH:
406      [[fallthrough]];
407    case A:
408      InstCountA = Diff.getInstCountA();
409      StackSizeA = Diff.getStackSizeA();
410      if (WhichFiles != BOTH)
411        break;
412      [[fallthrough]];
413    case B:
414      InstCountB = Diff.getInstCountB();
415      StackSizeB = Diff.getStackSizeB();
416      break;
417    }
418    // Each metric we care about is represented like:
419    //   "Val": [A, B]
420    // This allows any consumer of the JSON to calculate the diff using B - A.
421    // This is somewhat wasteful for OnlyInA and OnlyInB (we only need A or B).
422    // However, this should make writing consuming tools easier, since the tool
423    // writer doesn't need to think about slightly different formats in each
424    // section.
425    json::Object FunctionObject({{"FunctionName", Diff.FuncName},
426                                 {"InstCount", {InstCountA, InstCountB}},
427                                 {"StackSize", {StackSizeA, StackSizeB}}});
428    FunctionDiffsAsJSON.push_back(std::move(FunctionObject));
429  }
430  return FunctionDiffsAsJSON;
431}
432
433/// Output all diffs in \p DiffsByFilesPresent as a JSON report. This is
434/// intended for consumption by external tools.
435///
436/// \p InputFileNameA - File A used to produce the report.
437/// \p InputFileNameB - File B used ot produce the report.
438/// \p OS - Output stream.
439///
440/// JSON output includes:
441///  - \p InputFileNameA and \p InputFileNameB under "Files".
442///  - Functions present in both files under "InBoth".
443///  - Functions present only in A in "OnlyInA".
444///  - Functions present only in B in "OnlyInB".
445///  - Instruction count and stack size differences for each function.
446///
447/// Differences are represented using [count_a, count_b]. The actual difference
448/// can be computed via count_b - count_a.
449static void
450outputJSONForAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB,
451                      const DiffsCategorizedByFilesPresent &DiffsByFilesPresent,
452                      llvm::raw_ostream &OS) {
453  json::Object Output;
454  // Include file names in the report.
455  json::Object Files(
456      {{"A", InputFileNameA.str()}, {"B", InputFileNameB.str()}});
457  Output["Files"] = std::move(Files);
458  Output["OnlyInA"] = getFunctionDiffListAsJSON(DiffsByFilesPresent.OnlyInA, A);
459  Output["OnlyInB"] = getFunctionDiffListAsJSON(DiffsByFilesPresent.OnlyInB, B);
460  Output["InBoth"] =
461      getFunctionDiffListAsJSON(DiffsByFilesPresent.InBoth, BOTH);
462  json::OStream JOS(OS, PrettyPrint ? 2 : 0);
463  JOS.value(std::move(Output));
464  OS << '\n';
465}
466
467/// Output all diffs in \p DiffsByFilesPresent using the desired output style.
468/// \returns Error::success() on success, and an Error otherwise.
469/// \p InputFileNameA - Name of input file A; may be used in the report.
470/// \p InputFileNameB - Name of input file B; may be used in the report.
471static Error
472outputAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB,
473               DiffsCategorizedByFilesPresent &DiffsByFilesPresent) {
474  auto MaybeOF = getOutputStream();
475  if (std::error_code EC = MaybeOF.getError())
476    return errorCodeToError(EC);
477  std::unique_ptr<ToolOutputFile> OF = std::move(*MaybeOF);
478  switch (ReportStyle) {
479  case human_output:
480    printDiffsCategorizedByFilesPresent(DiffsByFilesPresent, OF->os());
481    break;
482  case json_output:
483    outputJSONForAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent,
484                          OF->os());
485    break;
486  }
487  OF->keep();
488  return Error::success();
489}
490
491/// Boolean wrapper for outputDiff which handles errors.
492static bool
493tryOutputAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB,
494                  DiffsCategorizedByFilesPresent &DiffsByFilesPresent) {
495  if (Error E =
496          outputAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent)) {
497    handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) {
498      PE.log(WithColor::error());
499      errs() << '\n';
500    });
501    return false;
502  }
503  return true;
504}
505
506int main(int argc, const char **argv) {
507  InitLLVM X(argc, argv);
508  cl::HideUnrelatedOptions(SizeDiffCategory);
509  cl::ParseCommandLineOptions(argc, argv,
510                              "Diff instruction count and stack size remarks "
511                              "between two remark files.\n");
512  StringMap<InstCountAndStackSize> FuncNameToSizeInfoA;
513  StringMap<InstCountAndStackSize> FuncNameToSizeInfoB;
514  if (!tryReadFileAndProcessRemarks(InputFileNameA, FuncNameToSizeInfoA) ||
515      !tryReadFileAndProcessRemarks(InputFileNameB, FuncNameToSizeInfoB))
516    return 1;
517  DiffsCategorizedByFilesPresent DiffsByFilesPresent;
518  computeDiff(FuncNameToSizeInfoA, FuncNameToSizeInfoB, DiffsByFilesPresent);
519  if (!tryOutputAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent))
520    return 1;
521}
522