1//===-------------- llvm-remark-size-diff/RemarkSizeDiff.cpp --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// Diffs instruction count and stack size remarks between two remark files. 11/// 12/// This is intended for use by compiler developers who want to see how their 13/// changes impact program code size. 14/// 15//===----------------------------------------------------------------------===// 16 17#include "llvm-c/Remarks.h" 18#include "llvm/ADT/STLExtras.h" 19#include "llvm/ADT/SmallSet.h" 20#include "llvm/Remarks/Remark.h" 21#include "llvm/Remarks/RemarkParser.h" 22#include "llvm/Remarks/RemarkSerializer.h" 23#include "llvm/Support/CommandLine.h" 24#include "llvm/Support/Compiler.h" 25#include "llvm/Support/Error.h" 26#include "llvm/Support/FileSystem.h" 27#include "llvm/Support/FormatVariadic.h" 28#include "llvm/Support/InitLLVM.h" 29#include "llvm/Support/JSON.h" 30#include "llvm/Support/MemoryBuffer.h" 31#include "llvm/Support/ToolOutputFile.h" 32#include "llvm/Support/WithColor.h" 33#include "llvm/Support/raw_ostream.h" 34 35using namespace llvm; 36 37enum ParserFormatOptions { yaml, bitstream }; 38enum ReportStyleOptions { human_output, json_output }; 39static cl::OptionCategory SizeDiffCategory("llvm-remark-size-diff options"); 40static cl::opt<std::string> InputFileNameA(cl::Positional, cl::Required, 41 cl::cat(SizeDiffCategory), 42 cl::desc("remarks_a")); 43static cl::opt<std::string> InputFileNameB(cl::Positional, cl::Required, 44 cl::cat(SizeDiffCategory), 45 cl::desc("remarks_b")); 46static cl::opt<std::string> OutputFilename("o", cl::init("-"), 47 cl::cat(SizeDiffCategory), 48 cl::desc("Output"), 49 cl::value_desc("file")); 50static cl::opt<ParserFormatOptions> 51 ParserFormat("parser", cl::cat(SizeDiffCategory), cl::init(bitstream), 52 cl::desc("Set the remark parser format:"), 53 cl::values(clEnumVal(yaml, "YAML format"), 54 clEnumVal(bitstream, "Bitstream format"))); 55static cl::opt<ReportStyleOptions> ReportStyle( 56 "report_style", cl::cat(SizeDiffCategory), 57 cl::init(ReportStyleOptions::human_output), 58 cl::desc("Choose the report output format:"), 59 cl::values(clEnumValN(human_output, "human", "Human-readable format"), 60 clEnumValN(json_output, "json", "JSON format"))); 61static cl::opt<bool> PrettyPrint("pretty", cl::cat(SizeDiffCategory), 62 cl::init(false), 63 cl::desc("Pretty-print JSON")); 64 65/// Contains information from size remarks. 66// This is a little nicer to read than a std::pair. 67struct InstCountAndStackSize { 68 int64_t InstCount = 0; 69 int64_t StackSize = 0; 70}; 71 72/// Represents which files a function appeared in. 73enum FilesPresent { A, B, BOTH }; 74 75/// Contains the data from the remarks in file A and file B for some function. 76/// E.g. instruction count, stack size... 77struct FunctionDiff { 78 /// Function name from the remark. 79 std::string FuncName; 80 // Idx 0 = A, Idx 1 = B. 81 int64_t InstCount[2] = {0, 0}; 82 int64_t StackSize[2] = {0, 0}; 83 84 // Calculate diffs between the first and second files. 85 int64_t getInstDiff() const { return InstCount[1] - InstCount[0]; } 86 int64_t getStackDiff() const { return StackSize[1] - StackSize[0]; } 87 88 // Accessors for the remarks from the first file. 89 int64_t getInstCountA() const { return InstCount[0]; } 90 int64_t getStackSizeA() const { return StackSize[0]; } 91 92 // Accessors for the remarks from the second file. 93 int64_t getInstCountB() const { return InstCount[1]; } 94 int64_t getStackSizeB() const { return StackSize[1]; } 95 96 /// \returns which files this function was present in. 97 FilesPresent getFilesPresent() const { 98 if (getInstCountA() == 0) 99 return B; 100 if (getInstCountB() == 0) 101 return A; 102 return BOTH; 103 } 104 105 FunctionDiff(StringRef FuncName, const InstCountAndStackSize &A, 106 const InstCountAndStackSize &B) 107 : FuncName(FuncName) { 108 InstCount[0] = A.InstCount; 109 InstCount[1] = B.InstCount; 110 StackSize[0] = A.StackSize; 111 StackSize[1] = B.StackSize; 112 } 113}; 114 115/// Organizes the diffs into 3 categories: 116/// - Functions which only appeared in the first file 117/// - Functions which only appeared in the second file 118/// - Functions which appeared in both files 119struct DiffsCategorizedByFilesPresent { 120 /// Diffs for functions which only appeared in the first file. 121 SmallVector<FunctionDiff> OnlyInA; 122 123 /// Diffs for functions which only appeared in the second file. 124 SmallVector<FunctionDiff> OnlyInB; 125 126 /// Diffs for functions which appeared in both files. 127 SmallVector<FunctionDiff> InBoth; 128 129 /// Add a diff to the appropriate list. 130 void addDiff(FunctionDiff &FD) { 131 switch (FD.getFilesPresent()) { 132 case A: 133 OnlyInA.push_back(FD); 134 break; 135 case B: 136 OnlyInB.push_back(FD); 137 break; 138 case BOTH: 139 InBoth.push_back(FD); 140 break; 141 } 142 } 143}; 144 145static void printFunctionDiff(const FunctionDiff &FD, llvm::raw_ostream &OS) { 146 // Describe which files the function had remarks in. 147 FilesPresent FP = FD.getFilesPresent(); 148 const std::string &FuncName = FD.FuncName; 149 const int64_t InstDiff = FD.getInstDiff(); 150 assert(InstDiff && "Shouldn't get functions with no size change?"); 151 const int64_t StackDiff = FD.getStackDiff(); 152 // Output an indicator denoting which files the function was present in. 153 switch (FP) { 154 case FilesPresent::A: 155 OS << "-- "; 156 break; 157 case FilesPresent::B: 158 OS << "++ "; 159 break; 160 case FilesPresent::BOTH: 161 OS << "== "; 162 break; 163 } 164 // Output an indicator denoting if a function changed in size. 165 if (InstDiff > 0) 166 OS << "> "; 167 else 168 OS << "< "; 169 OS << FuncName << ", "; 170 OS << InstDiff << " instrs, "; 171 OS << StackDiff << " stack B"; 172 OS << "\n"; 173} 174 175/// Print an item in the summary section. 176/// 177/// \p TotalA - Total count of the metric in file A. 178/// \p TotalB - Total count of the metric in file B. 179/// \p Metric - Name of the metric we want to print (e.g. instruction 180/// count). 181/// \p OS - The output stream. 182static void printSummaryItem(int64_t TotalA, int64_t TotalB, StringRef Metric, 183 llvm::raw_ostream &OS) { 184 OS << " " << Metric << ": "; 185 int64_t TotalDiff = TotalB - TotalA; 186 if (TotalDiff == 0) { 187 OS << "None\n"; 188 return; 189 } 190 OS << TotalDiff << " (" << formatv("{0:p}", TotalDiff / (double)TotalA) 191 << ")\n"; 192} 193 194/// Print all contents of \p Diff and a high-level summary of the differences. 195static void printDiffsCategorizedByFilesPresent( 196 DiffsCategorizedByFilesPresent &DiffsByFilesPresent, 197 llvm::raw_ostream &OS) { 198 int64_t InstrsA = 0; 199 int64_t InstrsB = 0; 200 int64_t StackA = 0; 201 int64_t StackB = 0; 202 // Helper lambda to sort + print a list of diffs. 203 auto PrintDiffList = [&](SmallVector<FunctionDiff> &FunctionDiffList) { 204 if (FunctionDiffList.empty()) 205 return; 206 stable_sort(FunctionDiffList, 207 [](const FunctionDiff &LHS, const FunctionDiff &RHS) { 208 return LHS.getInstDiff() < RHS.getInstDiff(); 209 }); 210 for (const auto &FuncDiff : FunctionDiffList) { 211 // If there is a difference in instruction count, then print out info for 212 // the function. 213 if (FuncDiff.getInstDiff()) 214 printFunctionDiff(FuncDiff, OS); 215 InstrsA += FuncDiff.getInstCountA(); 216 InstrsB += FuncDiff.getInstCountB(); 217 StackA += FuncDiff.getStackSizeA(); 218 StackB += FuncDiff.getStackSizeB(); 219 } 220 }; 221 PrintDiffList(DiffsByFilesPresent.OnlyInA); 222 PrintDiffList(DiffsByFilesPresent.OnlyInB); 223 PrintDiffList(DiffsByFilesPresent.InBoth); 224 OS << "\n### Summary ###\n"; 225 OS << "Total change: \n"; 226 printSummaryItem(InstrsA, InstrsB, "instruction count", OS); 227 printSummaryItem(StackA, StackB, "stack byte usage", OS); 228} 229 230/// Collects an expected integer value from a given argument index in a remark. 231/// 232/// \p Remark - The remark. 233/// \p ArgIdx - The index where the integer value should be found. 234/// \p ExpectedKeyName - The expected key name for the index 235/// (e.g. "InstructionCount") 236/// 237/// \returns the integer value at the index if it exists, and the key-value pair 238/// is what is expected. Otherwise, returns an Error. 239static Expected<int64_t> getIntValFromKey(const remarks::Remark &Remark, 240 unsigned ArgIdx, 241 StringRef ExpectedKeyName) { 242 auto KeyName = Remark.Args[ArgIdx].Key; 243 if (KeyName != ExpectedKeyName) 244 return createStringError( 245 inconvertibleErrorCode(), 246 Twine("Unexpected key at argument index " + std::to_string(ArgIdx) + 247 ": Expected '" + ExpectedKeyName + "', got '" + KeyName + "'")); 248 long long Val; 249 auto ValStr = Remark.Args[ArgIdx].Val; 250 if (getAsSignedInteger(ValStr, 0, Val)) 251 return createStringError( 252 inconvertibleErrorCode(), 253 Twine("Could not convert string to signed integer: " + ValStr)); 254 return static_cast<int64_t>(Val); 255} 256 257/// Collects relevant size information from \p Remark if it is an size-related 258/// remark of some kind (e.g. instruction count). Otherwise records nothing. 259/// 260/// \p Remark - The remark. 261/// \p FuncNameToSizeInfo - Maps function names to relevant size info. 262/// \p NumInstCountRemarksParsed - Keeps track of the number of instruction 263/// count remarks parsed. We need at least 1 in both files to produce a diff. 264static Error processRemark(const remarks::Remark &Remark, 265 StringMap<InstCountAndStackSize> &FuncNameToSizeInfo, 266 unsigned &NumInstCountRemarksParsed) { 267 const auto &RemarkName = Remark.RemarkName; 268 const auto &PassName = Remark.PassName; 269 // Collect remarks which contain the number of instructions in a function. 270 if (PassName == "asm-printer" && RemarkName == "InstructionCount") { 271 // Expecting the 0-th argument to have the key "NumInstructions" and an 272 // integer value. 273 auto MaybeInstCount = 274 getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumInstructions"); 275 if (!MaybeInstCount) 276 return MaybeInstCount.takeError(); 277 FuncNameToSizeInfo[Remark.FunctionName].InstCount = *MaybeInstCount; 278 ++NumInstCountRemarksParsed; 279 } 280 // Collect remarks which contain the stack size of a function. 281 else if (PassName == "prologepilog" && RemarkName == "StackSize") { 282 // Expecting the 0-th argument to have the key "NumStackBytes" and an 283 // integer value. 284 auto MaybeStackSize = 285 getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumStackBytes"); 286 if (!MaybeStackSize) 287 return MaybeStackSize.takeError(); 288 FuncNameToSizeInfo[Remark.FunctionName].StackSize = *MaybeStackSize; 289 } 290 // Either we collected a remark, or it's something we don't care about. In 291 // both cases, this is a success. 292 return Error::success(); 293} 294 295/// Process all of the size-related remarks in a file. 296/// 297/// \param[in] InputFileName - Name of file to read from. 298/// \param[in, out] FuncNameToSizeInfo - Maps function names to relevant 299/// size info. 300static Error readFileAndProcessRemarks( 301 StringRef InputFileName, 302 StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) { 303 auto Buf = MemoryBuffer::getFile(InputFileName); 304 if (auto EC = Buf.getError()) 305 return createStringError( 306 EC, Twine("Cannot open file '" + InputFileName + "': " + EC.message())); 307 auto MaybeParser = remarks::createRemarkParserFromMeta( 308 ParserFormat == bitstream ? remarks::Format::Bitstream 309 : remarks::Format::YAML, 310 (*Buf)->getBuffer()); 311 if (!MaybeParser) 312 return MaybeParser.takeError(); 313 auto &Parser = **MaybeParser; 314 auto MaybeRemark = Parser.next(); 315 unsigned NumInstCountRemarksParsed = 0; 316 for (; MaybeRemark; MaybeRemark = Parser.next()) { 317 if (auto E = processRemark(**MaybeRemark, FuncNameToSizeInfo, 318 NumInstCountRemarksParsed)) 319 return E; 320 } 321 auto E = MaybeRemark.takeError(); 322 if (!E.isA<remarks::EndOfFileError>()) 323 return E; 324 consumeError(std::move(E)); 325 // We need at least one instruction count remark in each file to produce a 326 // meaningful diff. 327 if (NumInstCountRemarksParsed == 0) 328 return createStringError( 329 inconvertibleErrorCode(), 330 "File '" + InputFileName + 331 "' did not contain any instruction-count remarks!"); 332 return Error::success(); 333} 334 335/// Wrapper function for readFileAndProcessRemarks which handles errors. 336/// 337/// \param[in] InputFileName - Name of file to read from. 338/// \param[out] FuncNameToSizeInfo - Populated with information from size 339/// remarks in the input file. 340/// 341/// \returns true if readFileAndProcessRemarks returned no errors. False 342/// otherwise. 343static bool tryReadFileAndProcessRemarks( 344 StringRef InputFileName, 345 StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) { 346 if (Error E = readFileAndProcessRemarks(InputFileName, FuncNameToSizeInfo)) { 347 handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) { 348 PE.log(WithColor::error()); 349 errs() << '\n'; 350 }); 351 return false; 352 } 353 return true; 354} 355 356/// Populates \p FuncDiffs with the difference between \p 357/// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB. 358/// 359/// \param[in] FuncNameToSizeInfoA - Size info collected from the first 360/// remarks file. 361/// \param[in] FuncNameToSizeInfoB - Size info collected from 362/// the second remarks file. 363/// \param[out] DiffsByFilesPresent - Filled with the diff between \p 364/// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB. 365static void 366computeDiff(const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoA, 367 const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoB, 368 DiffsCategorizedByFilesPresent &DiffsByFilesPresent) { 369 SmallSet<std::string, 10> FuncNames; 370 for (const auto &FuncName : FuncNameToSizeInfoA.keys()) 371 FuncNames.insert(FuncName.str()); 372 for (const auto &FuncName : FuncNameToSizeInfoB.keys()) 373 FuncNames.insert(FuncName.str()); 374 for (const std::string &FuncName : FuncNames) { 375 const auto &SizeInfoA = FuncNameToSizeInfoA.lookup(FuncName); 376 const auto &SizeInfoB = FuncNameToSizeInfoB.lookup(FuncName); 377 FunctionDiff FuncDiff(FuncName, SizeInfoA, SizeInfoB); 378 DiffsByFilesPresent.addDiff(FuncDiff); 379 } 380} 381 382/// Attempt to get the output stream for writing the diff. 383static ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() { 384 if (OutputFilename == "") 385 OutputFilename = "-"; 386 std::error_code EC; 387 auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC, 388 sys::fs::OF_TextWithCRLF); 389 if (!EC) 390 return std::move(Out); 391 return EC; 392} 393 394/// \return a json::Array representing all FunctionDiffs in \p FunctionDiffs. 395/// \p WhichFiles represents which files the functions in \p FunctionDiffs 396/// appeared in (A, B, or both). 397json::Array 398getFunctionDiffListAsJSON(const SmallVector<FunctionDiff> &FunctionDiffs, 399 const FilesPresent &WhichFiles) { 400 json::Array FunctionDiffsAsJSON; 401 int64_t InstCountA, InstCountB, StackSizeA, StackSizeB; 402 for (auto &Diff : FunctionDiffs) { 403 InstCountA = InstCountB = StackSizeA = StackSizeB = 0; 404 switch (WhichFiles) { 405 case BOTH: 406 [[fallthrough]]; 407 case A: 408 InstCountA = Diff.getInstCountA(); 409 StackSizeA = Diff.getStackSizeA(); 410 if (WhichFiles != BOTH) 411 break; 412 [[fallthrough]]; 413 case B: 414 InstCountB = Diff.getInstCountB(); 415 StackSizeB = Diff.getStackSizeB(); 416 break; 417 } 418 // Each metric we care about is represented like: 419 // "Val": [A, B] 420 // This allows any consumer of the JSON to calculate the diff using B - A. 421 // This is somewhat wasteful for OnlyInA and OnlyInB (we only need A or B). 422 // However, this should make writing consuming tools easier, since the tool 423 // writer doesn't need to think about slightly different formats in each 424 // section. 425 json::Object FunctionObject({{"FunctionName", Diff.FuncName}, 426 {"InstCount", {InstCountA, InstCountB}}, 427 {"StackSize", {StackSizeA, StackSizeB}}}); 428 FunctionDiffsAsJSON.push_back(std::move(FunctionObject)); 429 } 430 return FunctionDiffsAsJSON; 431} 432 433/// Output all diffs in \p DiffsByFilesPresent as a JSON report. This is 434/// intended for consumption by external tools. 435/// 436/// \p InputFileNameA - File A used to produce the report. 437/// \p InputFileNameB - File B used ot produce the report. 438/// \p OS - Output stream. 439/// 440/// JSON output includes: 441/// - \p InputFileNameA and \p InputFileNameB under "Files". 442/// - Functions present in both files under "InBoth". 443/// - Functions present only in A in "OnlyInA". 444/// - Functions present only in B in "OnlyInB". 445/// - Instruction count and stack size differences for each function. 446/// 447/// Differences are represented using [count_a, count_b]. The actual difference 448/// can be computed via count_b - count_a. 449static void 450outputJSONForAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB, 451 const DiffsCategorizedByFilesPresent &DiffsByFilesPresent, 452 llvm::raw_ostream &OS) { 453 json::Object Output; 454 // Include file names in the report. 455 json::Object Files( 456 {{"A", InputFileNameA.str()}, {"B", InputFileNameB.str()}}); 457 Output["Files"] = std::move(Files); 458 Output["OnlyInA"] = getFunctionDiffListAsJSON(DiffsByFilesPresent.OnlyInA, A); 459 Output["OnlyInB"] = getFunctionDiffListAsJSON(DiffsByFilesPresent.OnlyInB, B); 460 Output["InBoth"] = 461 getFunctionDiffListAsJSON(DiffsByFilesPresent.InBoth, BOTH); 462 json::OStream JOS(OS, PrettyPrint ? 2 : 0); 463 JOS.value(std::move(Output)); 464 OS << '\n'; 465} 466 467/// Output all diffs in \p DiffsByFilesPresent using the desired output style. 468/// \returns Error::success() on success, and an Error otherwise. 469/// \p InputFileNameA - Name of input file A; may be used in the report. 470/// \p InputFileNameB - Name of input file B; may be used in the report. 471static Error 472outputAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB, 473 DiffsCategorizedByFilesPresent &DiffsByFilesPresent) { 474 auto MaybeOF = getOutputStream(); 475 if (std::error_code EC = MaybeOF.getError()) 476 return errorCodeToError(EC); 477 std::unique_ptr<ToolOutputFile> OF = std::move(*MaybeOF); 478 switch (ReportStyle) { 479 case human_output: 480 printDiffsCategorizedByFilesPresent(DiffsByFilesPresent, OF->os()); 481 break; 482 case json_output: 483 outputJSONForAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent, 484 OF->os()); 485 break; 486 } 487 OF->keep(); 488 return Error::success(); 489} 490 491/// Boolean wrapper for outputDiff which handles errors. 492static bool 493tryOutputAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB, 494 DiffsCategorizedByFilesPresent &DiffsByFilesPresent) { 495 if (Error E = 496 outputAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent)) { 497 handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) { 498 PE.log(WithColor::error()); 499 errs() << '\n'; 500 }); 501 return false; 502 } 503 return true; 504} 505 506int main(int argc, const char **argv) { 507 InitLLVM X(argc, argv); 508 cl::HideUnrelatedOptions(SizeDiffCategory); 509 cl::ParseCommandLineOptions(argc, argv, 510 "Diff instruction count and stack size remarks " 511 "between two remark files.\n"); 512 StringMap<InstCountAndStackSize> FuncNameToSizeInfoA; 513 StringMap<InstCountAndStackSize> FuncNameToSizeInfoB; 514 if (!tryReadFileAndProcessRemarks(InputFileNameA, FuncNameToSizeInfoA) || 515 !tryReadFileAndProcessRemarks(InputFileNameB, FuncNameToSizeInfoB)) 516 return 1; 517 DiffsCategorizedByFilesPresent DiffsByFilesPresent; 518 computeDiff(FuncNameToSizeInfoA, FuncNameToSizeInfoB, DiffsByFilesPresent); 519 if (!tryOutputAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent)) 520 return 1; 521} 522