1//===- YAMLRemarkParser.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides utility methods used by clients that want to use the
10// parser for remark diagnostics in LLVM.
11//
12//===----------------------------------------------------------------------===//
13
14#include "YAMLRemarkParser.h"
15#include "llvm/ADT/StringSwitch.h"
16#include "llvm/Remarks/RemarkParser.h"
17#include "llvm/Support/Endian.h"
18#include "llvm/Support/Path.h"
19
20using namespace llvm;
21using namespace llvm::remarks;
22
23char YAMLParseError::ID = 0;
24
25static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
26  assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
27  std::string &Message = *static_cast<std::string *>(Ctx);
28  assert(Message.empty() && "Expected an empty string.");
29  raw_string_ostream OS(Message);
30  Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
31             /*ShowKindLabels*/ true);
32  OS << '\n';
33  OS.flush();
34}
35
36YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
37                               yaml::Stream &Stream, yaml::Node &Node) {
38  // 1) Set up a diagnostic handler to avoid errors being printed out to
39  // stderr.
40  // 2) Use the stream to print the error with the associated node.
41  // 3) The stream will use the source manager to print the error, which will
42  // call the diagnostic handler.
43  // 4) The diagnostic handler will stream the error directly into this object's
44  // Message member, which is used when logging is asked for.
45  auto OldDiagHandler = SM.getDiagHandler();
46  auto OldDiagCtx = SM.getDiagContext();
47  SM.setDiagHandler(handleDiagnostic, &Message);
48  Stream.printError(&Node, Twine(Msg) + Twine('\n'));
49  // Restore the old handlers.
50  SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
51}
52
53static SourceMgr setupSM(std::string &LastErrorMessage) {
54  SourceMgr SM;
55  SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
56  return SM;
57}
58
59// Parse the magic number. This function returns true if this represents remark
60// metadata, false otherwise.
61static Expected<bool> parseMagic(StringRef &Buf) {
62  if (!Buf.consume_front(remarks::Magic))
63    return false;
64
65  if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1)))
66    return createStringError(std::errc::illegal_byte_sequence,
67                             "Expecting \\0 after magic number.");
68  return true;
69}
70
71static Expected<uint64_t> parseVersion(StringRef &Buf) {
72  if (Buf.size() < sizeof(uint64_t))
73    return createStringError(std::errc::illegal_byte_sequence,
74                             "Expecting version number.");
75
76  uint64_t Version =
77      support::endian::read<uint64_t, support::little, support::unaligned>(
78          Buf.data());
79  if (Version != remarks::CurrentRemarkVersion)
80    return createStringError(std::errc::illegal_byte_sequence,
81                             "Mismatching remark version. Got %" PRId64
82                             ", expected %" PRId64 ".",
83                             Version, remarks::CurrentRemarkVersion);
84  Buf = Buf.drop_front(sizeof(uint64_t));
85  return Version;
86}
87
88static Expected<uint64_t> parseStrTabSize(StringRef &Buf) {
89  if (Buf.size() < sizeof(uint64_t))
90    return createStringError(std::errc::illegal_byte_sequence,
91                             "Expecting string table size.");
92  uint64_t StrTabSize =
93      support::endian::read<uint64_t, support::little, support::unaligned>(
94          Buf.data());
95  Buf = Buf.drop_front(sizeof(uint64_t));
96  return StrTabSize;
97}
98
99static Expected<ParsedStringTable> parseStrTab(StringRef &Buf,
100                                               uint64_t StrTabSize) {
101  if (Buf.size() < StrTabSize)
102    return createStringError(std::errc::illegal_byte_sequence,
103                             "Expecting string table.");
104
105  // Attach the string table to the parser.
106  ParsedStringTable Result(StringRef(Buf.data(), StrTabSize));
107  Buf = Buf.drop_front(StrTabSize);
108  return Expected<ParsedStringTable>(std::move(Result));
109}
110
111Expected<std::unique_ptr<YAMLRemarkParser>>
112remarks::createYAMLParserFromMeta(StringRef Buf,
113                                  Optional<ParsedStringTable> StrTab,
114                                  Optional<StringRef> ExternalFilePrependPath) {
115  // We now have a magic number. The metadata has to be correct.
116  Expected<bool> isMeta = parseMagic(Buf);
117  if (!isMeta)
118    return isMeta.takeError();
119  // If it's not recognized as metadata, roll back.
120  std::unique_ptr<MemoryBuffer> SeparateBuf;
121  if (*isMeta) {
122    Expected<uint64_t> Version = parseVersion(Buf);
123    if (!Version)
124      return Version.takeError();
125
126    Expected<uint64_t> StrTabSize = parseStrTabSize(Buf);
127    if (!StrTabSize)
128      return StrTabSize.takeError();
129
130    // If the size of string table is not 0, try to build one.
131    if (*StrTabSize != 0) {
132      if (StrTab)
133        return createStringError(std::errc::illegal_byte_sequence,
134                                 "String table already provided.");
135      Expected<ParsedStringTable> MaybeStrTab = parseStrTab(Buf, *StrTabSize);
136      if (!MaybeStrTab)
137        return MaybeStrTab.takeError();
138      StrTab = std::move(*MaybeStrTab);
139    }
140    // If it starts with "---", there is no external file.
141    if (!Buf.startswith("---")) {
142      // At this point, we expect Buf to contain the external file path.
143      StringRef ExternalFilePath = Buf;
144      SmallString<80> FullPath;
145      if (ExternalFilePrependPath)
146        FullPath = *ExternalFilePrependPath;
147      sys::path::append(FullPath, ExternalFilePath);
148
149      // Try to open the file and start parsing from there.
150      ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
151          MemoryBuffer::getFile(FullPath);
152      if (std::error_code EC = BufferOrErr.getError())
153        return createFileError(FullPath, EC);
154
155      // Keep the buffer alive.
156      SeparateBuf = std::move(*BufferOrErr);
157      Buf = SeparateBuf->getBuffer();
158    }
159  }
160
161  std::unique_ptr<YAMLRemarkParser> Result =
162      StrTab
163          ? std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(*StrTab))
164          : std::make_unique<YAMLRemarkParser>(Buf);
165  if (SeparateBuf)
166    Result->SeparateBuf = std::move(SeparateBuf);
167  return std::move(Result);
168}
169
170YAMLRemarkParser::YAMLRemarkParser(StringRef Buf)
171    : YAMLRemarkParser(Buf, None) {}
172
173YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
174                                   Optional<ParsedStringTable> StrTab)
175    : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(),
176      SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
177
178Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
179  return make_error<YAMLParseError>(Message, SM, Stream, Node);
180}
181
182Error YAMLRemarkParser::error() {
183  if (LastErrorMessage.empty())
184    return Error::success();
185  Error E = make_error<YAMLParseError>(LastErrorMessage);
186  LastErrorMessage.clear();
187  return E;
188}
189
190Expected<std::unique_ptr<Remark>>
191YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
192  if (Error E = error())
193    return std::move(E);
194
195  yaml::Node *YAMLRoot = RemarkEntry.getRoot();
196  if (!YAMLRoot) {
197    return createStringError(std::make_error_code(std::errc::invalid_argument),
198                             "not a valid YAML file.");
199  }
200
201  auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
202  if (!Root)
203    return error("document root is not of mapping type.", *YAMLRoot);
204
205  std::unique_ptr<Remark> Result = std::make_unique<Remark>();
206  Remark &TheRemark = *Result;
207
208  // First, the type. It needs special handling since is not part of the
209  // key-value stream.
210  Expected<Type> T = parseType(*Root);
211  if (!T)
212    return T.takeError();
213  else
214    TheRemark.RemarkType = *T;
215
216  // Then, parse the fields, one by one.
217  for (yaml::KeyValueNode &RemarkField : *Root) {
218    Expected<StringRef> MaybeKey = parseKey(RemarkField);
219    if (!MaybeKey)
220      return MaybeKey.takeError();
221    StringRef KeyName = *MaybeKey;
222
223    if (KeyName == "Pass") {
224      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
225        TheRemark.PassName = *MaybeStr;
226      else
227        return MaybeStr.takeError();
228    } else if (KeyName == "Name") {
229      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
230        TheRemark.RemarkName = *MaybeStr;
231      else
232        return MaybeStr.takeError();
233    } else if (KeyName == "Function") {
234      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
235        TheRemark.FunctionName = *MaybeStr;
236      else
237        return MaybeStr.takeError();
238    } else if (KeyName == "Hotness") {
239      if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
240        TheRemark.Hotness = *MaybeU;
241      else
242        return MaybeU.takeError();
243    } else if (KeyName == "DebugLoc") {
244      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
245        TheRemark.Loc = *MaybeLoc;
246      else
247        return MaybeLoc.takeError();
248    } else if (KeyName == "Args") {
249      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
250      if (!Args)
251        return error("wrong value type for key.", RemarkField);
252
253      for (yaml::Node &Arg : *Args) {
254        if (Expected<Argument> MaybeArg = parseArg(Arg))
255          TheRemark.Args.push_back(*MaybeArg);
256        else
257          return MaybeArg.takeError();
258      }
259    } else {
260      return error("unknown key.", RemarkField);
261    }
262  }
263
264  // Check if any of the mandatory fields are missing.
265  if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
266      TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
267    return error("Type, Pass, Name or Function missing.",
268                 *RemarkEntry.getRoot());
269
270  return std::move(Result);
271}
272
273Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
274  auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
275                  .Case("!Passed", remarks::Type::Passed)
276                  .Case("!Missed", remarks::Type::Missed)
277                  .Case("!Analysis", remarks::Type::Analysis)
278                  .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
279                  .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
280                  .Case("!Failure", remarks::Type::Failure)
281                  .Default(remarks::Type::Unknown);
282  if (Type == remarks::Type::Unknown)
283    return error("expected a remark tag.", Node);
284  return Type;
285}
286
287Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
288  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
289    return Key->getRawValue();
290
291  return error("key is not a string.", Node);
292}
293
294Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
295  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
296  if (!Value)
297    return error("expected a value of scalar type.", Node);
298  StringRef Result = Value->getRawValue();
299
300  if (Result.front() == '\'')
301    Result = Result.drop_front();
302
303  if (Result.back() == '\'')
304    Result = Result.drop_back();
305
306  return Result;
307}
308
309Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
310  SmallVector<char, 4> Tmp;
311  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
312  if (!Value)
313    return error("expected a value of scalar type.", Node);
314  unsigned UnsignedValue = 0;
315  if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
316    return error("expected a value of integer type.", *Value);
317  return UnsignedValue;
318}
319
320Expected<RemarkLocation>
321YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
322  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
323  if (!DebugLoc)
324    return error("expected a value of mapping type.", Node);
325
326  Optional<StringRef> File;
327  Optional<unsigned> Line;
328  Optional<unsigned> Column;
329
330  for (yaml::KeyValueNode &DLNode : *DebugLoc) {
331    Expected<StringRef> MaybeKey = parseKey(DLNode);
332    if (!MaybeKey)
333      return MaybeKey.takeError();
334    StringRef KeyName = *MaybeKey;
335
336    if (KeyName == "File") {
337      if (Expected<StringRef> MaybeStr = parseStr(DLNode))
338        File = *MaybeStr;
339      else
340        return MaybeStr.takeError();
341    } else if (KeyName == "Column") {
342      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
343        Column = *MaybeU;
344      else
345        return MaybeU.takeError();
346    } else if (KeyName == "Line") {
347      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
348        Line = *MaybeU;
349      else
350        return MaybeU.takeError();
351    } else {
352      return error("unknown entry in DebugLoc map.", DLNode);
353    }
354  }
355
356  // If any of the debug loc fields is missing, return an error.
357  if (!File || !Line || !Column)
358    return error("DebugLoc node incomplete.", Node);
359
360  return RemarkLocation{*File, *Line, *Column};
361}
362
363Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
364  auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
365  if (!ArgMap)
366    return error("expected a value of mapping type.", Node);
367
368  Optional<StringRef> KeyStr;
369  Optional<StringRef> ValueStr;
370  Optional<RemarkLocation> Loc;
371
372  for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
373    Expected<StringRef> MaybeKey = parseKey(ArgEntry);
374    if (!MaybeKey)
375      return MaybeKey.takeError();
376    StringRef KeyName = *MaybeKey;
377
378    // Try to parse debug locs.
379    if (KeyName == "DebugLoc") {
380      // Can't have multiple DebugLoc entries per argument.
381      if (Loc)
382        return error("only one DebugLoc entry is allowed per argument.",
383                     ArgEntry);
384
385      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
386        Loc = *MaybeLoc;
387        continue;
388      } else
389        return MaybeLoc.takeError();
390    }
391
392    // If we already have a string, error out.
393    if (ValueStr)
394      return error("only one string entry is allowed per argument.", ArgEntry);
395
396    // Try to parse the value.
397    if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
398      ValueStr = *MaybeStr;
399    else
400      return MaybeStr.takeError();
401
402    // Keep the key from the string.
403    KeyStr = KeyName;
404  }
405
406  if (!KeyStr)
407    return error("argument key is missing.", *ArgMap);
408  if (!ValueStr)
409    return error("argument value is missing.", *ArgMap);
410
411  return Argument{*KeyStr, *ValueStr, Loc};
412}
413
414Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
415  if (YAMLIt == Stream.end())
416    return make_error<EndOfFileError>();
417
418  Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
419  if (!MaybeResult) {
420    // Avoid garbage input, set the iterator to the end.
421    YAMLIt = Stream.end();
422    return MaybeResult.takeError();
423  }
424
425  ++YAMLIt;
426
427  return std::move(*MaybeResult);
428}
429
430Expected<StringRef> YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) {
431  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
432  if (!Value)
433    return error("expected a value of scalar type.", Node);
434  StringRef Result;
435  // If we have a string table, parse it as an unsigned.
436  unsigned StrID = 0;
437  if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
438    StrID = *MaybeStrID;
439  else
440    return MaybeStrID.takeError();
441
442  if (Expected<StringRef> Str = (*StrTab)[StrID])
443    Result = *Str;
444  else
445    return Str.takeError();
446
447  if (Result.front() == '\'')
448    Result = Result.drop_front();
449
450  if (Result.back() == '\'')
451    Result = Result.drop_back();
452
453  return Result;
454}
455