1351278Sdim//===- YAMLRemarkParser.cpp -----------------------------------------------===//
2351278Sdim//
3351278Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4351278Sdim// See https://llvm.org/LICENSE.txt for license information.
5351278Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6351278Sdim//
7351278Sdim//===----------------------------------------------------------------------===//
8351278Sdim//
9351278Sdim// This file provides utility methods used by clients that want to use the
10351278Sdim// parser for remark diagnostics in LLVM.
11351278Sdim//
12351278Sdim//===----------------------------------------------------------------------===//
13351278Sdim
14351278Sdim#include "YAMLRemarkParser.h"
15351278Sdim#include "llvm/ADT/StringSwitch.h"
16351278Sdim#include "llvm/Remarks/RemarkParser.h"
17360784Sdim#include "llvm/Support/Endian.h"
18360784Sdim#include "llvm/Support/Path.h"
19351278Sdim
20351278Sdimusing namespace llvm;
21351278Sdimusing namespace llvm::remarks;
22351278Sdim
23351278Sdimchar YAMLParseError::ID = 0;
24351278Sdim
25351278Sdimstatic void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
26351278Sdim  assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
27351278Sdim  std::string &Message = *static_cast<std::string *>(Ctx);
28351278Sdim  assert(Message.empty() && "Expected an empty string.");
29351278Sdim  raw_string_ostream OS(Message);
30351278Sdim  Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
31351278Sdim             /*ShowKindLabels*/ true);
32351278Sdim  OS << '\n';
33351278Sdim  OS.flush();
34351278Sdim}
35351278Sdim
36351278SdimYAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
37351278Sdim                               yaml::Stream &Stream, yaml::Node &Node) {
38351278Sdim  // 1) Set up a diagnostic handler to avoid errors being printed out to
39351278Sdim  // stderr.
40351278Sdim  // 2) Use the stream to print the error with the associated node.
41351278Sdim  // 3) The stream will use the source manager to print the error, which will
42351278Sdim  // call the diagnostic handler.
43351278Sdim  // 4) The diagnostic handler will stream the error directly into this object's
44351278Sdim  // Message member, which is used when logging is asked for.
45351278Sdim  auto OldDiagHandler = SM.getDiagHandler();
46351278Sdim  auto OldDiagCtx = SM.getDiagContext();
47351278Sdim  SM.setDiagHandler(handleDiagnostic, &Message);
48351278Sdim  Stream.printError(&Node, Twine(Msg) + Twine('\n'));
49351278Sdim  // Restore the old handlers.
50351278Sdim  SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
51351278Sdim}
52351278Sdim
53351278Sdimstatic SourceMgr setupSM(std::string &LastErrorMessage) {
54351278Sdim  SourceMgr SM;
55351278Sdim  SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
56351278Sdim  return SM;
57351278Sdim}
58351278Sdim
59360784Sdim// Parse the magic number. This function returns true if this represents remark
60360784Sdim// metadata, false otherwise.
61360784Sdimstatic Expected<bool> parseMagic(StringRef &Buf) {
62360784Sdim  if (!Buf.consume_front(remarks::Magic))
63360784Sdim    return false;
64360784Sdim
65360784Sdim  if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1)))
66360784Sdim    return createStringError(std::errc::illegal_byte_sequence,
67360784Sdim                             "Expecting \\0 after magic number.");
68360784Sdim  return true;
69360784Sdim}
70360784Sdim
71360784Sdimstatic Expected<uint64_t> parseVersion(StringRef &Buf) {
72360784Sdim  if (Buf.size() < sizeof(uint64_t))
73360784Sdim    return createStringError(std::errc::illegal_byte_sequence,
74360784Sdim                             "Expecting version number.");
75360784Sdim
76360784Sdim  uint64_t Version =
77360784Sdim      support::endian::read<uint64_t, support::little, support::unaligned>(
78360784Sdim          Buf.data());
79360784Sdim  if (Version != remarks::CurrentRemarkVersion)
80360784Sdim    return createStringError(std::errc::illegal_byte_sequence,
81360784Sdim                             "Mismatching remark version. Got %" PRId64
82360784Sdim                             ", expected %" PRId64 ".",
83360784Sdim                             Version, remarks::CurrentRemarkVersion);
84360784Sdim  Buf = Buf.drop_front(sizeof(uint64_t));
85360784Sdim  return Version;
86360784Sdim}
87360784Sdim
88360784Sdimstatic Expected<uint64_t> parseStrTabSize(StringRef &Buf) {
89360784Sdim  if (Buf.size() < sizeof(uint64_t))
90360784Sdim    return createStringError(std::errc::illegal_byte_sequence,
91360784Sdim                             "Expecting string table size.");
92360784Sdim  uint64_t StrTabSize =
93360784Sdim      support::endian::read<uint64_t, support::little, support::unaligned>(
94360784Sdim          Buf.data());
95360784Sdim  Buf = Buf.drop_front(sizeof(uint64_t));
96360784Sdim  return StrTabSize;
97360784Sdim}
98360784Sdim
99360784Sdimstatic Expected<ParsedStringTable> parseStrTab(StringRef &Buf,
100360784Sdim                                               uint64_t StrTabSize) {
101360784Sdim  if (Buf.size() < StrTabSize)
102360784Sdim    return createStringError(std::errc::illegal_byte_sequence,
103360784Sdim                             "Expecting string table.");
104360784Sdim
105360784Sdim  // Attach the string table to the parser.
106360784Sdim  ParsedStringTable Result(StringRef(Buf.data(), StrTabSize));
107360784Sdim  Buf = Buf.drop_front(StrTabSize);
108360784Sdim  return Expected<ParsedStringTable>(std::move(Result));
109360784Sdim}
110360784Sdim
111360784SdimExpected<std::unique_ptr<YAMLRemarkParser>>
112360784Sdimremarks::createYAMLParserFromMeta(StringRef Buf,
113360784Sdim                                  Optional<ParsedStringTable> StrTab,
114360784Sdim                                  Optional<StringRef> ExternalFilePrependPath) {
115360784Sdim  // We now have a magic number. The metadata has to be correct.
116360784Sdim  Expected<bool> isMeta = parseMagic(Buf);
117360784Sdim  if (!isMeta)
118360784Sdim    return isMeta.takeError();
119360784Sdim  // If it's not recognized as metadata, roll back.
120360784Sdim  std::unique_ptr<MemoryBuffer> SeparateBuf;
121360784Sdim  if (*isMeta) {
122360784Sdim    Expected<uint64_t> Version = parseVersion(Buf);
123360784Sdim    if (!Version)
124360784Sdim      return Version.takeError();
125360784Sdim
126360784Sdim    Expected<uint64_t> StrTabSize = parseStrTabSize(Buf);
127360784Sdim    if (!StrTabSize)
128360784Sdim      return StrTabSize.takeError();
129360784Sdim
130360784Sdim    // If the size of string table is not 0, try to build one.
131360784Sdim    if (*StrTabSize != 0) {
132360784Sdim      if (StrTab)
133360784Sdim        return createStringError(std::errc::illegal_byte_sequence,
134360784Sdim                                 "String table already provided.");
135360784Sdim      Expected<ParsedStringTable> MaybeStrTab = parseStrTab(Buf, *StrTabSize);
136360784Sdim      if (!MaybeStrTab)
137360784Sdim        return MaybeStrTab.takeError();
138360784Sdim      StrTab = std::move(*MaybeStrTab);
139360784Sdim    }
140360784Sdim    // If it starts with "---", there is no external file.
141360784Sdim    if (!Buf.startswith("---")) {
142360784Sdim      // At this point, we expect Buf to contain the external file path.
143360784Sdim      StringRef ExternalFilePath = Buf;
144360784Sdim      SmallString<80> FullPath;
145360784Sdim      if (ExternalFilePrependPath)
146360784Sdim        FullPath = *ExternalFilePrependPath;
147360784Sdim      sys::path::append(FullPath, ExternalFilePath);
148360784Sdim
149360784Sdim      // Try to open the file and start parsing from there.
150360784Sdim      ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
151360784Sdim          MemoryBuffer::getFile(FullPath);
152360784Sdim      if (std::error_code EC = BufferOrErr.getError())
153360784Sdim        return createFileError(FullPath, EC);
154360784Sdim
155360784Sdim      // Keep the buffer alive.
156360784Sdim      SeparateBuf = std::move(*BufferOrErr);
157360784Sdim      Buf = SeparateBuf->getBuffer();
158360784Sdim    }
159360784Sdim  }
160360784Sdim
161360784Sdim  std::unique_ptr<YAMLRemarkParser> Result =
162360784Sdim      StrTab
163360784Sdim          ? std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(*StrTab))
164360784Sdim          : std::make_unique<YAMLRemarkParser>(Buf);
165360784Sdim  if (SeparateBuf)
166360784Sdim    Result->SeparateBuf = std::move(SeparateBuf);
167360784Sdim  return std::move(Result);
168360784Sdim}
169360784Sdim
170360784SdimYAMLRemarkParser::YAMLRemarkParser(StringRef Buf)
171360784Sdim    : YAMLRemarkParser(Buf, None) {}
172360784Sdim
173351278SdimYAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
174360784Sdim                                   Optional<ParsedStringTable> StrTab)
175360784Sdim    : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(),
176351278Sdim      SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
177351278Sdim
178351278SdimError YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
179351278Sdim  return make_error<YAMLParseError>(Message, SM, Stream, Node);
180351278Sdim}
181351278Sdim
182351278SdimError YAMLRemarkParser::error() {
183351278Sdim  if (LastErrorMessage.empty())
184351278Sdim    return Error::success();
185351278Sdim  Error E = make_error<YAMLParseError>(LastErrorMessage);
186351278Sdim  LastErrorMessage.clear();
187351278Sdim  return E;
188351278Sdim}
189351278Sdim
190351278SdimExpected<std::unique_ptr<Remark>>
191351278SdimYAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
192351278Sdim  if (Error E = error())
193351278Sdim    return std::move(E);
194351278Sdim
195351278Sdim  yaml::Node *YAMLRoot = RemarkEntry.getRoot();
196351278Sdim  if (!YAMLRoot) {
197351278Sdim    return createStringError(std::make_error_code(std::errc::invalid_argument),
198351278Sdim                             "not a valid YAML file.");
199351278Sdim  }
200351278Sdim
201351278Sdim  auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
202351278Sdim  if (!Root)
203351278Sdim    return error("document root is not of mapping type.", *YAMLRoot);
204351278Sdim
205360784Sdim  std::unique_ptr<Remark> Result = std::make_unique<Remark>();
206351278Sdim  Remark &TheRemark = *Result;
207351278Sdim
208351278Sdim  // First, the type. It needs special handling since is not part of the
209351278Sdim  // key-value stream.
210351278Sdim  Expected<Type> T = parseType(*Root);
211351278Sdim  if (!T)
212351278Sdim    return T.takeError();
213351278Sdim  else
214351278Sdim    TheRemark.RemarkType = *T;
215351278Sdim
216351278Sdim  // Then, parse the fields, one by one.
217351278Sdim  for (yaml::KeyValueNode &RemarkField : *Root) {
218351278Sdim    Expected<StringRef> MaybeKey = parseKey(RemarkField);
219351278Sdim    if (!MaybeKey)
220351278Sdim      return MaybeKey.takeError();
221351278Sdim    StringRef KeyName = *MaybeKey;
222351278Sdim
223351278Sdim    if (KeyName == "Pass") {
224351278Sdim      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
225351278Sdim        TheRemark.PassName = *MaybeStr;
226351278Sdim      else
227351278Sdim        return MaybeStr.takeError();
228351278Sdim    } else if (KeyName == "Name") {
229351278Sdim      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
230351278Sdim        TheRemark.RemarkName = *MaybeStr;
231351278Sdim      else
232351278Sdim        return MaybeStr.takeError();
233351278Sdim    } else if (KeyName == "Function") {
234351278Sdim      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
235351278Sdim        TheRemark.FunctionName = *MaybeStr;
236351278Sdim      else
237351278Sdim        return MaybeStr.takeError();
238351278Sdim    } else if (KeyName == "Hotness") {
239351278Sdim      if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
240351278Sdim        TheRemark.Hotness = *MaybeU;
241351278Sdim      else
242351278Sdim        return MaybeU.takeError();
243351278Sdim    } else if (KeyName == "DebugLoc") {
244351278Sdim      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
245351278Sdim        TheRemark.Loc = *MaybeLoc;
246351278Sdim      else
247351278Sdim        return MaybeLoc.takeError();
248351278Sdim    } else if (KeyName == "Args") {
249351278Sdim      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
250351278Sdim      if (!Args)
251351278Sdim        return error("wrong value type for key.", RemarkField);
252351278Sdim
253351278Sdim      for (yaml::Node &Arg : *Args) {
254351278Sdim        if (Expected<Argument> MaybeArg = parseArg(Arg))
255351278Sdim          TheRemark.Args.push_back(*MaybeArg);
256351278Sdim        else
257351278Sdim          return MaybeArg.takeError();
258351278Sdim      }
259351278Sdim    } else {
260351278Sdim      return error("unknown key.", RemarkField);
261351278Sdim    }
262351278Sdim  }
263351278Sdim
264351278Sdim  // Check if any of the mandatory fields are missing.
265351278Sdim  if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
266351278Sdim      TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
267351278Sdim    return error("Type, Pass, Name or Function missing.",
268351278Sdim                 *RemarkEntry.getRoot());
269351278Sdim
270351278Sdim  return std::move(Result);
271351278Sdim}
272351278Sdim
273351278SdimExpected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
274351278Sdim  auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
275351278Sdim                  .Case("!Passed", remarks::Type::Passed)
276351278Sdim                  .Case("!Missed", remarks::Type::Missed)
277351278Sdim                  .Case("!Analysis", remarks::Type::Analysis)
278351278Sdim                  .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
279351278Sdim                  .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
280351278Sdim                  .Case("!Failure", remarks::Type::Failure)
281351278Sdim                  .Default(remarks::Type::Unknown);
282351278Sdim  if (Type == remarks::Type::Unknown)
283351278Sdim    return error("expected a remark tag.", Node);
284351278Sdim  return Type;
285351278Sdim}
286351278Sdim
287351278SdimExpected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
288351278Sdim  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
289351278Sdim    return Key->getRawValue();
290351278Sdim
291351278Sdim  return error("key is not a string.", Node);
292351278Sdim}
293351278Sdim
294351278SdimExpected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
295351278Sdim  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
296351278Sdim  if (!Value)
297351278Sdim    return error("expected a value of scalar type.", Node);
298360784Sdim  StringRef Result = Value->getRawValue();
299351278Sdim
300351278Sdim  if (Result.front() == '\'')
301351278Sdim    Result = Result.drop_front();
302351278Sdim
303351278Sdim  if (Result.back() == '\'')
304351278Sdim    Result = Result.drop_back();
305351278Sdim
306351278Sdim  return Result;
307351278Sdim}
308351278Sdim
309351278SdimExpected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
310351278Sdim  SmallVector<char, 4> Tmp;
311351278Sdim  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
312351278Sdim  if (!Value)
313351278Sdim    return error("expected a value of scalar type.", Node);
314351278Sdim  unsigned UnsignedValue = 0;
315351278Sdim  if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
316351278Sdim    return error("expected a value of integer type.", *Value);
317351278Sdim  return UnsignedValue;
318351278Sdim}
319351278Sdim
320351278SdimExpected<RemarkLocation>
321351278SdimYAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
322351278Sdim  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
323351278Sdim  if (!DebugLoc)
324351278Sdim    return error("expected a value of mapping type.", Node);
325351278Sdim
326351278Sdim  Optional<StringRef> File;
327351278Sdim  Optional<unsigned> Line;
328351278Sdim  Optional<unsigned> Column;
329351278Sdim
330351278Sdim  for (yaml::KeyValueNode &DLNode : *DebugLoc) {
331351278Sdim    Expected<StringRef> MaybeKey = parseKey(DLNode);
332351278Sdim    if (!MaybeKey)
333351278Sdim      return MaybeKey.takeError();
334351278Sdim    StringRef KeyName = *MaybeKey;
335351278Sdim
336351278Sdim    if (KeyName == "File") {
337351278Sdim      if (Expected<StringRef> MaybeStr = parseStr(DLNode))
338351278Sdim        File = *MaybeStr;
339351278Sdim      else
340351278Sdim        return MaybeStr.takeError();
341351278Sdim    } else if (KeyName == "Column") {
342351278Sdim      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
343351278Sdim        Column = *MaybeU;
344351278Sdim      else
345351278Sdim        return MaybeU.takeError();
346351278Sdim    } else if (KeyName == "Line") {
347351278Sdim      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
348351278Sdim        Line = *MaybeU;
349351278Sdim      else
350351278Sdim        return MaybeU.takeError();
351351278Sdim    } else {
352351278Sdim      return error("unknown entry in DebugLoc map.", DLNode);
353351278Sdim    }
354351278Sdim  }
355351278Sdim
356351278Sdim  // If any of the debug loc fields is missing, return an error.
357351278Sdim  if (!File || !Line || !Column)
358351278Sdim    return error("DebugLoc node incomplete.", Node);
359351278Sdim
360351278Sdim  return RemarkLocation{*File, *Line, *Column};
361351278Sdim}
362351278Sdim
363351278SdimExpected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
364351278Sdim  auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
365351278Sdim  if (!ArgMap)
366351278Sdim    return error("expected a value of mapping type.", Node);
367351278Sdim
368351278Sdim  Optional<StringRef> KeyStr;
369351278Sdim  Optional<StringRef> ValueStr;
370351278Sdim  Optional<RemarkLocation> Loc;
371351278Sdim
372351278Sdim  for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
373351278Sdim    Expected<StringRef> MaybeKey = parseKey(ArgEntry);
374351278Sdim    if (!MaybeKey)
375351278Sdim      return MaybeKey.takeError();
376351278Sdim    StringRef KeyName = *MaybeKey;
377351278Sdim
378351278Sdim    // Try to parse debug locs.
379351278Sdim    if (KeyName == "DebugLoc") {
380351278Sdim      // Can't have multiple DebugLoc entries per argument.
381351278Sdim      if (Loc)
382351278Sdim        return error("only one DebugLoc entry is allowed per argument.",
383351278Sdim                     ArgEntry);
384351278Sdim
385351278Sdim      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
386351278Sdim        Loc = *MaybeLoc;
387351278Sdim        continue;
388351278Sdim      } else
389351278Sdim        return MaybeLoc.takeError();
390351278Sdim    }
391351278Sdim
392351278Sdim    // If we already have a string, error out.
393351278Sdim    if (ValueStr)
394351278Sdim      return error("only one string entry is allowed per argument.", ArgEntry);
395351278Sdim
396351278Sdim    // Try to parse the value.
397351278Sdim    if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
398351278Sdim      ValueStr = *MaybeStr;
399351278Sdim    else
400351278Sdim      return MaybeStr.takeError();
401351278Sdim
402351278Sdim    // Keep the key from the string.
403351278Sdim    KeyStr = KeyName;
404351278Sdim  }
405351278Sdim
406351278Sdim  if (!KeyStr)
407351278Sdim    return error("argument key is missing.", *ArgMap);
408351278Sdim  if (!ValueStr)
409351278Sdim    return error("argument value is missing.", *ArgMap);
410351278Sdim
411351278Sdim  return Argument{*KeyStr, *ValueStr, Loc};
412351278Sdim}
413351278Sdim
414351278SdimExpected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
415351278Sdim  if (YAMLIt == Stream.end())
416351278Sdim    return make_error<EndOfFileError>();
417351278Sdim
418351278Sdim  Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
419351278Sdim  if (!MaybeResult) {
420351278Sdim    // Avoid garbage input, set the iterator to the end.
421351278Sdim    YAMLIt = Stream.end();
422351278Sdim    return MaybeResult.takeError();
423351278Sdim  }
424351278Sdim
425351278Sdim  ++YAMLIt;
426351278Sdim
427351278Sdim  return std::move(*MaybeResult);
428351278Sdim}
429360784Sdim
430360784SdimExpected<StringRef> YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) {
431360784Sdim  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
432360784Sdim  if (!Value)
433360784Sdim    return error("expected a value of scalar type.", Node);
434360784Sdim  StringRef Result;
435360784Sdim  // If we have a string table, parse it as an unsigned.
436360784Sdim  unsigned StrID = 0;
437360784Sdim  if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
438360784Sdim    StrID = *MaybeStrID;
439360784Sdim  else
440360784Sdim    return MaybeStrID.takeError();
441360784Sdim
442360784Sdim  if (Expected<StringRef> Str = (*StrTab)[StrID])
443360784Sdim    Result = *Str;
444360784Sdim  else
445360784Sdim    return Str.takeError();
446360784Sdim
447360784Sdim  if (Result.front() == '\'')
448360784Sdim    Result = Result.drop_front();
449360784Sdim
450360784Sdim  if (Result.back() == '\'')
451360784Sdim    Result = Result.drop_back();
452360784Sdim
453360784Sdim  return Result;
454360784Sdim}
455