1//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the SARIFDocumentWriter class, and
11/// associated builders such as:
12/// - \ref SarifArtifact
13/// - \ref SarifArtifactLocation
14/// - \ref SarifRule
15/// - \ref SarifResult
16//===----------------------------------------------------------------------===//
17#include "clang/Basic/Sarif.h"
18#include "clang/Basic/SourceLocation.h"
19#include "clang/Basic/SourceManager.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/ConvertUTF.h"
25#include "llvm/Support/JSON.h"
26#include "llvm/Support/Path.h"
27
28#include <optional>
29#include <string>
30#include <utility>
31
32using namespace clang;
33using namespace llvm;
34
35using clang::detail::SarifArtifact;
36using clang::detail::SarifArtifactLocation;
37
38static StringRef getFileName(const FileEntry &FE) {
39  StringRef Filename = FE.tryGetRealPathName();
40  if (Filename.empty())
41    Filename = FE.getName();
42  return Filename;
43}
44/// \name URI
45/// @{
46
47/// \internal
48/// \brief
49/// Return the RFC3986 encoding of the input character.
50///
51/// \param C Character to encode to RFC3986.
52///
53/// \return The RFC3986 representation of \c C.
54static std::string percentEncodeURICharacter(char C) {
55  // RFC 3986 claims alpha, numeric, and this handful of
56  // characters are not reserved for the path component and
57  // should be written out directly. Otherwise, percent
58  // encode the character and write that out instead of the
59  // reserved character.
60  if (llvm::isAlnum(C) ||
61      StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
62    return std::string(&C, 1);
63  return "%" + llvm::toHex(StringRef(&C, 1));
64}
65
66/// \internal
67/// \brief Return a URI representing the given file name.
68///
69/// \param Filename The filename to be represented as URI.
70///
71/// \return RFC3986 URI representing the input file name.
72static std::string fileNameToURI(StringRef Filename) {
73  SmallString<32> Ret = StringRef("file://");
74
75  // Get the root name to see if it has a URI authority.
76  StringRef Root = sys::path::root_name(Filename);
77  if (Root.startswith("//")) {
78    // There is an authority, so add it to the URI.
79    Ret += Root.drop_front(2).str();
80  } else if (!Root.empty()) {
81    // There is no authority, so end the component and add the root to the URI.
82    Ret += Twine("/" + Root).str();
83  }
84
85  auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
86  assert(Iter != End && "Expected there to be a non-root path component.");
87  // Add the rest of the path components, encoding any reserved characters;
88  // we skip past the first path component, as it was handled it above.
89  std::for_each(++Iter, End, [&Ret](StringRef Component) {
90    // For reasons unknown to me, we may get a backslash with Windows native
91    // paths for the initial backslash following the drive component, which
92    // we need to ignore as a URI path part.
93    if (Component == "\\")
94      return;
95
96    // Add the separator between the previous path part and the one being
97    // currently processed.
98    Ret += "/";
99
100    // URI encode the part.
101    for (char C : Component) {
102      Ret += percentEncodeURICharacter(C);
103    }
104  });
105
106  return std::string(Ret);
107}
108///  @}
109
110/// \brief Calculate the column position expressed in the number of UTF-8 code
111/// points from column start to the source location
112///
113/// \param Loc The source location whose column needs to be calculated.
114/// \param TokenLen Optional hint for when the token is multiple bytes long.
115///
116/// \return The column number as a UTF-8 aware byte offset from column start to
117/// the effective source location.
118static unsigned int adjustColumnPos(FullSourceLoc Loc,
119                                    unsigned int TokenLen = 0) {
120  assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
121
122  std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
123  std::optional<MemoryBufferRef> Buf =
124      Loc.getManager().getBufferOrNone(LocInfo.first);
125  assert(Buf && "got an invalid buffer for the location's file");
126  assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
127         "token extends past end of buffer?");
128
129  // Adjust the offset to be the start of the line, since we'll be counting
130  // Unicode characters from there until our column offset.
131  unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
132  unsigned int Ret = 1;
133  while (Off < (LocInfo.second + TokenLen)) {
134    Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
135    Ret++;
136  }
137
138  return Ret;
139}
140
141/// \name SARIF Utilities
142/// @{
143
144/// \internal
145json::Object createMessage(StringRef Text) {
146  return json::Object{{"text", Text.str()}};
147}
148
149/// \internal
150/// \pre CharSourceRange must be a token range
151static json::Object createTextRegion(const SourceManager &SM,
152                                     const CharSourceRange &R) {
153  FullSourceLoc BeginCharLoc{R.getBegin(), SM};
154  FullSourceLoc EndCharLoc{R.getEnd(), SM};
155  json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
156                      {"startColumn", adjustColumnPos(BeginCharLoc)}};
157
158  if (BeginCharLoc == EndCharLoc) {
159    Region["endColumn"] = adjustColumnPos(BeginCharLoc);
160  } else {
161    Region["endLine"] = EndCharLoc.getExpansionLineNumber();
162    Region["endColumn"] = adjustColumnPos(EndCharLoc);
163  }
164  return Region;
165}
166
167static json::Object createLocation(json::Object &&PhysicalLocation,
168                                   StringRef Message = "") {
169  json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
170  if (!Message.empty())
171    Ret.insert({"message", createMessage(Message)});
172  return Ret;
173}
174
175static StringRef importanceToStr(ThreadFlowImportance I) {
176  switch (I) {
177  case ThreadFlowImportance::Important:
178    return "important";
179  case ThreadFlowImportance::Essential:
180    return "essential";
181  case ThreadFlowImportance::Unimportant:
182    return "unimportant";
183  }
184  llvm_unreachable("Fully covered switch is not so fully covered");
185}
186
187static StringRef resultLevelToStr(SarifResultLevel R) {
188  switch (R) {
189  case SarifResultLevel::None:
190    return "none";
191  case SarifResultLevel::Note:
192    return "note";
193  case SarifResultLevel::Warning:
194    return "warning";
195  case SarifResultLevel::Error:
196    return "error";
197  }
198  llvm_unreachable("Potentially un-handled SarifResultLevel. "
199                   "Is the switch not fully covered?");
200}
201
202static json::Object
203createThreadFlowLocation(json::Object &&Location,
204                         const ThreadFlowImportance &Importance) {
205  return json::Object{{"location", std::move(Location)},
206                      {"importance", importanceToStr(Importance)}};
207}
208///  @}
209
210json::Object
211SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
212  assert(R.isValid() &&
213         "Cannot create a physicalLocation from invalid SourceRange!");
214  assert(R.isCharRange() &&
215         "Cannot create a physicalLocation from a token range!");
216  FullSourceLoc Start{R.getBegin(), SourceMgr};
217  const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
218  assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
219
220  const std::string &FileURI = fileNameToURI(getFileName(*FE));
221  auto I = CurrentArtifacts.find(FileURI);
222
223  if (I == CurrentArtifacts.end()) {
224    uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
225    const SarifArtifactLocation &Location =
226        SarifArtifactLocation::create(FileURI).setIndex(Idx);
227    const SarifArtifact &Artifact = SarifArtifact::create(Location)
228                                        .setRoles({"resultFile"})
229                                        .setLength(FE->getSize())
230                                        .setMimeType("text/plain");
231    auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
232    // If inserted, ensure the original iterator points to the newly inserted
233    // element, so it can be used downstream.
234    if (StatusIter.second)
235      I = StatusIter.first;
236  }
237  assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
238  const SarifArtifactLocation &Location = I->second.Location;
239  json::Object ArtifactLocationObject{{"uri", Location.URI}};
240  if (Location.Index.has_value())
241    ArtifactLocationObject["index"] = *Location.Index;
242  return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
243                       {"region", createTextRegion(SourceMgr, R)}}};
244}
245
246json::Object &SarifDocumentWriter::getCurrentTool() {
247  assert(!Closed && "SARIF Document is closed. "
248                    "Need to call createRun() before using getcurrentTool!");
249
250  // Since Closed = false here, expect there to be at least 1 Run, anything
251  // else is an invalid state.
252  assert(!Runs.empty() && "There are no runs associated with the document!");
253
254  return *Runs.back().getAsObject()->get("tool")->getAsObject();
255}
256
257void SarifDocumentWriter::reset() {
258  CurrentRules.clear();
259  CurrentArtifacts.clear();
260}
261
262void SarifDocumentWriter::endRun() {
263  // Exit early if trying to close a closed Document.
264  if (Closed) {
265    reset();
266    return;
267  }
268
269  // Since Closed = false here, expect there to be at least 1 Run, anything
270  // else is an invalid state.
271  assert(!Runs.empty() && "There are no runs associated with the document!");
272
273  // Flush all the rules.
274  json::Object &Tool = getCurrentTool();
275  json::Array Rules;
276  for (const SarifRule &R : CurrentRules) {
277    json::Object Config{
278        {"enabled", R.DefaultConfiguration.Enabled},
279        {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
280        {"rank", R.DefaultConfiguration.Rank}};
281    json::Object Rule{
282        {"name", R.Name},
283        {"id", R.Id},
284        {"fullDescription", json::Object{{"text", R.Description}}},
285        {"defaultConfiguration", std::move(Config)}};
286    if (!R.HelpURI.empty())
287      Rule["helpUri"] = R.HelpURI;
288    Rules.emplace_back(std::move(Rule));
289  }
290  json::Object &Driver = *Tool.getObject("driver");
291  Driver["rules"] = std::move(Rules);
292
293  // Flush all the artifacts.
294  json::Object &Run = getCurrentRun();
295  json::Array *Artifacts = Run.getArray("artifacts");
296  for (const auto &Pair : CurrentArtifacts) {
297    const SarifArtifact &A = Pair.getValue();
298    json::Object Loc{{"uri", A.Location.URI}};
299    if (A.Location.Index.has_value()) {
300      Loc["index"] = static_cast<int64_t>(*A.Location.Index);
301    }
302    json::Object Artifact;
303    Artifact["location"] = std::move(Loc);
304    if (A.Length.has_value())
305      Artifact["length"] = static_cast<int64_t>(*A.Length);
306    if (!A.Roles.empty())
307      Artifact["roles"] = json::Array(A.Roles);
308    if (!A.MimeType.empty())
309      Artifact["mimeType"] = A.MimeType;
310    if (A.Offset.has_value())
311      Artifact["offset"] = *A.Offset;
312    Artifacts->push_back(json::Value(std::move(Artifact)));
313  }
314
315  // Clear, reset temporaries before next run.
316  reset();
317
318  // Mark the document as closed.
319  Closed = true;
320}
321
322json::Array
323SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
324  json::Object Ret{{"locations", json::Array{}}};
325  json::Array Locs;
326  for (const auto &ThreadFlow : ThreadFlows) {
327    json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
328    json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
329    Locs.emplace_back(
330        createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
331  }
332  Ret["locations"] = std::move(Locs);
333  return json::Array{std::move(Ret)};
334}
335
336json::Object
337SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
338  return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
339}
340
341void SarifDocumentWriter::createRun(StringRef ShortToolName,
342                                    StringRef LongToolName,
343                                    StringRef ToolVersion) {
344  // Clear resources associated with a previous run.
345  endRun();
346
347  // Signify a new run has begun.
348  Closed = false;
349
350  json::Object Tool{
351      {"driver",
352       json::Object{{"name", ShortToolName},
353                    {"fullName", LongToolName},
354                    {"language", "en-US"},
355                    {"version", ToolVersion},
356                    {"informationUri",
357                     "https://clang.llvm.org/docs/UsersManual.html"}}}};
358  json::Object TheRun{{"tool", std::move(Tool)},
359                      {"results", {}},
360                      {"artifacts", {}},
361                      {"columnKind", "unicodeCodePoints"}};
362  Runs.emplace_back(std::move(TheRun));
363}
364
365json::Object &SarifDocumentWriter::getCurrentRun() {
366  assert(!Closed &&
367         "SARIF Document is closed. "
368         "Can only getCurrentRun() if document is opened via createRun(), "
369         "create a run first");
370
371  // Since Closed = false here, expect there to be at least 1 Run, anything
372  // else is an invalid state.
373  assert(!Runs.empty() && "There are no runs associated with the document!");
374  return *Runs.back().getAsObject();
375}
376
377size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
378  size_t Ret = CurrentRules.size();
379  CurrentRules.emplace_back(Rule);
380  return Ret;
381}
382
383void SarifDocumentWriter::appendResult(const SarifResult &Result) {
384  size_t RuleIdx = Result.RuleIdx;
385  assert(RuleIdx < CurrentRules.size() &&
386         "Trying to reference a rule that doesn't exist");
387  const SarifRule &Rule = CurrentRules[RuleIdx];
388  assert(Rule.DefaultConfiguration.Enabled &&
389         "Cannot add a result referencing a disabled Rule");
390  json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
391                   {"ruleIndex", static_cast<int64_t>(RuleIdx)},
392                   {"ruleId", Rule.Id}};
393  if (!Result.Locations.empty()) {
394    json::Array Locs;
395    for (auto &Range : Result.Locations) {
396      Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
397    }
398    Ret["locations"] = std::move(Locs);
399  }
400  if (!Result.ThreadFlows.empty())
401    Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
402
403  Ret["level"] = resultLevelToStr(
404      Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
405
406  json::Object &Run = getCurrentRun();
407  json::Array *Results = Run.getArray("results");
408  Results->emplace_back(std::move(Ret));
409}
410
411json::Object SarifDocumentWriter::createDocument() {
412  // Flush all temporaries to their destinations if needed.
413  endRun();
414
415  json::Object Doc{
416      {"$schema", SchemaURI},
417      {"version", SchemaVersion},
418  };
419  if (!Runs.empty())
420    Doc["runs"] = json::Array(Runs);
421  return Doc;
422}
423