1//===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines the SarifDiagnostics object.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Analysis/MacroExpansionContext.h"
14#include "clang/Analysis/PathDiagnostic.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/Sarif.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Basic/Version.h"
19#include "clang/Lex/Preprocessor.h"
20#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/Support/ConvertUTF.h"
24#include "llvm/Support/JSON.h"
25#include "llvm/Support/Path.h"
26
27using namespace llvm;
28using namespace clang;
29using namespace ento;
30
31namespace {
32class SarifDiagnostics : public PathDiagnosticConsumer {
33  std::string OutputFile;
34  const LangOptions &LO;
35  SarifDocumentWriter SarifWriter;
36
37public:
38  SarifDiagnostics(const std::string &Output, const LangOptions &LO,
39                   const SourceManager &SM)
40      : OutputFile(Output), LO(LO), SarifWriter(SM) {}
41  ~SarifDiagnostics() override = default;
42
43  void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
44                            FilesMade *FM) override;
45
46  StringRef getName() const override { return "SarifDiagnostics"; }
47  PathGenerationScheme getGenerationScheme() const override { return Minimal; }
48  bool supportsLogicalOpControlFlow() const override { return true; }
49  bool supportsCrossFileDiagnostics() const override { return true; }
50};
51} // end anonymous namespace
52
53void ento::createSarifDiagnosticConsumer(
54    PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C,
55    const std::string &Output, const Preprocessor &PP,
56    const cross_tu::CrossTranslationUnitContext &CTU,
57    const MacroExpansionContext &MacroExpansions) {
58
59  // TODO: Emit an error here.
60  if (Output.empty())
61    return;
62
63  C.push_back(
64      new SarifDiagnostics(Output, PP.getLangOpts(), PP.getSourceManager()));
65  createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, Output, PP,
66                                          CTU, MacroExpansions);
67}
68
69static StringRef getRuleDescription(StringRef CheckName) {
70  return llvm::StringSwitch<StringRef>(CheckName)
71#define GET_CHECKERS
72#define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)                 \
73  .Case(FULLNAME, HELPTEXT)
74#include "clang/StaticAnalyzer/Checkers/Checkers.inc"
75#undef CHECKER
76#undef GET_CHECKERS
77      ;
78}
79
80static StringRef getRuleHelpURIStr(StringRef CheckName) {
81  return llvm::StringSwitch<StringRef>(CheckName)
82#define GET_CHECKERS
83#define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)                 \
84  .Case(FULLNAME, DOC_URI)
85#include "clang/StaticAnalyzer/Checkers/Checkers.inc"
86#undef CHECKER
87#undef GET_CHECKERS
88      ;
89}
90
91static ThreadFlowImportance
92calculateImportance(const PathDiagnosticPiece &Piece) {
93  switch (Piece.getKind()) {
94  case PathDiagnosticPiece::Call:
95  case PathDiagnosticPiece::Macro:
96  case PathDiagnosticPiece::Note:
97  case PathDiagnosticPiece::PopUp:
98    // FIXME: What should be reported here?
99    break;
100  case PathDiagnosticPiece::Event:
101    return Piece.getTagStr() == "ConditionBRVisitor"
102               ? ThreadFlowImportance::Important
103               : ThreadFlowImportance::Essential;
104  case PathDiagnosticPiece::ControlFlow:
105    return ThreadFlowImportance::Unimportant;
106  }
107  return ThreadFlowImportance::Unimportant;
108}
109
110/// Accepts a SourceRange corresponding to a pair of the first and last tokens
111/// and converts to a Character granular CharSourceRange.
112static CharSourceRange convertTokenRangeToCharRange(const SourceRange &R,
113                                                    const SourceManager &SM,
114                                                    const LangOptions &LO) {
115  // Caret diagnostics have the first and last locations pointed at the same
116  // location, return these as-is.
117  if (R.getBegin() == R.getEnd())
118    return CharSourceRange::getCharRange(R);
119
120  SourceLocation BeginCharLoc = R.getBegin();
121  // For token ranges, the raw end SLoc points at the first character of the
122  // last token in the range. This must be moved to one past the end of the
123  // last character using the lexer.
124  SourceLocation EndCharLoc =
125      Lexer::getLocForEndOfToken(R.getEnd(), /* Offset = */ 0, SM, LO);
126  return CharSourceRange::getCharRange(BeginCharLoc, EndCharLoc);
127}
128
129static SmallVector<ThreadFlow, 8> createThreadFlows(const PathDiagnostic *Diag,
130                                                    const LangOptions &LO) {
131  SmallVector<ThreadFlow, 8> Flows;
132  const PathPieces &Pieces = Diag->path.flatten(false);
133  for (const auto &Piece : Pieces) {
134    auto Range = convertTokenRangeToCharRange(
135        Piece->getLocation().asRange(), Piece->getLocation().getManager(), LO);
136    auto Flow = ThreadFlow::create()
137                    .setImportance(calculateImportance(*Piece))
138                    .setRange(Range)
139                    .setMessage(Piece->getString());
140    Flows.push_back(Flow);
141  }
142  return Flows;
143}
144
145static StringMap<uint32_t>
146createRuleMapping(const std::vector<const PathDiagnostic *> &Diags,
147                  SarifDocumentWriter &SarifWriter) {
148  StringMap<uint32_t> RuleMapping;
149  llvm::StringSet<> Seen;
150
151  for (const PathDiagnostic *D : Diags) {
152    StringRef CheckName = D->getCheckerName();
153    std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(CheckName);
154    if (P.second) {
155      auto Rule = SarifRule::create()
156                      .setName(CheckName)
157                      .setRuleId(CheckName)
158                      .setDescription(getRuleDescription(CheckName))
159                      .setHelpURI(getRuleHelpURIStr(CheckName));
160      size_t RuleIdx = SarifWriter.createRule(Rule);
161      RuleMapping[CheckName] = RuleIdx;
162    }
163  }
164  return RuleMapping;
165}
166
167static SarifResult createResult(const PathDiagnostic *Diag,
168                                const StringMap<uint32_t> &RuleMapping,
169                                const LangOptions &LO) {
170
171  StringRef CheckName = Diag->getCheckerName();
172  uint32_t RuleIdx = RuleMapping.lookup(CheckName);
173  auto Range = convertTokenRangeToCharRange(
174      Diag->getLocation().asRange(), Diag->getLocation().getManager(), LO);
175
176  SmallVector<ThreadFlow, 8> Flows = createThreadFlows(Diag, LO);
177  auto Result = SarifResult::create(RuleIdx)
178                    .setRuleId(CheckName)
179                    .setDiagnosticMessage(Diag->getVerboseDescription())
180                    .setDiagnosticLevel(SarifResultLevel::Warning)
181                    .setLocations({Range})
182                    .setThreadFlows(Flows);
183  return Result;
184}
185
186void SarifDiagnostics::FlushDiagnosticsImpl(
187    std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
188  // We currently overwrite the file if it already exists. However, it may be
189  // useful to add a feature someday that allows the user to append a run to an
190  // existing SARIF file. One danger from that approach is that the size of the
191  // file can become large very quickly, so decoding into JSON to append a run
192  // may be an expensive operation.
193  std::error_code EC;
194  llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF);
195  if (EC) {
196    llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
197    return;
198  }
199
200  std::string ToolVersion = getClangFullVersion();
201  SarifWriter.createRun("clang", "clang static analyzer", ToolVersion);
202  StringMap<uint32_t> RuleMapping = createRuleMapping(Diags, SarifWriter);
203  for (const PathDiagnostic *D : Diags) {
204    SarifResult Result = createResult(D, RuleMapping, LO);
205    SarifWriter.appendResult(Result);
206  }
207  auto Document = SarifWriter.createDocument();
208  OS << llvm::formatv("{0:2}\n", json::Value(std::move(Document)));
209}
210