1//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// This file contains the declaration of the SARIFDocumentWriter class, and 11/// associated builders such as: 12/// - \ref SarifArtifact 13/// - \ref SarifArtifactLocation 14/// - \ref SarifRule 15/// - \ref SarifResult 16//===----------------------------------------------------------------------===// 17#include "clang/Basic/Sarif.h" 18#include "clang/Basic/SourceLocation.h" 19#include "clang/Basic/SourceManager.h" 20#include "llvm/ADT/ArrayRef.h" 21#include "llvm/ADT/STLExtras.h" 22#include "llvm/ADT/StringMap.h" 23#include "llvm/ADT/StringRef.h" 24#include "llvm/Support/ConvertUTF.h" 25#include "llvm/Support/JSON.h" 26#include "llvm/Support/Path.h" 27 28#include <optional> 29#include <string> 30#include <utility> 31 32using namespace clang; 33using namespace llvm; 34 35using clang::detail::SarifArtifact; 36using clang::detail::SarifArtifactLocation; 37 38static StringRef getFileName(const FileEntry &FE) { 39 StringRef Filename = FE.tryGetRealPathName(); 40 if (Filename.empty()) 41 Filename = FE.getName(); 42 return Filename; 43} 44/// \name URI 45/// @{ 46 47/// \internal 48/// \brief 49/// Return the RFC3986 encoding of the input character. 50/// 51/// \param C Character to encode to RFC3986. 52/// 53/// \return The RFC3986 representation of \c C. 54static std::string percentEncodeURICharacter(char C) { 55 // RFC 3986 claims alpha, numeric, and this handful of 56 // characters are not reserved for the path component and 57 // should be written out directly. Otherwise, percent 58 // encode the character and write that out instead of the 59 // reserved character. 60 if (llvm::isAlnum(C) || 61 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 62 return std::string(&C, 1); 63 return "%" + llvm::toHex(StringRef(&C, 1)); 64} 65 66/// \internal 67/// \brief Return a URI representing the given file name. 68/// 69/// \param Filename The filename to be represented as URI. 70/// 71/// \return RFC3986 URI representing the input file name. 72static std::string fileNameToURI(StringRef Filename) { 73 SmallString<32> Ret = StringRef("file://"); 74 75 // Get the root name to see if it has a URI authority. 76 StringRef Root = sys::path::root_name(Filename); 77 if (Root.startswith("//")) { 78 // There is an authority, so add it to the URI. 79 Ret += Root.drop_front(2).str(); 80 } else if (!Root.empty()) { 81 // There is no authority, so end the component and add the root to the URI. 82 Ret += Twine("/" + Root).str(); 83 } 84 85 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 86 assert(Iter != End && "Expected there to be a non-root path component."); 87 // Add the rest of the path components, encoding any reserved characters; 88 // we skip past the first path component, as it was handled it above. 89 std::for_each(++Iter, End, [&Ret](StringRef Component) { 90 // For reasons unknown to me, we may get a backslash with Windows native 91 // paths for the initial backslash following the drive component, which 92 // we need to ignore as a URI path part. 93 if (Component == "\\") 94 return; 95 96 // Add the separator between the previous path part and the one being 97 // currently processed. 98 Ret += "/"; 99 100 // URI encode the part. 101 for (char C : Component) { 102 Ret += percentEncodeURICharacter(C); 103 } 104 }); 105 106 return std::string(Ret); 107} 108/// @} 109 110/// \brief Calculate the column position expressed in the number of UTF-8 code 111/// points from column start to the source location 112/// 113/// \param Loc The source location whose column needs to be calculated. 114/// \param TokenLen Optional hint for when the token is multiple bytes long. 115/// 116/// \return The column number as a UTF-8 aware byte offset from column start to 117/// the effective source location. 118static unsigned int adjustColumnPos(FullSourceLoc Loc, 119 unsigned int TokenLen = 0) { 120 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 121 122 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); 123 std::optional<MemoryBufferRef> Buf = 124 Loc.getManager().getBufferOrNone(LocInfo.first); 125 assert(Buf && "got an invalid buffer for the location's file"); 126 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 127 "token extends past end of buffer?"); 128 129 // Adjust the offset to be the start of the line, since we'll be counting 130 // Unicode characters from there until our column offset. 131 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); 132 unsigned int Ret = 1; 133 while (Off < (LocInfo.second + TokenLen)) { 134 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 135 Ret++; 136 } 137 138 return Ret; 139} 140 141/// \name SARIF Utilities 142/// @{ 143 144/// \internal 145json::Object createMessage(StringRef Text) { 146 return json::Object{{"text", Text.str()}}; 147} 148 149/// \internal 150/// \pre CharSourceRange must be a token range 151static json::Object createTextRegion(const SourceManager &SM, 152 const CharSourceRange &R) { 153 FullSourceLoc BeginCharLoc{R.getBegin(), SM}; 154 FullSourceLoc EndCharLoc{R.getEnd(), SM}; 155 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, 156 {"startColumn", adjustColumnPos(BeginCharLoc)}}; 157 158 if (BeginCharLoc == EndCharLoc) { 159 Region["endColumn"] = adjustColumnPos(BeginCharLoc); 160 } else { 161 Region["endLine"] = EndCharLoc.getExpansionLineNumber(); 162 Region["endColumn"] = adjustColumnPos(EndCharLoc); 163 } 164 return Region; 165} 166 167static json::Object createLocation(json::Object &&PhysicalLocation, 168 StringRef Message = "") { 169 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 170 if (!Message.empty()) 171 Ret.insert({"message", createMessage(Message)}); 172 return Ret; 173} 174 175static StringRef importanceToStr(ThreadFlowImportance I) { 176 switch (I) { 177 case ThreadFlowImportance::Important: 178 return "important"; 179 case ThreadFlowImportance::Essential: 180 return "essential"; 181 case ThreadFlowImportance::Unimportant: 182 return "unimportant"; 183 } 184 llvm_unreachable("Fully covered switch is not so fully covered"); 185} 186 187static StringRef resultLevelToStr(SarifResultLevel R) { 188 switch (R) { 189 case SarifResultLevel::None: 190 return "none"; 191 case SarifResultLevel::Note: 192 return "note"; 193 case SarifResultLevel::Warning: 194 return "warning"; 195 case SarifResultLevel::Error: 196 return "error"; 197 } 198 llvm_unreachable("Potentially un-handled SarifResultLevel. " 199 "Is the switch not fully covered?"); 200} 201 202static json::Object 203createThreadFlowLocation(json::Object &&Location, 204 const ThreadFlowImportance &Importance) { 205 return json::Object{{"location", std::move(Location)}, 206 {"importance", importanceToStr(Importance)}}; 207} 208/// @} 209 210json::Object 211SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { 212 assert(R.isValid() && 213 "Cannot create a physicalLocation from invalid SourceRange!"); 214 assert(R.isCharRange() && 215 "Cannot create a physicalLocation from a token range!"); 216 FullSourceLoc Start{R.getBegin(), SourceMgr}; 217 const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); 218 assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); 219 220 const std::string &FileURI = fileNameToURI(getFileName(*FE)); 221 auto I = CurrentArtifacts.find(FileURI); 222 223 if (I == CurrentArtifacts.end()) { 224 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); 225 const SarifArtifactLocation &Location = 226 SarifArtifactLocation::create(FileURI).setIndex(Idx); 227 const SarifArtifact &Artifact = SarifArtifact::create(Location) 228 .setRoles({"resultFile"}) 229 .setLength(FE->getSize()) 230 .setMimeType("text/plain"); 231 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); 232 // If inserted, ensure the original iterator points to the newly inserted 233 // element, so it can be used downstream. 234 if (StatusIter.second) 235 I = StatusIter.first; 236 } 237 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); 238 const SarifArtifactLocation &Location = I->second.Location; 239 json::Object ArtifactLocationObject{{"uri", Location.URI}}; 240 if (Location.Index.has_value()) 241 ArtifactLocationObject["index"] = *Location.Index; 242 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, 243 {"region", createTextRegion(SourceMgr, R)}}}; 244} 245 246json::Object &SarifDocumentWriter::getCurrentTool() { 247 assert(!Closed && "SARIF Document is closed. " 248 "Need to call createRun() before using getcurrentTool!"); 249 250 // Since Closed = false here, expect there to be at least 1 Run, anything 251 // else is an invalid state. 252 assert(!Runs.empty() && "There are no runs associated with the document!"); 253 254 return *Runs.back().getAsObject()->get("tool")->getAsObject(); 255} 256 257void SarifDocumentWriter::reset() { 258 CurrentRules.clear(); 259 CurrentArtifacts.clear(); 260} 261 262void SarifDocumentWriter::endRun() { 263 // Exit early if trying to close a closed Document. 264 if (Closed) { 265 reset(); 266 return; 267 } 268 269 // Since Closed = false here, expect there to be at least 1 Run, anything 270 // else is an invalid state. 271 assert(!Runs.empty() && "There are no runs associated with the document!"); 272 273 // Flush all the rules. 274 json::Object &Tool = getCurrentTool(); 275 json::Array Rules; 276 for (const SarifRule &R : CurrentRules) { 277 json::Object Config{ 278 {"enabled", R.DefaultConfiguration.Enabled}, 279 {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, 280 {"rank", R.DefaultConfiguration.Rank}}; 281 json::Object Rule{ 282 {"name", R.Name}, 283 {"id", R.Id}, 284 {"fullDescription", json::Object{{"text", R.Description}}}, 285 {"defaultConfiguration", std::move(Config)}}; 286 if (!R.HelpURI.empty()) 287 Rule["helpUri"] = R.HelpURI; 288 Rules.emplace_back(std::move(Rule)); 289 } 290 json::Object &Driver = *Tool.getObject("driver"); 291 Driver["rules"] = std::move(Rules); 292 293 // Flush all the artifacts. 294 json::Object &Run = getCurrentRun(); 295 json::Array *Artifacts = Run.getArray("artifacts"); 296 for (const auto &Pair : CurrentArtifacts) { 297 const SarifArtifact &A = Pair.getValue(); 298 json::Object Loc{{"uri", A.Location.URI}}; 299 if (A.Location.Index.has_value()) { 300 Loc["index"] = static_cast<int64_t>(*A.Location.Index); 301 } 302 json::Object Artifact; 303 Artifact["location"] = std::move(Loc); 304 if (A.Length.has_value()) 305 Artifact["length"] = static_cast<int64_t>(*A.Length); 306 if (!A.Roles.empty()) 307 Artifact["roles"] = json::Array(A.Roles); 308 if (!A.MimeType.empty()) 309 Artifact["mimeType"] = A.MimeType; 310 if (A.Offset.has_value()) 311 Artifact["offset"] = *A.Offset; 312 Artifacts->push_back(json::Value(std::move(Artifact))); 313 } 314 315 // Clear, reset temporaries before next run. 316 reset(); 317 318 // Mark the document as closed. 319 Closed = true; 320} 321 322json::Array 323SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { 324 json::Object Ret{{"locations", json::Array{}}}; 325 json::Array Locs; 326 for (const auto &ThreadFlow : ThreadFlows) { 327 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); 328 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); 329 Locs.emplace_back( 330 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); 331 } 332 Ret["locations"] = std::move(Locs); 333 return json::Array{std::move(Ret)}; 334} 335 336json::Object 337SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { 338 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; 339} 340 341void SarifDocumentWriter::createRun(StringRef ShortToolName, 342 StringRef LongToolName, 343 StringRef ToolVersion) { 344 // Clear resources associated with a previous run. 345 endRun(); 346 347 // Signify a new run has begun. 348 Closed = false; 349 350 json::Object Tool{ 351 {"driver", 352 json::Object{{"name", ShortToolName}, 353 {"fullName", LongToolName}, 354 {"language", "en-US"}, 355 {"version", ToolVersion}, 356 {"informationUri", 357 "https://clang.llvm.org/docs/UsersManual.html"}}}}; 358 json::Object TheRun{{"tool", std::move(Tool)}, 359 {"results", {}}, 360 {"artifacts", {}}, 361 {"columnKind", "unicodeCodePoints"}}; 362 Runs.emplace_back(std::move(TheRun)); 363} 364 365json::Object &SarifDocumentWriter::getCurrentRun() { 366 assert(!Closed && 367 "SARIF Document is closed. " 368 "Can only getCurrentRun() if document is opened via createRun(), " 369 "create a run first"); 370 371 // Since Closed = false here, expect there to be at least 1 Run, anything 372 // else is an invalid state. 373 assert(!Runs.empty() && "There are no runs associated with the document!"); 374 return *Runs.back().getAsObject(); 375} 376 377size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { 378 size_t Ret = CurrentRules.size(); 379 CurrentRules.emplace_back(Rule); 380 return Ret; 381} 382 383void SarifDocumentWriter::appendResult(const SarifResult &Result) { 384 size_t RuleIdx = Result.RuleIdx; 385 assert(RuleIdx < CurrentRules.size() && 386 "Trying to reference a rule that doesn't exist"); 387 const SarifRule &Rule = CurrentRules[RuleIdx]; 388 assert(Rule.DefaultConfiguration.Enabled && 389 "Cannot add a result referencing a disabled Rule"); 390 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, 391 {"ruleIndex", static_cast<int64_t>(RuleIdx)}, 392 {"ruleId", Rule.Id}}; 393 if (!Result.Locations.empty()) { 394 json::Array Locs; 395 for (auto &Range : Result.Locations) { 396 Locs.emplace_back(createLocation(createPhysicalLocation(Range))); 397 } 398 Ret["locations"] = std::move(Locs); 399 } 400 if (!Result.ThreadFlows.empty()) 401 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; 402 403 Ret["level"] = resultLevelToStr( 404 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); 405 406 json::Object &Run = getCurrentRun(); 407 json::Array *Results = Run.getArray("results"); 408 Results->emplace_back(std::move(Ret)); 409} 410 411json::Object SarifDocumentWriter::createDocument() { 412 // Flush all temporaries to their destinations if needed. 413 endRun(); 414 415 json::Object Doc{ 416 {"$schema", SchemaURI}, 417 {"version", SchemaVersion}, 418 }; 419 if (!Runs.empty()) 420 Doc["runs"] = json::Array(Runs); 421 return Doc; 422} 423