1341825Sdim//===- JSONCompilationDatabase.cpp ----------------------------------------===//
2243791Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6243791Sdim//
7243791Sdim//===----------------------------------------------------------------------===//
8243791Sdim//
9243791Sdim//  This file contains the implementation of the JSONCompilationDatabase.
10243791Sdim//
11243791Sdim//===----------------------------------------------------------------------===//
12243791Sdim
13243791Sdim#include "clang/Tooling/JSONCompilationDatabase.h"
14341825Sdim#include "clang/Basic/LLVM.h"
15243791Sdim#include "clang/Tooling/CompilationDatabase.h"
16243791Sdim#include "clang/Tooling/CompilationDatabasePluginRegistry.h"
17353358Sdim#include "clang/Tooling/Tooling.h"
18341825Sdim#include "llvm/ADT/Optional.h"
19353358Sdim#include "llvm/ADT/STLExtras.h"
20243791Sdim#include "llvm/ADT/SmallString.h"
21341825Sdim#include "llvm/ADT/SmallVector.h"
22341825Sdim#include "llvm/ADT/StringRef.h"
23341825Sdim#include "llvm/ADT/Triple.h"
24314564Sdim#include "llvm/Support/Allocator.h"
25341825Sdim#include "llvm/Support/Casting.h"
26314564Sdim#include "llvm/Support/CommandLine.h"
27341825Sdim#include "llvm/Support/ErrorOr.h"
28341825Sdim#include "llvm/Support/Host.h"
29341825Sdim#include "llvm/Support/MemoryBuffer.h"
30243791Sdim#include "llvm/Support/Path.h"
31314564Sdim#include "llvm/Support/StringSaver.h"
32360784Sdim#include "llvm/Support/VirtualFileSystem.h"
33341825Sdim#include "llvm/Support/YAMLParser.h"
34341825Sdim#include "llvm/Support/raw_ostream.h"
35341825Sdim#include <cassert>
36341825Sdim#include <memory>
37341825Sdim#include <string>
38276479Sdim#include <system_error>
39341825Sdim#include <tuple>
40341825Sdim#include <utility>
41341825Sdim#include <vector>
42243791Sdim
43341825Sdimusing namespace clang;
44341825Sdimusing namespace tooling;
45243791Sdim
46243791Sdimnamespace {
47243791Sdim
48341825Sdim/// A parser for escaped strings of command line arguments.
49243791Sdim///
50243791Sdim/// Assumes \-escaping for quoted arguments (see the documentation of
51243791Sdim/// unescapeCommandLine(...)).
52243791Sdimclass CommandLineArgumentParser {
53243791Sdim public:
54243791Sdim  CommandLineArgumentParser(StringRef CommandLine)
55243791Sdim      : Input(CommandLine), Position(Input.begin()-1) {}
56243791Sdim
57243791Sdim  std::vector<std::string> parse() {
58243791Sdim    bool HasMoreInput = true;
59243791Sdim    while (HasMoreInput && nextNonWhitespace()) {
60243791Sdim      std::string Argument;
61243791Sdim      HasMoreInput = parseStringInto(Argument);
62243791Sdim      CommandLine.push_back(Argument);
63243791Sdim    }
64243791Sdim    return CommandLine;
65243791Sdim  }
66243791Sdim
67243791Sdim private:
68243791Sdim  // All private methods return true if there is more input available.
69243791Sdim
70243791Sdim  bool parseStringInto(std::string &String) {
71243791Sdim    do {
72243791Sdim      if (*Position == '"') {
73249423Sdim        if (!parseDoubleQuotedStringInto(String)) return false;
74249423Sdim      } else if (*Position == '\'') {
75249423Sdim        if (!parseSingleQuotedStringInto(String)) return false;
76243791Sdim      } else {
77243791Sdim        if (!parseFreeStringInto(String)) return false;
78243791Sdim      }
79243791Sdim    } while (*Position != ' ');
80243791Sdim    return true;
81243791Sdim  }
82243791Sdim
83249423Sdim  bool parseDoubleQuotedStringInto(std::string &String) {
84243791Sdim    if (!next()) return false;
85243791Sdim    while (*Position != '"') {
86243791Sdim      if (!skipEscapeCharacter()) return false;
87243791Sdim      String.push_back(*Position);
88243791Sdim      if (!next()) return false;
89243791Sdim    }
90243791Sdim    return next();
91243791Sdim  }
92243791Sdim
93249423Sdim  bool parseSingleQuotedStringInto(std::string &String) {
94249423Sdim    if (!next()) return false;
95249423Sdim    while (*Position != '\'') {
96249423Sdim      String.push_back(*Position);
97249423Sdim      if (!next()) return false;
98249423Sdim    }
99249423Sdim    return next();
100249423Sdim  }
101249423Sdim
102243791Sdim  bool parseFreeStringInto(std::string &String) {
103243791Sdim    do {
104243791Sdim      if (!skipEscapeCharacter()) return false;
105243791Sdim      String.push_back(*Position);
106243791Sdim      if (!next()) return false;
107249423Sdim    } while (*Position != ' ' && *Position != '"' && *Position != '\'');
108243791Sdim    return true;
109243791Sdim  }
110243791Sdim
111243791Sdim  bool skipEscapeCharacter() {
112243791Sdim    if (*Position == '\\') {
113243791Sdim      return next();
114243791Sdim    }
115243791Sdim    return true;
116243791Sdim  }
117243791Sdim
118243791Sdim  bool nextNonWhitespace() {
119243791Sdim    do {
120243791Sdim      if (!next()) return false;
121243791Sdim    } while (*Position == ' ');
122243791Sdim    return true;
123243791Sdim  }
124243791Sdim
125243791Sdim  bool next() {
126243791Sdim    ++Position;
127243791Sdim    return Position != Input.end();
128243791Sdim  }
129243791Sdim
130243791Sdim  const StringRef Input;
131243791Sdim  StringRef::iterator Position;
132243791Sdim  std::vector<std::string> CommandLine;
133243791Sdim};
134243791Sdim
135314564Sdimstd::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax,
136314564Sdim                                             StringRef EscapedCommandLine) {
137314564Sdim  if (Syntax == JSONCommandLineSyntax::AutoDetect) {
138314564Sdim    Syntax = JSONCommandLineSyntax::Gnu;
139314564Sdim    llvm::Triple Triple(llvm::sys::getProcessTriple());
140314564Sdim    if (Triple.getOS() == llvm::Triple::OSType::Win32) {
141314564Sdim      // Assume Windows command line parsing on Win32 unless the triple
142314564Sdim      // explicitly tells us otherwise.
143314564Sdim      if (!Triple.hasEnvironment() ||
144314564Sdim          Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC)
145314564Sdim        Syntax = JSONCommandLineSyntax::Windows;
146314564Sdim    }
147314564Sdim  }
148314564Sdim
149314564Sdim  if (Syntax == JSONCommandLineSyntax::Windows) {
150314564Sdim    llvm::BumpPtrAllocator Alloc;
151314564Sdim    llvm::StringSaver Saver(Alloc);
152314564Sdim    llvm::SmallVector<const char *, 64> T;
153314564Sdim    llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T);
154314564Sdim    std::vector<std::string> Result(T.begin(), T.end());
155314564Sdim    return Result;
156314564Sdim  }
157314564Sdim  assert(Syntax == JSONCommandLineSyntax::Gnu);
158243791Sdim  CommandLineArgumentParser parser(EscapedCommandLine);
159243791Sdim  return parser.parse();
160243791Sdim}
161243791Sdim
162344779Sdim// This plugin locates a nearby compile_command.json file, and also infers
163344779Sdim// compile commands for files not present in the database.
164243791Sdimclass JSONCompilationDatabasePlugin : public CompilationDatabasePlugin {
165280031Sdim  std::unique_ptr<CompilationDatabase>
166280031Sdim  loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
167249423Sdim    SmallString<1024> JSONDatabasePath(Directory);
168243791Sdim    llvm::sys::path::append(JSONDatabasePath, "compile_commands.json");
169344779Sdim    auto Base = JSONCompilationDatabase::loadFromFile(
170321369Sdim        JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect);
171353358Sdim    return Base ? inferTargetAndDriverMode(
172360784Sdim                      inferMissingCompileCommands(expandResponseFiles(
173360784Sdim                          std::move(Base), llvm::vfs::getRealFileSystem())))
174353358Sdim                : nullptr;
175243791Sdim  }
176243791Sdim};
177243791Sdim
178341825Sdim} // namespace
179261991Sdim
180243791Sdim// Register the JSONCompilationDatabasePlugin with the
181243791Sdim// CompilationDatabasePluginRegistry using this statically initialized variable.
182243791Sdimstatic CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin>
183243791SdimX("json-compilation-database", "Reads JSON formatted compilation databases");
184243791Sdim
185341825Sdimnamespace clang {
186341825Sdimnamespace tooling {
187341825Sdim
188243791Sdim// This anchor is used to force the linker to link in the generated object file
189243791Sdim// and thus register the JSONCompilationDatabasePlugin.
190243791Sdimvolatile int JSONAnchorSource = 0;
191243791Sdim
192341825Sdim} // namespace tooling
193341825Sdim} // namespace clang
194341825Sdim
195280031Sdimstd::unique_ptr<JSONCompilationDatabase>
196243791SdimJSONCompilationDatabase::loadFromFile(StringRef FilePath,
197314564Sdim                                      std::string &ErrorMessage,
198314564Sdim                                      JSONCommandLineSyntax Syntax) {
199353358Sdim  // Don't mmap: if we're a long-lived process, the build system may overwrite.
200276479Sdim  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer =
201353358Sdim      llvm::MemoryBuffer::getFile(FilePath, /*FileSize=*/-1,
202353358Sdim                                  /*RequiresNullTerminator=*/true,
203353358Sdim                                  /*IsVolatile=*/true);
204276479Sdim  if (std::error_code Result = DatabaseBuffer.getError()) {
205243791Sdim    ErrorMessage = "Error while opening JSON database: " + Result.message();
206276479Sdim    return nullptr;
207243791Sdim  }
208276479Sdim  std::unique_ptr<JSONCompilationDatabase> Database(
209314564Sdim      new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax));
210243791Sdim  if (!Database->parse(ErrorMessage))
211276479Sdim    return nullptr;
212280031Sdim  return Database;
213243791Sdim}
214243791Sdim
215280031Sdimstd::unique_ptr<JSONCompilationDatabase>
216243791SdimJSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString,
217314564Sdim                                        std::string &ErrorMessage,
218314564Sdim                                        JSONCommandLineSyntax Syntax) {
219276479Sdim  std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer(
220243791Sdim      llvm::MemoryBuffer::getMemBuffer(DatabaseString));
221276479Sdim  std::unique_ptr<JSONCompilationDatabase> Database(
222314564Sdim      new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax));
223243791Sdim  if (!Database->parse(ErrorMessage))
224276479Sdim    return nullptr;
225280031Sdim  return Database;
226243791Sdim}
227243791Sdim
228243791Sdimstd::vector<CompileCommand>
229243791SdimJSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
230249423Sdim  SmallString<128> NativeFilePath;
231243791Sdim  llvm::sys::path::native(FilePath, NativeFilePath);
232276479Sdim
233243791Sdim  std::string Error;
234243791Sdim  llvm::raw_string_ostream ES(Error);
235288943Sdim  StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES);
236249423Sdim  if (Match.empty())
237341825Sdim    return {};
238341825Sdim  const auto CommandsRefI = IndexByFile.find(Match);
239243791Sdim  if (CommandsRefI == IndexByFile.end())
240341825Sdim    return {};
241243791Sdim  std::vector<CompileCommand> Commands;
242249423Sdim  getCommands(CommandsRefI->getValue(), Commands);
243243791Sdim  return Commands;
244243791Sdim}
245243791Sdim
246243791Sdimstd::vector<std::string>
247243791SdimJSONCompilationDatabase::getAllFiles() const {
248243791Sdim  std::vector<std::string> Result;
249341825Sdim  for (const auto &CommandRef : IndexByFile)
250341825Sdim    Result.push_back(CommandRef.first().str());
251243791Sdim  return Result;
252243791Sdim}
253243791Sdim
254249423Sdimstd::vector<CompileCommand>
255249423SdimJSONCompilationDatabase::getAllCompileCommands() const {
256249423Sdim  std::vector<CompileCommand> Commands;
257296417Sdim  getCommands(AllCommands, Commands);
258249423Sdim  return Commands;
259249423Sdim}
260249423Sdim
261353358Sdimstatic llvm::StringRef stripExecutableExtension(llvm::StringRef Name) {
262353358Sdim  Name.consume_back(".exe");
263353358Sdim  return Name;
264353358Sdim}
265353358Sdim
266353358Sdim// There are compiler-wrappers (ccache, distcc, gomacc) that take the "real"
267353358Sdim// compiler as an argument, e.g. distcc gcc -O3 foo.c.
268353358Sdim// These end up in compile_commands.json when people set CC="distcc gcc".
269353358Sdim// Clang's driver doesn't understand this, so we need to unwrap.
270353358Sdimstatic bool unwrapCommand(std::vector<std::string> &Args) {
271353358Sdim  if (Args.size() < 2)
272353358Sdim    return false;
273353358Sdim  StringRef Wrapper =
274353358Sdim      stripExecutableExtension(llvm::sys::path::filename(Args.front()));
275353358Sdim  if (Wrapper == "distcc" || Wrapper == "gomacc" || Wrapper == "ccache") {
276353358Sdim    // Most of these wrappers support being invoked 3 ways:
277353358Sdim    // `distcc g++ file.c` This is the mode we're trying to match.
278353358Sdim    //                     We need to drop `distcc`.
279353358Sdim    // `distcc file.c`     This acts like compiler is cc or similar.
280353358Sdim    //                     Clang's driver can handle this, no change needed.
281353358Sdim    // `g++ file.c`        g++ is a symlink to distcc.
282353358Sdim    //                     We don't even notice this case, and all is well.
283353358Sdim    //
284353358Sdim    // We need to distinguish between the first and second case.
285353358Sdim    // The wrappers themselves don't take flags, so Args[1] is a compiler flag,
286353358Sdim    // an input file, or a compiler. Inputs have extensions, compilers don't.
287353358Sdim    bool HasCompiler =
288353358Sdim        (Args[1][0] != '-') &&
289353358Sdim        !llvm::sys::path::has_extension(stripExecutableExtension(Args[1]));
290353358Sdim    if (HasCompiler) {
291353358Sdim      Args.erase(Args.begin());
292353358Sdim      return true;
293353358Sdim    }
294353358Sdim    // If !HasCompiler, wrappers act like GCC. Fine: so do we.
295353358Sdim  }
296353358Sdim  return false;
297353358Sdim}
298353358Sdim
299296417Sdimstatic std::vector<std::string>
300314564SdimnodeToCommandLine(JSONCommandLineSyntax Syntax,
301314564Sdim                  const std::vector<llvm::yaml::ScalarNode *> &Nodes) {
302296417Sdim  SmallString<1024> Storage;
303353358Sdim  std::vector<std::string> Arguments;
304341825Sdim  if (Nodes.size() == 1)
305353358Sdim    Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage));
306353358Sdim  else
307353358Sdim    for (const auto *Node : Nodes)
308353358Sdim      Arguments.push_back(Node->getValue(Storage));
309353358Sdim  // There may be multiple wrappers: using distcc and ccache together is common.
310353358Sdim  while (unwrapCommand(Arguments))
311353358Sdim    ;
312296417Sdim  return Arguments;
313296417Sdim}
314296417Sdim
315249423Sdimvoid JSONCompilationDatabase::getCommands(
316296417Sdim    ArrayRef<CompileCommandRef> CommandsRef,
317296417Sdim    std::vector<CompileCommand> &Commands) const {
318341825Sdim  for (const auto &CommandRef : CommandsRef) {
319249423Sdim    SmallString<8> DirectoryStorage;
320296417Sdim    SmallString<32> FilenameStorage;
321314564Sdim    SmallString<32> OutputStorage;
322341825Sdim    auto Output = std::get<3>(CommandRef);
323288943Sdim    Commands.emplace_back(
324341825Sdim        std::get<0>(CommandRef)->getValue(DirectoryStorage),
325341825Sdim        std::get<1>(CommandRef)->getValue(FilenameStorage),
326341825Sdim        nodeToCommandLine(Syntax, std::get<2>(CommandRef)),
327314564Sdim        Output ? Output->getValue(OutputStorage) : "");
328249423Sdim  }
329249423Sdim}
330249423Sdim
331243791Sdimbool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
332243791Sdim  llvm::yaml::document_iterator I = YAMLStream.begin();
333243791Sdim  if (I == YAMLStream.end()) {
334243791Sdim    ErrorMessage = "Error while parsing YAML.";
335243791Sdim    return false;
336243791Sdim  }
337243791Sdim  llvm::yaml::Node *Root = I->getRoot();
338276479Sdim  if (!Root) {
339243791Sdim    ErrorMessage = "Error while parsing YAML.";
340243791Sdim    return false;
341243791Sdim  }
342341825Sdim  auto *Array = dyn_cast<llvm::yaml::SequenceNode>(Root);
343276479Sdim  if (!Array) {
344243791Sdim    ErrorMessage = "Expected array.";
345243791Sdim    return false;
346243791Sdim  }
347341825Sdim  for (auto &NextObject : *Array) {
348341825Sdim    auto *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject);
349276479Sdim    if (!Object) {
350243791Sdim      ErrorMessage = "Expected object.";
351243791Sdim      return false;
352243791Sdim    }
353276479Sdim    llvm::yaml::ScalarNode *Directory = nullptr;
354296417Sdim    llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command;
355276479Sdim    llvm::yaml::ScalarNode *File = nullptr;
356314564Sdim    llvm::yaml::ScalarNode *Output = nullptr;
357296417Sdim    for (auto& NextKeyValue : *Object) {
358341825Sdim      auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
359296417Sdim      if (!KeyString) {
360296417Sdim        ErrorMessage = "Expected strings as key.";
361296417Sdim        return false;
362296417Sdim      }
363296417Sdim      SmallString<10> KeyStorage;
364296417Sdim      StringRef KeyValue = KeyString->getValue(KeyStorage);
365296417Sdim      llvm::yaml::Node *Value = NextKeyValue.getValue();
366276479Sdim      if (!Value) {
367243791Sdim        ErrorMessage = "Expected value.";
368243791Sdim        return false;
369243791Sdim      }
370341825Sdim      auto *ValueString = dyn_cast<llvm::yaml::ScalarNode>(Value);
371341825Sdim      auto *SequenceString = dyn_cast<llvm::yaml::SequenceNode>(Value);
372296417Sdim      if (KeyValue == "arguments" && !SequenceString) {
373296417Sdim        ErrorMessage = "Expected sequence as value.";
374296417Sdim        return false;
375296417Sdim      } else if (KeyValue != "arguments" && !ValueString) {
376243791Sdim        ErrorMessage = "Expected string as value.";
377243791Sdim        return false;
378243791Sdim      }
379296417Sdim      if (KeyValue == "directory") {
380243791Sdim        Directory = ValueString;
381296417Sdim      } else if (KeyValue == "arguments") {
382296417Sdim        Command = std::vector<llvm::yaml::ScalarNode *>();
383296417Sdim        for (auto &Argument : *SequenceString) {
384341825Sdim          auto *Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument);
385296417Sdim          if (!Scalar) {
386296417Sdim            ErrorMessage = "Only strings are allowed in 'arguments'.";
387296417Sdim            return false;
388296417Sdim          }
389296417Sdim          Command->push_back(Scalar);
390296417Sdim        }
391296417Sdim      } else if (KeyValue == "command") {
392296417Sdim        if (!Command)
393296417Sdim          Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString);
394296417Sdim      } else if (KeyValue == "file") {
395243791Sdim        File = ValueString;
396314564Sdim      } else if (KeyValue == "output") {
397314564Sdim        Output = ValueString;
398243791Sdim      } else {
399243791Sdim        ErrorMessage = ("Unknown key: \"" +
400243791Sdim                        KeyString->getRawValue() + "\"").str();
401243791Sdim        return false;
402243791Sdim      }
403243791Sdim    }
404243791Sdim    if (!File) {
405243791Sdim      ErrorMessage = "Missing key: \"file\".";
406243791Sdim      return false;
407243791Sdim    }
408243791Sdim    if (!Command) {
409296417Sdim      ErrorMessage = "Missing key: \"command\" or \"arguments\".";
410243791Sdim      return false;
411243791Sdim    }
412243791Sdim    if (!Directory) {
413243791Sdim      ErrorMessage = "Missing key: \"directory\".";
414243791Sdim      return false;
415243791Sdim    }
416249423Sdim    SmallString<8> FileStorage;
417243791Sdim    StringRef FileName = File->getValue(FileStorage);
418249423Sdim    SmallString<128> NativeFilePath;
419243791Sdim    if (llvm::sys::path::is_relative(FileName)) {
420249423Sdim      SmallString<8> DirectoryStorage;
421249423Sdim      SmallString<128> AbsolutePath(
422243791Sdim          Directory->getValue(DirectoryStorage));
423243791Sdim      llvm::sys::path::append(AbsolutePath, FileName);
424353358Sdim      llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/ true);
425288943Sdim      llvm::sys::path::native(AbsolutePath, NativeFilePath);
426243791Sdim    } else {
427243791Sdim      llvm::sys::path::native(FileName, NativeFilePath);
428243791Sdim    }
429314564Sdim    auto Cmd = CompileCommandRef(Directory, File, *Command, Output);
430296417Sdim    IndexByFile[NativeFilePath].push_back(Cmd);
431296417Sdim    AllCommands.push_back(Cmd);
432288943Sdim    MatchTrie.insert(NativeFilePath);
433243791Sdim  }
434243791Sdim  return true;
435243791Sdim}
436