1//===- CompilationDatabase.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file contains implementations of the CompilationDatabase base class
10//  and the FixedCompilationDatabase.
11//
12//  FIXME: Various functions that take a string &ErrorMessage should be upgraded
13//  to Expected.
14//
15//===----------------------------------------------------------------------===//
16
17#include "clang/Tooling/CompilationDatabase.h"
18#include "clang/Basic/Diagnostic.h"
19#include "clang/Basic/DiagnosticIDs.h"
20#include "clang/Basic/DiagnosticOptions.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/Driver/Action.h"
23#include "clang/Driver/Compilation.h"
24#include "clang/Driver/Driver.h"
25#include "clang/Driver/DriverDiagnostic.h"
26#include "clang/Driver/Job.h"
27#include "clang/Frontend/TextDiagnosticPrinter.h"
28#include "clang/Tooling/CompilationDatabasePluginRegistry.h"
29#include "clang/Tooling/Tooling.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/IntrusiveRefCntPtr.h"
32#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallString.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/Option/Arg.h"
37#include "llvm/Support/Casting.h"
38#include "llvm/Support/Compiler.h"
39#include "llvm/Support/ErrorOr.h"
40#include "llvm/Support/LineIterator.h"
41#include "llvm/Support/MemoryBuffer.h"
42#include "llvm/Support/Path.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/TargetParser/Host.h"
45#include <algorithm>
46#include <cassert>
47#include <cstring>
48#include <iterator>
49#include <memory>
50#include <sstream>
51#include <string>
52#include <system_error>
53#include <utility>
54#include <vector>
55
56using namespace clang;
57using namespace tooling;
58
59LLVM_INSTANTIATE_REGISTRY(CompilationDatabasePluginRegistry)
60
61CompilationDatabase::~CompilationDatabase() = default;
62
63std::unique_ptr<CompilationDatabase>
64CompilationDatabase::loadFromDirectory(StringRef BuildDirectory,
65                                       std::string &ErrorMessage) {
66  llvm::raw_string_ostream ErrorStream(ErrorMessage);
67  for (const CompilationDatabasePluginRegistry::entry &Database :
68       CompilationDatabasePluginRegistry::entries()) {
69    std::string DatabaseErrorMessage;
70    std::unique_ptr<CompilationDatabasePlugin> Plugin(Database.instantiate());
71    if (std::unique_ptr<CompilationDatabase> DB =
72            Plugin->loadFromDirectory(BuildDirectory, DatabaseErrorMessage))
73      return DB;
74    ErrorStream << Database.getName() << ": " << DatabaseErrorMessage << "\n";
75  }
76  return nullptr;
77}
78
79static std::unique_ptr<CompilationDatabase>
80findCompilationDatabaseFromDirectory(StringRef Directory,
81                                     std::string &ErrorMessage) {
82  std::stringstream ErrorStream;
83  bool HasErrorMessage = false;
84  while (!Directory.empty()) {
85    std::string LoadErrorMessage;
86
87    if (std::unique_ptr<CompilationDatabase> DB =
88            CompilationDatabase::loadFromDirectory(Directory, LoadErrorMessage))
89      return DB;
90
91    if (!HasErrorMessage) {
92      ErrorStream << "No compilation database found in " << Directory.str()
93                  << " or any parent directory\n" << LoadErrorMessage;
94      HasErrorMessage = true;
95    }
96
97    Directory = llvm::sys::path::parent_path(Directory);
98  }
99  ErrorMessage = ErrorStream.str();
100  return nullptr;
101}
102
103std::unique_ptr<CompilationDatabase>
104CompilationDatabase::autoDetectFromSource(StringRef SourceFile,
105                                          std::string &ErrorMessage) {
106  SmallString<1024> AbsolutePath(getAbsolutePath(SourceFile));
107  StringRef Directory = llvm::sys::path::parent_path(AbsolutePath);
108
109  std::unique_ptr<CompilationDatabase> DB =
110      findCompilationDatabaseFromDirectory(Directory, ErrorMessage);
111
112  if (!DB)
113    ErrorMessage = ("Could not auto-detect compilation database for file \"" +
114                   SourceFile + "\"\n" + ErrorMessage).str();
115  return DB;
116}
117
118std::unique_ptr<CompilationDatabase>
119CompilationDatabase::autoDetectFromDirectory(StringRef SourceDir,
120                                             std::string &ErrorMessage) {
121  SmallString<1024> AbsolutePath(getAbsolutePath(SourceDir));
122
123  std::unique_ptr<CompilationDatabase> DB =
124      findCompilationDatabaseFromDirectory(AbsolutePath, ErrorMessage);
125
126  if (!DB)
127    ErrorMessage = ("Could not auto-detect compilation database from directory \"" +
128                   SourceDir + "\"\n" + ErrorMessage).str();
129  return DB;
130}
131
132std::vector<CompileCommand> CompilationDatabase::getAllCompileCommands() const {
133  std::vector<CompileCommand> Result;
134  for (const auto &File : getAllFiles()) {
135    auto C = getCompileCommands(File);
136    std::move(C.begin(), C.end(), std::back_inserter(Result));
137  }
138  return Result;
139}
140
141CompilationDatabasePlugin::~CompilationDatabasePlugin() = default;
142
143namespace {
144
145// Helper for recursively searching through a chain of actions and collecting
146// all inputs, direct and indirect, of compile jobs.
147struct CompileJobAnalyzer {
148  SmallVector<std::string, 2> Inputs;
149
150  void run(const driver::Action *A) {
151    runImpl(A, false);
152  }
153
154private:
155  void runImpl(const driver::Action *A, bool Collect) {
156    bool CollectChildren = Collect;
157    switch (A->getKind()) {
158    case driver::Action::CompileJobClass:
159    case driver::Action::PrecompileJobClass:
160      CollectChildren = true;
161      break;
162
163    case driver::Action::InputClass:
164      if (Collect) {
165        const auto *IA = cast<driver::InputAction>(A);
166        Inputs.push_back(std::string(IA->getInputArg().getSpelling()));
167      }
168      break;
169
170    default:
171      // Don't care about others
172      break;
173    }
174
175    for (const driver::Action *AI : A->inputs())
176      runImpl(AI, CollectChildren);
177  }
178};
179
180// Special DiagnosticConsumer that looks for warn_drv_input_file_unused
181// diagnostics from the driver and collects the option strings for those unused
182// options.
183class UnusedInputDiagConsumer : public DiagnosticConsumer {
184public:
185  UnusedInputDiagConsumer(DiagnosticConsumer &Other) : Other(Other) {}
186
187  void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
188                        const Diagnostic &Info) override {
189    if (Info.getID() == diag::warn_drv_input_file_unused) {
190      // Arg 1 for this diagnostic is the option that didn't get used.
191      UnusedInputs.push_back(Info.getArgStdStr(0));
192    } else if (DiagLevel >= DiagnosticsEngine::Error) {
193      // If driver failed to create compilation object, show the diagnostics
194      // to user.
195      Other.HandleDiagnostic(DiagLevel, Info);
196    }
197  }
198
199  DiagnosticConsumer &Other;
200  SmallVector<std::string, 2> UnusedInputs;
201};
202
203// Filter of tools unused flags such as -no-integrated-as and -Wa,*.
204// They are not used for syntax checking, and could confuse targets
205// which don't support these options.
206struct FilterUnusedFlags {
207  bool operator() (StringRef S) {
208    return (S == "-no-integrated-as") || S.starts_with("-Wa,");
209  }
210};
211
212std::string GetClangToolCommand() {
213  static int Dummy;
214  std::string ClangExecutable =
215      llvm::sys::fs::getMainExecutable("clang", (void *)&Dummy);
216  SmallString<128> ClangToolPath;
217  ClangToolPath = llvm::sys::path::parent_path(ClangExecutable);
218  llvm::sys::path::append(ClangToolPath, "clang-tool");
219  return std::string(ClangToolPath);
220}
221
222} // namespace
223
224/// Strips any positional args and possible argv[0] from a command-line
225/// provided by the user to construct a FixedCompilationDatabase.
226///
227/// FixedCompilationDatabase requires a command line to be in this format as it
228/// constructs the command line for each file by appending the name of the file
229/// to be compiled. FixedCompilationDatabase also adds its own argv[0] to the
230/// start of the command line although its value is not important as it's just
231/// ignored by the Driver invoked by the ClangTool using the
232/// FixedCompilationDatabase.
233///
234/// FIXME: This functionality should probably be made available by
235/// clang::driver::Driver although what the interface should look like is not
236/// clear.
237///
238/// \param[in] Args Args as provided by the user.
239/// \return Resulting stripped command line.
240///          \li true if successful.
241///          \li false if \c Args cannot be used for compilation jobs (e.g.
242///          contains an option like -E or -version).
243static bool stripPositionalArgs(std::vector<const char *> Args,
244                                std::vector<std::string> &Result,
245                                std::string &ErrorMsg) {
246  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
247  llvm::raw_string_ostream Output(ErrorMsg);
248  TextDiagnosticPrinter DiagnosticPrinter(Output, &*DiagOpts);
249  UnusedInputDiagConsumer DiagClient(DiagnosticPrinter);
250  DiagnosticsEngine Diagnostics(
251      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()),
252      &*DiagOpts, &DiagClient, false);
253
254  // The clang executable path isn't required since the jobs the driver builds
255  // will not be executed.
256  std::unique_ptr<driver::Driver> NewDriver(new driver::Driver(
257      /* ClangExecutable= */ "", llvm::sys::getDefaultTargetTriple(),
258      Diagnostics));
259  NewDriver->setCheckInputsExist(false);
260
261  // This becomes the new argv[0]. The value is used to detect libc++ include
262  // dirs on Mac, it isn't used for other platforms.
263  std::string Argv0 = GetClangToolCommand();
264  Args.insert(Args.begin(), Argv0.c_str());
265
266  // By adding -c, we force the driver to treat compilation as the last phase.
267  // It will then issue warnings via Diagnostics about un-used options that
268  // would have been used for linking. If the user provided a compiler name as
269  // the original argv[0], this will be treated as a linker input thanks to
270  // insertng a new argv[0] above. All un-used options get collected by
271  // UnusedInputdiagConsumer and get stripped out later.
272  Args.push_back("-c");
273
274  // Put a dummy C++ file on to ensure there's at least one compile job for the
275  // driver to construct. If the user specified some other argument that
276  // prevents compilation, e.g. -E or something like -version, we may still end
277  // up with no jobs but then this is the user's fault.
278  Args.push_back("placeholder.cpp");
279
280  llvm::erase_if(Args, FilterUnusedFlags());
281
282  const std::unique_ptr<driver::Compilation> Compilation(
283      NewDriver->BuildCompilation(Args));
284  if (!Compilation)
285    return false;
286
287  const driver::JobList &Jobs = Compilation->getJobs();
288
289  CompileJobAnalyzer CompileAnalyzer;
290
291  for (const auto &Cmd : Jobs) {
292    // Collect only for Assemble, Backend, and Compile jobs. If we do all jobs
293    // we get duplicates since Link jobs point to Assemble jobs as inputs.
294    // -flto* flags make the BackendJobClass, which still needs analyzer.
295    if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass ||
296        Cmd.getSource().getKind() == driver::Action::BackendJobClass ||
297        Cmd.getSource().getKind() == driver::Action::CompileJobClass ||
298        Cmd.getSource().getKind() == driver::Action::PrecompileJobClass) {
299      CompileAnalyzer.run(&Cmd.getSource());
300    }
301  }
302
303  if (CompileAnalyzer.Inputs.empty()) {
304    ErrorMsg = "warning: no compile jobs found\n";
305    return false;
306  }
307
308  // Remove all compilation input files from the command line and inputs deemed
309  // unused for compilation. This is necessary so that getCompileCommands() can
310  // construct a command line for each file.
311  std::vector<const char *>::iterator End =
312      llvm::remove_if(Args, [&](StringRef S) {
313        return llvm::is_contained(CompileAnalyzer.Inputs, S) ||
314               llvm::is_contained(DiagClient.UnusedInputs, S);
315      });
316  // Remove the -c add above as well. It will be at the end right now.
317  assert(strcmp(*(End - 1), "-c") == 0);
318  --End;
319
320  Result = std::vector<std::string>(Args.begin() + 1, End);
321  return true;
322}
323
324std::unique_ptr<FixedCompilationDatabase>
325FixedCompilationDatabase::loadFromCommandLine(int &Argc,
326                                              const char *const *Argv,
327                                              std::string &ErrorMsg,
328                                              const Twine &Directory) {
329  ErrorMsg.clear();
330  if (Argc == 0)
331    return nullptr;
332  const char *const *DoubleDash = std::find(Argv, Argv + Argc, StringRef("--"));
333  if (DoubleDash == Argv + Argc)
334    return nullptr;
335  std::vector<const char *> CommandLine(DoubleDash + 1, Argv + Argc);
336  Argc = DoubleDash - Argv;
337
338  std::vector<std::string> StrippedArgs;
339  if (!stripPositionalArgs(CommandLine, StrippedArgs, ErrorMsg))
340    return nullptr;
341  return std::make_unique<FixedCompilationDatabase>(Directory, StrippedArgs);
342}
343
344std::unique_ptr<FixedCompilationDatabase>
345FixedCompilationDatabase::loadFromFile(StringRef Path, std::string &ErrorMsg) {
346  ErrorMsg.clear();
347  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
348      llvm::MemoryBuffer::getFile(Path);
349  if (std::error_code Result = File.getError()) {
350    ErrorMsg = "Error while opening fixed database: " + Result.message();
351    return nullptr;
352  }
353  return loadFromBuffer(llvm::sys::path::parent_path(Path),
354                        (*File)->getBuffer(), ErrorMsg);
355}
356
357std::unique_ptr<FixedCompilationDatabase>
358FixedCompilationDatabase::loadFromBuffer(StringRef Directory, StringRef Data,
359                                         std::string &ErrorMsg) {
360  ErrorMsg.clear();
361  std::vector<std::string> Args;
362  StringRef Line;
363  while (!Data.empty()) {
364    std::tie(Line, Data) = Data.split('\n');
365    // Stray whitespace is almost certainly unintended.
366    Line = Line.trim();
367    if (!Line.empty())
368      Args.push_back(Line.str());
369  }
370  return std::make_unique<FixedCompilationDatabase>(Directory, std::move(Args));
371}
372
373FixedCompilationDatabase::FixedCompilationDatabase(
374    const Twine &Directory, ArrayRef<std::string> CommandLine) {
375  std::vector<std::string> ToolCommandLine(1, GetClangToolCommand());
376  ToolCommandLine.insert(ToolCommandLine.end(),
377                         CommandLine.begin(), CommandLine.end());
378  CompileCommands.emplace_back(Directory, StringRef(),
379                               std::move(ToolCommandLine),
380                               StringRef());
381}
382
383std::vector<CompileCommand>
384FixedCompilationDatabase::getCompileCommands(StringRef FilePath) const {
385  std::vector<CompileCommand> Result(CompileCommands);
386  Result[0].CommandLine.push_back(std::string(FilePath));
387  Result[0].Filename = std::string(FilePath);
388  return Result;
389}
390
391namespace {
392
393class FixedCompilationDatabasePlugin : public CompilationDatabasePlugin {
394  std::unique_ptr<CompilationDatabase>
395  loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
396    SmallString<1024> DatabasePath(Directory);
397    llvm::sys::path::append(DatabasePath, "compile_flags.txt");
398    return FixedCompilationDatabase::loadFromFile(DatabasePath, ErrorMessage);
399  }
400};
401
402} // namespace
403
404static CompilationDatabasePluginRegistry::Add<FixedCompilationDatabasePlugin>
405X("fixed-compilation-database", "Reads plain-text flags file");
406
407namespace clang {
408namespace tooling {
409
410// This anchor is used to force the linker to link in the generated object file
411// and thus register the JSONCompilationDatabasePlugin.
412extern volatile int JSONAnchorSource;
413static int LLVM_ATTRIBUTE_UNUSED JSONAnchorDest = JSONAnchorSource;
414
415} // namespace tooling
416} // namespace clang
417