1341825Sdim//===- JSONCompilationDatabase.cpp ----------------------------------------===// 2243791Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6243791Sdim// 7243791Sdim//===----------------------------------------------------------------------===// 8243791Sdim// 9243791Sdim// This file contains the implementation of the JSONCompilationDatabase. 10243791Sdim// 11243791Sdim//===----------------------------------------------------------------------===// 12243791Sdim 13243791Sdim#include "clang/Tooling/JSONCompilationDatabase.h" 14341825Sdim#include "clang/Basic/LLVM.h" 15243791Sdim#include "clang/Tooling/CompilationDatabase.h" 16243791Sdim#include "clang/Tooling/CompilationDatabasePluginRegistry.h" 17353358Sdim#include "clang/Tooling/Tooling.h" 18341825Sdim#include "llvm/ADT/Optional.h" 19353358Sdim#include "llvm/ADT/STLExtras.h" 20243791Sdim#include "llvm/ADT/SmallString.h" 21341825Sdim#include "llvm/ADT/SmallVector.h" 22341825Sdim#include "llvm/ADT/StringRef.h" 23341825Sdim#include "llvm/ADT/Triple.h" 24314564Sdim#include "llvm/Support/Allocator.h" 25341825Sdim#include "llvm/Support/Casting.h" 26314564Sdim#include "llvm/Support/CommandLine.h" 27341825Sdim#include "llvm/Support/ErrorOr.h" 28341825Sdim#include "llvm/Support/Host.h" 29341825Sdim#include "llvm/Support/MemoryBuffer.h" 30243791Sdim#include "llvm/Support/Path.h" 31314564Sdim#include "llvm/Support/StringSaver.h" 32360784Sdim#include "llvm/Support/VirtualFileSystem.h" 33341825Sdim#include "llvm/Support/YAMLParser.h" 34341825Sdim#include "llvm/Support/raw_ostream.h" 35341825Sdim#include <cassert> 36341825Sdim#include <memory> 37341825Sdim#include <string> 38276479Sdim#include <system_error> 39341825Sdim#include <tuple> 40341825Sdim#include <utility> 41341825Sdim#include <vector> 42243791Sdim 43341825Sdimusing namespace clang; 44341825Sdimusing namespace tooling; 45243791Sdim 46243791Sdimnamespace { 47243791Sdim 48341825Sdim/// A parser for escaped strings of command line arguments. 49243791Sdim/// 50243791Sdim/// Assumes \-escaping for quoted arguments (see the documentation of 51243791Sdim/// unescapeCommandLine(...)). 52243791Sdimclass CommandLineArgumentParser { 53243791Sdim public: 54243791Sdim CommandLineArgumentParser(StringRef CommandLine) 55243791Sdim : Input(CommandLine), Position(Input.begin()-1) {} 56243791Sdim 57243791Sdim std::vector<std::string> parse() { 58243791Sdim bool HasMoreInput = true; 59243791Sdim while (HasMoreInput && nextNonWhitespace()) { 60243791Sdim std::string Argument; 61243791Sdim HasMoreInput = parseStringInto(Argument); 62243791Sdim CommandLine.push_back(Argument); 63243791Sdim } 64243791Sdim return CommandLine; 65243791Sdim } 66243791Sdim 67243791Sdim private: 68243791Sdim // All private methods return true if there is more input available. 69243791Sdim 70243791Sdim bool parseStringInto(std::string &String) { 71243791Sdim do { 72243791Sdim if (*Position == '"') { 73249423Sdim if (!parseDoubleQuotedStringInto(String)) return false; 74249423Sdim } else if (*Position == '\'') { 75249423Sdim if (!parseSingleQuotedStringInto(String)) return false; 76243791Sdim } else { 77243791Sdim if (!parseFreeStringInto(String)) return false; 78243791Sdim } 79243791Sdim } while (*Position != ' '); 80243791Sdim return true; 81243791Sdim } 82243791Sdim 83249423Sdim bool parseDoubleQuotedStringInto(std::string &String) { 84243791Sdim if (!next()) return false; 85243791Sdim while (*Position != '"') { 86243791Sdim if (!skipEscapeCharacter()) return false; 87243791Sdim String.push_back(*Position); 88243791Sdim if (!next()) return false; 89243791Sdim } 90243791Sdim return next(); 91243791Sdim } 92243791Sdim 93249423Sdim bool parseSingleQuotedStringInto(std::string &String) { 94249423Sdim if (!next()) return false; 95249423Sdim while (*Position != '\'') { 96249423Sdim String.push_back(*Position); 97249423Sdim if (!next()) return false; 98249423Sdim } 99249423Sdim return next(); 100249423Sdim } 101249423Sdim 102243791Sdim bool parseFreeStringInto(std::string &String) { 103243791Sdim do { 104243791Sdim if (!skipEscapeCharacter()) return false; 105243791Sdim String.push_back(*Position); 106243791Sdim if (!next()) return false; 107249423Sdim } while (*Position != ' ' && *Position != '"' && *Position != '\''); 108243791Sdim return true; 109243791Sdim } 110243791Sdim 111243791Sdim bool skipEscapeCharacter() { 112243791Sdim if (*Position == '\\') { 113243791Sdim return next(); 114243791Sdim } 115243791Sdim return true; 116243791Sdim } 117243791Sdim 118243791Sdim bool nextNonWhitespace() { 119243791Sdim do { 120243791Sdim if (!next()) return false; 121243791Sdim } while (*Position == ' '); 122243791Sdim return true; 123243791Sdim } 124243791Sdim 125243791Sdim bool next() { 126243791Sdim ++Position; 127243791Sdim return Position != Input.end(); 128243791Sdim } 129243791Sdim 130243791Sdim const StringRef Input; 131243791Sdim StringRef::iterator Position; 132243791Sdim std::vector<std::string> CommandLine; 133243791Sdim}; 134243791Sdim 135314564Sdimstd::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax, 136314564Sdim StringRef EscapedCommandLine) { 137314564Sdim if (Syntax == JSONCommandLineSyntax::AutoDetect) { 138314564Sdim Syntax = JSONCommandLineSyntax::Gnu; 139314564Sdim llvm::Triple Triple(llvm::sys::getProcessTriple()); 140314564Sdim if (Triple.getOS() == llvm::Triple::OSType::Win32) { 141314564Sdim // Assume Windows command line parsing on Win32 unless the triple 142314564Sdim // explicitly tells us otherwise. 143314564Sdim if (!Triple.hasEnvironment() || 144314564Sdim Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC) 145314564Sdim Syntax = JSONCommandLineSyntax::Windows; 146314564Sdim } 147314564Sdim } 148314564Sdim 149314564Sdim if (Syntax == JSONCommandLineSyntax::Windows) { 150314564Sdim llvm::BumpPtrAllocator Alloc; 151314564Sdim llvm::StringSaver Saver(Alloc); 152314564Sdim llvm::SmallVector<const char *, 64> T; 153314564Sdim llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T); 154314564Sdim std::vector<std::string> Result(T.begin(), T.end()); 155314564Sdim return Result; 156314564Sdim } 157314564Sdim assert(Syntax == JSONCommandLineSyntax::Gnu); 158243791Sdim CommandLineArgumentParser parser(EscapedCommandLine); 159243791Sdim return parser.parse(); 160243791Sdim} 161243791Sdim 162344779Sdim// This plugin locates a nearby compile_command.json file, and also infers 163344779Sdim// compile commands for files not present in the database. 164243791Sdimclass JSONCompilationDatabasePlugin : public CompilationDatabasePlugin { 165280031Sdim std::unique_ptr<CompilationDatabase> 166280031Sdim loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override { 167249423Sdim SmallString<1024> JSONDatabasePath(Directory); 168243791Sdim llvm::sys::path::append(JSONDatabasePath, "compile_commands.json"); 169344779Sdim auto Base = JSONCompilationDatabase::loadFromFile( 170321369Sdim JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect); 171353358Sdim return Base ? inferTargetAndDriverMode( 172360784Sdim inferMissingCompileCommands(expandResponseFiles( 173360784Sdim std::move(Base), llvm::vfs::getRealFileSystem()))) 174353358Sdim : nullptr; 175243791Sdim } 176243791Sdim}; 177243791Sdim 178341825Sdim} // namespace 179261991Sdim 180243791Sdim// Register the JSONCompilationDatabasePlugin with the 181243791Sdim// CompilationDatabasePluginRegistry using this statically initialized variable. 182243791Sdimstatic CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin> 183243791SdimX("json-compilation-database", "Reads JSON formatted compilation databases"); 184243791Sdim 185341825Sdimnamespace clang { 186341825Sdimnamespace tooling { 187341825Sdim 188243791Sdim// This anchor is used to force the linker to link in the generated object file 189243791Sdim// and thus register the JSONCompilationDatabasePlugin. 190243791Sdimvolatile int JSONAnchorSource = 0; 191243791Sdim 192341825Sdim} // namespace tooling 193341825Sdim} // namespace clang 194341825Sdim 195280031Sdimstd::unique_ptr<JSONCompilationDatabase> 196243791SdimJSONCompilationDatabase::loadFromFile(StringRef FilePath, 197314564Sdim std::string &ErrorMessage, 198314564Sdim JSONCommandLineSyntax Syntax) { 199353358Sdim // Don't mmap: if we're a long-lived process, the build system may overwrite. 200276479Sdim llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer = 201353358Sdim llvm::MemoryBuffer::getFile(FilePath, /*FileSize=*/-1, 202353358Sdim /*RequiresNullTerminator=*/true, 203353358Sdim /*IsVolatile=*/true); 204276479Sdim if (std::error_code Result = DatabaseBuffer.getError()) { 205243791Sdim ErrorMessage = "Error while opening JSON database: " + Result.message(); 206276479Sdim return nullptr; 207243791Sdim } 208276479Sdim std::unique_ptr<JSONCompilationDatabase> Database( 209314564Sdim new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax)); 210243791Sdim if (!Database->parse(ErrorMessage)) 211276479Sdim return nullptr; 212280031Sdim return Database; 213243791Sdim} 214243791Sdim 215280031Sdimstd::unique_ptr<JSONCompilationDatabase> 216243791SdimJSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString, 217314564Sdim std::string &ErrorMessage, 218314564Sdim JSONCommandLineSyntax Syntax) { 219276479Sdim std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer( 220243791Sdim llvm::MemoryBuffer::getMemBuffer(DatabaseString)); 221276479Sdim std::unique_ptr<JSONCompilationDatabase> Database( 222314564Sdim new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax)); 223243791Sdim if (!Database->parse(ErrorMessage)) 224276479Sdim return nullptr; 225280031Sdim return Database; 226243791Sdim} 227243791Sdim 228243791Sdimstd::vector<CompileCommand> 229243791SdimJSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { 230249423Sdim SmallString<128> NativeFilePath; 231243791Sdim llvm::sys::path::native(FilePath, NativeFilePath); 232276479Sdim 233243791Sdim std::string Error; 234243791Sdim llvm::raw_string_ostream ES(Error); 235288943Sdim StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES); 236249423Sdim if (Match.empty()) 237341825Sdim return {}; 238341825Sdim const auto CommandsRefI = IndexByFile.find(Match); 239243791Sdim if (CommandsRefI == IndexByFile.end()) 240341825Sdim return {}; 241243791Sdim std::vector<CompileCommand> Commands; 242249423Sdim getCommands(CommandsRefI->getValue(), Commands); 243243791Sdim return Commands; 244243791Sdim} 245243791Sdim 246243791Sdimstd::vector<std::string> 247243791SdimJSONCompilationDatabase::getAllFiles() const { 248243791Sdim std::vector<std::string> Result; 249341825Sdim for (const auto &CommandRef : IndexByFile) 250341825Sdim Result.push_back(CommandRef.first().str()); 251243791Sdim return Result; 252243791Sdim} 253243791Sdim 254249423Sdimstd::vector<CompileCommand> 255249423SdimJSONCompilationDatabase::getAllCompileCommands() const { 256249423Sdim std::vector<CompileCommand> Commands; 257296417Sdim getCommands(AllCommands, Commands); 258249423Sdim return Commands; 259249423Sdim} 260249423Sdim 261353358Sdimstatic llvm::StringRef stripExecutableExtension(llvm::StringRef Name) { 262353358Sdim Name.consume_back(".exe"); 263353358Sdim return Name; 264353358Sdim} 265353358Sdim 266353358Sdim// There are compiler-wrappers (ccache, distcc, gomacc) that take the "real" 267353358Sdim// compiler as an argument, e.g. distcc gcc -O3 foo.c. 268353358Sdim// These end up in compile_commands.json when people set CC="distcc gcc". 269353358Sdim// Clang's driver doesn't understand this, so we need to unwrap. 270353358Sdimstatic bool unwrapCommand(std::vector<std::string> &Args) { 271353358Sdim if (Args.size() < 2) 272353358Sdim return false; 273353358Sdim StringRef Wrapper = 274353358Sdim stripExecutableExtension(llvm::sys::path::filename(Args.front())); 275353358Sdim if (Wrapper == "distcc" || Wrapper == "gomacc" || Wrapper == "ccache") { 276353358Sdim // Most of these wrappers support being invoked 3 ways: 277353358Sdim // `distcc g++ file.c` This is the mode we're trying to match. 278353358Sdim // We need to drop `distcc`. 279353358Sdim // `distcc file.c` This acts like compiler is cc or similar. 280353358Sdim // Clang's driver can handle this, no change needed. 281353358Sdim // `g++ file.c` g++ is a symlink to distcc. 282353358Sdim // We don't even notice this case, and all is well. 283353358Sdim // 284353358Sdim // We need to distinguish between the first and second case. 285353358Sdim // The wrappers themselves don't take flags, so Args[1] is a compiler flag, 286353358Sdim // an input file, or a compiler. Inputs have extensions, compilers don't. 287353358Sdim bool HasCompiler = 288353358Sdim (Args[1][0] != '-') && 289353358Sdim !llvm::sys::path::has_extension(stripExecutableExtension(Args[1])); 290353358Sdim if (HasCompiler) { 291353358Sdim Args.erase(Args.begin()); 292353358Sdim return true; 293353358Sdim } 294353358Sdim // If !HasCompiler, wrappers act like GCC. Fine: so do we. 295353358Sdim } 296353358Sdim return false; 297353358Sdim} 298353358Sdim 299296417Sdimstatic std::vector<std::string> 300314564SdimnodeToCommandLine(JSONCommandLineSyntax Syntax, 301314564Sdim const std::vector<llvm::yaml::ScalarNode *> &Nodes) { 302296417Sdim SmallString<1024> Storage; 303353358Sdim std::vector<std::string> Arguments; 304341825Sdim if (Nodes.size() == 1) 305353358Sdim Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage)); 306353358Sdim else 307353358Sdim for (const auto *Node : Nodes) 308353358Sdim Arguments.push_back(Node->getValue(Storage)); 309353358Sdim // There may be multiple wrappers: using distcc and ccache together is common. 310353358Sdim while (unwrapCommand(Arguments)) 311353358Sdim ; 312296417Sdim return Arguments; 313296417Sdim} 314296417Sdim 315249423Sdimvoid JSONCompilationDatabase::getCommands( 316296417Sdim ArrayRef<CompileCommandRef> CommandsRef, 317296417Sdim std::vector<CompileCommand> &Commands) const { 318341825Sdim for (const auto &CommandRef : CommandsRef) { 319249423Sdim SmallString<8> DirectoryStorage; 320296417Sdim SmallString<32> FilenameStorage; 321314564Sdim SmallString<32> OutputStorage; 322341825Sdim auto Output = std::get<3>(CommandRef); 323288943Sdim Commands.emplace_back( 324341825Sdim std::get<0>(CommandRef)->getValue(DirectoryStorage), 325341825Sdim std::get<1>(CommandRef)->getValue(FilenameStorage), 326341825Sdim nodeToCommandLine(Syntax, std::get<2>(CommandRef)), 327314564Sdim Output ? Output->getValue(OutputStorage) : ""); 328249423Sdim } 329249423Sdim} 330249423Sdim 331243791Sdimbool JSONCompilationDatabase::parse(std::string &ErrorMessage) { 332243791Sdim llvm::yaml::document_iterator I = YAMLStream.begin(); 333243791Sdim if (I == YAMLStream.end()) { 334243791Sdim ErrorMessage = "Error while parsing YAML."; 335243791Sdim return false; 336243791Sdim } 337243791Sdim llvm::yaml::Node *Root = I->getRoot(); 338276479Sdim if (!Root) { 339243791Sdim ErrorMessage = "Error while parsing YAML."; 340243791Sdim return false; 341243791Sdim } 342341825Sdim auto *Array = dyn_cast<llvm::yaml::SequenceNode>(Root); 343276479Sdim if (!Array) { 344243791Sdim ErrorMessage = "Expected array."; 345243791Sdim return false; 346243791Sdim } 347341825Sdim for (auto &NextObject : *Array) { 348341825Sdim auto *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject); 349276479Sdim if (!Object) { 350243791Sdim ErrorMessage = "Expected object."; 351243791Sdim return false; 352243791Sdim } 353276479Sdim llvm::yaml::ScalarNode *Directory = nullptr; 354296417Sdim llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command; 355276479Sdim llvm::yaml::ScalarNode *File = nullptr; 356314564Sdim llvm::yaml::ScalarNode *Output = nullptr; 357296417Sdim for (auto& NextKeyValue : *Object) { 358341825Sdim auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey()); 359296417Sdim if (!KeyString) { 360296417Sdim ErrorMessage = "Expected strings as key."; 361296417Sdim return false; 362296417Sdim } 363296417Sdim SmallString<10> KeyStorage; 364296417Sdim StringRef KeyValue = KeyString->getValue(KeyStorage); 365296417Sdim llvm::yaml::Node *Value = NextKeyValue.getValue(); 366276479Sdim if (!Value) { 367243791Sdim ErrorMessage = "Expected value."; 368243791Sdim return false; 369243791Sdim } 370341825Sdim auto *ValueString = dyn_cast<llvm::yaml::ScalarNode>(Value); 371341825Sdim auto *SequenceString = dyn_cast<llvm::yaml::SequenceNode>(Value); 372296417Sdim if (KeyValue == "arguments" && !SequenceString) { 373296417Sdim ErrorMessage = "Expected sequence as value."; 374296417Sdim return false; 375296417Sdim } else if (KeyValue != "arguments" && !ValueString) { 376243791Sdim ErrorMessage = "Expected string as value."; 377243791Sdim return false; 378243791Sdim } 379296417Sdim if (KeyValue == "directory") { 380243791Sdim Directory = ValueString; 381296417Sdim } else if (KeyValue == "arguments") { 382296417Sdim Command = std::vector<llvm::yaml::ScalarNode *>(); 383296417Sdim for (auto &Argument : *SequenceString) { 384341825Sdim auto *Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument); 385296417Sdim if (!Scalar) { 386296417Sdim ErrorMessage = "Only strings are allowed in 'arguments'."; 387296417Sdim return false; 388296417Sdim } 389296417Sdim Command->push_back(Scalar); 390296417Sdim } 391296417Sdim } else if (KeyValue == "command") { 392296417Sdim if (!Command) 393296417Sdim Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString); 394296417Sdim } else if (KeyValue == "file") { 395243791Sdim File = ValueString; 396314564Sdim } else if (KeyValue == "output") { 397314564Sdim Output = ValueString; 398243791Sdim } else { 399243791Sdim ErrorMessage = ("Unknown key: \"" + 400243791Sdim KeyString->getRawValue() + "\"").str(); 401243791Sdim return false; 402243791Sdim } 403243791Sdim } 404243791Sdim if (!File) { 405243791Sdim ErrorMessage = "Missing key: \"file\"."; 406243791Sdim return false; 407243791Sdim } 408243791Sdim if (!Command) { 409296417Sdim ErrorMessage = "Missing key: \"command\" or \"arguments\"."; 410243791Sdim return false; 411243791Sdim } 412243791Sdim if (!Directory) { 413243791Sdim ErrorMessage = "Missing key: \"directory\"."; 414243791Sdim return false; 415243791Sdim } 416249423Sdim SmallString<8> FileStorage; 417243791Sdim StringRef FileName = File->getValue(FileStorage); 418249423Sdim SmallString<128> NativeFilePath; 419243791Sdim if (llvm::sys::path::is_relative(FileName)) { 420249423Sdim SmallString<8> DirectoryStorage; 421249423Sdim SmallString<128> AbsolutePath( 422243791Sdim Directory->getValue(DirectoryStorage)); 423243791Sdim llvm::sys::path::append(AbsolutePath, FileName); 424353358Sdim llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/ true); 425288943Sdim llvm::sys::path::native(AbsolutePath, NativeFilePath); 426243791Sdim } else { 427243791Sdim llvm::sys::path::native(FileName, NativeFilePath); 428243791Sdim } 429314564Sdim auto Cmd = CompileCommandRef(Directory, File, *Command, Output); 430296417Sdim IndexByFile[NativeFilePath].push_back(Cmd); 431296417Sdim AllCommands.push_back(Cmd); 432288943Sdim MatchTrie.insert(NativeFilePath); 433243791Sdim } 434243791Sdim return true; 435243791Sdim} 436