1//===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10///
11/// This file contains several definitions for the debuginfod client and server.
12/// For the client, this file defines the fetchInfo function. For the server,
13/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15/// function retrieves any of the three supported artifact types: (executable,
16/// debuginfo, source file) associated with a build-id from debuginfod servers.
17/// If a source file is to be fetched, its absolute path must be specified in
18/// the Description argument to fetchInfo. The DebuginfodLogEntry,
19/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20/// scan the local filesystem for binaries and serve the debuginfod protocol.
21///
22//===----------------------------------------------------------------------===//
23
24#include "llvm/Debuginfod/Debuginfod.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/BinaryFormat/Magic.h"
28#include "llvm/DebugInfo/DWARF/DWARFContext.h"
29#include "llvm/DebugInfo/Symbolize/Symbolize.h"
30#include "llvm/Debuginfod/HTTPClient.h"
31#include "llvm/Object/BuildID.h"
32#include "llvm/Object/ELFObjectFile.h"
33#include "llvm/Support/CachePruning.h"
34#include "llvm/Support/Caching.h"
35#include "llvm/Support/Errc.h"
36#include "llvm/Support/Error.h"
37#include "llvm/Support/FileUtilities.h"
38#include "llvm/Support/MemoryBuffer.h"
39#include "llvm/Support/Path.h"
40#include "llvm/Support/ThreadPool.h"
41#include "llvm/Support/xxhash.h"
42
43#include <atomic>
44#include <optional>
45#include <thread>
46
47namespace llvm {
48
49using llvm::object::BuildIDRef;
50
51namespace {
52std::optional<SmallVector<StringRef>> DebuginfodUrls;
53// Many Readers/Single Writer lock protecting the global debuginfod URL list.
54llvm::sys::RWMutex UrlsMutex;
55} // namespace
56
57std::string getDebuginfodCacheKey(llvm::StringRef S) {
58  return utostr(xxh3_64bits(S));
59}
60
61// Returns a binary BuildID as a normalized hex string.
62// Uses lowercase for compatibility with common debuginfod servers.
63static std::string buildIDToString(BuildIDRef ID) {
64  return llvm::toHex(ID, /*LowerCase=*/true);
65}
66
67bool canUseDebuginfod() {
68  return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
69}
70
71SmallVector<StringRef> getDefaultDebuginfodUrls() {
72  std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);
73  if (!DebuginfodUrls) {
74    // Only read from the environment variable if the user hasn't already
75    // set the value.
76    ReadGuard.unlock();
77    std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
78    DebuginfodUrls = SmallVector<StringRef>();
79    if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) {
80      StringRef(DebuginfodUrlsEnv)
81          .split(DebuginfodUrls.value(), " ", -1, false);
82    }
83    WriteGuard.unlock();
84    ReadGuard.lock();
85  }
86  return DebuginfodUrls.value();
87}
88
89// Set the default debuginfod URL list, override the environment variable.
90void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {
91  std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
92  DebuginfodUrls = URLs;
93}
94
95/// Finds a default local file caching directory for the debuginfod client,
96/// first checking DEBUGINFOD_CACHE_PATH.
97Expected<std::string> getDefaultDebuginfodCacheDirectory() {
98  if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
99    return CacheDirectoryEnv;
100
101  SmallString<64> CacheDirectory;
102  if (!sys::path::cache_directory(CacheDirectory))
103    return createStringError(
104        errc::io_error, "Unable to determine appropriate cache directory.");
105  sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
106  return std::string(CacheDirectory);
107}
108
109std::chrono::milliseconds getDefaultDebuginfodTimeout() {
110  long Timeout;
111  const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
112  if (DebuginfodTimeoutEnv &&
113      to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
114    return std::chrono::milliseconds(Timeout * 1000);
115
116  return std::chrono::milliseconds(90 * 1000);
117}
118
119/// The following functions fetch a debuginfod artifact to a file in a local
120/// cache and return the cached file path. They first search the local cache,
121/// followed by the debuginfod servers.
122
123std::string getDebuginfodSourceUrlPath(BuildIDRef ID,
124                                       StringRef SourceFilePath) {
125  SmallString<64> UrlPath;
126  sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
127                    buildIDToString(ID), "source",
128                    sys::path::convert_to_slash(SourceFilePath));
129  return std::string(UrlPath);
130}
131
132Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
133                                                StringRef SourceFilePath) {
134  std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath);
135  return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
136}
137
138std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) {
139  SmallString<64> UrlPath;
140  sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
141                    buildIDToString(ID), "executable");
142  return std::string(UrlPath);
143}
144
145Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
146  std::string UrlPath = getDebuginfodExecutableUrlPath(ID);
147  return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
148}
149
150std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) {
151  SmallString<64> UrlPath;
152  sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
153                    buildIDToString(ID), "debuginfo");
154  return std::string(UrlPath);
155}
156
157Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
158  std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID);
159  return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
160}
161
162// General fetching function.
163Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
164                                                  StringRef UrlPath) {
165  SmallString<10> CacheDir;
166
167  Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
168  if (!CacheDirOrErr)
169    return CacheDirOrErr.takeError();
170  CacheDir = *CacheDirOrErr;
171
172  return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
173                                     getDefaultDebuginfodUrls(),
174                                     getDefaultDebuginfodTimeout());
175}
176
177namespace {
178
179/// A simple handler which streams the returned data to a cache file. The cache
180/// file is only created if a 200 OK status is observed.
181class StreamedHTTPResponseHandler : public HTTPResponseHandler {
182  using CreateStreamFn =
183      std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
184  CreateStreamFn CreateStream;
185  HTTPClient &Client;
186  std::unique_ptr<CachedFileStream> FileStream;
187
188public:
189  StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
190      : CreateStream(CreateStream), Client(Client) {}
191  virtual ~StreamedHTTPResponseHandler() = default;
192
193  Error handleBodyChunk(StringRef BodyChunk) override;
194};
195
196} // namespace
197
198Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
199  if (!FileStream) {
200    unsigned Code = Client.responseCode();
201    if (Code && Code != 200)
202      return Error::success();
203    Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
204        CreateStream();
205    if (!FileStreamOrError)
206      return FileStreamOrError.takeError();
207    FileStream = std::move(*FileStreamOrError);
208  }
209  *FileStream->OS << BodyChunk;
210  return Error::success();
211}
212
213// An over-accepting simplification of the HTTP RFC 7230 spec.
214static bool isHeader(StringRef S) {
215  StringRef Name;
216  StringRef Value;
217  std::tie(Name, Value) = S.split(':');
218  if (Name.empty() || Value.empty())
219    return false;
220  return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
221         all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
222}
223
224static SmallVector<std::string, 0> getHeaders() {
225  const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
226  if (!Filename)
227    return {};
228  ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
229      MemoryBuffer::getFile(Filename, /*IsText=*/true);
230  if (!HeadersFile)
231    return {};
232
233  SmallVector<std::string, 0> Headers;
234  uint64_t LineNumber = 0;
235  for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
236    LineNumber++;
237    if (!Line.empty() && Line.back() == '\r')
238      Line = Line.drop_back();
239    if (!isHeader(Line)) {
240      if (!all_of(Line, llvm::isSpace))
241        WithColor::warning()
242            << "could not parse debuginfod header: " << Filename << ':'
243            << LineNumber << '\n';
244      continue;
245    }
246    Headers.emplace_back(Line);
247  }
248  return Headers;
249}
250
251Expected<std::string> getCachedOrDownloadArtifact(
252    StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
253    ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
254  SmallString<64> AbsCachedArtifactPath;
255  sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
256                    "llvmcache-" + UniqueKey);
257
258  Expected<FileCache> CacheOrErr =
259      localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
260  if (!CacheOrErr)
261    return CacheOrErr.takeError();
262
263  FileCache Cache = *CacheOrErr;
264  // We choose an arbitrary Task parameter as we do not make use of it.
265  unsigned Task = 0;
266  Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
267  if (!CacheAddStreamOrErr)
268    return CacheAddStreamOrErr.takeError();
269  AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
270  if (!CacheAddStream)
271    return std::string(AbsCachedArtifactPath);
272  // The artifact was not found in the local cache, query the debuginfod
273  // servers.
274  if (!HTTPClient::isAvailable())
275    return createStringError(errc::io_error,
276                             "No working HTTP client is available.");
277
278  if (!HTTPClient::IsInitialized)
279    return createStringError(
280        errc::io_error,
281        "A working HTTP client is available, but it is not initialized. To "
282        "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
283        "at the beginning of main.");
284
285  HTTPClient Client;
286  Client.setTimeout(Timeout);
287  for (StringRef ServerUrl : DebuginfodUrls) {
288    SmallString<64> ArtifactUrl;
289    sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
290
291    // Perform the HTTP request and if successful, write the response body to
292    // the cache.
293    {
294      StreamedHTTPResponseHandler Handler(
295          [&]() { return CacheAddStream(Task, ""); }, Client);
296      HTTPRequest Request(ArtifactUrl);
297      Request.Headers = getHeaders();
298      Error Err = Client.perform(Request, Handler);
299      if (Err)
300        return std::move(Err);
301
302      unsigned Code = Client.responseCode();
303      if (Code && Code != 200)
304        continue;
305    }
306
307    Expected<CachePruningPolicy> PruningPolicyOrErr =
308        parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));
309    if (!PruningPolicyOrErr)
310      return PruningPolicyOrErr.takeError();
311    pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);
312
313    // Return the path to the artifact on disk.
314    return std::string(AbsCachedArtifactPath);
315  }
316
317  return createStringError(errc::argument_out_of_domain, "build id not found");
318}
319
320DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
321    : Message(Message.str()) {}
322
323void DebuginfodLog::push(const Twine &Message) {
324  push(DebuginfodLogEntry(Message));
325}
326
327void DebuginfodLog::push(DebuginfodLogEntry Entry) {
328  {
329    std::lock_guard<std::mutex> Guard(QueueMutex);
330    LogEntryQueue.push(Entry);
331  }
332  QueueCondition.notify_one();
333}
334
335DebuginfodLogEntry DebuginfodLog::pop() {
336  {
337    std::unique_lock<std::mutex> Guard(QueueMutex);
338    // Wait for messages to be pushed into the queue.
339    QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
340  }
341  std::lock_guard<std::mutex> Guard(QueueMutex);
342  if (!LogEntryQueue.size())
343    llvm_unreachable("Expected message in the queue.");
344
345  DebuginfodLogEntry Entry = LogEntryQueue.front();
346  LogEntryQueue.pop();
347  return Entry;
348}
349
350DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
351                                           DebuginfodLog &Log, ThreadPool &Pool,
352                                           double MinInterval)
353    : Log(Log), Pool(Pool), MinInterval(MinInterval) {
354  for (StringRef Path : PathsRef)
355    Paths.push_back(Path.str());
356}
357
358Error DebuginfodCollection::update() {
359  std::lock_guard<sys::Mutex> Guard(UpdateMutex);
360  if (UpdateTimer.isRunning())
361    UpdateTimer.stopTimer();
362  UpdateTimer.clear();
363  for (const std::string &Path : Paths) {
364    Log.push("Updating binaries at path " + Path);
365    if (Error Err = findBinaries(Path))
366      return Err;
367  }
368  Log.push("Updated collection");
369  UpdateTimer.startTimer();
370  return Error::success();
371}
372
373Expected<bool> DebuginfodCollection::updateIfStale() {
374  if (!UpdateTimer.isRunning())
375    return false;
376  UpdateTimer.stopTimer();
377  double Time = UpdateTimer.getTotalTime().getWallTime();
378  UpdateTimer.startTimer();
379  if (Time < MinInterval)
380    return false;
381  if (Error Err = update())
382    return std::move(Err);
383  return true;
384}
385
386Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
387  while (true) {
388    if (Error Err = update())
389      return Err;
390    std::this_thread::sleep_for(Interval);
391  }
392  llvm_unreachable("updateForever loop should never end");
393}
394
395static bool hasELFMagic(StringRef FilePath) {
396  file_magic Type;
397  std::error_code EC = identify_magic(FilePath, Type);
398  if (EC)
399    return false;
400  switch (Type) {
401  case file_magic::elf:
402  case file_magic::elf_relocatable:
403  case file_magic::elf_executable:
404  case file_magic::elf_shared_object:
405  case file_magic::elf_core:
406    return true;
407  default:
408    return false;
409  }
410}
411
412Error DebuginfodCollection::findBinaries(StringRef Path) {
413  std::error_code EC;
414  sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
415  std::mutex IteratorMutex;
416  ThreadPoolTaskGroup IteratorGroup(Pool);
417  for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
418       WorkerIndex++) {
419    IteratorGroup.async([&, this]() -> void {
420      std::string FilePath;
421      while (true) {
422        {
423          // Check if iteration is over or there is an error during iteration
424          std::lock_guard<std::mutex> Guard(IteratorMutex);
425          if (I == E || EC)
426            return;
427          // Grab a file path from the directory iterator and advance the
428          // iterator.
429          FilePath = I->path();
430          I.increment(EC);
431        }
432
433        // Inspect the file at this path to determine if it is debuginfo.
434        if (!hasELFMagic(FilePath))
435          continue;
436
437        Expected<object::OwningBinary<object::Binary>> BinOrErr =
438            object::createBinary(FilePath);
439
440        if (!BinOrErr) {
441          consumeError(BinOrErr.takeError());
442          continue;
443        }
444        object::Binary *Bin = std::move(BinOrErr.get().getBinary());
445        if (!Bin->isObject())
446          continue;
447
448        // TODO: Support non-ELF binaries
449        object::ELFObjectFileBase *Object =
450            dyn_cast<object::ELFObjectFileBase>(Bin);
451        if (!Object)
452          continue;
453
454        BuildIDRef ID = getBuildID(Object);
455        if (ID.empty())
456          continue;
457
458        std::string IDString = buildIDToString(ID);
459        if (Object->hasDebugInfo()) {
460          std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
461          (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
462        } else {
463          std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
464          (void)Binaries.try_emplace(IDString, std::move(FilePath));
465        }
466      }
467    });
468  }
469  IteratorGroup.wait();
470  std::unique_lock<std::mutex> Guard(IteratorMutex);
471  if (EC)
472    return errorCodeToError(EC);
473  return Error::success();
474}
475
476Expected<std::optional<std::string>>
477DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
478  Log.push("getting binary path of ID " + buildIDToString(ID));
479  std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
480  auto Loc = Binaries.find(buildIDToString(ID));
481  if (Loc != Binaries.end()) {
482    std::string Path = Loc->getValue();
483    return Path;
484  }
485  return std::nullopt;
486}
487
488Expected<std::optional<std::string>>
489DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
490  Log.push("getting debug binary path of ID " + buildIDToString(ID));
491  std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
492  auto Loc = DebugBinaries.find(buildIDToString(ID));
493  if (Loc != DebugBinaries.end()) {
494    std::string Path = Loc->getValue();
495    return Path;
496  }
497  return std::nullopt;
498}
499
500Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
501  {
502    // Check collection; perform on-demand update if stale.
503    Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
504    if (!PathOrErr)
505      return PathOrErr.takeError();
506    std::optional<std::string> Path = *PathOrErr;
507    if (!Path) {
508      Expected<bool> UpdatedOrErr = updateIfStale();
509      if (!UpdatedOrErr)
510        return UpdatedOrErr.takeError();
511      if (*UpdatedOrErr) {
512        // Try once more.
513        PathOrErr = getBinaryPath(ID);
514        if (!PathOrErr)
515          return PathOrErr.takeError();
516        Path = *PathOrErr;
517      }
518    }
519    if (Path)
520      return *Path;
521  }
522
523  // Try federation.
524  Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
525  if (!PathOrErr)
526    consumeError(PathOrErr.takeError());
527
528  // Fall back to debug binary.
529  return findDebugBinaryPath(ID);
530}
531
532Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
533  // Check collection; perform on-demand update if stale.
534  Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
535  if (!PathOrErr)
536    return PathOrErr.takeError();
537  std::optional<std::string> Path = *PathOrErr;
538  if (!Path) {
539    Expected<bool> UpdatedOrErr = updateIfStale();
540    if (!UpdatedOrErr)
541      return UpdatedOrErr.takeError();
542    if (*UpdatedOrErr) {
543      // Try once more.
544      PathOrErr = getBinaryPath(ID);
545      if (!PathOrErr)
546        return PathOrErr.takeError();
547      Path = *PathOrErr;
548    }
549  }
550  if (Path)
551    return *Path;
552
553  // Try federation.
554  return getCachedOrDownloadDebuginfo(ID);
555}
556
557DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
558                                   DebuginfodCollection &Collection)
559    : Log(Log), Collection(Collection) {
560  cantFail(
561      Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
562        Log.push("GET " + Request.UrlPath);
563        std::string IDString;
564        if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
565          Request.setResponse(
566              {404, "text/plain", "Build ID is not a hex string\n"});
567          return;
568        }
569        object::BuildID ID(IDString.begin(), IDString.end());
570        Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
571        if (Error Err = PathOrErr.takeError()) {
572          consumeError(std::move(Err));
573          Request.setResponse({404, "text/plain", "Build ID not found\n"});
574          return;
575        }
576        streamFile(Request, *PathOrErr);
577      }));
578  cantFail(
579      Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
580        Log.push("GET " + Request.UrlPath);
581        std::string IDString;
582        if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
583          Request.setResponse(
584              {404, "text/plain", "Build ID is not a hex string\n"});
585          return;
586        }
587        object::BuildID ID(IDString.begin(), IDString.end());
588        Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
589        if (Error Err = PathOrErr.takeError()) {
590          consumeError(std::move(Err));
591          Request.setResponse({404, "text/plain", "Build ID not found\n"});
592          return;
593        }
594        streamFile(Request, *PathOrErr);
595      }));
596}
597
598} // namespace llvm
599