1//===-- ObjectContainerBSDArchive.cpp ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjectContainerBSDArchive.h"
10
11#if defined(_WIN32) || defined(__ANDROID__)
12// Defines from ar, missing on Windows
13#define ARMAG "!<arch>\n"
14#define SARMAG 8
15#define ARFMAG "`\n"
16
17typedef struct ar_hdr {
18  char ar_name[16];
19  char ar_date[12];
20  char ar_uid[6], ar_gid[6];
21  char ar_mode[8];
22  char ar_size[10];
23  char ar_fmag[2];
24} ar_hdr;
25#else
26#include <ar.h>
27#endif
28
29#include "lldb/Core/Module.h"
30#include "lldb/Core/ModuleSpec.h"
31#include "lldb/Core/PluginManager.h"
32#include "lldb/Host/FileSystem.h"
33#include "lldb/Symbol/ObjectFile.h"
34#include "lldb/Utility/ArchSpec.h"
35#include "lldb/Utility/Stream.h"
36#include "lldb/Utility/Timer.h"
37
38#include "llvm/Support/MemoryBuffer.h"
39
40using namespace lldb;
41using namespace lldb_private;
42
43ObjectContainerBSDArchive::Object::Object()
44    : ar_name(), modification_time(0), uid(0), gid(0), mode(0), size(0),
45      file_offset(0), file_size(0) {}
46
47void ObjectContainerBSDArchive::Object::Clear() {
48  ar_name.Clear();
49  modification_time = 0;
50  uid = 0;
51  gid = 0;
52  mode = 0;
53  size = 0;
54  file_offset = 0;
55  file_size = 0;
56}
57
58lldb::offset_t
59ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data,
60                                           lldb::offset_t offset) {
61  size_t ar_name_len = 0;
62  std::string str;
63  char *err;
64
65  // File header
66  //
67  // The common format is as follows.
68  //
69  //  Offset  Length	Name            Format
70  //  0       16      File name       ASCII right padded with spaces (no spaces
71  //  allowed in file name)
72  //  16      12      File mod        Decimal as cstring right padded with
73  //  spaces
74  //  28      6       Owner ID        Decimal as cstring right padded with
75  //  spaces
76  //  34      6       Group ID        Decimal as cstring right padded with
77  //  spaces
78  //  40      8       File mode       Octal   as cstring right padded with
79  //  spaces
80  //  48      10      File byte size  Decimal as cstring right padded with
81  //  spaces
82  //  58      2       File magic      0x60 0x0A
83
84  // Make sure there is enough data for the file header and bail if not
85  if (!data.ValidOffsetForDataOfSize(offset, 60))
86    return LLDB_INVALID_OFFSET;
87
88  str.assign((const char *)data.GetData(&offset, 16), 16);
89  if (str.find("#1/") == 0) {
90    // If the name is longer than 16 bytes, or contains an embedded space then
91    // it will use this format where the length of the name is here and the
92    // name characters are after this header.
93    ar_name_len = strtoul(str.c_str() + 3, &err, 10);
94  } else {
95    // Strip off any trailing spaces.
96    const size_t last_pos = str.find_last_not_of(' ');
97    if (last_pos != std::string::npos) {
98      if (last_pos + 1 < 16)
99        str.erase(last_pos + 1);
100    }
101    ar_name.SetCString(str.c_str());
102  }
103
104  str.assign((const char *)data.GetData(&offset, 12), 12);
105  modification_time = strtoul(str.c_str(), &err, 10);
106
107  str.assign((const char *)data.GetData(&offset, 6), 6);
108  uid = strtoul(str.c_str(), &err, 10);
109
110  str.assign((const char *)data.GetData(&offset, 6), 6);
111  gid = strtoul(str.c_str(), &err, 10);
112
113  str.assign((const char *)data.GetData(&offset, 8), 8);
114  mode = strtoul(str.c_str(), &err, 8);
115
116  str.assign((const char *)data.GetData(&offset, 10), 10);
117  size = strtoul(str.c_str(), &err, 10);
118
119  str.assign((const char *)data.GetData(&offset, 2), 2);
120  if (str == ARFMAG) {
121    if (ar_name_len > 0) {
122      const void *ar_name_ptr = data.GetData(&offset, ar_name_len);
123      // Make sure there was enough data for the string value and bail if not
124      if (ar_name_ptr == nullptr)
125        return LLDB_INVALID_OFFSET;
126      str.assign((const char *)ar_name_ptr, ar_name_len);
127      ar_name.SetCString(str.c_str());
128    }
129    file_offset = offset;
130    file_size = size - ar_name_len;
131    return offset;
132  }
133  return LLDB_INVALID_OFFSET;
134}
135
136ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch,
137                                            const llvm::sys::TimePoint<> &time,
138                                            lldb::offset_t file_offset,
139                                            lldb_private::DataExtractor &data)
140    : m_arch(arch), m_modification_time(time), m_file_offset(file_offset),
141      m_objects(), m_data(data) {}
142
143ObjectContainerBSDArchive::Archive::~Archive() {}
144
145size_t ObjectContainerBSDArchive::Archive::ParseObjects() {
146  DataExtractor &data = m_data;
147  std::string str;
148  lldb::offset_t offset = 0;
149  str.assign((const char *)data.GetData(&offset, SARMAG), SARMAG);
150  if (str == ARMAG) {
151    Object obj;
152    do {
153      offset = obj.Extract(data, offset);
154      if (offset == LLDB_INVALID_OFFSET)
155        break;
156      size_t obj_idx = m_objects.size();
157      m_objects.push_back(obj);
158      // Insert all of the C strings out of order for now...
159      m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
160      offset += obj.file_size;
161      obj.Clear();
162    } while (data.ValidOffset(offset));
163
164    // Now sort all of the object name pointers
165    m_object_name_to_index_map.Sort();
166  }
167  return m_objects.size();
168}
169
170ObjectContainerBSDArchive::Object *
171ObjectContainerBSDArchive::Archive::FindObject(
172    ConstString object_name, const llvm::sys::TimePoint<> &object_mod_time) {
173  const ObjectNameToIndexMap::Entry *match =
174      m_object_name_to_index_map.FindFirstValueForName(object_name);
175  if (!match)
176    return nullptr;
177  if (object_mod_time == llvm::sys::TimePoint<>())
178    return &m_objects[match->value];
179
180  const uint64_t object_modification_date = llvm::sys::toTimeT(object_mod_time);
181  if (m_objects[match->value].modification_time == object_modification_date)
182    return &m_objects[match->value];
183
184  const ObjectNameToIndexMap::Entry *next_match =
185      m_object_name_to_index_map.FindNextValueForName(match);
186  while (next_match) {
187    if (m_objects[next_match->value].modification_time ==
188        object_modification_date)
189      return &m_objects[next_match->value];
190    next_match = m_object_name_to_index_map.FindNextValueForName(next_match);
191  }
192
193  return nullptr;
194}
195
196ObjectContainerBSDArchive::Archive::shared_ptr
197ObjectContainerBSDArchive::Archive::FindCachedArchive(
198    const FileSpec &file, const ArchSpec &arch,
199    const llvm::sys::TimePoint<> &time, lldb::offset_t file_offset) {
200  std::lock_guard<std::recursive_mutex> guard(Archive::GetArchiveCacheMutex());
201  shared_ptr archive_sp;
202  Archive::Map &archive_map = Archive::GetArchiveCache();
203  Archive::Map::iterator pos = archive_map.find(file);
204  // Don't cache a value for "archive_map.end()" below since we might delete an
205  // archive entry...
206  while (pos != archive_map.end() && pos->first == file) {
207    bool match = true;
208    if (arch.IsValid() &&
209        !pos->second->GetArchitecture().IsCompatibleMatch(arch))
210      match = false;
211    else if (file_offset != LLDB_INVALID_OFFSET &&
212             pos->second->GetFileOffset() != file_offset)
213      match = false;
214    if (match) {
215      if (pos->second->GetModificationTime() == time) {
216        return pos->second;
217      } else {
218        // We have a file at the same path with the same architecture whose
219        // modification time doesn't match. It doesn't make sense for us to
220        // continue to use this BSD archive since we cache only the object info
221        // which consists of file time info and also the file offset and file
222        // size of any contained objects. Since this information is now out of
223        // date, we won't get the correct information if we go and extract the
224        // file data, so we should remove the old and outdated entry.
225        archive_map.erase(pos);
226        pos = archive_map.find(file);
227        continue; // Continue to next iteration so we don't increment pos
228                  // below...
229      }
230    }
231    ++pos;
232  }
233  return archive_sp;
234}
235
236ObjectContainerBSDArchive::Archive::shared_ptr
237ObjectContainerBSDArchive::Archive::ParseAndCacheArchiveForFile(
238    const FileSpec &file, const ArchSpec &arch,
239    const llvm::sys::TimePoint<> &time, lldb::offset_t file_offset,
240    DataExtractor &data) {
241  shared_ptr archive_sp(new Archive(arch, time, file_offset, data));
242  if (archive_sp) {
243    const size_t num_objects = archive_sp->ParseObjects();
244    if (num_objects > 0) {
245      std::lock_guard<std::recursive_mutex> guard(
246          Archive::GetArchiveCacheMutex());
247      Archive::GetArchiveCache().insert(std::make_pair(file, archive_sp));
248    } else {
249      archive_sp.reset();
250    }
251  }
252  return archive_sp;
253}
254
255ObjectContainerBSDArchive::Archive::Map &
256ObjectContainerBSDArchive::Archive::GetArchiveCache() {
257  static Archive::Map g_archive_map;
258  return g_archive_map;
259}
260
261std::recursive_mutex &
262ObjectContainerBSDArchive::Archive::GetArchiveCacheMutex() {
263  static std::recursive_mutex g_archive_map_mutex;
264  return g_archive_map_mutex;
265}
266
267void ObjectContainerBSDArchive::Initialize() {
268  PluginManager::RegisterPlugin(GetPluginNameStatic(),
269                                GetPluginDescriptionStatic(), CreateInstance,
270                                GetModuleSpecifications);
271}
272
273void ObjectContainerBSDArchive::Terminate() {
274  PluginManager::UnregisterPlugin(CreateInstance);
275}
276
277lldb_private::ConstString ObjectContainerBSDArchive::GetPluginNameStatic() {
278  static ConstString g_name("bsd-archive");
279  return g_name;
280}
281
282const char *ObjectContainerBSDArchive::GetPluginDescriptionStatic() {
283  return "BSD Archive object container reader.";
284}
285
286ObjectContainer *ObjectContainerBSDArchive::CreateInstance(
287    const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
288    lldb::offset_t data_offset, const FileSpec *file,
289    lldb::offset_t file_offset, lldb::offset_t length) {
290  ConstString object_name(module_sp->GetObjectName());
291  if (!object_name)
292    return nullptr;
293
294  if (data_sp) {
295    // We have data, which means this is the first 512 bytes of the file Check
296    // to see if the magic bytes match and if they do, read the entire table of
297    // contents for the archive and cache it
298    DataExtractor data;
299    data.SetData(data_sp, data_offset, length);
300    if (file && data_sp && ObjectContainerBSDArchive::MagicBytesMatch(data)) {
301      static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
302      Timer scoped_timer(
303          func_cat,
304          "ObjectContainerBSDArchive::CreateInstance (module = %s, file = "
305          "%p, file_offset = 0x%8.8" PRIx64 ", file_size = 0x%8.8" PRIx64 ")",
306          module_sp->GetFileSpec().GetPath().c_str(),
307          static_cast<const void *>(file), static_cast<uint64_t>(file_offset),
308          static_cast<uint64_t>(length));
309
310      // Map the entire .a file to be sure that we don't lose any data if the
311      // file gets updated by a new build while this .a file is being used for
312      // debugging
313      DataBufferSP archive_data_sp =
314          FileSystem::Instance().CreateDataBuffer(*file, length, file_offset);
315      if (!archive_data_sp)
316        return nullptr;
317
318      lldb::offset_t archive_data_offset = 0;
319
320      Archive::shared_ptr archive_sp(Archive::FindCachedArchive(
321          *file, module_sp->GetArchitecture(), module_sp->GetModificationTime(),
322          file_offset));
323      std::unique_ptr<ObjectContainerBSDArchive> container_up(
324          new ObjectContainerBSDArchive(module_sp, archive_data_sp,
325                                        archive_data_offset, file, file_offset,
326                                        length));
327
328      if (container_up) {
329        if (archive_sp) {
330          // We already have this archive in our cache, use it
331          container_up->SetArchive(archive_sp);
332          return container_up.release();
333        } else if (container_up->ParseHeader())
334          return container_up.release();
335      }
336    }
337  } else {
338    // No data, just check for a cached archive
339    Archive::shared_ptr archive_sp(Archive::FindCachedArchive(
340        *file, module_sp->GetArchitecture(), module_sp->GetModificationTime(),
341        file_offset));
342    if (archive_sp) {
343      std::unique_ptr<ObjectContainerBSDArchive> container_up(
344          new ObjectContainerBSDArchive(module_sp, data_sp, data_offset, file,
345                                        file_offset, length));
346
347      if (container_up) {
348        // We already have this archive in our cache, use it
349        container_up->SetArchive(archive_sp);
350        return container_up.release();
351      }
352    }
353  }
354  return nullptr;
355}
356
357bool ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) {
358  uint32_t offset = 0;
359  const char *armag = (const char *)data.PeekData(offset, sizeof(ar_hdr));
360  if (armag && ::strncmp(armag, ARMAG, SARMAG) == 0) {
361    armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG;
362    if (strncmp(armag, ARFMAG, 2) == 0)
363      return true;
364  }
365  return false;
366}
367
368ObjectContainerBSDArchive::ObjectContainerBSDArchive(
369    const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
370    lldb::offset_t data_offset, const lldb_private::FileSpec *file,
371    lldb::offset_t file_offset, lldb::offset_t size)
372    : ObjectContainer(module_sp, file, file_offset, size, data_sp, data_offset),
373      m_archive_sp() {}
374void ObjectContainerBSDArchive::SetArchive(Archive::shared_ptr &archive_sp) {
375  m_archive_sp = archive_sp;
376}
377
378ObjectContainerBSDArchive::~ObjectContainerBSDArchive() {}
379
380bool ObjectContainerBSDArchive::ParseHeader() {
381  if (m_archive_sp.get() == nullptr) {
382    if (m_data.GetByteSize() > 0) {
383      ModuleSP module_sp(GetModule());
384      if (module_sp) {
385        m_archive_sp = Archive::ParseAndCacheArchiveForFile(
386            m_file, module_sp->GetArchitecture(),
387            module_sp->GetModificationTime(), m_offset, m_data);
388      }
389      // Clear the m_data that contains the entire archive data and let our
390      // m_archive_sp hold onto the data.
391      m_data.Clear();
392    }
393  }
394  return m_archive_sp.get() != nullptr;
395}
396
397void ObjectContainerBSDArchive::Dump(Stream *s) const {
398  s->Printf("%p: ", static_cast<const void *>(this));
399  s->Indent();
400  const size_t num_archs = GetNumArchitectures();
401  const size_t num_objects = GetNumObjects();
402  s->Printf("ObjectContainerBSDArchive, num_archs = %" PRIu64
403            ", num_objects = %" PRIu64 "",
404            (uint64_t)num_archs, (uint64_t)num_objects);
405  uint32_t i;
406  ArchSpec arch;
407  s->IndentMore();
408  for (i = 0; i < num_archs; i++) {
409    s->Indent();
410    GetArchitectureAtIndex(i, arch);
411    s->Printf("arch[%u] = %s\n", i, arch.GetArchitectureName());
412  }
413  for (i = 0; i < num_objects; i++) {
414    s->Indent();
415    s->Printf("object[%u] = %s\n", i, GetObjectNameAtIndex(i));
416  }
417  s->IndentLess();
418  s->EOL();
419}
420
421ObjectFileSP ObjectContainerBSDArchive::GetObjectFile(const FileSpec *file) {
422  ModuleSP module_sp(GetModule());
423  if (module_sp) {
424    if (module_sp->GetObjectName() && m_archive_sp) {
425      Object *object = m_archive_sp->FindObject(
426          module_sp->GetObjectName(), module_sp->GetObjectModificationTime());
427      if (object) {
428        lldb::offset_t data_offset = object->file_offset;
429        return ObjectFile::FindPlugin(
430            module_sp, file, m_offset + object->file_offset, object->file_size,
431            m_archive_sp->GetData().GetSharedDataBuffer(), data_offset);
432      }
433    }
434  }
435  return ObjectFileSP();
436}
437
438// PluginInterface protocol
439lldb_private::ConstString ObjectContainerBSDArchive::GetPluginName() {
440  return GetPluginNameStatic();
441}
442
443uint32_t ObjectContainerBSDArchive::GetPluginVersion() { return 1; }
444
445size_t ObjectContainerBSDArchive::GetModuleSpecifications(
446    const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
447    lldb::offset_t data_offset, lldb::offset_t file_offset,
448    lldb::offset_t file_size, lldb_private::ModuleSpecList &specs) {
449
450  // We have data, which means this is the first 512 bytes of the file Check to
451  // see if the magic bytes match and if they do, read the entire table of
452  // contents for the archive and cache it
453  DataExtractor data;
454  data.SetData(data_sp, data_offset, data_sp->GetByteSize());
455  if (!file || !data_sp || !ObjectContainerBSDArchive::MagicBytesMatch(data))
456    return 0;
457
458  const size_t initial_count = specs.GetSize();
459  llvm::sys::TimePoint<> file_mod_time = FileSystem::Instance().GetModificationTime(file);
460  Archive::shared_ptr archive_sp(
461      Archive::FindCachedArchive(file, ArchSpec(), file_mod_time, file_offset));
462  bool set_archive_arch = false;
463  if (!archive_sp) {
464    set_archive_arch = true;
465    data_sp =
466        FileSystem::Instance().CreateDataBuffer(file, file_size, file_offset);
467    if (data_sp) {
468      data.SetData(data_sp, 0, data_sp->GetByteSize());
469      archive_sp = Archive::ParseAndCacheArchiveForFile(
470          file, ArchSpec(), file_mod_time, file_offset, data);
471    }
472  }
473
474  if (archive_sp) {
475    const size_t num_objects = archive_sp->GetNumObjects();
476    for (size_t idx = 0; idx < num_objects; ++idx) {
477      const Object *object = archive_sp->GetObjectAtIndex(idx);
478      if (object) {
479        const lldb::offset_t object_file_offset =
480            file_offset + object->file_offset;
481        if (object->file_offset < file_size && file_size > object_file_offset) {
482          if (ObjectFile::GetModuleSpecifications(
483                  file, object_file_offset, file_size - object_file_offset,
484                  specs)) {
485            ModuleSpec &spec =
486                specs.GetModuleSpecRefAtIndex(specs.GetSize() - 1);
487            llvm::sys::TimePoint<> object_mod_time(
488                std::chrono::seconds(object->modification_time));
489            spec.GetObjectName() = object->ar_name;
490            spec.SetObjectOffset(object_file_offset);
491            spec.SetObjectSize(file_size - object_file_offset);
492            spec.GetObjectModificationTime() = object_mod_time;
493          }
494        }
495      }
496    }
497  }
498  const size_t end_count = specs.GetSize();
499  size_t num_specs_added = end_count - initial_count;
500  if (set_archive_arch && num_specs_added > 0) {
501    // The archive was created but we didn't have an architecture so we need to
502    // set it
503    for (size_t i = initial_count; i < end_count; ++i) {
504      ModuleSpec module_spec;
505      if (specs.GetModuleSpecAtIndex(i, module_spec)) {
506        if (module_spec.GetArchitecture().IsValid()) {
507          archive_sp->SetArchitecture(module_spec.GetArchitecture());
508          break;
509        }
510      }
511    }
512  }
513  return num_specs_added;
514}
515