1//===-- Symtab.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <map>
10#include <set>
11
12#include "lldb/Core/DataFileCache.h"
13#include "lldb/Core/Module.h"
14#include "lldb/Core/RichManglingContext.h"
15#include "lldb/Core/Section.h"
16#include "lldb/Symbol/ObjectFile.h"
17#include "lldb/Symbol/Symbol.h"
18#include "lldb/Symbol/SymbolContext.h"
19#include "lldb/Symbol/Symtab.h"
20#include "lldb/Target/Language.h"
21#include "lldb/Utility/DataEncoder.h"
22#include "lldb/Utility/Endian.h"
23#include "lldb/Utility/RegularExpression.h"
24#include "lldb/Utility/Stream.h"
25#include "lldb/Utility/Timer.h"
26
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/Support/DJB.h"
30
31using namespace lldb;
32using namespace lldb_private;
33
34Symtab::Symtab(ObjectFile *objfile)
35    : m_objfile(objfile), m_symbols(), m_file_addr_to_index(*this),
36      m_name_to_symbol_indices(), m_mutex(),
37      m_file_addr_to_index_computed(false), m_name_indexes_computed(false),
38      m_loaded_from_cache(false), m_saved_to_cache(false) {
39  m_name_to_symbol_indices.emplace(std::make_pair(
40      lldb::eFunctionNameTypeNone, UniqueCStringMap<uint32_t>()));
41  m_name_to_symbol_indices.emplace(std::make_pair(
42      lldb::eFunctionNameTypeBase, UniqueCStringMap<uint32_t>()));
43  m_name_to_symbol_indices.emplace(std::make_pair(
44      lldb::eFunctionNameTypeMethod, UniqueCStringMap<uint32_t>()));
45  m_name_to_symbol_indices.emplace(std::make_pair(
46      lldb::eFunctionNameTypeSelector, UniqueCStringMap<uint32_t>()));
47}
48
49Symtab::~Symtab() = default;
50
51void Symtab::Reserve(size_t count) {
52  // Clients should grab the mutex from this symbol table and lock it manually
53  // when calling this function to avoid performance issues.
54  m_symbols.reserve(count);
55}
56
57Symbol *Symtab::Resize(size_t count) {
58  // Clients should grab the mutex from this symbol table and lock it manually
59  // when calling this function to avoid performance issues.
60  m_symbols.resize(count);
61  return m_symbols.empty() ? nullptr : &m_symbols[0];
62}
63
64uint32_t Symtab::AddSymbol(const Symbol &symbol) {
65  // Clients should grab the mutex from this symbol table and lock it manually
66  // when calling this function to avoid performance issues.
67  uint32_t symbol_idx = m_symbols.size();
68  auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
69  name_to_index.Clear();
70  m_file_addr_to_index.Clear();
71  m_symbols.push_back(symbol);
72  m_file_addr_to_index_computed = false;
73  m_name_indexes_computed = false;
74  return symbol_idx;
75}
76
77size_t Symtab::GetNumSymbols() const {
78  std::lock_guard<std::recursive_mutex> guard(m_mutex);
79  return m_symbols.size();
80}
81
82void Symtab::SectionFileAddressesChanged() {
83  m_file_addr_to_index.Clear();
84  m_file_addr_to_index_computed = false;
85}
86
87void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order,
88                  Mangled::NamePreference name_preference) {
89  std::lock_guard<std::recursive_mutex> guard(m_mutex);
90
91  //    s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
92  s->Indent();
93  const FileSpec &file_spec = m_objfile->GetFileSpec();
94  const char *object_name = nullptr;
95  if (m_objfile->GetModule())
96    object_name = m_objfile->GetModule()->GetObjectName().GetCString();
97
98  if (file_spec)
99    s->Printf("Symtab, file = %s%s%s%s, num_symbols = %" PRIu64,
100              file_spec.GetPath().c_str(), object_name ? "(" : "",
101              object_name ? object_name : "", object_name ? ")" : "",
102              (uint64_t)m_symbols.size());
103  else
104    s->Printf("Symtab, num_symbols = %" PRIu64 "", (uint64_t)m_symbols.size());
105
106  if (!m_symbols.empty()) {
107    switch (sort_order) {
108    case eSortOrderNone: {
109      s->PutCString(":\n");
110      DumpSymbolHeader(s);
111      const_iterator begin = m_symbols.begin();
112      const_iterator end = m_symbols.end();
113      for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
114        s->Indent();
115        pos->Dump(s, target, std::distance(begin, pos), name_preference);
116      }
117    }
118    break;
119
120    case eSortOrderByName: {
121      // Although we maintain a lookup by exact name map, the table isn't
122      // sorted by name. So we must make the ordered symbol list up ourselves.
123      s->PutCString(" (sorted by name):\n");
124      DumpSymbolHeader(s);
125
126      std::multimap<llvm::StringRef, const Symbol *> name_map;
127      for (const_iterator pos = m_symbols.begin(), end = m_symbols.end();
128           pos != end; ++pos) {
129        const char *name = pos->GetName().AsCString();
130        if (name && name[0])
131          name_map.insert(std::make_pair(name, &(*pos)));
132      }
133
134      for (const auto &name_to_symbol : name_map) {
135        const Symbol *symbol = name_to_symbol.second;
136        s->Indent();
137        symbol->Dump(s, target, symbol - &m_symbols[0], name_preference);
138      }
139    } break;
140
141    case eSortOrderByAddress:
142      s->PutCString(" (sorted by address):\n");
143      DumpSymbolHeader(s);
144      if (!m_file_addr_to_index_computed)
145        InitAddressIndexes();
146      const size_t num_entries = m_file_addr_to_index.GetSize();
147      for (size_t i = 0; i < num_entries; ++i) {
148        s->Indent();
149        const uint32_t symbol_idx = m_file_addr_to_index.GetEntryRef(i).data;
150        m_symbols[symbol_idx].Dump(s, target, symbol_idx, name_preference);
151      }
152      break;
153    }
154  } else {
155    s->PutCString("\n");
156  }
157}
158
159void Symtab::Dump(Stream *s, Target *target, std::vector<uint32_t> &indexes,
160                  Mangled::NamePreference name_preference) const {
161  std::lock_guard<std::recursive_mutex> guard(m_mutex);
162
163  const size_t num_symbols = GetNumSymbols();
164  // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
165  s->Indent();
166  s->Printf("Symtab %" PRIu64 " symbol indexes (%" PRIu64 " symbols total):\n",
167            (uint64_t)indexes.size(), (uint64_t)m_symbols.size());
168  s->IndentMore();
169
170  if (!indexes.empty()) {
171    std::vector<uint32_t>::const_iterator pos;
172    std::vector<uint32_t>::const_iterator end = indexes.end();
173    DumpSymbolHeader(s);
174    for (pos = indexes.begin(); pos != end; ++pos) {
175      size_t idx = *pos;
176      if (idx < num_symbols) {
177        s->Indent();
178        m_symbols[idx].Dump(s, target, idx, name_preference);
179      }
180    }
181  }
182  s->IndentLess();
183}
184
185void Symtab::DumpSymbolHeader(Stream *s) {
186  s->Indent("               Debug symbol\n");
187  s->Indent("               |Synthetic symbol\n");
188  s->Indent("               ||Externally Visible\n");
189  s->Indent("               |||\n");
190  s->Indent("Index   UserID DSX Type            File Address/Value Load "
191            "Address       Size               Flags      Name\n");
192  s->Indent("------- ------ --- --------------- ------------------ "
193            "------------------ ------------------ ---------- "
194            "----------------------------------\n");
195}
196
197static int CompareSymbolID(const void *key, const void *p) {
198  const user_id_t match_uid = *(const user_id_t *)key;
199  const user_id_t symbol_uid = ((const Symbol *)p)->GetID();
200  if (match_uid < symbol_uid)
201    return -1;
202  if (match_uid > symbol_uid)
203    return 1;
204  return 0;
205}
206
207Symbol *Symtab::FindSymbolByID(lldb::user_id_t symbol_uid) const {
208  std::lock_guard<std::recursive_mutex> guard(m_mutex);
209
210  Symbol *symbol =
211      (Symbol *)::bsearch(&symbol_uid, &m_symbols[0], m_symbols.size(),
212                          sizeof(m_symbols[0]), CompareSymbolID);
213  return symbol;
214}
215
216Symbol *Symtab::SymbolAtIndex(size_t idx) {
217  // Clients should grab the mutex from this symbol table and lock it manually
218  // when calling this function to avoid performance issues.
219  if (idx < m_symbols.size())
220    return &m_symbols[idx];
221  return nullptr;
222}
223
224const Symbol *Symtab::SymbolAtIndex(size_t idx) const {
225  // Clients should grab the mutex from this symbol table and lock it manually
226  // when calling this function to avoid performance issues.
227  if (idx < m_symbols.size())
228    return &m_symbols[idx];
229  return nullptr;
230}
231
232static bool lldb_skip_name(llvm::StringRef mangled,
233                           Mangled::ManglingScheme scheme) {
234  switch (scheme) {
235  case Mangled::eManglingSchemeItanium: {
236    if (mangled.size() < 3 || !mangled.starts_with("_Z"))
237      return true;
238
239    // Avoid the following types of symbols in the index.
240    switch (mangled[2]) {
241    case 'G': // guard variables
242    case 'T': // virtual tables, VTT structures, typeinfo structures + names
243    case 'Z': // named local entities (if we eventually handle
244              // eSymbolTypeData, we will want this back)
245      return true;
246
247    default:
248      break;
249    }
250
251    // Include this name in the index.
252    return false;
253  }
254
255  // No filters for this scheme yet. Include all names in indexing.
256  case Mangled::eManglingSchemeMSVC:
257  case Mangled::eManglingSchemeRustV0:
258  case Mangled::eManglingSchemeD:
259  case Mangled::eManglingSchemeSwift:
260    return false;
261
262  // Don't try and demangle things we can't categorize.
263  case Mangled::eManglingSchemeNone:
264    return true;
265  }
266  llvm_unreachable("unknown scheme!");
267}
268
269void Symtab::InitNameIndexes() {
270  // Protected function, no need to lock mutex...
271  if (!m_name_indexes_computed) {
272    m_name_indexes_computed = true;
273    ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
274    LLDB_SCOPED_TIMER();
275
276    // Collect all loaded language plugins.
277    std::vector<Language *> languages;
278    Language::ForEach([&languages](Language *l) {
279      languages.push_back(l);
280      return true;
281    });
282
283    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
284    auto &basename_to_index =
285        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
286    auto &method_to_index =
287        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
288    auto &selector_to_index =
289        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
290    // Create the name index vector to be able to quickly search by name
291    const size_t num_symbols = m_symbols.size();
292    name_to_index.Reserve(num_symbols);
293
294    // The "const char *" in "class_contexts" and backlog::value_type::second
295    // must come from a ConstString::GetCString()
296    std::set<const char *> class_contexts;
297    std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
298    backlog.reserve(num_symbols / 2);
299
300    // Instantiation of the demangler is expensive, so better use a single one
301    // for all entries during batch processing.
302    RichManglingContext rmc;
303    for (uint32_t value = 0; value < num_symbols; ++value) {
304      Symbol *symbol = &m_symbols[value];
305
306      // Don't let trampolines get into the lookup by name map If we ever need
307      // the trampoline symbols to be searchable by name we can remove this and
308      // then possibly add a new bool to any of the Symtab functions that
309      // lookup symbols by name to indicate if they want trampolines. We also
310      // don't want any synthetic symbols with auto generated names in the
311      // name lookups.
312      if (symbol->IsTrampoline() || symbol->IsSyntheticWithAutoGeneratedName())
313        continue;
314
315      // If the symbol's name string matched a Mangled::ManglingScheme, it is
316      // stored in the mangled field.
317      Mangled &mangled = symbol->GetMangled();
318      if (ConstString name = mangled.GetMangledName()) {
319        name_to_index.Append(name, value);
320
321        if (symbol->ContainsLinkerAnnotations()) {
322          // If the symbol has linker annotations, also add the version without
323          // the annotations.
324          ConstString stripped = ConstString(
325              m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
326          name_to_index.Append(stripped, value);
327        }
328
329        const SymbolType type = symbol->GetType();
330        if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
331          if (mangled.GetRichManglingInfo(rmc, lldb_skip_name)) {
332            RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
333            continue;
334          }
335        }
336      }
337
338      // Symbol name strings that didn't match a Mangled::ManglingScheme, are
339      // stored in the demangled field.
340      if (ConstString name = mangled.GetDemangledName()) {
341        name_to_index.Append(name, value);
342
343        if (symbol->ContainsLinkerAnnotations()) {
344          // If the symbol has linker annotations, also add the version without
345          // the annotations.
346          name = ConstString(
347              m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
348          name_to_index.Append(name, value);
349        }
350
351        // If the demangled name turns out to be an ObjC name, and is a category
352        // name, add the version without categories to the index too.
353        for (Language *lang : languages) {
354          for (auto variant : lang->GetMethodNameVariants(name)) {
355            if (variant.GetType() & lldb::eFunctionNameTypeSelector)
356              selector_to_index.Append(variant.GetName(), value);
357            else if (variant.GetType() & lldb::eFunctionNameTypeFull)
358              name_to_index.Append(variant.GetName(), value);
359            else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
360              method_to_index.Append(variant.GetName(), value);
361            else if (variant.GetType() & lldb::eFunctionNameTypeBase)
362              basename_to_index.Append(variant.GetName(), value);
363          }
364        }
365      }
366    }
367
368    for (const auto &record : backlog) {
369      RegisterBacklogEntry(record.first, record.second, class_contexts);
370    }
371
372    name_to_index.Sort();
373    name_to_index.SizeToFit();
374    selector_to_index.Sort();
375    selector_to_index.SizeToFit();
376    basename_to_index.Sort();
377    basename_to_index.SizeToFit();
378    method_to_index.Sort();
379    method_to_index.SizeToFit();
380  }
381}
382
383void Symtab::RegisterMangledNameEntry(
384    uint32_t value, std::set<const char *> &class_contexts,
385    std::vector<std::pair<NameToIndexMap::Entry, const char *>> &backlog,
386    RichManglingContext &rmc) {
387  // Only register functions that have a base name.
388  llvm::StringRef base_name = rmc.ParseFunctionBaseName();
389  if (base_name.empty())
390    return;
391
392  // The base name will be our entry's name.
393  NameToIndexMap::Entry entry(ConstString(base_name), value);
394  llvm::StringRef decl_context = rmc.ParseFunctionDeclContextName();
395
396  // Register functions with no context.
397  if (decl_context.empty()) {
398    // This has to be a basename
399    auto &basename_to_index =
400        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
401    basename_to_index.Append(entry);
402    // If there is no context (no namespaces or class scopes that come before
403    // the function name) then this also could be a fullname.
404    auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
405    name_to_index.Append(entry);
406    return;
407  }
408
409  // Make sure we have a pool-string pointer and see if we already know the
410  // context name.
411  const char *decl_context_ccstr = ConstString(decl_context).GetCString();
412  auto it = class_contexts.find(decl_context_ccstr);
413
414  auto &method_to_index =
415      GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
416  // Register constructors and destructors. They are methods and create
417  // declaration contexts.
418  if (rmc.IsCtorOrDtor()) {
419    method_to_index.Append(entry);
420    if (it == class_contexts.end())
421      class_contexts.insert(it, decl_context_ccstr);
422    return;
423  }
424
425  // Register regular methods with a known declaration context.
426  if (it != class_contexts.end()) {
427    method_to_index.Append(entry);
428    return;
429  }
430
431  // Regular methods in unknown declaration contexts are put to the backlog. We
432  // will revisit them once we processed all remaining symbols.
433  backlog.push_back(std::make_pair(entry, decl_context_ccstr));
434}
435
436void Symtab::RegisterBacklogEntry(
437    const NameToIndexMap::Entry &entry, const char *decl_context,
438    const std::set<const char *> &class_contexts) {
439  auto &method_to_index =
440      GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
441  auto it = class_contexts.find(decl_context);
442  if (it != class_contexts.end()) {
443    method_to_index.Append(entry);
444  } else {
445    // If we got here, we have something that had a context (was inside
446    // a namespace or class) yet we don't know the entry
447    method_to_index.Append(entry);
448    auto &basename_to_index =
449        GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
450    basename_to_index.Append(entry);
451  }
452}
453
454void Symtab::PreloadSymbols() {
455  std::lock_guard<std::recursive_mutex> guard(m_mutex);
456  InitNameIndexes();
457}
458
459void Symtab::AppendSymbolNamesToMap(const IndexCollection &indexes,
460                                    bool add_demangled, bool add_mangled,
461                                    NameToIndexMap &name_to_index_map) const {
462  LLDB_SCOPED_TIMER();
463  if (add_demangled || add_mangled) {
464    std::lock_guard<std::recursive_mutex> guard(m_mutex);
465
466    // Create the name index vector to be able to quickly search by name
467    const size_t num_indexes = indexes.size();
468    for (size_t i = 0; i < num_indexes; ++i) {
469      uint32_t value = indexes[i];
470      assert(i < m_symbols.size());
471      const Symbol *symbol = &m_symbols[value];
472
473      const Mangled &mangled = symbol->GetMangled();
474      if (add_demangled) {
475        if (ConstString name = mangled.GetDemangledName())
476          name_to_index_map.Append(name, value);
477      }
478
479      if (add_mangled) {
480        if (ConstString name = mangled.GetMangledName())
481          name_to_index_map.Append(name, value);
482      }
483    }
484  }
485}
486
487uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
488                                             std::vector<uint32_t> &indexes,
489                                             uint32_t start_idx,
490                                             uint32_t end_index) const {
491  std::lock_guard<std::recursive_mutex> guard(m_mutex);
492
493  uint32_t prev_size = indexes.size();
494
495  const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
496
497  for (uint32_t i = start_idx; i < count; ++i) {
498    if (symbol_type == eSymbolTypeAny || m_symbols[i].GetType() == symbol_type)
499      indexes.push_back(i);
500  }
501
502  return indexes.size() - prev_size;
503}
504
505uint32_t Symtab::AppendSymbolIndexesWithTypeAndFlagsValue(
506    SymbolType symbol_type, uint32_t flags_value,
507    std::vector<uint32_t> &indexes, uint32_t start_idx,
508    uint32_t end_index) const {
509  std::lock_guard<std::recursive_mutex> guard(m_mutex);
510
511  uint32_t prev_size = indexes.size();
512
513  const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
514
515  for (uint32_t i = start_idx; i < count; ++i) {
516    if ((symbol_type == eSymbolTypeAny ||
517         m_symbols[i].GetType() == symbol_type) &&
518        m_symbols[i].GetFlags() == flags_value)
519      indexes.push_back(i);
520  }
521
522  return indexes.size() - prev_size;
523}
524
525uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
526                                             Debug symbol_debug_type,
527                                             Visibility symbol_visibility,
528                                             std::vector<uint32_t> &indexes,
529                                             uint32_t start_idx,
530                                             uint32_t end_index) const {
531  std::lock_guard<std::recursive_mutex> guard(m_mutex);
532
533  uint32_t prev_size = indexes.size();
534
535  const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
536
537  for (uint32_t i = start_idx; i < count; ++i) {
538    if (symbol_type == eSymbolTypeAny ||
539        m_symbols[i].GetType() == symbol_type) {
540      if (CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
541        indexes.push_back(i);
542    }
543  }
544
545  return indexes.size() - prev_size;
546}
547
548uint32_t Symtab::GetIndexForSymbol(const Symbol *symbol) const {
549  if (!m_symbols.empty()) {
550    const Symbol *first_symbol = &m_symbols[0];
551    if (symbol >= first_symbol && symbol < first_symbol + m_symbols.size())
552      return symbol - first_symbol;
553  }
554  return UINT32_MAX;
555}
556
557struct SymbolSortInfo {
558  const bool sort_by_load_addr;
559  const Symbol *symbols;
560};
561
562namespace {
563struct SymbolIndexComparator {
564  const std::vector<Symbol> &symbols;
565  std::vector<lldb::addr_t> &addr_cache;
566
567  // Getting from the symbol to the Address to the File Address involves some
568  // work. Since there are potentially many symbols here, and we're using this
569  // for sorting so we're going to be computing the address many times, cache
570  // that in addr_cache. The array passed in has to be the same size as the
571  // symbols array passed into the member variable symbols, and should be
572  // initialized with LLDB_INVALID_ADDRESS.
573  // NOTE: You have to make addr_cache externally and pass it in because
574  // std::stable_sort
575  // makes copies of the comparator it is initially passed in, and you end up
576  // spending huge amounts of time copying this array...
577
578  SymbolIndexComparator(const std::vector<Symbol> &s,
579                        std::vector<lldb::addr_t> &a)
580      : symbols(s), addr_cache(a) {
581    assert(symbols.size() == addr_cache.size());
582  }
583  bool operator()(uint32_t index_a, uint32_t index_b) {
584    addr_t value_a = addr_cache[index_a];
585    if (value_a == LLDB_INVALID_ADDRESS) {
586      value_a = symbols[index_a].GetAddressRef().GetFileAddress();
587      addr_cache[index_a] = value_a;
588    }
589
590    addr_t value_b = addr_cache[index_b];
591    if (value_b == LLDB_INVALID_ADDRESS) {
592      value_b = symbols[index_b].GetAddressRef().GetFileAddress();
593      addr_cache[index_b] = value_b;
594    }
595
596    if (value_a == value_b) {
597      // The if the values are equal, use the original symbol user ID
598      lldb::user_id_t uid_a = symbols[index_a].GetID();
599      lldb::user_id_t uid_b = symbols[index_b].GetID();
600      if (uid_a < uid_b)
601        return true;
602      if (uid_a > uid_b)
603        return false;
604      return false;
605    } else if (value_a < value_b)
606      return true;
607
608    return false;
609  }
610};
611}
612
613void Symtab::SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
614                                      bool remove_duplicates) const {
615  std::lock_guard<std::recursive_mutex> guard(m_mutex);
616  LLDB_SCOPED_TIMER();
617  // No need to sort if we have zero or one items...
618  if (indexes.size() <= 1)
619    return;
620
621  // Sort the indexes in place using std::stable_sort.
622  // NOTE: The use of std::stable_sort instead of llvm::sort here is strictly
623  // for performance, not correctness.  The indexes vector tends to be "close"
624  // to sorted, which the stable sort handles better.
625
626  std::vector<lldb::addr_t> addr_cache(m_symbols.size(), LLDB_INVALID_ADDRESS);
627
628  SymbolIndexComparator comparator(m_symbols, addr_cache);
629  std::stable_sort(indexes.begin(), indexes.end(), comparator);
630
631  // Remove any duplicates if requested
632  if (remove_duplicates) {
633    auto last = std::unique(indexes.begin(), indexes.end());
634    indexes.erase(last, indexes.end());
635  }
636}
637
638uint32_t Symtab::GetNameIndexes(ConstString symbol_name,
639                                std::vector<uint32_t> &indexes) {
640  auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
641  const uint32_t count = name_to_index.GetValues(symbol_name, indexes);
642  if (count)
643    return count;
644  // Synthetic symbol names are not added to the name indexes, but they start
645  // with a prefix and end with a the symbol UserID. This allows users to find
646  // these symbols without having to add them to the name indexes. These
647  // queries will not happen very often since the names don't mean anything, so
648  // performance is not paramount in this case.
649  llvm::StringRef name = symbol_name.GetStringRef();
650  // String the synthetic prefix if the name starts with it.
651  if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix()))
652    return 0; // Not a synthetic symbol name
653
654  // Extract the user ID from the symbol name
655  unsigned long long uid = 0;
656  if (getAsUnsignedInteger(name, /*Radix=*/10, uid))
657    return 0; // Failed to extract the user ID as an integer
658  Symbol *symbol = FindSymbolByID(uid);
659  if (symbol == nullptr)
660    return 0;
661  const uint32_t symbol_idx = GetIndexForSymbol(symbol);
662  if (symbol_idx == UINT32_MAX)
663    return 0;
664  indexes.push_back(symbol_idx);
665  return 1;
666}
667
668uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
669                                             std::vector<uint32_t> &indexes) {
670  std::lock_guard<std::recursive_mutex> guard(m_mutex);
671
672  if (symbol_name) {
673    if (!m_name_indexes_computed)
674      InitNameIndexes();
675
676    return GetNameIndexes(symbol_name, indexes);
677  }
678  return 0;
679}
680
681uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
682                                             Debug symbol_debug_type,
683                                             Visibility symbol_visibility,
684                                             std::vector<uint32_t> &indexes) {
685  std::lock_guard<std::recursive_mutex> guard(m_mutex);
686
687  LLDB_SCOPED_TIMER();
688  if (symbol_name) {
689    const size_t old_size = indexes.size();
690    if (!m_name_indexes_computed)
691      InitNameIndexes();
692
693    std::vector<uint32_t> all_name_indexes;
694    const size_t name_match_count =
695        GetNameIndexes(symbol_name, all_name_indexes);
696    for (size_t i = 0; i < name_match_count; ++i) {
697      if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
698                             symbol_visibility))
699        indexes.push_back(all_name_indexes[i]);
700    }
701    return indexes.size() - old_size;
702  }
703  return 0;
704}
705
706uint32_t
707Symtab::AppendSymbolIndexesWithNameAndType(ConstString symbol_name,
708                                           SymbolType symbol_type,
709                                           std::vector<uint32_t> &indexes) {
710  std::lock_guard<std::recursive_mutex> guard(m_mutex);
711
712  if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0) {
713    std::vector<uint32_t>::iterator pos = indexes.begin();
714    while (pos != indexes.end()) {
715      if (symbol_type == eSymbolTypeAny ||
716          m_symbols[*pos].GetType() == symbol_type)
717        ++pos;
718      else
719        pos = indexes.erase(pos);
720    }
721  }
722  return indexes.size();
723}
724
725uint32_t Symtab::AppendSymbolIndexesWithNameAndType(
726    ConstString symbol_name, SymbolType symbol_type,
727    Debug symbol_debug_type, Visibility symbol_visibility,
728    std::vector<uint32_t> &indexes) {
729  std::lock_guard<std::recursive_mutex> guard(m_mutex);
730
731  if (AppendSymbolIndexesWithName(symbol_name, symbol_debug_type,
732                                  symbol_visibility, indexes) > 0) {
733    std::vector<uint32_t>::iterator pos = indexes.begin();
734    while (pos != indexes.end()) {
735      if (symbol_type == eSymbolTypeAny ||
736          m_symbols[*pos].GetType() == symbol_type)
737        ++pos;
738      else
739        pos = indexes.erase(pos);
740    }
741  }
742  return indexes.size();
743}
744
745uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
746    const RegularExpression &regexp, SymbolType symbol_type,
747    std::vector<uint32_t> &indexes, Mangled::NamePreference name_preference) {
748  std::lock_guard<std::recursive_mutex> guard(m_mutex);
749
750  uint32_t prev_size = indexes.size();
751  uint32_t sym_end = m_symbols.size();
752
753  for (uint32_t i = 0; i < sym_end; i++) {
754    if (symbol_type == eSymbolTypeAny ||
755        m_symbols[i].GetType() == symbol_type) {
756      const char *name =
757          m_symbols[i].GetMangled().GetName(name_preference).AsCString();
758      if (name) {
759        if (regexp.Execute(name))
760          indexes.push_back(i);
761      }
762    }
763  }
764  return indexes.size() - prev_size;
765}
766
767uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
768    const RegularExpression &regexp, SymbolType symbol_type,
769    Debug symbol_debug_type, Visibility symbol_visibility,
770    std::vector<uint32_t> &indexes, Mangled::NamePreference name_preference) {
771  std::lock_guard<std::recursive_mutex> guard(m_mutex);
772
773  uint32_t prev_size = indexes.size();
774  uint32_t sym_end = m_symbols.size();
775
776  for (uint32_t i = 0; i < sym_end; i++) {
777    if (symbol_type == eSymbolTypeAny ||
778        m_symbols[i].GetType() == symbol_type) {
779      if (!CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
780        continue;
781
782      const char *name =
783          m_symbols[i].GetMangled().GetName(name_preference).AsCString();
784      if (name) {
785        if (regexp.Execute(name))
786          indexes.push_back(i);
787      }
788    }
789  }
790  return indexes.size() - prev_size;
791}
792
793Symbol *Symtab::FindSymbolWithType(SymbolType symbol_type,
794                                   Debug symbol_debug_type,
795                                   Visibility symbol_visibility,
796                                   uint32_t &start_idx) {
797  std::lock_guard<std::recursive_mutex> guard(m_mutex);
798
799  const size_t count = m_symbols.size();
800  for (size_t idx = start_idx; idx < count; ++idx) {
801    if (symbol_type == eSymbolTypeAny ||
802        m_symbols[idx].GetType() == symbol_type) {
803      if (CheckSymbolAtIndex(idx, symbol_debug_type, symbol_visibility)) {
804        start_idx = idx;
805        return &m_symbols[idx];
806      }
807    }
808  }
809  return nullptr;
810}
811
812void
813Symtab::FindAllSymbolsWithNameAndType(ConstString name,
814                                      SymbolType symbol_type,
815                                      std::vector<uint32_t> &symbol_indexes) {
816  std::lock_guard<std::recursive_mutex> guard(m_mutex);
817
818  // Initialize all of the lookup by name indexes before converting NAME to a
819  // uniqued string NAME_STR below.
820  if (!m_name_indexes_computed)
821    InitNameIndexes();
822
823  if (name) {
824    // The string table did have a string that matched, but we need to check
825    // the symbols and match the symbol_type if any was given.
826    AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_indexes);
827  }
828}
829
830void Symtab::FindAllSymbolsWithNameAndType(
831    ConstString name, SymbolType symbol_type, Debug symbol_debug_type,
832    Visibility symbol_visibility, std::vector<uint32_t> &symbol_indexes) {
833  std::lock_guard<std::recursive_mutex> guard(m_mutex);
834
835  LLDB_SCOPED_TIMER();
836  // Initialize all of the lookup by name indexes before converting NAME to a
837  // uniqued string NAME_STR below.
838  if (!m_name_indexes_computed)
839    InitNameIndexes();
840
841  if (name) {
842    // The string table did have a string that matched, but we need to check
843    // the symbols and match the symbol_type if any was given.
844    AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
845                                       symbol_visibility, symbol_indexes);
846  }
847}
848
849void Symtab::FindAllSymbolsMatchingRexExAndType(
850    const RegularExpression &regex, SymbolType symbol_type,
851    Debug symbol_debug_type, Visibility symbol_visibility,
852    std::vector<uint32_t> &symbol_indexes,
853    Mangled::NamePreference name_preference) {
854  std::lock_guard<std::recursive_mutex> guard(m_mutex);
855
856  AppendSymbolIndexesMatchingRegExAndType(regex, symbol_type, symbol_debug_type,
857                                          symbol_visibility, symbol_indexes,
858                                          name_preference);
859}
860
861Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
862                                               SymbolType symbol_type,
863                                               Debug symbol_debug_type,
864                                               Visibility symbol_visibility) {
865  std::lock_guard<std::recursive_mutex> guard(m_mutex);
866  LLDB_SCOPED_TIMER();
867  if (!m_name_indexes_computed)
868    InitNameIndexes();
869
870  if (name) {
871    std::vector<uint32_t> matching_indexes;
872    // The string table did have a string that matched, but we need to check
873    // the symbols and match the symbol_type if any was given.
874    if (AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
875                                           symbol_visibility,
876                                           matching_indexes)) {
877      std::vector<uint32_t>::const_iterator pos, end = matching_indexes.end();
878      for (pos = matching_indexes.begin(); pos != end; ++pos) {
879        Symbol *symbol = SymbolAtIndex(*pos);
880
881        if (symbol->Compare(name, symbol_type))
882          return symbol;
883      }
884    }
885  }
886  return nullptr;
887}
888
889typedef struct {
890  const Symtab *symtab;
891  const addr_t file_addr;
892  Symbol *match_symbol;
893  const uint32_t *match_index_ptr;
894  addr_t match_offset;
895} SymbolSearchInfo;
896
897// Add all the section file start address & size to the RangeVector, recusively
898// adding any children sections.
899static void AddSectionsToRangeMap(SectionList *sectlist,
900                                  RangeVector<addr_t, addr_t> &section_ranges) {
901  const int num_sections = sectlist->GetNumSections(0);
902  for (int i = 0; i < num_sections; i++) {
903    SectionSP sect_sp = sectlist->GetSectionAtIndex(i);
904    if (sect_sp) {
905      SectionList &child_sectlist = sect_sp->GetChildren();
906
907      // If this section has children, add the children to the RangeVector.
908      // Else add this section to the RangeVector.
909      if (child_sectlist.GetNumSections(0) > 0) {
910        AddSectionsToRangeMap(&child_sectlist, section_ranges);
911      } else {
912        size_t size = sect_sp->GetByteSize();
913        if (size > 0) {
914          addr_t base_addr = sect_sp->GetFileAddress();
915          RangeVector<addr_t, addr_t>::Entry entry;
916          entry.SetRangeBase(base_addr);
917          entry.SetByteSize(size);
918          section_ranges.Append(entry);
919        }
920      }
921    }
922  }
923}
924
925void Symtab::InitAddressIndexes() {
926  // Protected function, no need to lock mutex...
927  if (!m_file_addr_to_index_computed && !m_symbols.empty()) {
928    m_file_addr_to_index_computed = true;
929
930    FileRangeToIndexMap::Entry entry;
931    const_iterator begin = m_symbols.begin();
932    const_iterator end = m_symbols.end();
933    for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
934      if (pos->ValueIsAddress()) {
935        entry.SetRangeBase(pos->GetAddressRef().GetFileAddress());
936        entry.SetByteSize(pos->GetByteSize());
937        entry.data = std::distance(begin, pos);
938        m_file_addr_to_index.Append(entry);
939      }
940    }
941    const size_t num_entries = m_file_addr_to_index.GetSize();
942    if (num_entries > 0) {
943      m_file_addr_to_index.Sort();
944
945      // Create a RangeVector with the start & size of all the sections for
946      // this objfile.  We'll need to check this for any FileRangeToIndexMap
947      // entries with an uninitialized size, which could potentially be a large
948      // number so reconstituting the weak pointer is busywork when it is
949      // invariant information.
950      SectionList *sectlist = m_objfile->GetSectionList();
951      RangeVector<addr_t, addr_t> section_ranges;
952      if (sectlist) {
953        AddSectionsToRangeMap(sectlist, section_ranges);
954        section_ranges.Sort();
955      }
956
957      // Iterate through the FileRangeToIndexMap and fill in the size for any
958      // entries that didn't already have a size from the Symbol (e.g. if we
959      // have a plain linker symbol with an address only, instead of debug info
960      // where we get an address and a size and a type, etc.)
961      for (size_t i = 0; i < num_entries; i++) {
962        FileRangeToIndexMap::Entry *entry =
963            m_file_addr_to_index.GetMutableEntryAtIndex(i);
964        if (entry->GetByteSize() == 0) {
965          addr_t curr_base_addr = entry->GetRangeBase();
966          const RangeVector<addr_t, addr_t>::Entry *containing_section =
967              section_ranges.FindEntryThatContains(curr_base_addr);
968
969          // Use the end of the section as the default max size of the symbol
970          addr_t sym_size = 0;
971          if (containing_section) {
972            sym_size =
973                containing_section->GetByteSize() -
974                (entry->GetRangeBase() - containing_section->GetRangeBase());
975          }
976
977          for (size_t j = i; j < num_entries; j++) {
978            FileRangeToIndexMap::Entry *next_entry =
979                m_file_addr_to_index.GetMutableEntryAtIndex(j);
980            addr_t next_base_addr = next_entry->GetRangeBase();
981            if (next_base_addr > curr_base_addr) {
982              addr_t size_to_next_symbol = next_base_addr - curr_base_addr;
983
984              // Take the difference between this symbol and the next one as
985              // its size, if it is less than the size of the section.
986              if (sym_size == 0 || size_to_next_symbol < sym_size) {
987                sym_size = size_to_next_symbol;
988              }
989              break;
990            }
991          }
992
993          if (sym_size > 0) {
994            entry->SetByteSize(sym_size);
995            Symbol &symbol = m_symbols[entry->data];
996            symbol.SetByteSize(sym_size);
997            symbol.SetSizeIsSynthesized(true);
998          }
999        }
1000      }
1001
1002      // Sort again in case the range size changes the ordering
1003      m_file_addr_to_index.Sort();
1004    }
1005  }
1006}
1007
1008void Symtab::Finalize() {
1009  std::lock_guard<std::recursive_mutex> guard(m_mutex);
1010  // Calculate the size of symbols inside InitAddressIndexes.
1011  InitAddressIndexes();
1012  // Shrink to fit the symbols so we don't waste memory
1013  m_symbols.shrink_to_fit();
1014  SaveToCache();
1015}
1016
1017Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) {
1018  std::lock_guard<std::recursive_mutex> guard(m_mutex);
1019  if (!m_file_addr_to_index_computed)
1020    InitAddressIndexes();
1021
1022  const FileRangeToIndexMap::Entry *entry =
1023      m_file_addr_to_index.FindEntryStartsAt(file_addr);
1024  if (entry) {
1025    Symbol *symbol = SymbolAtIndex(entry->data);
1026    if (symbol->GetFileAddress() == file_addr)
1027      return symbol;
1028  }
1029  return nullptr;
1030}
1031
1032Symbol *Symtab::FindSymbolContainingFileAddress(addr_t file_addr) {
1033  std::lock_guard<std::recursive_mutex> guard(m_mutex);
1034
1035  if (!m_file_addr_to_index_computed)
1036    InitAddressIndexes();
1037
1038  const FileRangeToIndexMap::Entry *entry =
1039      m_file_addr_to_index.FindEntryThatContains(file_addr);
1040  if (entry) {
1041    Symbol *symbol = SymbolAtIndex(entry->data);
1042    if (symbol->ContainsFileAddress(file_addr))
1043      return symbol;
1044  }
1045  return nullptr;
1046}
1047
1048void Symtab::ForEachSymbolContainingFileAddress(
1049    addr_t file_addr, std::function<bool(Symbol *)> const &callback) {
1050  std::lock_guard<std::recursive_mutex> guard(m_mutex);
1051
1052  if (!m_file_addr_to_index_computed)
1053    InitAddressIndexes();
1054
1055  std::vector<uint32_t> all_addr_indexes;
1056
1057  // Get all symbols with file_addr
1058  const size_t addr_match_count =
1059      m_file_addr_to_index.FindEntryIndexesThatContain(file_addr,
1060                                                       all_addr_indexes);
1061
1062  for (size_t i = 0; i < addr_match_count; ++i) {
1063    Symbol *symbol = SymbolAtIndex(all_addr_indexes[i]);
1064    if (symbol->ContainsFileAddress(file_addr)) {
1065      if (!callback(symbol))
1066        break;
1067    }
1068  }
1069}
1070
1071void Symtab::SymbolIndicesToSymbolContextList(
1072    std::vector<uint32_t> &symbol_indexes, SymbolContextList &sc_list) {
1073  // No need to protect this call using m_mutex all other method calls are
1074  // already thread safe.
1075
1076  const bool merge_symbol_into_function = true;
1077  size_t num_indices = symbol_indexes.size();
1078  if (num_indices > 0) {
1079    SymbolContext sc;
1080    sc.module_sp = m_objfile->GetModule();
1081    for (size_t i = 0; i < num_indices; i++) {
1082      sc.symbol = SymbolAtIndex(symbol_indexes[i]);
1083      if (sc.symbol)
1084        sc_list.AppendIfUnique(sc, merge_symbol_into_function);
1085    }
1086  }
1087}
1088
1089void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
1090                                 SymbolContextList &sc_list) {
1091  std::vector<uint32_t> symbol_indexes;
1092
1093  // eFunctionNameTypeAuto should be pre-resolved by a call to
1094  // Module::LookupInfo::LookupInfo()
1095  assert((name_type_mask & eFunctionNameTypeAuto) == 0);
1096
1097  if (name_type_mask & (eFunctionNameTypeBase | eFunctionNameTypeFull)) {
1098    std::vector<uint32_t> temp_symbol_indexes;
1099    FindAllSymbolsWithNameAndType(name, eSymbolTypeAny, temp_symbol_indexes);
1100
1101    unsigned temp_symbol_indexes_size = temp_symbol_indexes.size();
1102    if (temp_symbol_indexes_size > 0) {
1103      std::lock_guard<std::recursive_mutex> guard(m_mutex);
1104      for (unsigned i = 0; i < temp_symbol_indexes_size; i++) {
1105        SymbolContext sym_ctx;
1106        sym_ctx.symbol = SymbolAtIndex(temp_symbol_indexes[i]);
1107        if (sym_ctx.symbol) {
1108          switch (sym_ctx.symbol->GetType()) {
1109          case eSymbolTypeCode:
1110          case eSymbolTypeResolver:
1111          case eSymbolTypeReExported:
1112          case eSymbolTypeAbsolute:
1113            symbol_indexes.push_back(temp_symbol_indexes[i]);
1114            break;
1115          default:
1116            break;
1117          }
1118        }
1119      }
1120    }
1121  }
1122
1123  if (!m_name_indexes_computed)
1124    InitNameIndexes();
1125
1126  for (lldb::FunctionNameType type :
1127       {lldb::eFunctionNameTypeBase, lldb::eFunctionNameTypeMethod,
1128        lldb::eFunctionNameTypeSelector}) {
1129    if (name_type_mask & type) {
1130      auto map = GetNameToSymbolIndexMap(type);
1131
1132      const UniqueCStringMap<uint32_t>::Entry *match;
1133      for (match = map.FindFirstValueForName(name); match != nullptr;
1134           match = map.FindNextValueForName(match)) {
1135        symbol_indexes.push_back(match->value);
1136      }
1137    }
1138  }
1139
1140  if (!symbol_indexes.empty()) {
1141    llvm::sort(symbol_indexes);
1142    symbol_indexes.erase(
1143        std::unique(symbol_indexes.begin(), symbol_indexes.end()),
1144        symbol_indexes.end());
1145    SymbolIndicesToSymbolContextList(symbol_indexes, sc_list);
1146  }
1147}
1148
1149const Symbol *Symtab::GetParent(Symbol *child_symbol) const {
1150  uint32_t child_idx = GetIndexForSymbol(child_symbol);
1151  if (child_idx != UINT32_MAX && child_idx > 0) {
1152    for (uint32_t idx = child_idx - 1; idx != UINT32_MAX; --idx) {
1153      const Symbol *symbol = SymbolAtIndex(idx);
1154      const uint32_t sibling_idx = symbol->GetSiblingIndex();
1155      if (sibling_idx != UINT32_MAX && sibling_idx > child_idx)
1156        return symbol;
1157    }
1158  }
1159  return nullptr;
1160}
1161
1162std::string Symtab::GetCacheKey() {
1163  std::string key;
1164  llvm::raw_string_ostream strm(key);
1165  // Symbol table can come from different object files for the same module. A
1166  // module can have one object file as the main executable and might have
1167  // another object file in a separate symbol file.
1168  strm << m_objfile->GetModule()->GetCacheKey() << "-symtab-"
1169      << llvm::format_hex(m_objfile->GetCacheHash(), 10);
1170  return strm.str();
1171}
1172
1173void Symtab::SaveToCache() {
1174  DataFileCache *cache = Module::GetIndexCache();
1175  if (!cache)
1176    return; // Caching is not enabled.
1177  InitNameIndexes(); // Init the name indexes so we can cache them as well.
1178  const auto byte_order = endian::InlHostByteOrder();
1179  DataEncoder file(byte_order, /*addr_size=*/8);
1180  // Encode will return false if the symbol table's object file doesn't have
1181  // anything to make a signature from.
1182  if (Encode(file))
1183    if (cache->SetCachedData(GetCacheKey(), file.GetData()))
1184      SetWasSavedToCache();
1185}
1186
1187constexpr llvm::StringLiteral kIdentifierCStrMap("CMAP");
1188
1189static void EncodeCStrMap(DataEncoder &encoder, ConstStringTable &strtab,
1190                          const UniqueCStringMap<uint32_t> &cstr_map) {
1191  encoder.AppendData(kIdentifierCStrMap);
1192  encoder.AppendU32(cstr_map.GetSize());
1193  for (const auto &entry: cstr_map) {
1194    // Make sure there are no empty strings.
1195    assert((bool)entry.cstring);
1196    encoder.AppendU32(strtab.Add(entry.cstring));
1197    encoder.AppendU32(entry.value);
1198  }
1199}
1200
1201bool DecodeCStrMap(const DataExtractor &data, lldb::offset_t *offset_ptr,
1202                   const StringTableReader &strtab,
1203                   UniqueCStringMap<uint32_t> &cstr_map) {
1204  llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
1205  if (identifier != kIdentifierCStrMap)
1206    return false;
1207  const uint32_t count = data.GetU32(offset_ptr);
1208  cstr_map.Reserve(count);
1209  for (uint32_t i=0; i<count; ++i)
1210  {
1211    llvm::StringRef str(strtab.Get(data.GetU32(offset_ptr)));
1212    uint32_t value = data.GetU32(offset_ptr);
1213    // No empty strings in the name indexes in Symtab
1214    if (str.empty())
1215      return false;
1216    cstr_map.Append(ConstString(str), value);
1217  }
1218  // We must sort the UniqueCStringMap after decoding it since it is a vector
1219  // of UniqueCStringMap::Entry objects which contain a ConstString and type T.
1220  // ConstString objects are sorted by "const char *" and then type T and
1221  // the "const char *" are point values that will depend on the order in which
1222  // ConstString objects are created and in which of the 256 string pools they
1223  // are created in. So after we decode all of the entries, we must sort the
1224  // name map to ensure name lookups succeed. If we encode and decode within
1225  // the same process we wouldn't need to sort, so unit testing didn't catch
1226  // this issue when first checked in.
1227  cstr_map.Sort();
1228  return true;
1229}
1230
1231constexpr llvm::StringLiteral kIdentifierSymbolTable("SYMB");
1232constexpr uint32_t CURRENT_CACHE_VERSION = 1;
1233
1234/// The encoding format for the symbol table is as follows:
1235///
1236/// Signature signature;
1237/// ConstStringTable strtab;
1238/// Identifier four character code: 'SYMB'
1239/// uint32_t version;
1240/// uint32_t num_symbols;
1241/// Symbol symbols[num_symbols];
1242/// uint8_t num_cstr_maps;
1243/// UniqueCStringMap<uint32_t> cstr_maps[num_cstr_maps]
1244bool Symtab::Encode(DataEncoder &encoder) const {
1245  // Name indexes must be computed before calling this function.
1246  assert(m_name_indexes_computed);
1247
1248  // Encode the object file's signature
1249  CacheSignature signature(m_objfile);
1250  if (!signature.Encode(encoder))
1251    return false;
1252  ConstStringTable strtab;
1253
1254  // Encoder the symbol table into a separate encoder first. This allows us
1255  // gather all of the strings we willl need in "strtab" as we will need to
1256  // write the string table out before the symbol table.
1257  DataEncoder symtab_encoder(encoder.GetByteOrder(),
1258                              encoder.GetAddressByteSize());
1259  symtab_encoder.AppendData(kIdentifierSymbolTable);
1260  // Encode the symtab data version.
1261  symtab_encoder.AppendU32(CURRENT_CACHE_VERSION);
1262  // Encode the number of symbols.
1263  symtab_encoder.AppendU32(m_symbols.size());
1264  // Encode the symbol data for all symbols.
1265  for (const auto &symbol: m_symbols)
1266    symbol.Encode(symtab_encoder, strtab);
1267
1268  // Emit a byte for how many C string maps we emit. We will fix this up after
1269  // we emit the C string maps since we skip emitting C string maps if they are
1270  // empty.
1271  size_t num_cmaps_offset = symtab_encoder.GetByteSize();
1272  uint8_t num_cmaps = 0;
1273  symtab_encoder.AppendU8(0);
1274  for (const auto &pair: m_name_to_symbol_indices) {
1275    if (pair.second.IsEmpty())
1276      continue;
1277    ++num_cmaps;
1278    symtab_encoder.AppendU8(pair.first);
1279    EncodeCStrMap(symtab_encoder, strtab, pair.second);
1280  }
1281  if (num_cmaps > 0)
1282    symtab_encoder.PutU8(num_cmaps_offset, num_cmaps);
1283
1284  // Now that all strings have been gathered, we will emit the string table.
1285  strtab.Encode(encoder);
1286  // Followed by the symbol table data.
1287  encoder.AppendData(symtab_encoder.GetData());
1288  return true;
1289}
1290
1291bool Symtab::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
1292                    bool &signature_mismatch) {
1293  signature_mismatch = false;
1294  CacheSignature signature;
1295  StringTableReader strtab;
1296  { // Scope for "elapsed" object below so it can measure the time parse.
1297    ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabParseTime());
1298    if (!signature.Decode(data, offset_ptr))
1299      return false;
1300    if (CacheSignature(m_objfile) != signature) {
1301      signature_mismatch = true;
1302      return false;
1303    }
1304    // We now decode the string table for all strings in the data cache file.
1305    if (!strtab.Decode(data, offset_ptr))
1306      return false;
1307
1308    // And now we can decode the symbol table with string table we just decoded.
1309    llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
1310    if (identifier != kIdentifierSymbolTable)
1311      return false;
1312    const uint32_t version = data.GetU32(offset_ptr);
1313    if (version != CURRENT_CACHE_VERSION)
1314      return false;
1315    const uint32_t num_symbols = data.GetU32(offset_ptr);
1316    if (num_symbols == 0)
1317      return true;
1318    m_symbols.resize(num_symbols);
1319    SectionList *sections = m_objfile->GetModule()->GetSectionList();
1320    for (uint32_t i=0; i<num_symbols; ++i) {
1321      if (!m_symbols[i].Decode(data, offset_ptr, sections, strtab))
1322        return false;
1323    }
1324  }
1325
1326  { // Scope for "elapsed" object below so it can measure the time to index.
1327    ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
1328    const uint8_t num_cstr_maps = data.GetU8(offset_ptr);
1329    for (uint8_t i=0; i<num_cstr_maps; ++i) {
1330      uint8_t type = data.GetU8(offset_ptr);
1331      UniqueCStringMap<uint32_t> &cstr_map =
1332          GetNameToSymbolIndexMap((lldb::FunctionNameType)type);
1333      if (!DecodeCStrMap(data, offset_ptr, strtab, cstr_map))
1334        return false;
1335    }
1336    m_name_indexes_computed = true;
1337  }
1338  return true;
1339}
1340
1341bool Symtab::LoadFromCache() {
1342  DataFileCache *cache = Module::GetIndexCache();
1343  if (!cache)
1344    return false;
1345
1346  std::unique_ptr<llvm::MemoryBuffer> mem_buffer_up =
1347      cache->GetCachedData(GetCacheKey());
1348  if (!mem_buffer_up)
1349    return false;
1350  DataExtractor data(mem_buffer_up->getBufferStart(),
1351                     mem_buffer_up->getBufferSize(),
1352                     m_objfile->GetByteOrder(),
1353                     m_objfile->GetAddressByteSize());
1354  bool signature_mismatch = false;
1355  lldb::offset_t offset = 0;
1356  const bool result = Decode(data, &offset, signature_mismatch);
1357  if (signature_mismatch)
1358    cache->RemoveCacheFile(GetCacheKey());
1359  if (result)
1360    SetWasLoadedFromCache();
1361  return result;
1362}
1363