1//===-- ConstString.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Utility/ConstString.h"
10
11#include "lldb/Utility/Stream.h"
12
13#include "llvm/ADT/StringMap.h"
14#include "llvm/ADT/iterator.h"
15#include "llvm/Support/Allocator.h"
16#include "llvm/Support/DJB.h"
17#include "llvm/Support/FormatProviders.h"
18#include "llvm/Support/RWMutex.h"
19#include "llvm/Support/Threading.h"
20
21#include <array>
22#include <utility>
23
24#include <cinttypes>
25#include <cstdint>
26#include <cstring>
27
28using namespace lldb_private;
29
30class Pool {
31public:
32  /// The default BumpPtrAllocatorImpl slab size.
33  static const size_t AllocatorSlabSize = 4096;
34  static const size_t SizeThreshold = AllocatorSlabSize;
35  /// Every Pool has its own allocator which receives an equal share of
36  /// the ConstString allocations. This means that when allocating many
37  /// ConstStrings, every allocator sees only its small share of allocations and
38  /// assumes LLDB only allocated a small amount of memory so far. In reality
39  /// LLDB allocated a total memory that is N times as large as what the
40  /// allocator sees (where N is the number of string pools). This causes that
41  /// the BumpPtrAllocator continues a long time to allocate memory in small
42  /// chunks which only makes sense when allocating a small amount of memory
43  /// (which is true from the perspective of a single allocator). On some
44  /// systems doing all these small memory allocations causes LLDB to spend
45  /// a lot of time in malloc, so we need to force all these allocators to
46  /// behave like one allocator in terms of scaling their memory allocations
47  /// with increased demand. To do this we set the growth delay for each single
48  /// allocator to a rate so that our pool of allocators scales their memory
49  /// allocations similar to a single BumpPtrAllocatorImpl.
50  ///
51  /// Currently we have 256 string pools and the normal growth delay of the
52  /// BumpPtrAllocatorImpl is 128 (i.e., the memory allocation size increases
53  /// every 128 full chunks), so by changing the delay to 1 we get a
54  /// total growth delay in our allocator collection of 256/1 = 256. This is
55  /// still only half as fast as a normal allocator but we can't go any faster
56  /// without decreasing the number of string pools.
57  static const size_t AllocatorGrowthDelay = 1;
58  typedef llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, AllocatorSlabSize,
59                                     SizeThreshold, AllocatorGrowthDelay>
60      Allocator;
61  typedef const char *StringPoolValueType;
62  typedef llvm::StringMap<StringPoolValueType, Allocator> StringPool;
63  typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
64
65  static StringPoolEntryType &
66  GetStringMapEntryFromKeyData(const char *keyData) {
67    return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
68  }
69
70  static size_t GetConstCStringLength(const char *ccstr) {
71    if (ccstr != nullptr) {
72      // Since the entry is read only, and we derive the entry entirely from
73      // the pointer, we don't need the lock.
74      const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
75      return entry.getKey().size();
76    }
77    return 0;
78  }
79
80  StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
81    if (ccstr != nullptr) {
82      const uint8_t h = hash(llvm::StringRef(ccstr));
83      llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
84      return GetStringMapEntryFromKeyData(ccstr).getValue();
85    }
86    return nullptr;
87  }
88
89  const char *GetConstCString(const char *cstr) {
90    if (cstr != nullptr)
91      return GetConstCStringWithLength(cstr, strlen(cstr));
92    return nullptr;
93  }
94
95  const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
96    if (cstr != nullptr)
97      return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
98    return nullptr;
99  }
100
101  const char *GetConstCStringWithStringRef(llvm::StringRef string_ref) {
102    if (string_ref.data()) {
103      const uint8_t h = hash(string_ref);
104
105      {
106        llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
107        auto it = m_string_pools[h].m_string_map.find(string_ref);
108        if (it != m_string_pools[h].m_string_map.end())
109          return it->getKeyData();
110      }
111
112      llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
113      StringPoolEntryType &entry =
114          *m_string_pools[h]
115               .m_string_map.insert(std::make_pair(string_ref, nullptr))
116               .first;
117      return entry.getKeyData();
118    }
119    return nullptr;
120  }
121
122  const char *
123  GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
124                                          const char *mangled_ccstr) {
125    const char *demangled_ccstr = nullptr;
126
127    {
128      const uint8_t h = hash(demangled);
129      llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
130
131      // Make or update string pool entry with the mangled counterpart
132      StringPool &map = m_string_pools[h].m_string_map;
133      StringPoolEntryType &entry = *map.try_emplace(demangled).first;
134
135      entry.second = mangled_ccstr;
136
137      // Extract the const version of the demangled_cstr
138      demangled_ccstr = entry.getKeyData();
139    }
140
141    {
142      // Now assign the demangled const string as the counterpart of the
143      // mangled const string...
144      const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
145      llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
146      GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
147    }
148
149    // Return the constant demangled C string
150    return demangled_ccstr;
151  }
152
153  const char *GetConstTrimmedCStringWithLength(const char *cstr,
154                                               size_t cstr_len) {
155    if (cstr != nullptr) {
156      const size_t trimmed_len = strnlen(cstr, cstr_len);
157      return GetConstCStringWithLength(cstr, trimmed_len);
158    }
159    return nullptr;
160  }
161
162  ConstString::MemoryStats GetMemoryStats() const {
163    ConstString::MemoryStats stats;
164    for (const auto &pool : m_string_pools) {
165      llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
166      const Allocator &alloc = pool.m_string_map.getAllocator();
167      stats.bytes_total += alloc.getTotalMemory();
168      stats.bytes_used += alloc.getBytesAllocated();
169    }
170    return stats;
171  }
172
173protected:
174  uint8_t hash(llvm::StringRef s) const {
175    uint32_t h = llvm::djbHash(s);
176    return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
177  }
178
179  struct PoolEntry {
180    mutable llvm::sys::SmartRWMutex<false> m_mutex;
181    StringPool m_string_map;
182  };
183
184  std::array<PoolEntry, 256> m_string_pools;
185};
186
187// Frameworks and dylibs aren't supposed to have global C++ initializers so we
188// hide the string pool in a static function so that it will get initialized on
189// the first call to this static function.
190//
191// Note, for now we make the string pool a pointer to the pool, because we
192// can't guarantee that some objects won't get destroyed after the global
193// destructor chain is run, and trying to make sure no destructors touch
194// ConstStrings is difficult.  So we leak the pool instead.
195static Pool &StringPool() {
196  static llvm::once_flag g_pool_initialization_flag;
197  static Pool *g_string_pool = nullptr;
198
199  llvm::call_once(g_pool_initialization_flag,
200                 []() { g_string_pool = new Pool(); });
201
202  return *g_string_pool;
203}
204
205ConstString::ConstString(const char *cstr)
206    : m_string(StringPool().GetConstCString(cstr)) {}
207
208ConstString::ConstString(const char *cstr, size_t cstr_len)
209    : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
210
211ConstString::ConstString(llvm::StringRef s)
212    : m_string(StringPool().GetConstCStringWithStringRef(s)) {}
213
214bool ConstString::operator<(ConstString rhs) const {
215  if (m_string == rhs.m_string)
216    return false;
217
218  llvm::StringRef lhs_string_ref(GetStringRef());
219  llvm::StringRef rhs_string_ref(rhs.GetStringRef());
220
221  // If both have valid C strings, then return the comparison
222  if (lhs_string_ref.data() && rhs_string_ref.data())
223    return lhs_string_ref < rhs_string_ref;
224
225  // Else one of them was nullptr, so if LHS is nullptr then it is less than
226  return lhs_string_ref.data() == nullptr;
227}
228
229Stream &lldb_private::operator<<(Stream &s, ConstString str) {
230  const char *cstr = str.GetCString();
231  if (cstr != nullptr)
232    s << cstr;
233
234  return s;
235}
236
237size_t ConstString::GetLength() const {
238  return Pool::GetConstCStringLength(m_string);
239}
240
241bool ConstString::Equals(ConstString lhs, ConstString rhs,
242                         const bool case_sensitive) {
243  if (lhs.m_string == rhs.m_string)
244    return true;
245
246  // Since the pointers weren't equal, and identical ConstStrings always have
247  // identical pointers, the result must be false for case sensitive equality
248  // test.
249  if (case_sensitive)
250    return false;
251
252  // perform case insensitive equality test
253  llvm::StringRef lhs_string_ref(lhs.GetStringRef());
254  llvm::StringRef rhs_string_ref(rhs.GetStringRef());
255  return lhs_string_ref.equals_insensitive(rhs_string_ref);
256}
257
258int ConstString::Compare(ConstString lhs, ConstString rhs,
259                         const bool case_sensitive) {
260  // If the iterators are the same, this is the same string
261  const char *lhs_cstr = lhs.m_string;
262  const char *rhs_cstr = rhs.m_string;
263  if (lhs_cstr == rhs_cstr)
264    return 0;
265  if (lhs_cstr && rhs_cstr) {
266    llvm::StringRef lhs_string_ref(lhs.GetStringRef());
267    llvm::StringRef rhs_string_ref(rhs.GetStringRef());
268
269    if (case_sensitive) {
270      return lhs_string_ref.compare(rhs_string_ref);
271    } else {
272      return lhs_string_ref.compare_insensitive(rhs_string_ref);
273    }
274  }
275
276  if (lhs_cstr)
277    return +1; // LHS isn't nullptr but RHS is
278  else
279    return -1; // LHS is nullptr but RHS isn't
280}
281
282void ConstString::Dump(Stream *s, const char *fail_value) const {
283  if (s != nullptr) {
284    const char *cstr = AsCString(fail_value);
285    if (cstr != nullptr)
286      s->PutCString(cstr);
287  }
288}
289
290void ConstString::DumpDebug(Stream *s) const {
291  const char *cstr = GetCString();
292  size_t cstr_len = GetLength();
293  // Only print the parens if we have a non-nullptr string
294  const char *parens = cstr ? "\"" : "";
295  s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
296            static_cast<int>(sizeof(void *) * 2),
297            static_cast<const void *>(this), parens, cstr, parens,
298            static_cast<uint64_t>(cstr_len));
299}
300
301void ConstString::SetCString(const char *cstr) {
302  m_string = StringPool().GetConstCString(cstr);
303}
304
305void ConstString::SetString(llvm::StringRef s) {
306  m_string = StringPool().GetConstCStringWithStringRef(s);
307}
308
309void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
310                                                  ConstString mangled) {
311  m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
312      demangled, mangled.m_string);
313}
314
315bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
316  counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
317  return (bool)counterpart;
318}
319
320void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
321  m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
322}
323
324void ConstString::SetTrimmedCStringWithLength(const char *cstr,
325                                              size_t cstr_len) {
326  m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
327}
328
329ConstString::MemoryStats ConstString::GetMemoryStats() {
330  return StringPool().GetMemoryStats();
331}
332
333void llvm::format_provider<ConstString>::format(const ConstString &CS,
334                                                llvm::raw_ostream &OS,
335                                                llvm::StringRef Options) {
336  format_provider<StringRef>::format(CS.GetStringRef(), OS, Options);
337}
338