1//===-- ConstString.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef liblldb_ConstString_h_
10#define liblldb_ConstString_h_
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/ADT/DenseMapInfo.h"
14#include "llvm/Support/FormatVariadic.h"
15
16#include <stddef.h>
17
18namespace lldb_private {
19class Stream;
20}
21namespace llvm {
22class raw_ostream;
23}
24
25namespace lldb_private {
26
27/// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
28/// A uniqued constant string class.
29///
30/// Provides an efficient way to store strings as uniqued strings. After the
31/// strings are uniqued, finding strings that are equal to one another is very
32/// fast as just the pointers need to be compared. It also allows for many
33/// common strings from many different sources to be shared to keep the memory
34/// footprint low.
35///
36/// No reference counting is done on strings that are added to the string
37/// pool, once strings are added they are in the string pool for the life of
38/// the program.
39class ConstString {
40public:
41  /// Default constructor
42  ///
43  /// Initializes the string to an empty string.
44  ConstString() : m_string(nullptr) {}
45
46  /// Copy constructor
47  ///
48  /// Copies the string value in \a rhs into this object.
49  ///
50  /// \param[in] rhs
51  ///     Another string object to copy.
52  ConstString(const ConstString &rhs) : m_string(rhs.m_string) {}
53
54  explicit ConstString(const llvm::StringRef &s);
55
56  /// Construct with C String value
57  ///
58  /// Constructs this object with a C string by looking to see if the
59  /// C string already exists in the global string pool. If it doesn't
60  /// exist, it is added to the string pool.
61  ///
62  /// \param[in] cstr
63  ///     A NULL terminated C string to add to the string pool.
64  explicit ConstString(const char *cstr);
65
66  /// Construct with C String value with max length
67  ///
68  /// Constructs this object with a C string with a length. If \a max_cstr_len
69  /// is greater than the actual length of the string, the string length will
70  /// be truncated. This allows substrings to be created without the need to
71  /// NULL terminate the string as it is passed into this function.
72  ///
73  /// \param[in] cstr
74  ///     A pointer to the first character in the C string. The C
75  ///     string can be NULL terminated in a buffer that contains
76  ///     more characters than the length of the string, or the
77  ///     string can be part of another string and a new substring
78  ///     can be created.
79  ///
80  /// \param[in] max_cstr_len
81  ///     The max length of \a cstr. If the string length of \a cstr
82  ///     is less than \a max_cstr_len, then the string will be
83  ///     truncated. If the string length of \a cstr is greater than
84  ///     \a max_cstr_len, then only max_cstr_len bytes will be used
85  ///     from \a cstr.
86  explicit ConstString(const char *cstr, size_t max_cstr_len);
87
88  /// Destructor
89  ///
90  /// Since constant string values are currently not reference counted, there
91  /// isn't much to do here.
92  ~ConstString() = default;
93
94  /// C string equality binary predicate function object for ConstString
95  /// objects.
96  struct StringIsEqual {
97    /// C equality test.
98    ///
99    /// Two C strings are equal when they are contained in ConstString objects
100    /// when their pointer values are equal to each other.
101    ///
102    /// \return
103    ///     Returns \b true if the C string in \a lhs is equal to
104    ///     the C string value in \a rhs, \b false otherwise.
105    bool operator()(const char *lhs, const char *rhs) const {
106      return lhs == rhs;
107    }
108  };
109
110  /// Convert to bool operator.
111  ///
112  /// This allows code to check a ConstString object to see if it contains a
113  /// valid string using code such as:
114  ///
115  /// \code
116  /// ConstString str(...);
117  /// if (str)
118  /// { ...
119  /// \endcode
120  ///
121  /// \return
122  ///     /b True this object contains a valid non-empty C string, \b
123  ///     false otherwise.
124  explicit operator bool() const { return !IsEmpty(); }
125
126  /// Assignment operator
127  ///
128  /// Assigns the string in this object with the value from \a rhs.
129  ///
130  /// \param[in] rhs
131  ///     Another string object to copy into this object.
132  ///
133  /// \return
134  ///     A const reference to this object.
135  ConstString operator=(ConstString rhs) {
136    m_string = rhs.m_string;
137    return *this;
138  }
139
140  /// Equal to operator
141  ///
142  /// Returns true if this string is equal to the string in \a rhs. This
143  /// operation is very fast as it results in a pointer comparison since all
144  /// strings are in a uniqued in a global string pool.
145  ///
146  /// \param[in] rhs
147  ///     Another string object to compare this object to.
148  ///
149  /// \return
150  ///     true if this object is equal to \a rhs.
151  ///     false if this object is not equal to \a rhs.
152  bool operator==(ConstString rhs) const {
153    // We can do a pointer compare to compare these strings since they must
154    // come from the same pool in order to be equal.
155    return m_string == rhs.m_string;
156  }
157
158  /// Equal to operator against a non-ConstString value.
159  ///
160  /// Returns true if this string is equal to the string in \a rhs. This
161  /// overload is usually slower than comparing against a ConstString value.
162  /// However, if the rhs string not already a ConstString and it is impractical
163  /// to turn it into a non-temporary variable, then this overload is faster.
164  ///
165  /// \param[in] rhs
166  ///     Another string object to compare this object to.
167  ///
168  /// \return
169  ///     \b true if this object is equal to \a rhs.
170  ///     \b false if this object is not equal to \a rhs.
171  bool operator==(const char *rhs) const {
172    // ConstString differentiates between empty strings and nullptr strings, but
173    // StringRef doesn't. Therefore we have to do this check manually now.
174    if (m_string == nullptr && rhs != nullptr)
175      return false;
176    if (m_string != nullptr && rhs == nullptr)
177      return false;
178
179    return GetStringRef() == rhs;
180  }
181
182  /// Not equal to operator
183  ///
184  /// Returns true if this string is not equal to the string in \a rhs. This
185  /// operation is very fast as it results in a pointer comparison since all
186  /// strings are in a uniqued in a global string pool.
187  ///
188  /// \param[in] rhs
189  ///     Another string object to compare this object to.
190  ///
191  /// \return
192  ///     \b true if this object is not equal to \a rhs.
193  ///     \b false if this object is equal to \a rhs.
194  bool operator!=(ConstString rhs) const {
195    return m_string != rhs.m_string;
196  }
197
198  /// Not equal to operator against a non-ConstString value.
199  ///
200  /// Returns true if this string is not equal to the string in \a rhs. This
201  /// overload is usually slower than comparing against a ConstString value.
202  /// However, if the rhs string not already a ConstString and it is impractical
203  /// to turn it into a non-temporary variable, then this overload is faster.
204  ///
205  /// \param[in] rhs
206  ///     Another string object to compare this object to.
207  ///
208  /// \return \b true if this object is not equal to \a rhs, false otherwise.
209  bool operator!=(const char *rhs) const { return !(*this == rhs); }
210
211  bool operator<(ConstString rhs) const;
212
213  /// Get the string value as a C string.
214  ///
215  /// Get the value of the contained string as a NULL terminated C string
216  /// value.
217  ///
218  /// If \a value_if_empty is nullptr, then nullptr will be returned.
219  ///
220  /// \return Returns \a value_if_empty if the string is empty, otherwise
221  ///     the C string value contained in this object.
222  const char *AsCString(const char *value_if_empty = nullptr) const {
223    return (IsEmpty() ? value_if_empty : m_string);
224  }
225
226  /// Get the string value as a llvm::StringRef
227  ///
228  /// \return
229  ///     Returns a new llvm::StringRef object filled in with the
230  ///     needed data.
231  llvm::StringRef GetStringRef() const {
232    return llvm::StringRef(m_string, GetLength());
233  }
234
235  /// Get the string value as a C string.
236  ///
237  /// Get the value of the contained string as a NULL terminated C string
238  /// value. Similar to the ConstString::AsCString() function, yet this
239  /// function will always return nullptr if the string is not valid. So this
240  /// function is a direct accessor to the string pointer value.
241  ///
242  /// \return
243  ///     Returns nullptr the string is invalid, otherwise the C string
244  ///     value contained in this object.
245  const char *GetCString() const { return m_string; }
246
247  /// Get the length in bytes of string value.
248  ///
249  /// The string pool stores the length of the string, so we can avoid calling
250  /// strlen() on the pointer value with this function.
251  ///
252  /// \return
253  ///     Returns the number of bytes that this string occupies in
254  ///     memory, not including the NULL termination byte.
255  size_t GetLength() const;
256
257  /// Clear this object's state.
258  ///
259  /// Clear any contained string and reset the value to the empty string
260  /// value.
261  void Clear() { m_string = nullptr; }
262
263  /// Equal to operator
264  ///
265  /// Returns true if this string is equal to the string in \a rhs. If case
266  /// sensitive equality is tested, this operation is very fast as it results
267  /// in a pointer comparison since all strings are in a uniqued in a global
268  /// string pool.
269  ///
270  /// \param[in] lhs
271  ///     The Left Hand Side const ConstString object reference.
272  ///
273  /// \param[in] rhs
274  ///     The Right Hand Side const ConstString object reference.
275  ///
276  /// \param[in] case_sensitive
277  ///     Case sensitivity. If true, case sensitive equality
278  ///     will be tested, otherwise character case will be ignored
279  ///
280  /// \return \b true if this object is equal to \a rhs, \b false otherwise.
281  static bool Equals(ConstString lhs, ConstString rhs,
282                     const bool case_sensitive = true);
283
284  /// Compare two string objects.
285  ///
286  /// Compares the C string values contained in \a lhs and \a rhs and returns
287  /// an integer result.
288  ///
289  /// NOTE: only call this function when you want a true string
290  /// comparison. If you want string equality use the, use the == operator as
291  /// it is much more efficient. Also if you want string inequality, use the
292  /// != operator for the same reasons.
293  ///
294  /// \param[in] lhs
295  ///     The Left Hand Side const ConstString object reference.
296  ///
297  /// \param[in] rhs
298  ///     The Right Hand Side const ConstString object reference.
299  ///
300  /// \param[in] case_sensitive
301  ///     Case sensitivity of compare. If true, case sensitive compare
302  ///     will be performed, otherwise character case will be ignored
303  ///
304  /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs
305  static int Compare(ConstString lhs, ConstString rhs,
306                     const bool case_sensitive = true);
307
308  /// Dump the object description to a stream.
309  ///
310  /// Dump the string value to the stream \a s. If the contained string is
311  /// empty, print \a value_if_empty to the stream instead. If \a
312  /// value_if_empty is nullptr, then nothing will be dumped to the stream.
313  ///
314  /// \param[in] s
315  ///     The stream that will be used to dump the object description.
316  ///
317  /// \param[in] value_if_empty
318  ///     The value to dump if the string is empty. If nullptr, nothing
319  ///     will be output to the stream.
320  void Dump(Stream *s, const char *value_if_empty = nullptr) const;
321
322  /// Dump the object debug description to a stream.
323  ///
324  /// \param[in] s
325  ///     The stream that will be used to dump the object description.
326  void DumpDebug(Stream *s) const;
327
328  /// Test for empty string.
329  ///
330  /// \return
331  ///     \b true if the contained string is empty.
332  ///     \b false if the contained string is not empty.
333  bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
334
335  /// Test for null string.
336  ///
337  /// \return
338  ///     \b true if there is no string associated with this instance.
339  ///     \b false if there is a string associated with this instance.
340  bool IsNull() const { return m_string == nullptr; }
341
342  /// Set the C string value.
343  ///
344  /// Set the string value in the object by uniquing the \a cstr string value
345  /// in our global string pool.
346  ///
347  /// If the C string already exists in the global string pool, it finds the
348  /// current entry and returns the existing value. If it doesn't exist, it is
349  /// added to the string pool.
350  ///
351  /// \param[in] cstr
352  ///     A NULL terminated C string to add to the string pool.
353  void SetCString(const char *cstr);
354
355  void SetString(const llvm::StringRef &s);
356
357  /// Set the C string value and its mangled counterpart.
358  ///
359  /// Object files and debug symbols often use mangled string to represent the
360  /// linkage name for a symbol, function or global. The string pool can
361  /// efficiently store these values and their counterparts so when we run
362  /// into another instance of a mangled name, we can avoid calling the name
363  /// demangler over and over on the same strings and then trying to unique
364  /// them.
365  ///
366  /// \param[in] demangled
367  ///     The demangled string to correlate with the \a mangled name.
368  ///
369  /// \param[in] mangled
370  ///     The already uniqued mangled ConstString to correlate the
371  ///     soon to be uniqued version of \a demangled.
372  void SetStringWithMangledCounterpart(llvm::StringRef demangled,
373                                       ConstString mangled);
374
375  /// Retrieve the mangled or demangled counterpart for a mangled or demangled
376  /// ConstString.
377  ///
378  /// Object files and debug symbols often use mangled string to represent the
379  /// linkage name for a symbol, function or global. The string pool can
380  /// efficiently store these values and their counterparts so when we run
381  /// into another instance of a mangled name, we can avoid calling the name
382  /// demangler over and over on the same strings and then trying to unique
383  /// them.
384  ///
385  /// \param[in] counterpart
386  ///     A reference to a ConstString object that might get filled in
387  ///     with the demangled/mangled counterpart.
388  ///
389  /// \return
390  ///     /b True if \a counterpart was filled in with the counterpart
391  ///     /b false otherwise.
392  bool GetMangledCounterpart(ConstString &counterpart) const;
393
394  /// Set the C string value with length.
395  ///
396  /// Set the string value in the object by uniquing \a cstr_len bytes
397  /// starting at the \a cstr string value in our global string pool. If trim
398  /// is true, then \a cstr_len indicates a maximum length of the CString and
399  /// if the actual length of the string is less, then it will be trimmed.
400  ///
401  /// If the C string already exists in the global string pool, it finds the
402  /// current entry and returns the existing value. If it doesn't exist, it is
403  /// added to the string pool.
404  ///
405  /// \param[in] cstr
406  ///     A NULL terminated C string to add to the string pool.
407  ///
408  /// \param[in] cstr_len
409  ///     The maximum length of the C string.
410  void SetCStringWithLength(const char *cstr, size_t cstr_len);
411
412  /// Set the C string value with the minimum length between \a fixed_cstr_len
413  /// and the actual length of the C string. This can be used for data
414  /// structures that have a fixed length to store a C string where the string
415  /// might not be NULL terminated if the string takes the entire buffer.
416  void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
417
418  /// Get the memory cost of this object.
419  ///
420  /// Return the size in bytes that this object takes in memory. This returns
421  /// the size in bytes of this object, which does not include any the shared
422  /// string values it may refer to.
423  ///
424  /// \return
425  ///     The number of bytes that this object occupies in memory.
426  ///
427  /// \see ConstString::StaticMemorySize ()
428  size_t MemorySize() const { return sizeof(ConstString); }
429
430  /// Get the size in bytes of the current global string pool.
431  ///
432  /// Reports the size in bytes of all shared C string values, containers and
433  /// any other values as a byte size for the entire string pool.
434  ///
435  /// \return
436  ///     The number of bytes that the global string pool occupies
437  ///     in memory.
438  static size_t StaticMemorySize();
439
440protected:
441  template <typename T> friend struct ::llvm::DenseMapInfo;
442  /// Only used by DenseMapInfo.
443  static ConstString FromStringPoolPointer(const char *ptr) {
444    ConstString s;
445    s.m_string = ptr;
446    return s;
447  };
448
449  // Member variables
450  const char *m_string;
451};
452
453/// Stream the string value \a str to the stream \a s
454Stream &operator<<(Stream &s, ConstString str);
455
456} // namespace lldb_private
457
458namespace llvm {
459template <> struct format_provider<lldb_private::ConstString> {
460  static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
461                     llvm::StringRef Options);
462};
463
464/// DenseMapInfo implementation.
465/// \{
466template <> struct DenseMapInfo<lldb_private::ConstString> {
467  static inline lldb_private::ConstString getEmptyKey() {
468    return lldb_private::ConstString::FromStringPoolPointer(
469        DenseMapInfo<const char *>::getEmptyKey());
470  }
471  static inline lldb_private::ConstString getTombstoneKey() {
472    return lldb_private::ConstString::FromStringPoolPointer(
473        DenseMapInfo<const char *>::getTombstoneKey());
474  }
475  static unsigned getHashValue(lldb_private::ConstString val) {
476    return DenseMapInfo<const char *>::getHashValue(val.m_string);
477  }
478  static bool isEqual(lldb_private::ConstString LHS,
479                      lldb_private::ConstString RHS) {
480    return LHS == RHS;
481  }
482};
483/// \}
484}
485
486#endif // liblldb_ConstString_h_
487