1317027Sdim//===-- ConstString.h -------------------------------------------*- C++ -*-===//
2317027Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6317027Sdim//
7317027Sdim//===----------------------------------------------------------------------===//
8317027Sdim
9317027Sdim#ifndef liblldb_ConstString_h_
10317027Sdim#define liblldb_ConstString_h_
11317027Sdim
12317027Sdim#include "llvm/ADT/StringRef.h"
13360784Sdim#include "llvm/ADT/DenseMapInfo.h"
14344779Sdim#include "llvm/Support/FormatVariadic.h"
15317027Sdim
16344779Sdim#include <stddef.h>
17317027Sdim
18317027Sdimnamespace lldb_private {
19317027Sdimclass Stream;
20317027Sdim}
21317027Sdimnamespace llvm {
22317027Sdimclass raw_ostream;
23317027Sdim}
24317027Sdim
25317027Sdimnamespace lldb_private {
26317027Sdim
27353358Sdim/// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
28341825Sdim/// A uniqued constant string class.
29317027Sdim///
30341825Sdim/// Provides an efficient way to store strings as uniqued strings. After the
31341825Sdim/// strings are uniqued, finding strings that are equal to one another is very
32341825Sdim/// fast as just the pointers need to be compared. It also allows for many
33341825Sdim/// common strings from many different sources to be shared to keep the memory
34341825Sdim/// footprint low.
35317027Sdim///
36341825Sdim/// No reference counting is done on strings that are added to the string
37341825Sdim/// pool, once strings are added they are in the string pool for the life of
38341825Sdim/// the program.
39317027Sdimclass ConstString {
40317027Sdimpublic:
41317027Sdim  /// Default constructor
42317027Sdim  ///
43317027Sdim  /// Initializes the string to an empty string.
44317027Sdim  ConstString() : m_string(nullptr) {}
45317027Sdim
46317027Sdim  /// Copy constructor
47317027Sdim  ///
48317027Sdim  /// Copies the string value in \a rhs into this object.
49317027Sdim  ///
50353358Sdim  /// \param[in] rhs
51317027Sdim  ///     Another string object to copy.
52317027Sdim  ConstString(const ConstString &rhs) : m_string(rhs.m_string) {}
53317027Sdim
54317027Sdim  explicit ConstString(const llvm::StringRef &s);
55317027Sdim
56317027Sdim  /// Construct with C String value
57317027Sdim  ///
58317027Sdim  /// Constructs this object with a C string by looking to see if the
59317027Sdim  /// C string already exists in the global string pool. If it doesn't
60317027Sdim  /// exist, it is added to the string pool.
61317027Sdim  ///
62353358Sdim  /// \param[in] cstr
63317027Sdim  ///     A NULL terminated C string to add to the string pool.
64317027Sdim  explicit ConstString(const char *cstr);
65317027Sdim
66317027Sdim  /// Construct with C String value with max length
67317027Sdim  ///
68341825Sdim  /// Constructs this object with a C string with a length. If \a max_cstr_len
69341825Sdim  /// is greater than the actual length of the string, the string length will
70341825Sdim  /// be truncated. This allows substrings to be created without the need to
71341825Sdim  /// NULL terminate the string as it is passed into this function.
72317027Sdim  ///
73353358Sdim  /// \param[in] cstr
74317027Sdim  ///     A pointer to the first character in the C string. The C
75317027Sdim  ///     string can be NULL terminated in a buffer that contains
76317027Sdim  ///     more characters than the length of the string, or the
77317027Sdim  ///     string can be part of another string and a new substring
78317027Sdim  ///     can be created.
79317027Sdim  ///
80353358Sdim  /// \param[in] max_cstr_len
81317027Sdim  ///     The max length of \a cstr. If the string length of \a cstr
82317027Sdim  ///     is less than \a max_cstr_len, then the string will be
83317027Sdim  ///     truncated. If the string length of \a cstr is greater than
84317027Sdim  ///     \a max_cstr_len, then only max_cstr_len bytes will be used
85317027Sdim  ///     from \a cstr.
86317027Sdim  explicit ConstString(const char *cstr, size_t max_cstr_len);
87317027Sdim
88317027Sdim  /// Destructor
89317027Sdim  ///
90341825Sdim  /// Since constant string values are currently not reference counted, there
91341825Sdim  /// isn't much to do here.
92317027Sdim  ~ConstString() = default;
93317027Sdim
94317027Sdim  /// C string equality binary predicate function object for ConstString
95317027Sdim  /// objects.
96317027Sdim  struct StringIsEqual {
97317027Sdim    /// C equality test.
98317027Sdim    ///
99341825Sdim    /// Two C strings are equal when they are contained in ConstString objects
100341825Sdim    /// when their pointer values are equal to each other.
101317027Sdim    ///
102353358Sdim    /// \return
103317027Sdim    ///     Returns \b true if the C string in \a lhs is equal to
104317027Sdim    ///     the C string value in \a rhs, \b false otherwise.
105317027Sdim    bool operator()(const char *lhs, const char *rhs) const {
106317027Sdim      return lhs == rhs;
107317027Sdim    }
108317027Sdim  };
109317027Sdim
110317027Sdim  /// Convert to bool operator.
111317027Sdim  ///
112341825Sdim  /// This allows code to check a ConstString object to see if it contains a
113341825Sdim  /// valid string using code such as:
114317027Sdim  ///
115353358Sdim  /// \code
116317027Sdim  /// ConstString str(...);
117317027Sdim  /// if (str)
118317027Sdim  /// { ...
119353358Sdim  /// \endcode
120317027Sdim  ///
121353358Sdim  /// \return
122317027Sdim  ///     /b True this object contains a valid non-empty C string, \b
123317027Sdim  ///     false otherwise.
124341825Sdim  explicit operator bool() const { return !IsEmpty(); }
125317027Sdim
126317027Sdim  /// Assignment operator
127317027Sdim  ///
128317027Sdim  /// Assigns the string in this object with the value from \a rhs.
129317027Sdim  ///
130353358Sdim  /// \param[in] rhs
131317027Sdim  ///     Another string object to copy into this object.
132317027Sdim  ///
133353358Sdim  /// \return
134317027Sdim  ///     A const reference to this object.
135353358Sdim  ConstString operator=(ConstString rhs) {
136317027Sdim    m_string = rhs.m_string;
137317027Sdim    return *this;
138317027Sdim  }
139317027Sdim
140317027Sdim  /// Equal to operator
141317027Sdim  ///
142341825Sdim  /// Returns true if this string is equal to the string in \a rhs. This
143341825Sdim  /// operation is very fast as it results in a pointer comparison since all
144341825Sdim  /// strings are in a uniqued in a global string pool.
145317027Sdim  ///
146353358Sdim  /// \param[in] rhs
147317027Sdim  ///     Another string object to compare this object to.
148317027Sdim  ///
149353358Sdim  /// \return
150360784Sdim  ///     true if this object is equal to \a rhs.
151360784Sdim  ///     false if this object is not equal to \a rhs.
152353358Sdim  bool operator==(ConstString rhs) const {
153341825Sdim    // We can do a pointer compare to compare these strings since they must
154341825Sdim    // come from the same pool in order to be equal.
155317027Sdim    return m_string == rhs.m_string;
156317027Sdim  }
157317027Sdim
158353358Sdim  /// Equal to operator against a non-ConstString value.
159353358Sdim  ///
160353358Sdim  /// Returns true if this string is equal to the string in \a rhs. This
161353358Sdim  /// overload is usually slower than comparing against a ConstString value.
162353358Sdim  /// However, if the rhs string not already a ConstString and it is impractical
163353358Sdim  /// to turn it into a non-temporary variable, then this overload is faster.
164353358Sdim  ///
165353358Sdim  /// \param[in] rhs
166353358Sdim  ///     Another string object to compare this object to.
167353358Sdim  ///
168353358Sdim  /// \return
169360784Sdim  ///     \b true if this object is equal to \a rhs.
170360784Sdim  ///     \b false if this object is not equal to \a rhs.
171353358Sdim  bool operator==(const char *rhs) const {
172353358Sdim    // ConstString differentiates between empty strings and nullptr strings, but
173353358Sdim    // StringRef doesn't. Therefore we have to do this check manually now.
174353358Sdim    if (m_string == nullptr && rhs != nullptr)
175353358Sdim      return false;
176353358Sdim    if (m_string != nullptr && rhs == nullptr)
177353358Sdim      return false;
178353358Sdim
179353358Sdim    return GetStringRef() == rhs;
180353358Sdim  }
181353358Sdim
182317027Sdim  /// Not equal to operator
183317027Sdim  ///
184341825Sdim  /// Returns true if this string is not equal to the string in \a rhs. This
185341825Sdim  /// operation is very fast as it results in a pointer comparison since all
186341825Sdim  /// strings are in a uniqued in a global string pool.
187317027Sdim  ///
188353358Sdim  /// \param[in] rhs
189317027Sdim  ///     Another string object to compare this object to.
190317027Sdim  ///
191353358Sdim  /// \return
192360784Sdim  ///     \b true if this object is not equal to \a rhs.
193360784Sdim  ///     \b false if this object is equal to \a rhs.
194353358Sdim  bool operator!=(ConstString rhs) const {
195317027Sdim    return m_string != rhs.m_string;
196317027Sdim  }
197317027Sdim
198353358Sdim  /// Not equal to operator against a non-ConstString value.
199353358Sdim  ///
200353358Sdim  /// Returns true if this string is not equal to the string in \a rhs. This
201353358Sdim  /// overload is usually slower than comparing against a ConstString value.
202353358Sdim  /// However, if the rhs string not already a ConstString and it is impractical
203353358Sdim  /// to turn it into a non-temporary variable, then this overload is faster.
204353358Sdim  ///
205353358Sdim  /// \param[in] rhs
206353358Sdim  ///     Another string object to compare this object to.
207353358Sdim  ///
208360784Sdim  /// \return \b true if this object is not equal to \a rhs, false otherwise.
209353358Sdim  bool operator!=(const char *rhs) const { return !(*this == rhs); }
210317027Sdim
211353358Sdim  bool operator<(ConstString rhs) const;
212353358Sdim
213317027Sdim  /// Get the string value as a C string.
214317027Sdim  ///
215341825Sdim  /// Get the value of the contained string as a NULL terminated C string
216341825Sdim  /// value.
217317027Sdim  ///
218317027Sdim  /// If \a value_if_empty is nullptr, then nullptr will be returned.
219317027Sdim  ///
220360784Sdim  /// \return Returns \a value_if_empty if the string is empty, otherwise
221317027Sdim  ///     the C string value contained in this object.
222317027Sdim  const char *AsCString(const char *value_if_empty = nullptr) const {
223317027Sdim    return (IsEmpty() ? value_if_empty : m_string);
224317027Sdim  }
225317027Sdim
226317027Sdim  /// Get the string value as a llvm::StringRef
227317027Sdim  ///
228353358Sdim  /// \return
229317027Sdim  ///     Returns a new llvm::StringRef object filled in with the
230317027Sdim  ///     needed data.
231317027Sdim  llvm::StringRef GetStringRef() const {
232317027Sdim    return llvm::StringRef(m_string, GetLength());
233317027Sdim  }
234317027Sdim
235317027Sdim  /// Get the string value as a C string.
236317027Sdim  ///
237341825Sdim  /// Get the value of the contained string as a NULL terminated C string
238341825Sdim  /// value. Similar to the ConstString::AsCString() function, yet this
239341825Sdim  /// function will always return nullptr if the string is not valid. So this
240341825Sdim  /// function is a direct accessor to the string pointer value.
241317027Sdim  ///
242353358Sdim  /// \return
243317027Sdim  ///     Returns nullptr the string is invalid, otherwise the C string
244317027Sdim  ///     value contained in this object.
245317027Sdim  const char *GetCString() const { return m_string; }
246317027Sdim
247317027Sdim  /// Get the length in bytes of string value.
248317027Sdim  ///
249341825Sdim  /// The string pool stores the length of the string, so we can avoid calling
250341825Sdim  /// strlen() on the pointer value with this function.
251317027Sdim  ///
252353358Sdim  /// \return
253317027Sdim  ///     Returns the number of bytes that this string occupies in
254317027Sdim  ///     memory, not including the NULL termination byte.
255317027Sdim  size_t GetLength() const;
256317027Sdim
257317027Sdim  /// Clear this object's state.
258317027Sdim  ///
259344779Sdim  /// Clear any contained string and reset the value to the empty string
260341825Sdim  /// value.
261317027Sdim  void Clear() { m_string = nullptr; }
262317027Sdim
263317027Sdim  /// Equal to operator
264317027Sdim  ///
265341825Sdim  /// Returns true if this string is equal to the string in \a rhs. If case
266341825Sdim  /// sensitive equality is tested, this operation is very fast as it results
267341825Sdim  /// in a pointer comparison since all strings are in a uniqued in a global
268341825Sdim  /// string pool.
269317027Sdim  ///
270360784Sdim  /// \param[in] lhs
271317027Sdim  ///     The Left Hand Side const ConstString object reference.
272317027Sdim  ///
273353358Sdim  /// \param[in] rhs
274317027Sdim  ///     The Right Hand Side const ConstString object reference.
275317027Sdim  ///
276353358Sdim  /// \param[in] case_sensitive
277317027Sdim  ///     Case sensitivity. If true, case sensitive equality
278317027Sdim  ///     will be tested, otherwise character case will be ignored
279317027Sdim  ///
280360784Sdim  /// \return \b true if this object is equal to \a rhs, \b false otherwise.
281353358Sdim  static bool Equals(ConstString lhs, ConstString rhs,
282317027Sdim                     const bool case_sensitive = true);
283317027Sdim
284317027Sdim  /// Compare two string objects.
285317027Sdim  ///
286341825Sdim  /// Compares the C string values contained in \a lhs and \a rhs and returns
287341825Sdim  /// an integer result.
288317027Sdim  ///
289317027Sdim  /// NOTE: only call this function when you want a true string
290341825Sdim  /// comparison. If you want string equality use the, use the == operator as
291341825Sdim  /// it is much more efficient. Also if you want string inequality, use the
292341825Sdim  /// != operator for the same reasons.
293317027Sdim  ///
294353358Sdim  /// \param[in] lhs
295317027Sdim  ///     The Left Hand Side const ConstString object reference.
296317027Sdim  ///
297353358Sdim  /// \param[in] rhs
298317027Sdim  ///     The Right Hand Side const ConstString object reference.
299317027Sdim  ///
300353358Sdim  /// \param[in] case_sensitive
301317027Sdim  ///     Case sensitivity of compare. If true, case sensitive compare
302317027Sdim  ///     will be performed, otherwise character case will be ignored
303317027Sdim  ///
304360784Sdim  /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs
305353358Sdim  static int Compare(ConstString lhs, ConstString rhs,
306317027Sdim                     const bool case_sensitive = true);
307317027Sdim
308317027Sdim  /// Dump the object description to a stream.
309317027Sdim  ///
310341825Sdim  /// Dump the string value to the stream \a s. If the contained string is
311341825Sdim  /// empty, print \a value_if_empty to the stream instead. If \a
312341825Sdim  /// value_if_empty is nullptr, then nothing will be dumped to the stream.
313317027Sdim  ///
314353358Sdim  /// \param[in] s
315317027Sdim  ///     The stream that will be used to dump the object description.
316317027Sdim  ///
317353358Sdim  /// \param[in] value_if_empty
318317027Sdim  ///     The value to dump if the string is empty. If nullptr, nothing
319317027Sdim  ///     will be output to the stream.
320317027Sdim  void Dump(Stream *s, const char *value_if_empty = nullptr) const;
321317027Sdim
322317027Sdim  /// Dump the object debug description to a stream.
323317027Sdim  ///
324353358Sdim  /// \param[in] s
325317027Sdim  ///     The stream that will be used to dump the object description.
326317027Sdim  void DumpDebug(Stream *s) const;
327317027Sdim
328317027Sdim  /// Test for empty string.
329317027Sdim  ///
330353358Sdim  /// \return
331360784Sdim  ///     \b true if the contained string is empty.
332360784Sdim  ///     \b false if the contained string is not empty.
333317027Sdim  bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
334317027Sdim
335344779Sdim  /// Test for null string.
336344779Sdim  ///
337353358Sdim  /// \return
338360784Sdim  ///     \b true if there is no string associated with this instance.
339360784Sdim  ///     \b false if there is a string associated with this instance.
340344779Sdim  bool IsNull() const { return m_string == nullptr; }
341344779Sdim
342317027Sdim  /// Set the C string value.
343317027Sdim  ///
344341825Sdim  /// Set the string value in the object by uniquing the \a cstr string value
345341825Sdim  /// in our global string pool.
346317027Sdim  ///
347341825Sdim  /// If the C string already exists in the global string pool, it finds the
348341825Sdim  /// current entry and returns the existing value. If it doesn't exist, it is
349341825Sdim  /// added to the string pool.
350317027Sdim  ///
351353358Sdim  /// \param[in] cstr
352317027Sdim  ///     A NULL terminated C string to add to the string pool.
353317027Sdim  void SetCString(const char *cstr);
354317027Sdim
355317027Sdim  void SetString(const llvm::StringRef &s);
356317027Sdim
357317027Sdim  /// Set the C string value and its mangled counterpart.
358317027Sdim  ///
359341825Sdim  /// Object files and debug symbols often use mangled string to represent the
360341825Sdim  /// linkage name for a symbol, function or global. The string pool can
361341825Sdim  /// efficiently store these values and their counterparts so when we run
362341825Sdim  /// into another instance of a mangled name, we can avoid calling the name
363341825Sdim  /// demangler over and over on the same strings and then trying to unique
364341825Sdim  /// them.
365317027Sdim  ///
366353358Sdim  /// \param[in] demangled
367344779Sdim  ///     The demangled string to correlate with the \a mangled name.
368317027Sdim  ///
369353358Sdim  /// \param[in] mangled
370317027Sdim  ///     The already uniqued mangled ConstString to correlate the
371317027Sdim  ///     soon to be uniqued version of \a demangled.
372344779Sdim  void SetStringWithMangledCounterpart(llvm::StringRef demangled,
373353358Sdim                                       ConstString mangled);
374317027Sdim
375341825Sdim  /// Retrieve the mangled or demangled counterpart for a mangled or demangled
376341825Sdim  /// ConstString.
377317027Sdim  ///
378341825Sdim  /// Object files and debug symbols often use mangled string to represent the
379341825Sdim  /// linkage name for a symbol, function or global. The string pool can
380341825Sdim  /// efficiently store these values and their counterparts so when we run
381341825Sdim  /// into another instance of a mangled name, we can avoid calling the name
382341825Sdim  /// demangler over and over on the same strings and then trying to unique
383341825Sdim  /// them.
384317027Sdim  ///
385353358Sdim  /// \param[in] counterpart
386317027Sdim  ///     A reference to a ConstString object that might get filled in
387317027Sdim  ///     with the demangled/mangled counterpart.
388317027Sdim  ///
389353358Sdim  /// \return
390317027Sdim  ///     /b True if \a counterpart was filled in with the counterpart
391317027Sdim  ///     /b false otherwise.
392317027Sdim  bool GetMangledCounterpart(ConstString &counterpart) const;
393317027Sdim
394317027Sdim  /// Set the C string value with length.
395317027Sdim  ///
396317027Sdim  /// Set the string value in the object by uniquing \a cstr_len bytes
397341825Sdim  /// starting at the \a cstr string value in our global string pool. If trim
398341825Sdim  /// is true, then \a cstr_len indicates a maximum length of the CString and
399341825Sdim  /// if the actual length of the string is less, then it will be trimmed.
400317027Sdim  ///
401341825Sdim  /// If the C string already exists in the global string pool, it finds the
402341825Sdim  /// current entry and returns the existing value. If it doesn't exist, it is
403341825Sdim  /// added to the string pool.
404317027Sdim  ///
405353358Sdim  /// \param[in] cstr
406317027Sdim  ///     A NULL terminated C string to add to the string pool.
407317027Sdim  ///
408353358Sdim  /// \param[in] cstr_len
409317027Sdim  ///     The maximum length of the C string.
410317027Sdim  void SetCStringWithLength(const char *cstr, size_t cstr_len);
411317027Sdim
412341825Sdim  /// Set the C string value with the minimum length between \a fixed_cstr_len
413341825Sdim  /// and the actual length of the C string. This can be used for data
414341825Sdim  /// structures that have a fixed length to store a C string where the string
415341825Sdim  /// might not be NULL terminated if the string takes the entire buffer.
416317027Sdim  void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
417317027Sdim
418317027Sdim  /// Get the memory cost of this object.
419317027Sdim  ///
420341825Sdim  /// Return the size in bytes that this object takes in memory. This returns
421341825Sdim  /// the size in bytes of this object, which does not include any the shared
422341825Sdim  /// string values it may refer to.
423317027Sdim  ///
424353358Sdim  /// \return
425317027Sdim  ///     The number of bytes that this object occupies in memory.
426317027Sdim  ///
427353358Sdim  /// \see ConstString::StaticMemorySize ()
428317027Sdim  size_t MemorySize() const { return sizeof(ConstString); }
429317027Sdim
430317027Sdim  /// Get the size in bytes of the current global string pool.
431317027Sdim  ///
432341825Sdim  /// Reports the size in bytes of all shared C string values, containers and
433341825Sdim  /// any other values as a byte size for the entire string pool.
434317027Sdim  ///
435353358Sdim  /// \return
436317027Sdim  ///     The number of bytes that the global string pool occupies
437317027Sdim  ///     in memory.
438317027Sdim  static size_t StaticMemorySize();
439317027Sdim
440317027Sdimprotected:
441360784Sdim  template <typename T> friend struct ::llvm::DenseMapInfo;
442360784Sdim  /// Only used by DenseMapInfo.
443360784Sdim  static ConstString FromStringPoolPointer(const char *ptr) {
444360784Sdim    ConstString s;
445360784Sdim    s.m_string = ptr;
446360784Sdim    return s;
447360784Sdim  };
448360784Sdim
449317027Sdim  // Member variables
450317027Sdim  const char *m_string;
451317027Sdim};
452317027Sdim
453317027Sdim/// Stream the string value \a str to the stream \a s
454353358SdimStream &operator<<(Stream &s, ConstString str);
455317027Sdim
456317027Sdim} // namespace lldb_private
457317027Sdim
458317027Sdimnamespace llvm {
459317027Sdimtemplate <> struct format_provider<lldb_private::ConstString> {
460317027Sdim  static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
461317027Sdim                     llvm::StringRef Options);
462317027Sdim};
463360784Sdim
464360784Sdim/// DenseMapInfo implementation.
465360784Sdim/// \{
466360784Sdimtemplate <> struct DenseMapInfo<lldb_private::ConstString> {
467360784Sdim  static inline lldb_private::ConstString getEmptyKey() {
468360784Sdim    return lldb_private::ConstString::FromStringPoolPointer(
469360784Sdim        DenseMapInfo<const char *>::getEmptyKey());
470360784Sdim  }
471360784Sdim  static inline lldb_private::ConstString getTombstoneKey() {
472360784Sdim    return lldb_private::ConstString::FromStringPoolPointer(
473360784Sdim        DenseMapInfo<const char *>::getTombstoneKey());
474360784Sdim  }
475360784Sdim  static unsigned getHashValue(lldb_private::ConstString val) {
476360784Sdim    return DenseMapInfo<const char *>::getHashValue(val.m_string);
477360784Sdim  }
478360784Sdim  static bool isEqual(lldb_private::ConstString LHS,
479360784Sdim                      lldb_private::ConstString RHS) {
480360784Sdim    return LHS == RHS;
481360784Sdim  }
482360784Sdim};
483360784Sdim/// \}
484317027Sdim}
485317027Sdim
486317027Sdim#endif // liblldb_ConstString_h_
487