ConstString.h revision 360660
1//===-- ConstString.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef liblldb_ConstString_h_
10#define liblldb_ConstString_h_
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/Support/FormatVariadic.h"
14
15#include <stddef.h>
16
17namespace lldb_private {
18class Stream;
19}
20namespace llvm {
21class raw_ostream;
22}
23
24namespace lldb_private {
25
26/// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
27/// A uniqued constant string class.
28///
29/// Provides an efficient way to store strings as uniqued strings. After the
30/// strings are uniqued, finding strings that are equal to one another is very
31/// fast as just the pointers need to be compared. It also allows for many
32/// common strings from many different sources to be shared to keep the memory
33/// footprint low.
34///
35/// No reference counting is done on strings that are added to the string
36/// pool, once strings are added they are in the string pool for the life of
37/// the program.
38class ConstString {
39public:
40  /// Default constructor
41  ///
42  /// Initializes the string to an empty string.
43  ConstString() : m_string(nullptr) {}
44
45  /// Copy constructor
46  ///
47  /// Copies the string value in \a rhs into this object.
48  ///
49  /// \param[in] rhs
50  ///     Another string object to copy.
51  ConstString(const ConstString &rhs) : m_string(rhs.m_string) {}
52
53  explicit ConstString(const llvm::StringRef &s);
54
55  /// Construct with C String value
56  ///
57  /// Constructs this object with a C string by looking to see if the
58  /// C string already exists in the global string pool. If it doesn't
59  /// exist, it is added to the string pool.
60  ///
61  /// \param[in] cstr
62  ///     A NULL terminated C string to add to the string pool.
63  explicit ConstString(const char *cstr);
64
65  /// Construct with C String value with max length
66  ///
67  /// Constructs this object with a C string with a length. If \a max_cstr_len
68  /// is greater than the actual length of the string, the string length will
69  /// be truncated. This allows substrings to be created without the need to
70  /// NULL terminate the string as it is passed into this function.
71  ///
72  /// \param[in] cstr
73  ///     A pointer to the first character in the C string. The C
74  ///     string can be NULL terminated in a buffer that contains
75  ///     more characters than the length of the string, or the
76  ///     string can be part of another string and a new substring
77  ///     can be created.
78  ///
79  /// \param[in] max_cstr_len
80  ///     The max length of \a cstr. If the string length of \a cstr
81  ///     is less than \a max_cstr_len, then the string will be
82  ///     truncated. If the string length of \a cstr is greater than
83  ///     \a max_cstr_len, then only max_cstr_len bytes will be used
84  ///     from \a cstr.
85  explicit ConstString(const char *cstr, size_t max_cstr_len);
86
87  /// Destructor
88  ///
89  /// Since constant string values are currently not reference counted, there
90  /// isn't much to do here.
91  ~ConstString() = default;
92
93  /// C string equality binary predicate function object for ConstString
94  /// objects.
95  struct StringIsEqual {
96    /// C equality test.
97    ///
98    /// Two C strings are equal when they are contained in ConstString objects
99    /// when their pointer values are equal to each other.
100    ///
101    /// \return
102    ///     Returns \b true if the C string in \a lhs is equal to
103    ///     the C string value in \a rhs, \b false otherwise.
104    bool operator()(const char *lhs, const char *rhs) const {
105      return lhs == rhs;
106    }
107  };
108
109  /// Convert to bool operator.
110  ///
111  /// This allows code to check a ConstString object to see if it contains a
112  /// valid string using code such as:
113  ///
114  /// \code
115  /// ConstString str(...);
116  /// if (str)
117  /// { ...
118  /// \endcode
119  ///
120  /// \return
121  ///     /b True this object contains a valid non-empty C string, \b
122  ///     false otherwise.
123  explicit operator bool() const { return !IsEmpty(); }
124
125  /// Assignment operator
126  ///
127  /// Assigns the string in this object with the value from \a rhs.
128  ///
129  /// \param[in] rhs
130  ///     Another string object to copy into this object.
131  ///
132  /// \return
133  ///     A const reference to this object.
134  ConstString operator=(ConstString rhs) {
135    m_string = rhs.m_string;
136    return *this;
137  }
138
139  /// Equal to operator
140  ///
141  /// Returns true if this string is equal to the string in \a rhs. This
142  /// operation is very fast as it results in a pointer comparison since all
143  /// strings are in a uniqued in a global string pool.
144  ///
145  /// \param[in] rhs
146  ///     Another string object to compare this object to.
147  ///
148  /// \return
149  ///     \li \b true if this object is equal to \a rhs.
150  ///     \li \b false if this object is not equal to \a rhs.
151  bool operator==(ConstString rhs) const {
152    // We can do a pointer compare to compare these strings since they must
153    // come from the same pool in order to be equal.
154    return m_string == rhs.m_string;
155  }
156
157  /// Equal to operator against a non-ConstString value.
158  ///
159  /// Returns true if this string is equal to the string in \a rhs. This
160  /// overload is usually slower than comparing against a ConstString value.
161  /// However, if the rhs string not already a ConstString and it is impractical
162  /// to turn it into a non-temporary variable, then this overload is faster.
163  ///
164  /// \param[in] rhs
165  ///     Another string object to compare this object to.
166  ///
167  /// \return
168  ///     \li \b true if this object is equal to \a rhs.
169  ///     \li \b false if this object is not equal to \a rhs.
170  bool operator==(const char *rhs) const {
171    // ConstString differentiates between empty strings and nullptr strings, but
172    // StringRef doesn't. Therefore we have to do this check manually now.
173    if (m_string == nullptr && rhs != nullptr)
174      return false;
175    if (m_string != nullptr && rhs == nullptr)
176      return false;
177
178    return GetStringRef() == rhs;
179  }
180
181  /// Not equal to operator
182  ///
183  /// Returns true if this string is not equal to the string in \a rhs. This
184  /// operation is very fast as it results in a pointer comparison since all
185  /// strings are in a uniqued in a global string pool.
186  ///
187  /// \param[in] rhs
188  ///     Another string object to compare this object to.
189  ///
190  /// \return
191  ///     \li \b true if this object is not equal to \a rhs.
192  ///     \li \b false if this object is equal to \a rhs.
193  bool operator!=(ConstString rhs) const {
194    return m_string != rhs.m_string;
195  }
196
197  /// Not equal to operator against a non-ConstString value.
198  ///
199  /// Returns true if this string is not equal to the string in \a rhs. This
200  /// overload is usually slower than comparing against a ConstString value.
201  /// However, if the rhs string not already a ConstString and it is impractical
202  /// to turn it into a non-temporary variable, then this overload is faster.
203  ///
204  /// \param[in] rhs
205  ///     Another string object to compare this object to.
206  ///
207  /// \return
208  ///     \li \b true if this object is not equal to \a rhs.
209  ///     \li \b false if this object is equal to \a rhs.
210  bool operator!=(const char *rhs) const { return !(*this == rhs); }
211
212  bool operator<(ConstString rhs) const;
213
214  /// Get the string value as a C string.
215  ///
216  /// Get the value of the contained string as a NULL terminated C string
217  /// value.
218  ///
219  /// If \a value_if_empty is nullptr, then nullptr will be returned.
220  ///
221  /// \return
222  ///     Returns \a value_if_empty if the string is empty, otherwise
223  ///     the C string value contained in this object.
224  const char *AsCString(const char *value_if_empty = nullptr) const {
225    return (IsEmpty() ? value_if_empty : m_string);
226  }
227
228  /// Get the string value as a llvm::StringRef
229  ///
230  /// \return
231  ///     Returns a new llvm::StringRef object filled in with the
232  ///     needed data.
233  llvm::StringRef GetStringRef() const {
234    return llvm::StringRef(m_string, GetLength());
235  }
236
237  /// Get the string value as a C string.
238  ///
239  /// Get the value of the contained string as a NULL terminated C string
240  /// value. Similar to the ConstString::AsCString() function, yet this
241  /// function will always return nullptr if the string is not valid. So this
242  /// function is a direct accessor to the string pointer value.
243  ///
244  /// \return
245  ///     Returns nullptr the string is invalid, otherwise the C string
246  ///     value contained in this object.
247  const char *GetCString() const { return m_string; }
248
249  /// Get the length in bytes of string value.
250  ///
251  /// The string pool stores the length of the string, so we can avoid calling
252  /// strlen() on the pointer value with this function.
253  ///
254  /// \return
255  ///     Returns the number of bytes that this string occupies in
256  ///     memory, not including the NULL termination byte.
257  size_t GetLength() const;
258
259  /// Clear this object's state.
260  ///
261  /// Clear any contained string and reset the value to the empty string
262  /// value.
263  void Clear() { m_string = nullptr; }
264
265  /// Equal to operator
266  ///
267  /// Returns true if this string is equal to the string in \a rhs. If case
268  /// sensitive equality is tested, this operation is very fast as it results
269  /// in a pointer comparison since all strings are in a uniqued in a global
270  /// string pool.
271  ///
272  /// \param[in] rhs
273  ///     The Left Hand Side const ConstString object reference.
274  ///
275  /// \param[in] rhs
276  ///     The Right Hand Side const ConstString object reference.
277  ///
278  /// \param[in] case_sensitive
279  ///     Case sensitivity. If true, case sensitive equality
280  ///     will be tested, otherwise character case will be ignored
281  ///
282  /// \return
283  ///     \li \b true if this object is equal to \a rhs.
284  ///     \li \b false if this object is not equal to \a rhs.
285  static bool Equals(ConstString lhs, ConstString rhs,
286                     const bool case_sensitive = true);
287
288  /// Compare two string objects.
289  ///
290  /// Compares the C string values contained in \a lhs and \a rhs and returns
291  /// an integer result.
292  ///
293  /// NOTE: only call this function when you want a true string
294  /// comparison. If you want string equality use the, use the == operator as
295  /// it is much more efficient. Also if you want string inequality, use the
296  /// != operator for the same reasons.
297  ///
298  /// \param[in] lhs
299  ///     The Left Hand Side const ConstString object reference.
300  ///
301  /// \param[in] rhs
302  ///     The Right Hand Side const ConstString object reference.
303  ///
304  /// \param[in] case_sensitive
305  ///     Case sensitivity of compare. If true, case sensitive compare
306  ///     will be performed, otherwise character case will be ignored
307  ///
308  /// \return
309  ///     \li -1 if lhs < rhs
310  ///     \li 0 if lhs == rhs
311  ///     \li 1 if lhs > rhs
312  static int Compare(ConstString lhs, ConstString rhs,
313                     const bool case_sensitive = true);
314
315  /// Dump the object description to a stream.
316  ///
317  /// Dump the string value to the stream \a s. If the contained string is
318  /// empty, print \a value_if_empty to the stream instead. If \a
319  /// value_if_empty is nullptr, then nothing will be dumped to the stream.
320  ///
321  /// \param[in] s
322  ///     The stream that will be used to dump the object description.
323  ///
324  /// \param[in] value_if_empty
325  ///     The value to dump if the string is empty. If nullptr, nothing
326  ///     will be output to the stream.
327  void Dump(Stream *s, const char *value_if_empty = nullptr) const;
328
329  /// Dump the object debug description to a stream.
330  ///
331  /// \param[in] s
332  ///     The stream that will be used to dump the object description.
333  void DumpDebug(Stream *s) const;
334
335  /// Test for empty string.
336  ///
337  /// \return
338  ///     \li \b true if the contained string is empty.
339  ///     \li \b false if the contained string is not empty.
340  bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
341
342  /// Test for null string.
343  ///
344  /// \return
345  ///     \li \b true if there is no string associated with this instance.
346  ///     \li \b false if there is a string associated with this instance.
347  bool IsNull() const { return m_string == nullptr; }
348
349  /// Set the C string value.
350  ///
351  /// Set the string value in the object by uniquing the \a cstr string value
352  /// in our global string pool.
353  ///
354  /// If the C string already exists in the global string pool, it finds the
355  /// current entry and returns the existing value. If it doesn't exist, it is
356  /// added to the string pool.
357  ///
358  /// \param[in] cstr
359  ///     A NULL terminated C string to add to the string pool.
360  void SetCString(const char *cstr);
361
362  void SetString(const llvm::StringRef &s);
363
364  /// Set the C string value and its mangled counterpart.
365  ///
366  /// Object files and debug symbols often use mangled string to represent the
367  /// linkage name for a symbol, function or global. The string pool can
368  /// efficiently store these values and their counterparts so when we run
369  /// into another instance of a mangled name, we can avoid calling the name
370  /// demangler over and over on the same strings and then trying to unique
371  /// them.
372  ///
373  /// \param[in] demangled
374  ///     The demangled string to correlate with the \a mangled name.
375  ///
376  /// \param[in] mangled
377  ///     The already uniqued mangled ConstString to correlate the
378  ///     soon to be uniqued version of \a demangled.
379  void SetStringWithMangledCounterpart(llvm::StringRef demangled,
380                                       ConstString mangled);
381
382  /// Retrieve the mangled or demangled counterpart for a mangled or demangled
383  /// ConstString.
384  ///
385  /// Object files and debug symbols often use mangled string to represent the
386  /// linkage name for a symbol, function or global. The string pool can
387  /// efficiently store these values and their counterparts so when we run
388  /// into another instance of a mangled name, we can avoid calling the name
389  /// demangler over and over on the same strings and then trying to unique
390  /// them.
391  ///
392  /// \param[in] counterpart
393  ///     A reference to a ConstString object that might get filled in
394  ///     with the demangled/mangled counterpart.
395  ///
396  /// \return
397  ///     /b True if \a counterpart was filled in with the counterpart
398  ///     /b false otherwise.
399  bool GetMangledCounterpart(ConstString &counterpart) const;
400
401  /// Set the C string value with length.
402  ///
403  /// Set the string value in the object by uniquing \a cstr_len bytes
404  /// starting at the \a cstr string value in our global string pool. If trim
405  /// is true, then \a cstr_len indicates a maximum length of the CString and
406  /// if the actual length of the string is less, then it will be trimmed.
407  ///
408  /// If the C string already exists in the global string pool, it finds the
409  /// current entry and returns the existing value. If it doesn't exist, it is
410  /// added to the string pool.
411  ///
412  /// \param[in] cstr
413  ///     A NULL terminated C string to add to the string pool.
414  ///
415  /// \param[in] cstr_len
416  ///     The maximum length of the C string.
417  void SetCStringWithLength(const char *cstr, size_t cstr_len);
418
419  /// Set the C string value with the minimum length between \a fixed_cstr_len
420  /// and the actual length of the C string. This can be used for data
421  /// structures that have a fixed length to store a C string where the string
422  /// might not be NULL terminated if the string takes the entire buffer.
423  void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
424
425  /// Get the memory cost of this object.
426  ///
427  /// Return the size in bytes that this object takes in memory. This returns
428  /// the size in bytes of this object, which does not include any the shared
429  /// string values it may refer to.
430  ///
431  /// \return
432  ///     The number of bytes that this object occupies in memory.
433  ///
434  /// \see ConstString::StaticMemorySize ()
435  size_t MemorySize() const { return sizeof(ConstString); }
436
437  /// Get the size in bytes of the current global string pool.
438  ///
439  /// Reports the size in bytes of all shared C string values, containers and
440  /// any other values as a byte size for the entire string pool.
441  ///
442  /// \return
443  ///     The number of bytes that the global string pool occupies
444  ///     in memory.
445  static size_t StaticMemorySize();
446
447protected:
448  // Member variables
449  const char *m_string;
450};
451
452/// Stream the string value \a str to the stream \a s
453Stream &operator<<(Stream &s, ConstString str);
454
455} // namespace lldb_private
456
457namespace llvm {
458template <> struct format_provider<lldb_private::ConstString> {
459  static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
460                     llvm::StringRef Options);
461};
462}
463
464#endif // liblldb_ConstString_h_
465