ConstString.h revision 344779
1317027Sdim//===-- ConstString.h -------------------------------------------*- C++ -*-===//
2317027Sdim//
3317027Sdim//                     The LLVM Compiler Infrastructure
4317027Sdim//
5317027Sdim// This file is distributed under the University of Illinois Open Source
6317027Sdim// License. See LICENSE.TXT for details.
7317027Sdim//
8317027Sdim//===----------------------------------------------------------------------===//
9317027Sdim
10317027Sdim#ifndef liblldb_ConstString_h_
11317027Sdim#define liblldb_ConstString_h_
12317027Sdim
13317027Sdim#include "llvm/ADT/StringRef.h"
14344779Sdim#include "llvm/Support/FormatVariadic.h"
15317027Sdim
16344779Sdim#include <stddef.h>
17317027Sdim
18317027Sdimnamespace lldb_private {
19317027Sdimclass Stream;
20317027Sdim}
21317027Sdimnamespace llvm {
22317027Sdimclass raw_ostream;
23317027Sdim}
24317027Sdim
25317027Sdimnamespace lldb_private {
26317027Sdim
27317027Sdim//----------------------------------------------------------------------
28317027Sdim/// @class ConstString ConstString.h "lldb/Utility/ConstString.h"
29341825Sdim/// A uniqued constant string class.
30317027Sdim///
31341825Sdim/// Provides an efficient way to store strings as uniqued strings. After the
32341825Sdim/// strings are uniqued, finding strings that are equal to one another is very
33341825Sdim/// fast as just the pointers need to be compared. It also allows for many
34341825Sdim/// common strings from many different sources to be shared to keep the memory
35341825Sdim/// footprint low.
36317027Sdim///
37341825Sdim/// No reference counting is done on strings that are added to the string
38341825Sdim/// pool, once strings are added they are in the string pool for the life of
39341825Sdim/// the program.
40317027Sdim//----------------------------------------------------------------------
41317027Sdimclass ConstString {
42317027Sdimpublic:
43317027Sdim  //------------------------------------------------------------------
44317027Sdim  /// Default constructor
45317027Sdim  ///
46317027Sdim  /// Initializes the string to an empty string.
47317027Sdim  //------------------------------------------------------------------
48317027Sdim  ConstString() : m_string(nullptr) {}
49317027Sdim
50317027Sdim  //------------------------------------------------------------------
51317027Sdim  /// Copy constructor
52317027Sdim  ///
53317027Sdim  /// Copies the string value in \a rhs into this object.
54317027Sdim  ///
55317027Sdim  /// @param[in] rhs
56317027Sdim  ///     Another string object to copy.
57317027Sdim  //------------------------------------------------------------------
58317027Sdim  ConstString(const ConstString &rhs) : m_string(rhs.m_string) {}
59317027Sdim
60317027Sdim  explicit ConstString(const llvm::StringRef &s);
61317027Sdim
62317027Sdim  //------------------------------------------------------------------
63317027Sdim  /// Construct with C String value
64317027Sdim  ///
65317027Sdim  /// Constructs this object with a C string by looking to see if the
66317027Sdim  /// C string already exists in the global string pool. If it doesn't
67317027Sdim  /// exist, it is added to the string pool.
68317027Sdim  ///
69317027Sdim  /// @param[in] cstr
70317027Sdim  ///     A NULL terminated C string to add to the string pool.
71317027Sdim  //------------------------------------------------------------------
72317027Sdim  explicit ConstString(const char *cstr);
73317027Sdim
74317027Sdim  //------------------------------------------------------------------
75317027Sdim  /// Construct with C String value with max length
76317027Sdim  ///
77341825Sdim  /// Constructs this object with a C string with a length. If \a max_cstr_len
78341825Sdim  /// is greater than the actual length of the string, the string length will
79341825Sdim  /// be truncated. This allows substrings to be created without the need to
80341825Sdim  /// NULL terminate the string as it is passed into this function.
81317027Sdim  ///
82317027Sdim  /// @param[in] cstr
83317027Sdim  ///     A pointer to the first character in the C string. The C
84317027Sdim  ///     string can be NULL terminated in a buffer that contains
85317027Sdim  ///     more characters than the length of the string, or the
86317027Sdim  ///     string can be part of another string and a new substring
87317027Sdim  ///     can be created.
88317027Sdim  ///
89317027Sdim  /// @param[in] max_cstr_len
90317027Sdim  ///     The max length of \a cstr. If the string length of \a cstr
91317027Sdim  ///     is less than \a max_cstr_len, then the string will be
92317027Sdim  ///     truncated. If the string length of \a cstr is greater than
93317027Sdim  ///     \a max_cstr_len, then only max_cstr_len bytes will be used
94317027Sdim  ///     from \a cstr.
95317027Sdim  //------------------------------------------------------------------
96317027Sdim  explicit ConstString(const char *cstr, size_t max_cstr_len);
97317027Sdim
98317027Sdim  //------------------------------------------------------------------
99317027Sdim  /// Destructor
100317027Sdim  ///
101341825Sdim  /// Since constant string values are currently not reference counted, there
102341825Sdim  /// isn't much to do here.
103317027Sdim  //------------------------------------------------------------------
104317027Sdim  ~ConstString() = default;
105317027Sdim
106317027Sdim  //----------------------------------------------------------------------
107317027Sdim  /// C string equality binary predicate function object for ConstString
108317027Sdim  /// objects.
109317027Sdim  //----------------------------------------------------------------------
110317027Sdim  struct StringIsEqual {
111317027Sdim    //--------------------------------------------------------------
112317027Sdim    /// C equality test.
113317027Sdim    ///
114341825Sdim    /// Two C strings are equal when they are contained in ConstString objects
115341825Sdim    /// when their pointer values are equal to each other.
116317027Sdim    ///
117317027Sdim    /// @return
118317027Sdim    ///     Returns \b true if the C string in \a lhs is equal to
119317027Sdim    ///     the C string value in \a rhs, \b false otherwise.
120317027Sdim    //--------------------------------------------------------------
121317027Sdim    bool operator()(const char *lhs, const char *rhs) const {
122317027Sdim      return lhs == rhs;
123317027Sdim    }
124317027Sdim  };
125317027Sdim
126317027Sdim  //------------------------------------------------------------------
127317027Sdim  /// Convert to bool operator.
128317027Sdim  ///
129341825Sdim  /// This allows code to check a ConstString object to see if it contains a
130341825Sdim  /// valid string using code such as:
131317027Sdim  ///
132317027Sdim  /// @code
133317027Sdim  /// ConstString str(...);
134317027Sdim  /// if (str)
135317027Sdim  /// { ...
136317027Sdim  /// @endcode
137317027Sdim  ///
138317027Sdim  /// @return
139317027Sdim  ///     /b True this object contains a valid non-empty C string, \b
140317027Sdim  ///     false otherwise.
141317027Sdim  //------------------------------------------------------------------
142341825Sdim  explicit operator bool() const { return !IsEmpty(); }
143317027Sdim
144317027Sdim  //------------------------------------------------------------------
145317027Sdim  /// Assignment operator
146317027Sdim  ///
147317027Sdim  /// Assigns the string in this object with the value from \a rhs.
148317027Sdim  ///
149317027Sdim  /// @param[in] rhs
150317027Sdim  ///     Another string object to copy into this object.
151317027Sdim  ///
152317027Sdim  /// @return
153317027Sdim  ///     A const reference to this object.
154317027Sdim  //------------------------------------------------------------------
155317027Sdim  const ConstString &operator=(const ConstString &rhs) {
156317027Sdim    m_string = rhs.m_string;
157317027Sdim    return *this;
158317027Sdim  }
159317027Sdim
160317027Sdim  //------------------------------------------------------------------
161317027Sdim  /// Equal to operator
162317027Sdim  ///
163341825Sdim  /// Returns true if this string is equal to the string in \a rhs. This
164341825Sdim  /// operation is very fast as it results in a pointer comparison since all
165341825Sdim  /// strings are in a uniqued in a global string pool.
166317027Sdim  ///
167317027Sdim  /// @param[in] rhs
168317027Sdim  ///     Another string object to compare this object to.
169317027Sdim  ///
170317027Sdim  /// @return
171317027Sdim  ///     @li \b true if this object is equal to \a rhs.
172317027Sdim  ///     @li \b false if this object is not equal to \a rhs.
173317027Sdim  //------------------------------------------------------------------
174317027Sdim  bool operator==(const ConstString &rhs) const {
175341825Sdim    // We can do a pointer compare to compare these strings since they must
176341825Sdim    // come from the same pool in order to be equal.
177317027Sdim    return m_string == rhs.m_string;
178317027Sdim  }
179317027Sdim
180317027Sdim  //------------------------------------------------------------------
181317027Sdim  /// Not equal to operator
182317027Sdim  ///
183341825Sdim  /// Returns true if this string is not equal to the string in \a rhs. This
184341825Sdim  /// operation is very fast as it results in a pointer comparison since all
185341825Sdim  /// strings are in a uniqued in a global string pool.
186317027Sdim  ///
187317027Sdim  /// @param[in] rhs
188317027Sdim  ///     Another string object to compare this object to.
189317027Sdim  ///
190317027Sdim  /// @return
191317027Sdim  ///     @li \b true if this object is not equal to \a rhs.
192317027Sdim  ///     @li \b false if this object is equal to \a rhs.
193317027Sdim  //------------------------------------------------------------------
194317027Sdim  bool operator!=(const ConstString &rhs) const {
195317027Sdim    return m_string != rhs.m_string;
196317027Sdim  }
197317027Sdim
198317027Sdim  bool operator<(const ConstString &rhs) const;
199317027Sdim
200317027Sdim  //------------------------------------------------------------------
201317027Sdim  /// Get the string value as a C string.
202317027Sdim  ///
203341825Sdim  /// Get the value of the contained string as a NULL terminated C string
204341825Sdim  /// value.
205317027Sdim  ///
206317027Sdim  /// If \a value_if_empty is nullptr, then nullptr will be returned.
207317027Sdim  ///
208317027Sdim  /// @return
209317027Sdim  ///     Returns \a value_if_empty if the string is empty, otherwise
210317027Sdim  ///     the C string value contained in this object.
211317027Sdim  //------------------------------------------------------------------
212317027Sdim  const char *AsCString(const char *value_if_empty = nullptr) const {
213317027Sdim    return (IsEmpty() ? value_if_empty : m_string);
214317027Sdim  }
215317027Sdim
216317027Sdim  //------------------------------------------------------------------
217317027Sdim  /// Get the string value as a llvm::StringRef
218317027Sdim  ///
219317027Sdim  /// @return
220317027Sdim  ///     Returns a new llvm::StringRef object filled in with the
221317027Sdim  ///     needed data.
222317027Sdim  //------------------------------------------------------------------
223317027Sdim  llvm::StringRef GetStringRef() const {
224317027Sdim    return llvm::StringRef(m_string, GetLength());
225317027Sdim  }
226317027Sdim
227317027Sdim  //------------------------------------------------------------------
228317027Sdim  /// Get the string value as a C string.
229317027Sdim  ///
230341825Sdim  /// Get the value of the contained string as a NULL terminated C string
231341825Sdim  /// value. Similar to the ConstString::AsCString() function, yet this
232341825Sdim  /// function will always return nullptr if the string is not valid. So this
233341825Sdim  /// function is a direct accessor to the string pointer value.
234317027Sdim  ///
235317027Sdim  /// @return
236317027Sdim  ///     Returns nullptr the string is invalid, otherwise the C string
237317027Sdim  ///     value contained in this object.
238317027Sdim  //------------------------------------------------------------------
239317027Sdim  const char *GetCString() const { return m_string; }
240317027Sdim
241317027Sdim  //------------------------------------------------------------------
242317027Sdim  /// Get the length in bytes of string value.
243317027Sdim  ///
244341825Sdim  /// The string pool stores the length of the string, so we can avoid calling
245341825Sdim  /// strlen() on the pointer value with this function.
246317027Sdim  ///
247317027Sdim  /// @return
248317027Sdim  ///     Returns the number of bytes that this string occupies in
249317027Sdim  ///     memory, not including the NULL termination byte.
250317027Sdim  //------------------------------------------------------------------
251317027Sdim  size_t GetLength() const;
252317027Sdim
253317027Sdim  //------------------------------------------------------------------
254317027Sdim  /// Clear this object's state.
255317027Sdim  ///
256344779Sdim  /// Clear any contained string and reset the value to the empty string
257341825Sdim  /// value.
258317027Sdim  //------------------------------------------------------------------
259317027Sdim  void Clear() { m_string = nullptr; }
260317027Sdim
261317027Sdim  //------------------------------------------------------------------
262317027Sdim  /// Equal to operator
263317027Sdim  ///
264341825Sdim  /// Returns true if this string is equal to the string in \a rhs. If case
265341825Sdim  /// sensitive equality is tested, this operation is very fast as it results
266341825Sdim  /// in a pointer comparison since all strings are in a uniqued in a global
267341825Sdim  /// string pool.
268317027Sdim  ///
269317027Sdim  /// @param[in] rhs
270317027Sdim  ///     The Left Hand Side const ConstString object reference.
271317027Sdim  ///
272317027Sdim  /// @param[in] rhs
273317027Sdim  ///     The Right Hand Side const ConstString object reference.
274317027Sdim  ///
275317027Sdim  /// @param[in] case_sensitive
276317027Sdim  ///     Case sensitivity. If true, case sensitive equality
277317027Sdim  ///     will be tested, otherwise character case will be ignored
278317027Sdim  ///
279317027Sdim  /// @return
280317027Sdim  ///     @li \b true if this object is equal to \a rhs.
281317027Sdim  ///     @li \b false if this object is not equal to \a rhs.
282317027Sdim  //------------------------------------------------------------------
283317027Sdim  static bool Equals(const ConstString &lhs, const ConstString &rhs,
284317027Sdim                     const bool case_sensitive = true);
285317027Sdim
286317027Sdim  //------------------------------------------------------------------
287317027Sdim  /// Compare two string objects.
288317027Sdim  ///
289341825Sdim  /// Compares the C string values contained in \a lhs and \a rhs and returns
290341825Sdim  /// an integer result.
291317027Sdim  ///
292317027Sdim  /// NOTE: only call this function when you want a true string
293341825Sdim  /// comparison. If you want string equality use the, use the == operator as
294341825Sdim  /// it is much more efficient. Also if you want string inequality, use the
295341825Sdim  /// != operator for the same reasons.
296317027Sdim  ///
297317027Sdim  /// @param[in] lhs
298317027Sdim  ///     The Left Hand Side const ConstString object reference.
299317027Sdim  ///
300317027Sdim  /// @param[in] rhs
301317027Sdim  ///     The Right Hand Side const ConstString object reference.
302317027Sdim  ///
303317027Sdim  /// @param[in] case_sensitive
304317027Sdim  ///     Case sensitivity of compare. If true, case sensitive compare
305317027Sdim  ///     will be performed, otherwise character case will be ignored
306317027Sdim  ///
307317027Sdim  /// @return
308317027Sdim  ///     @li -1 if lhs < rhs
309317027Sdim  ///     @li 0 if lhs == rhs
310317027Sdim  ///     @li 1 if lhs > rhs
311317027Sdim  //------------------------------------------------------------------
312317027Sdim  static int Compare(const ConstString &lhs, const ConstString &rhs,
313317027Sdim                     const bool case_sensitive = true);
314317027Sdim
315317027Sdim  //------------------------------------------------------------------
316317027Sdim  /// Dump the object description to a stream.
317317027Sdim  ///
318341825Sdim  /// Dump the string value to the stream \a s. If the contained string is
319341825Sdim  /// empty, print \a value_if_empty to the stream instead. If \a
320341825Sdim  /// value_if_empty is nullptr, then nothing will be dumped to the stream.
321317027Sdim  ///
322317027Sdim  /// @param[in] s
323317027Sdim  ///     The stream that will be used to dump the object description.
324317027Sdim  ///
325317027Sdim  /// @param[in] value_if_empty
326317027Sdim  ///     The value to dump if the string is empty. If nullptr, nothing
327317027Sdim  ///     will be output to the stream.
328317027Sdim  //------------------------------------------------------------------
329317027Sdim  void Dump(Stream *s, const char *value_if_empty = nullptr) const;
330317027Sdim
331317027Sdim  //------------------------------------------------------------------
332317027Sdim  /// Dump the object debug description to a stream.
333317027Sdim  ///
334317027Sdim  /// @param[in] s
335317027Sdim  ///     The stream that will be used to dump the object description.
336317027Sdim  //------------------------------------------------------------------
337317027Sdim  void DumpDebug(Stream *s) const;
338317027Sdim
339317027Sdim  //------------------------------------------------------------------
340317027Sdim  /// Test for empty string.
341317027Sdim  ///
342317027Sdim  /// @return
343317027Sdim  ///     @li \b true if the contained string is empty.
344317027Sdim  ///     @li \b false if the contained string is not empty.
345317027Sdim  //------------------------------------------------------------------
346317027Sdim  bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
347317027Sdim
348317027Sdim  //------------------------------------------------------------------
349344779Sdim  /// Test for null string.
350344779Sdim  ///
351344779Sdim  /// @return
352344779Sdim  ///     @li \b true if there is no string associated with this instance.
353344779Sdim  ///     @li \b false if there is a string associated with this instance.
354344779Sdim  //------------------------------------------------------------------
355344779Sdim  bool IsNull() const { return m_string == nullptr; }
356344779Sdim
357344779Sdim  //------------------------------------------------------------------
358317027Sdim  /// Set the C string value.
359317027Sdim  ///
360341825Sdim  /// Set the string value in the object by uniquing the \a cstr string value
361341825Sdim  /// in our global string pool.
362317027Sdim  ///
363341825Sdim  /// If the C string already exists in the global string pool, it finds the
364341825Sdim  /// current entry and returns the existing value. If it doesn't exist, it is
365341825Sdim  /// added to the string pool.
366317027Sdim  ///
367317027Sdim  /// @param[in] cstr
368317027Sdim  ///     A NULL terminated C string to add to the string pool.
369317027Sdim  //------------------------------------------------------------------
370317027Sdim  void SetCString(const char *cstr);
371317027Sdim
372317027Sdim  void SetString(const llvm::StringRef &s);
373317027Sdim
374317027Sdim  //------------------------------------------------------------------
375317027Sdim  /// Set the C string value and its mangled counterpart.
376317027Sdim  ///
377341825Sdim  /// Object files and debug symbols often use mangled string to represent the
378341825Sdim  /// linkage name for a symbol, function or global. The string pool can
379341825Sdim  /// efficiently store these values and their counterparts so when we run
380341825Sdim  /// into another instance of a mangled name, we can avoid calling the name
381341825Sdim  /// demangler over and over on the same strings and then trying to unique
382341825Sdim  /// them.
383317027Sdim  ///
384317027Sdim  /// @param[in] demangled
385344779Sdim  ///     The demangled string to correlate with the \a mangled name.
386317027Sdim  ///
387317027Sdim  /// @param[in] mangled
388317027Sdim  ///     The already uniqued mangled ConstString to correlate the
389317027Sdim  ///     soon to be uniqued version of \a demangled.
390317027Sdim  //------------------------------------------------------------------
391344779Sdim  void SetStringWithMangledCounterpart(llvm::StringRef demangled,
392344779Sdim                                       const ConstString &mangled);
393317027Sdim
394317027Sdim  //------------------------------------------------------------------
395341825Sdim  /// Retrieve the mangled or demangled counterpart for a mangled or demangled
396341825Sdim  /// ConstString.
397317027Sdim  ///
398341825Sdim  /// Object files and debug symbols often use mangled string to represent the
399341825Sdim  /// linkage name for a symbol, function or global. The string pool can
400341825Sdim  /// efficiently store these values and their counterparts so when we run
401341825Sdim  /// into another instance of a mangled name, we can avoid calling the name
402341825Sdim  /// demangler over and over on the same strings and then trying to unique
403341825Sdim  /// them.
404317027Sdim  ///
405317027Sdim  /// @param[in] counterpart
406317027Sdim  ///     A reference to a ConstString object that might get filled in
407317027Sdim  ///     with the demangled/mangled counterpart.
408317027Sdim  ///
409317027Sdim  /// @return
410317027Sdim  ///     /b True if \a counterpart was filled in with the counterpart
411317027Sdim  ///     /b false otherwise.
412317027Sdim  //------------------------------------------------------------------
413317027Sdim  bool GetMangledCounterpart(ConstString &counterpart) const;
414317027Sdim
415317027Sdim  //------------------------------------------------------------------
416317027Sdim  /// Set the C string value with length.
417317027Sdim  ///
418317027Sdim  /// Set the string value in the object by uniquing \a cstr_len bytes
419341825Sdim  /// starting at the \a cstr string value in our global string pool. If trim
420341825Sdim  /// is true, then \a cstr_len indicates a maximum length of the CString and
421341825Sdim  /// if the actual length of the string is less, then it will be trimmed.
422317027Sdim  ///
423341825Sdim  /// If the C string already exists in the global string pool, it finds the
424341825Sdim  /// current entry and returns the existing value. If it doesn't exist, it is
425341825Sdim  /// added to the string pool.
426317027Sdim  ///
427317027Sdim  /// @param[in] cstr
428317027Sdim  ///     A NULL terminated C string to add to the string pool.
429317027Sdim  ///
430317027Sdim  /// @param[in] cstr_len
431317027Sdim  ///     The maximum length of the C string.
432317027Sdim  //------------------------------------------------------------------
433317027Sdim  void SetCStringWithLength(const char *cstr, size_t cstr_len);
434317027Sdim
435317027Sdim  //------------------------------------------------------------------
436341825Sdim  /// Set the C string value with the minimum length between \a fixed_cstr_len
437341825Sdim  /// and the actual length of the C string. This can be used for data
438341825Sdim  /// structures that have a fixed length to store a C string where the string
439341825Sdim  /// might not be NULL terminated if the string takes the entire buffer.
440317027Sdim  //------------------------------------------------------------------
441317027Sdim  void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
442317027Sdim
443317027Sdim  //------------------------------------------------------------------
444317027Sdim  /// Get the memory cost of this object.
445317027Sdim  ///
446341825Sdim  /// Return the size in bytes that this object takes in memory. This returns
447341825Sdim  /// the size in bytes of this object, which does not include any the shared
448341825Sdim  /// string values it may refer to.
449317027Sdim  ///
450317027Sdim  /// @return
451317027Sdim  ///     The number of bytes that this object occupies in memory.
452317027Sdim  ///
453317027Sdim  /// @see ConstString::StaticMemorySize ()
454317027Sdim  //------------------------------------------------------------------
455317027Sdim  size_t MemorySize() const { return sizeof(ConstString); }
456317027Sdim
457317027Sdim  //------------------------------------------------------------------
458317027Sdim  /// Get the size in bytes of the current global string pool.
459317027Sdim  ///
460341825Sdim  /// Reports the size in bytes of all shared C string values, containers and
461341825Sdim  /// any other values as a byte size for the entire string pool.
462317027Sdim  ///
463317027Sdim  /// @return
464317027Sdim  ///     The number of bytes that the global string pool occupies
465317027Sdim  ///     in memory.
466317027Sdim  //------------------------------------------------------------------
467317027Sdim  static size_t StaticMemorySize();
468317027Sdim
469317027Sdimprotected:
470317027Sdim  //------------------------------------------------------------------
471317027Sdim  // Member variables
472317027Sdim  //------------------------------------------------------------------
473317027Sdim  const char *m_string;
474317027Sdim};
475317027Sdim
476317027Sdim//------------------------------------------------------------------
477317027Sdim/// Stream the string value \a str to the stream \a s
478317027Sdim//------------------------------------------------------------------
479317027SdimStream &operator<<(Stream &s, const ConstString &str);
480317027Sdim
481317027Sdim} // namespace lldb_private
482317027Sdim
483317027Sdimnamespace llvm {
484317027Sdimtemplate <> struct format_provider<lldb_private::ConstString> {
485317027Sdim  static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
486317027Sdim                     llvm::StringRef Options);
487317027Sdim};
488317027Sdim}
489317027Sdim
490317027Sdim#endif // liblldb_ConstString_h_
491