ConstString.h revision 360784
1317027Sdim//===-- ConstString.h -------------------------------------------*- C++ -*-===// 2317027Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6317027Sdim// 7317027Sdim//===----------------------------------------------------------------------===// 8317027Sdim 9317027Sdim#ifndef liblldb_ConstString_h_ 10317027Sdim#define liblldb_ConstString_h_ 11317027Sdim 12317027Sdim#include "llvm/ADT/StringRef.h" 13360784Sdim#include "llvm/ADT/DenseMapInfo.h" 14344779Sdim#include "llvm/Support/FormatVariadic.h" 15317027Sdim 16344779Sdim#include <stddef.h> 17317027Sdim 18317027Sdimnamespace lldb_private { 19317027Sdimclass Stream; 20317027Sdim} 21317027Sdimnamespace llvm { 22317027Sdimclass raw_ostream; 23317027Sdim} 24317027Sdim 25317027Sdimnamespace lldb_private { 26317027Sdim 27353358Sdim/// \class ConstString ConstString.h "lldb/Utility/ConstString.h" 28341825Sdim/// A uniqued constant string class. 29317027Sdim/// 30341825Sdim/// Provides an efficient way to store strings as uniqued strings. After the 31341825Sdim/// strings are uniqued, finding strings that are equal to one another is very 32341825Sdim/// fast as just the pointers need to be compared. It also allows for many 33341825Sdim/// common strings from many different sources to be shared to keep the memory 34341825Sdim/// footprint low. 35317027Sdim/// 36341825Sdim/// No reference counting is done on strings that are added to the string 37341825Sdim/// pool, once strings are added they are in the string pool for the life of 38341825Sdim/// the program. 39317027Sdimclass ConstString { 40317027Sdimpublic: 41317027Sdim /// Default constructor 42317027Sdim /// 43317027Sdim /// Initializes the string to an empty string. 44317027Sdim ConstString() : m_string(nullptr) {} 45317027Sdim 46317027Sdim /// Copy constructor 47317027Sdim /// 48317027Sdim /// Copies the string value in \a rhs into this object. 49317027Sdim /// 50353358Sdim /// \param[in] rhs 51317027Sdim /// Another string object to copy. 52317027Sdim ConstString(const ConstString &rhs) : m_string(rhs.m_string) {} 53317027Sdim 54317027Sdim explicit ConstString(const llvm::StringRef &s); 55317027Sdim 56317027Sdim /// Construct with C String value 57317027Sdim /// 58317027Sdim /// Constructs this object with a C string by looking to see if the 59317027Sdim /// C string already exists in the global string pool. If it doesn't 60317027Sdim /// exist, it is added to the string pool. 61317027Sdim /// 62353358Sdim /// \param[in] cstr 63317027Sdim /// A NULL terminated C string to add to the string pool. 64317027Sdim explicit ConstString(const char *cstr); 65317027Sdim 66317027Sdim /// Construct with C String value with max length 67317027Sdim /// 68341825Sdim /// Constructs this object with a C string with a length. If \a max_cstr_len 69341825Sdim /// is greater than the actual length of the string, the string length will 70341825Sdim /// be truncated. This allows substrings to be created without the need to 71341825Sdim /// NULL terminate the string as it is passed into this function. 72317027Sdim /// 73353358Sdim /// \param[in] cstr 74317027Sdim /// A pointer to the first character in the C string. The C 75317027Sdim /// string can be NULL terminated in a buffer that contains 76317027Sdim /// more characters than the length of the string, or the 77317027Sdim /// string can be part of another string and a new substring 78317027Sdim /// can be created. 79317027Sdim /// 80353358Sdim /// \param[in] max_cstr_len 81317027Sdim /// The max length of \a cstr. If the string length of \a cstr 82317027Sdim /// is less than \a max_cstr_len, then the string will be 83317027Sdim /// truncated. If the string length of \a cstr is greater than 84317027Sdim /// \a max_cstr_len, then only max_cstr_len bytes will be used 85317027Sdim /// from \a cstr. 86317027Sdim explicit ConstString(const char *cstr, size_t max_cstr_len); 87317027Sdim 88317027Sdim /// Destructor 89317027Sdim /// 90341825Sdim /// Since constant string values are currently not reference counted, there 91341825Sdim /// isn't much to do here. 92317027Sdim ~ConstString() = default; 93317027Sdim 94317027Sdim /// C string equality binary predicate function object for ConstString 95317027Sdim /// objects. 96317027Sdim struct StringIsEqual { 97317027Sdim /// C equality test. 98317027Sdim /// 99341825Sdim /// Two C strings are equal when they are contained in ConstString objects 100341825Sdim /// when their pointer values are equal to each other. 101317027Sdim /// 102353358Sdim /// \return 103317027Sdim /// Returns \b true if the C string in \a lhs is equal to 104317027Sdim /// the C string value in \a rhs, \b false otherwise. 105317027Sdim bool operator()(const char *lhs, const char *rhs) const { 106317027Sdim return lhs == rhs; 107317027Sdim } 108317027Sdim }; 109317027Sdim 110317027Sdim /// Convert to bool operator. 111317027Sdim /// 112341825Sdim /// This allows code to check a ConstString object to see if it contains a 113341825Sdim /// valid string using code such as: 114317027Sdim /// 115353358Sdim /// \code 116317027Sdim /// ConstString str(...); 117317027Sdim /// if (str) 118317027Sdim /// { ... 119353358Sdim /// \endcode 120317027Sdim /// 121353358Sdim /// \return 122317027Sdim /// /b True this object contains a valid non-empty C string, \b 123317027Sdim /// false otherwise. 124341825Sdim explicit operator bool() const { return !IsEmpty(); } 125317027Sdim 126317027Sdim /// Assignment operator 127317027Sdim /// 128317027Sdim /// Assigns the string in this object with the value from \a rhs. 129317027Sdim /// 130353358Sdim /// \param[in] rhs 131317027Sdim /// Another string object to copy into this object. 132317027Sdim /// 133353358Sdim /// \return 134317027Sdim /// A const reference to this object. 135353358Sdim ConstString operator=(ConstString rhs) { 136317027Sdim m_string = rhs.m_string; 137317027Sdim return *this; 138317027Sdim } 139317027Sdim 140317027Sdim /// Equal to operator 141317027Sdim /// 142341825Sdim /// Returns true if this string is equal to the string in \a rhs. This 143341825Sdim /// operation is very fast as it results in a pointer comparison since all 144341825Sdim /// strings are in a uniqued in a global string pool. 145317027Sdim /// 146353358Sdim /// \param[in] rhs 147317027Sdim /// Another string object to compare this object to. 148317027Sdim /// 149353358Sdim /// \return 150360784Sdim /// true if this object is equal to \a rhs. 151360784Sdim /// false if this object is not equal to \a rhs. 152353358Sdim bool operator==(ConstString rhs) const { 153341825Sdim // We can do a pointer compare to compare these strings since they must 154341825Sdim // come from the same pool in order to be equal. 155317027Sdim return m_string == rhs.m_string; 156317027Sdim } 157317027Sdim 158353358Sdim /// Equal to operator against a non-ConstString value. 159353358Sdim /// 160353358Sdim /// Returns true if this string is equal to the string in \a rhs. This 161353358Sdim /// overload is usually slower than comparing against a ConstString value. 162353358Sdim /// However, if the rhs string not already a ConstString and it is impractical 163353358Sdim /// to turn it into a non-temporary variable, then this overload is faster. 164353358Sdim /// 165353358Sdim /// \param[in] rhs 166353358Sdim /// Another string object to compare this object to. 167353358Sdim /// 168353358Sdim /// \return 169360784Sdim /// \b true if this object is equal to \a rhs. 170360784Sdim /// \b false if this object is not equal to \a rhs. 171353358Sdim bool operator==(const char *rhs) const { 172353358Sdim // ConstString differentiates between empty strings and nullptr strings, but 173353358Sdim // StringRef doesn't. Therefore we have to do this check manually now. 174353358Sdim if (m_string == nullptr && rhs != nullptr) 175353358Sdim return false; 176353358Sdim if (m_string != nullptr && rhs == nullptr) 177353358Sdim return false; 178353358Sdim 179353358Sdim return GetStringRef() == rhs; 180353358Sdim } 181353358Sdim 182317027Sdim /// Not equal to operator 183317027Sdim /// 184341825Sdim /// Returns true if this string is not equal to the string in \a rhs. This 185341825Sdim /// operation is very fast as it results in a pointer comparison since all 186341825Sdim /// strings are in a uniqued in a global string pool. 187317027Sdim /// 188353358Sdim /// \param[in] rhs 189317027Sdim /// Another string object to compare this object to. 190317027Sdim /// 191353358Sdim /// \return 192360784Sdim /// \b true if this object is not equal to \a rhs. 193360784Sdim /// \b false if this object is equal to \a rhs. 194353358Sdim bool operator!=(ConstString rhs) const { 195317027Sdim return m_string != rhs.m_string; 196317027Sdim } 197317027Sdim 198353358Sdim /// Not equal to operator against a non-ConstString value. 199353358Sdim /// 200353358Sdim /// Returns true if this string is not equal to the string in \a rhs. This 201353358Sdim /// overload is usually slower than comparing against a ConstString value. 202353358Sdim /// However, if the rhs string not already a ConstString and it is impractical 203353358Sdim /// to turn it into a non-temporary variable, then this overload is faster. 204353358Sdim /// 205353358Sdim /// \param[in] rhs 206353358Sdim /// Another string object to compare this object to. 207353358Sdim /// 208360784Sdim /// \return \b true if this object is not equal to \a rhs, false otherwise. 209353358Sdim bool operator!=(const char *rhs) const { return !(*this == rhs); } 210317027Sdim 211353358Sdim bool operator<(ConstString rhs) const; 212353358Sdim 213317027Sdim /// Get the string value as a C string. 214317027Sdim /// 215341825Sdim /// Get the value of the contained string as a NULL terminated C string 216341825Sdim /// value. 217317027Sdim /// 218317027Sdim /// If \a value_if_empty is nullptr, then nullptr will be returned. 219317027Sdim /// 220360784Sdim /// \return Returns \a value_if_empty if the string is empty, otherwise 221317027Sdim /// the C string value contained in this object. 222317027Sdim const char *AsCString(const char *value_if_empty = nullptr) const { 223317027Sdim return (IsEmpty() ? value_if_empty : m_string); 224317027Sdim } 225317027Sdim 226317027Sdim /// Get the string value as a llvm::StringRef 227317027Sdim /// 228353358Sdim /// \return 229317027Sdim /// Returns a new llvm::StringRef object filled in with the 230317027Sdim /// needed data. 231317027Sdim llvm::StringRef GetStringRef() const { 232317027Sdim return llvm::StringRef(m_string, GetLength()); 233317027Sdim } 234317027Sdim 235317027Sdim /// Get the string value as a C string. 236317027Sdim /// 237341825Sdim /// Get the value of the contained string as a NULL terminated C string 238341825Sdim /// value. Similar to the ConstString::AsCString() function, yet this 239341825Sdim /// function will always return nullptr if the string is not valid. So this 240341825Sdim /// function is a direct accessor to the string pointer value. 241317027Sdim /// 242353358Sdim /// \return 243317027Sdim /// Returns nullptr the string is invalid, otherwise the C string 244317027Sdim /// value contained in this object. 245317027Sdim const char *GetCString() const { return m_string; } 246317027Sdim 247317027Sdim /// Get the length in bytes of string value. 248317027Sdim /// 249341825Sdim /// The string pool stores the length of the string, so we can avoid calling 250341825Sdim /// strlen() on the pointer value with this function. 251317027Sdim /// 252353358Sdim /// \return 253317027Sdim /// Returns the number of bytes that this string occupies in 254317027Sdim /// memory, not including the NULL termination byte. 255317027Sdim size_t GetLength() const; 256317027Sdim 257317027Sdim /// Clear this object's state. 258317027Sdim /// 259344779Sdim /// Clear any contained string and reset the value to the empty string 260341825Sdim /// value. 261317027Sdim void Clear() { m_string = nullptr; } 262317027Sdim 263317027Sdim /// Equal to operator 264317027Sdim /// 265341825Sdim /// Returns true if this string is equal to the string in \a rhs. If case 266341825Sdim /// sensitive equality is tested, this operation is very fast as it results 267341825Sdim /// in a pointer comparison since all strings are in a uniqued in a global 268341825Sdim /// string pool. 269317027Sdim /// 270360784Sdim /// \param[in] lhs 271317027Sdim /// The Left Hand Side const ConstString object reference. 272317027Sdim /// 273353358Sdim /// \param[in] rhs 274317027Sdim /// The Right Hand Side const ConstString object reference. 275317027Sdim /// 276353358Sdim /// \param[in] case_sensitive 277317027Sdim /// Case sensitivity. If true, case sensitive equality 278317027Sdim /// will be tested, otherwise character case will be ignored 279317027Sdim /// 280360784Sdim /// \return \b true if this object is equal to \a rhs, \b false otherwise. 281353358Sdim static bool Equals(ConstString lhs, ConstString rhs, 282317027Sdim const bool case_sensitive = true); 283317027Sdim 284317027Sdim /// Compare two string objects. 285317027Sdim /// 286341825Sdim /// Compares the C string values contained in \a lhs and \a rhs and returns 287341825Sdim /// an integer result. 288317027Sdim /// 289317027Sdim /// NOTE: only call this function when you want a true string 290341825Sdim /// comparison. If you want string equality use the, use the == operator as 291341825Sdim /// it is much more efficient. Also if you want string inequality, use the 292341825Sdim /// != operator for the same reasons. 293317027Sdim /// 294353358Sdim /// \param[in] lhs 295317027Sdim /// The Left Hand Side const ConstString object reference. 296317027Sdim /// 297353358Sdim /// \param[in] rhs 298317027Sdim /// The Right Hand Side const ConstString object reference. 299317027Sdim /// 300353358Sdim /// \param[in] case_sensitive 301317027Sdim /// Case sensitivity of compare. If true, case sensitive compare 302317027Sdim /// will be performed, otherwise character case will be ignored 303317027Sdim /// 304360784Sdim /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs 305353358Sdim static int Compare(ConstString lhs, ConstString rhs, 306317027Sdim const bool case_sensitive = true); 307317027Sdim 308317027Sdim /// Dump the object description to a stream. 309317027Sdim /// 310341825Sdim /// Dump the string value to the stream \a s. If the contained string is 311341825Sdim /// empty, print \a value_if_empty to the stream instead. If \a 312341825Sdim /// value_if_empty is nullptr, then nothing will be dumped to the stream. 313317027Sdim /// 314353358Sdim /// \param[in] s 315317027Sdim /// The stream that will be used to dump the object description. 316317027Sdim /// 317353358Sdim /// \param[in] value_if_empty 318317027Sdim /// The value to dump if the string is empty. If nullptr, nothing 319317027Sdim /// will be output to the stream. 320317027Sdim void Dump(Stream *s, const char *value_if_empty = nullptr) const; 321317027Sdim 322317027Sdim /// Dump the object debug description to a stream. 323317027Sdim /// 324353358Sdim /// \param[in] s 325317027Sdim /// The stream that will be used to dump the object description. 326317027Sdim void DumpDebug(Stream *s) const; 327317027Sdim 328317027Sdim /// Test for empty string. 329317027Sdim /// 330353358Sdim /// \return 331360784Sdim /// \b true if the contained string is empty. 332360784Sdim /// \b false if the contained string is not empty. 333317027Sdim bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; } 334317027Sdim 335344779Sdim /// Test for null string. 336344779Sdim /// 337353358Sdim /// \return 338360784Sdim /// \b true if there is no string associated with this instance. 339360784Sdim /// \b false if there is a string associated with this instance. 340344779Sdim bool IsNull() const { return m_string == nullptr; } 341344779Sdim 342317027Sdim /// Set the C string value. 343317027Sdim /// 344341825Sdim /// Set the string value in the object by uniquing the \a cstr string value 345341825Sdim /// in our global string pool. 346317027Sdim /// 347341825Sdim /// If the C string already exists in the global string pool, it finds the 348341825Sdim /// current entry and returns the existing value. If it doesn't exist, it is 349341825Sdim /// added to the string pool. 350317027Sdim /// 351353358Sdim /// \param[in] cstr 352317027Sdim /// A NULL terminated C string to add to the string pool. 353317027Sdim void SetCString(const char *cstr); 354317027Sdim 355317027Sdim void SetString(const llvm::StringRef &s); 356317027Sdim 357317027Sdim /// Set the C string value and its mangled counterpart. 358317027Sdim /// 359341825Sdim /// Object files and debug symbols often use mangled string to represent the 360341825Sdim /// linkage name for a symbol, function or global. The string pool can 361341825Sdim /// efficiently store these values and their counterparts so when we run 362341825Sdim /// into another instance of a mangled name, we can avoid calling the name 363341825Sdim /// demangler over and over on the same strings and then trying to unique 364341825Sdim /// them. 365317027Sdim /// 366353358Sdim /// \param[in] demangled 367344779Sdim /// The demangled string to correlate with the \a mangled name. 368317027Sdim /// 369353358Sdim /// \param[in] mangled 370317027Sdim /// The already uniqued mangled ConstString to correlate the 371317027Sdim /// soon to be uniqued version of \a demangled. 372344779Sdim void SetStringWithMangledCounterpart(llvm::StringRef demangled, 373353358Sdim ConstString mangled); 374317027Sdim 375341825Sdim /// Retrieve the mangled or demangled counterpart for a mangled or demangled 376341825Sdim /// ConstString. 377317027Sdim /// 378341825Sdim /// Object files and debug symbols often use mangled string to represent the 379341825Sdim /// linkage name for a symbol, function or global. The string pool can 380341825Sdim /// efficiently store these values and their counterparts so when we run 381341825Sdim /// into another instance of a mangled name, we can avoid calling the name 382341825Sdim /// demangler over and over on the same strings and then trying to unique 383341825Sdim /// them. 384317027Sdim /// 385353358Sdim /// \param[in] counterpart 386317027Sdim /// A reference to a ConstString object that might get filled in 387317027Sdim /// with the demangled/mangled counterpart. 388317027Sdim /// 389353358Sdim /// \return 390317027Sdim /// /b True if \a counterpart was filled in with the counterpart 391317027Sdim /// /b false otherwise. 392317027Sdim bool GetMangledCounterpart(ConstString &counterpart) const; 393317027Sdim 394317027Sdim /// Set the C string value with length. 395317027Sdim /// 396317027Sdim /// Set the string value in the object by uniquing \a cstr_len bytes 397341825Sdim /// starting at the \a cstr string value in our global string pool. If trim 398341825Sdim /// is true, then \a cstr_len indicates a maximum length of the CString and 399341825Sdim /// if the actual length of the string is less, then it will be trimmed. 400317027Sdim /// 401341825Sdim /// If the C string already exists in the global string pool, it finds the 402341825Sdim /// current entry and returns the existing value. If it doesn't exist, it is 403341825Sdim /// added to the string pool. 404317027Sdim /// 405353358Sdim /// \param[in] cstr 406317027Sdim /// A NULL terminated C string to add to the string pool. 407317027Sdim /// 408353358Sdim /// \param[in] cstr_len 409317027Sdim /// The maximum length of the C string. 410317027Sdim void SetCStringWithLength(const char *cstr, size_t cstr_len); 411317027Sdim 412341825Sdim /// Set the C string value with the minimum length between \a fixed_cstr_len 413341825Sdim /// and the actual length of the C string. This can be used for data 414341825Sdim /// structures that have a fixed length to store a C string where the string 415341825Sdim /// might not be NULL terminated if the string takes the entire buffer. 416317027Sdim void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len); 417317027Sdim 418317027Sdim /// Get the memory cost of this object. 419317027Sdim /// 420341825Sdim /// Return the size in bytes that this object takes in memory. This returns 421341825Sdim /// the size in bytes of this object, which does not include any the shared 422341825Sdim /// string values it may refer to. 423317027Sdim /// 424353358Sdim /// \return 425317027Sdim /// The number of bytes that this object occupies in memory. 426317027Sdim /// 427353358Sdim /// \see ConstString::StaticMemorySize () 428317027Sdim size_t MemorySize() const { return sizeof(ConstString); } 429317027Sdim 430317027Sdim /// Get the size in bytes of the current global string pool. 431317027Sdim /// 432341825Sdim /// Reports the size in bytes of all shared C string values, containers and 433341825Sdim /// any other values as a byte size for the entire string pool. 434317027Sdim /// 435353358Sdim /// \return 436317027Sdim /// The number of bytes that the global string pool occupies 437317027Sdim /// in memory. 438317027Sdim static size_t StaticMemorySize(); 439317027Sdim 440317027Sdimprotected: 441360784Sdim template <typename T> friend struct ::llvm::DenseMapInfo; 442360784Sdim /// Only used by DenseMapInfo. 443360784Sdim static ConstString FromStringPoolPointer(const char *ptr) { 444360784Sdim ConstString s; 445360784Sdim s.m_string = ptr; 446360784Sdim return s; 447360784Sdim }; 448360784Sdim 449317027Sdim // Member variables 450317027Sdim const char *m_string; 451317027Sdim}; 452317027Sdim 453317027Sdim/// Stream the string value \a str to the stream \a s 454353358SdimStream &operator<<(Stream &s, ConstString str); 455317027Sdim 456317027Sdim} // namespace lldb_private 457317027Sdim 458317027Sdimnamespace llvm { 459317027Sdimtemplate <> struct format_provider<lldb_private::ConstString> { 460317027Sdim static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS, 461317027Sdim llvm::StringRef Options); 462317027Sdim}; 463360784Sdim 464360784Sdim/// DenseMapInfo implementation. 465360784Sdim/// \{ 466360784Sdimtemplate <> struct DenseMapInfo<lldb_private::ConstString> { 467360784Sdim static inline lldb_private::ConstString getEmptyKey() { 468360784Sdim return lldb_private::ConstString::FromStringPoolPointer( 469360784Sdim DenseMapInfo<const char *>::getEmptyKey()); 470360784Sdim } 471360784Sdim static inline lldb_private::ConstString getTombstoneKey() { 472360784Sdim return lldb_private::ConstString::FromStringPoolPointer( 473360784Sdim DenseMapInfo<const char *>::getTombstoneKey()); 474360784Sdim } 475360784Sdim static unsigned getHashValue(lldb_private::ConstString val) { 476360784Sdim return DenseMapInfo<const char *>::getHashValue(val.m_string); 477360784Sdim } 478360784Sdim static bool isEqual(lldb_private::ConstString LHS, 479360784Sdim lldb_private::ConstString RHS) { 480360784Sdim return LHS == RHS; 481360784Sdim } 482360784Sdim}; 483360784Sdim/// \} 484317027Sdim} 485317027Sdim 486317027Sdim#endif // liblldb_ConstString_h_ 487