StringRef.h revision 198090
1198090Srdivacky//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// 2198090Srdivacky// 3198090Srdivacky// The LLVM Compiler Infrastructure 4198090Srdivacky// 5198090Srdivacky// This file is distributed under the University of Illinois Open Source 6198090Srdivacky// License. See LICENSE.TXT for details. 7198090Srdivacky// 8198090Srdivacky//===----------------------------------------------------------------------===// 9198090Srdivacky 10198090Srdivacky#ifndef LLVM_ADT_STRINGREF_H 11198090Srdivacky#define LLVM_ADT_STRINGREF_H 12198090Srdivacky 13198090Srdivacky#include <algorithm> 14198090Srdivacky#include <cassert> 15198090Srdivacky#include <cstring> 16198090Srdivacky#include <string> 17198090Srdivacky 18198090Srdivackynamespace llvm { 19198090Srdivacky 20198090Srdivacky /// StringRef - Represent a constant reference to a string, i.e. a character 21198090Srdivacky /// array and a length, which need not be null terminated. 22198090Srdivacky /// 23198090Srdivacky /// This class does not own the string data, it is expected to be used in 24198090Srdivacky /// situations where the character data resides in some other buffer, whose 25198090Srdivacky /// lifetime extends past that of the StringRef. For this reason, it is not in 26198090Srdivacky /// general safe to store a StringRef. 27198090Srdivacky class StringRef { 28198090Srdivacky public: 29198090Srdivacky typedef const char *iterator; 30198090Srdivacky static const size_t npos = ~size_t(0); 31198090Srdivacky typedef size_t size_type; 32198090Srdivacky 33198090Srdivacky private: 34198090Srdivacky /// The start of the string, in an external buffer. 35198090Srdivacky const char *Data; 36198090Srdivacky 37198090Srdivacky /// The length of the string. 38198090Srdivacky size_t Length; 39198090Srdivacky 40198090Srdivacky public: 41198090Srdivacky /// @name Constructors 42198090Srdivacky /// @{ 43198090Srdivacky 44198090Srdivacky /// Construct an empty string ref. 45198090Srdivacky /*implicit*/ StringRef() : Data(0), Length(0) {} 46198090Srdivacky 47198090Srdivacky /// Construct a string ref from a cstring. 48198090Srdivacky /*implicit*/ StringRef(const char *Str) 49198090Srdivacky : Data(Str) { if (Str) Length = ::strlen(Str); else Length = 0; } 50198090Srdivacky 51198090Srdivacky /// Construct a string ref from a pointer and length. 52198090Srdivacky /*implicit*/ StringRef(const char *data, unsigned length) 53198090Srdivacky : Data(data), Length(length) {} 54198090Srdivacky 55198090Srdivacky /// Construct a string ref from an std::string. 56198090Srdivacky /*implicit*/ StringRef(const std::string &Str) 57198090Srdivacky : Data(Str.c_str()), Length(Str.length()) {} 58198090Srdivacky 59198090Srdivacky /// @} 60198090Srdivacky /// @name Iterators 61198090Srdivacky /// @{ 62198090Srdivacky 63198090Srdivacky iterator begin() const { return Data; } 64198090Srdivacky 65198090Srdivacky iterator end() const { return Data + Length; } 66198090Srdivacky 67198090Srdivacky /// @} 68198090Srdivacky /// @name String Operations 69198090Srdivacky /// @{ 70198090Srdivacky 71198090Srdivacky /// data - Get a pointer to the start of the string (which may not be null 72198090Srdivacky /// terminated). 73198090Srdivacky const char *data() const { return Data; } 74198090Srdivacky 75198090Srdivacky /// empty - Check if the string is empty. 76198090Srdivacky bool empty() const { return Length == 0; } 77198090Srdivacky 78198090Srdivacky /// size - Get the string size. 79198090Srdivacky size_t size() const { return Length; } 80198090Srdivacky 81198090Srdivacky /// front - Get the first character in the string. 82198090Srdivacky char front() const { 83198090Srdivacky assert(!empty()); 84198090Srdivacky return Data[0]; 85198090Srdivacky } 86198090Srdivacky 87198090Srdivacky /// back - Get the last character in the string. 88198090Srdivacky char back() const { 89198090Srdivacky assert(!empty()); 90198090Srdivacky return Data[Length-1]; 91198090Srdivacky } 92198090Srdivacky 93198090Srdivacky /// equals - Check for string equality, this is more efficient than 94198090Srdivacky /// compare() when the relative ordering of inequal strings isn't needed. 95198090Srdivacky bool equals(const StringRef &RHS) const { 96198090Srdivacky return (Length == RHS.Length && 97198090Srdivacky memcmp(Data, RHS.Data, RHS.Length) == 0); 98198090Srdivacky } 99198090Srdivacky 100198090Srdivacky /// compare - Compare two strings; the result is -1, 0, or 1 if this string 101198090Srdivacky /// is lexicographically less than, equal to, or greater than the \arg RHS. 102198090Srdivacky int compare(const StringRef &RHS) const { 103198090Srdivacky // Check the prefix for a mismatch. 104198090Srdivacky if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length))) 105198090Srdivacky return Res < 0 ? -1 : 1; 106198090Srdivacky 107198090Srdivacky // Otherwise the prefixes match, so we only need to check the lengths. 108198090Srdivacky if (Length == RHS.Length) 109198090Srdivacky return 0; 110198090Srdivacky return Length < RHS.Length ? -1 : 1; 111198090Srdivacky } 112198090Srdivacky 113198090Srdivacky /// str - Get the contents as an std::string. 114198090Srdivacky std::string str() const { return std::string(Data, Length); } 115198090Srdivacky 116198090Srdivacky /// @} 117198090Srdivacky /// @name Operator Overloads 118198090Srdivacky /// @{ 119198090Srdivacky 120198090Srdivacky char operator[](size_t Index) const { 121198090Srdivacky assert(Index < Length && "Invalid index!"); 122198090Srdivacky return Data[Index]; 123198090Srdivacky } 124198090Srdivacky 125198090Srdivacky /// @} 126198090Srdivacky /// @name Type Conversions 127198090Srdivacky /// @{ 128198090Srdivacky 129198090Srdivacky operator std::string() const { 130198090Srdivacky return str(); 131198090Srdivacky } 132198090Srdivacky 133198090Srdivacky /// @} 134198090Srdivacky /// @name String Predicates 135198090Srdivacky /// @{ 136198090Srdivacky 137198090Srdivacky /// startswith - Check if this string starts with the given \arg Prefix. 138198090Srdivacky bool startswith(const StringRef &Prefix) const { 139198090Srdivacky return substr(0, Prefix.Length).equals(Prefix); 140198090Srdivacky } 141198090Srdivacky 142198090Srdivacky /// endswith - Check if this string ends with the given \arg Suffix. 143198090Srdivacky bool endswith(const StringRef &Suffix) const { 144198090Srdivacky return slice(size() - Suffix.Length, size()).equals(Suffix); 145198090Srdivacky } 146198090Srdivacky 147198090Srdivacky /// @} 148198090Srdivacky /// @name String Searching 149198090Srdivacky /// @{ 150198090Srdivacky 151198090Srdivacky /// find - Search for the first character \arg C in the string. 152198090Srdivacky /// 153198090Srdivacky /// \return - The index of the first occurence of \arg C, or npos if not 154198090Srdivacky /// found. 155198090Srdivacky size_t find(char C) const { 156198090Srdivacky for (size_t i = 0, e = Length; i != e; ++i) 157198090Srdivacky if (Data[i] == C) 158198090Srdivacky return i; 159198090Srdivacky return npos; 160198090Srdivacky } 161198090Srdivacky 162198090Srdivacky /// find - Search for the first string \arg Str in the string. 163198090Srdivacky /// 164198090Srdivacky /// \return - The index of the first occurence of \arg Str, or npos if not 165198090Srdivacky /// found. 166198090Srdivacky size_t find(const StringRef &Str) const; 167198090Srdivacky 168198090Srdivacky /// rfind - Search for the last character \arg C in the string. 169198090Srdivacky /// 170198090Srdivacky /// \return - The index of the last occurence of \arg C, or npos if not 171198090Srdivacky /// found. 172198090Srdivacky size_t rfind(char C, size_t From = npos) const { 173198090Srdivacky From = std::min(From, Length); 174198090Srdivacky size_t i = From; 175198090Srdivacky while (i != 0) { 176198090Srdivacky --i; 177198090Srdivacky if (Data[i] == C) 178198090Srdivacky return i; 179198090Srdivacky } 180198090Srdivacky return npos; 181198090Srdivacky } 182198090Srdivacky 183198090Srdivacky /// rfind - Search for the last string \arg Str in the string. 184198090Srdivacky /// 185198090Srdivacky /// \return - The index of the last occurence of \arg Str, or npos if not 186198090Srdivacky /// found. 187198090Srdivacky size_t rfind(const StringRef &Str) const; 188198090Srdivacky 189198090Srdivacky /// find_first_of - Find the first instance of the specified character or 190198090Srdivacky /// return npos if not in string. Same as find. 191198090Srdivacky size_type find_first_of(char C) const { return find(C); } 192198090Srdivacky 193198090Srdivacky /// find_first_of - Find the first character from the string 'Chars' in the 194198090Srdivacky /// current string or return npos if not in string. 195198090Srdivacky size_type find_first_of(StringRef Chars) const; 196198090Srdivacky 197198090Srdivacky /// find_first_not_of - Find the first character in the string that is not 198198090Srdivacky /// in the string 'Chars' or return npos if all are in string. Same as find. 199198090Srdivacky size_type find_first_not_of(StringRef Chars) const; 200198090Srdivacky 201198090Srdivacky /// @} 202198090Srdivacky /// @name Helpful Algorithms 203198090Srdivacky /// @{ 204198090Srdivacky 205198090Srdivacky /// count - Return the number of occurrences of \arg C in the string. 206198090Srdivacky size_t count(char C) const { 207198090Srdivacky size_t Count = 0; 208198090Srdivacky for (size_t i = 0, e = Length; i != e; ++i) 209198090Srdivacky if (Data[i] == C) 210198090Srdivacky ++Count; 211198090Srdivacky return Count; 212198090Srdivacky } 213198090Srdivacky 214198090Srdivacky /// count - Return the number of non-overlapped occurrences of \arg Str in 215198090Srdivacky /// the string. 216198090Srdivacky size_t count(const StringRef &Str) const; 217198090Srdivacky 218198090Srdivacky /// getAsInteger - Parse the current string as an integer of the specified 219198090Srdivacky /// radix. If Radix is specified as zero, this does radix autosensing using 220198090Srdivacky /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 221198090Srdivacky /// 222198090Srdivacky /// If the string is invalid or if only a subset of the string is valid, 223198090Srdivacky /// this returns true to signify the error. The string is considered 224198090Srdivacky /// erroneous if empty. 225198090Srdivacky /// 226198090Srdivacky bool getAsInteger(unsigned Radix, long long &Result) const; 227198090Srdivacky bool getAsInteger(unsigned Radix, unsigned long long &Result) const; 228198090Srdivacky bool getAsInteger(unsigned Radix, int &Result) const; 229198090Srdivacky bool getAsInteger(unsigned Radix, unsigned &Result) const; 230198090Srdivacky 231198090Srdivacky // TODO: Provide overloads for int/unsigned that check for overflow. 232198090Srdivacky 233198090Srdivacky /// @} 234198090Srdivacky /// @name Substring Operations 235198090Srdivacky /// @{ 236198090Srdivacky 237198090Srdivacky /// substr - Return a reference to the substring from [Start, Start + N). 238198090Srdivacky /// 239198090Srdivacky /// \param Start - The index of the starting character in the substring; if 240198090Srdivacky /// the index is npos or greater than the length of the string then the 241198090Srdivacky /// empty substring will be returned. 242198090Srdivacky /// 243198090Srdivacky /// \param N - The number of characters to included in the substring. If N 244198090Srdivacky /// exceeds the number of characters remaining in the string, the string 245198090Srdivacky /// suffix (starting with \arg Start) will be returned. 246198090Srdivacky StringRef substr(size_t Start, size_t N = npos) const { 247198090Srdivacky Start = std::min(Start, Length); 248198090Srdivacky return StringRef(Data + Start, std::min(N, Length - Start)); 249198090Srdivacky } 250198090Srdivacky 251198090Srdivacky /// slice - Return a reference to the substring from [Start, End). 252198090Srdivacky /// 253198090Srdivacky /// \param Start - The index of the starting character in the substring; if 254198090Srdivacky /// the index is npos or greater than the length of the string then the 255198090Srdivacky /// empty substring will be returned. 256198090Srdivacky /// 257198090Srdivacky /// \param End - The index following the last character to include in the 258198090Srdivacky /// substring. If this is npos, or less than \arg Start, or exceeds the 259198090Srdivacky /// number of characters remaining in the string, the string suffix 260198090Srdivacky /// (starting with \arg Start) will be returned. 261198090Srdivacky StringRef slice(size_t Start, size_t End) const { 262198090Srdivacky Start = std::min(Start, Length); 263198090Srdivacky End = std::min(std::max(Start, End), Length); 264198090Srdivacky return StringRef(Data + Start, End - Start); 265198090Srdivacky } 266198090Srdivacky 267198090Srdivacky /// split - Split into two substrings around the first occurence of a 268198090Srdivacky /// separator character. 269198090Srdivacky /// 270198090Srdivacky /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 271198090Srdivacky /// such that (*this == LHS + Separator + RHS) is true and RHS is 272198090Srdivacky /// maximal. If \arg Separator is not in the string, then the result is a 273198090Srdivacky /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 274198090Srdivacky /// 275198090Srdivacky /// \param Separator - The character to split on. 276198090Srdivacky /// \return - The split substrings. 277198090Srdivacky std::pair<StringRef, StringRef> split(char Separator) const { 278198090Srdivacky size_t Idx = find(Separator); 279198090Srdivacky if (Idx == npos) 280198090Srdivacky return std::make_pair(*this, StringRef()); 281198090Srdivacky return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 282198090Srdivacky } 283198090Srdivacky 284198090Srdivacky /// rsplit - Split into two substrings around the last occurence of a 285198090Srdivacky /// separator character. 286198090Srdivacky /// 287198090Srdivacky /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) 288198090Srdivacky /// such that (*this == LHS + Separator + RHS) is true and RHS is 289198090Srdivacky /// minimal. If \arg Separator is not in the string, then the result is a 290198090Srdivacky /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 291198090Srdivacky /// 292198090Srdivacky /// \param Separator - The character to split on. 293198090Srdivacky /// \return - The split substrings. 294198090Srdivacky std::pair<StringRef, StringRef> rsplit(char Separator) const { 295198090Srdivacky size_t Idx = rfind(Separator); 296198090Srdivacky if (Idx == npos) 297198090Srdivacky return std::make_pair(*this, StringRef()); 298198090Srdivacky return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 299198090Srdivacky } 300198090Srdivacky 301198090Srdivacky /// @} 302198090Srdivacky }; 303198090Srdivacky 304198090Srdivacky /// @name StringRef Comparison Operators 305198090Srdivacky /// @{ 306198090Srdivacky 307198090Srdivacky inline bool operator==(const StringRef &LHS, const StringRef &RHS) { 308198090Srdivacky return LHS.equals(RHS); 309198090Srdivacky } 310198090Srdivacky 311198090Srdivacky inline bool operator!=(const StringRef &LHS, const StringRef &RHS) { 312198090Srdivacky return !(LHS == RHS); 313198090Srdivacky } 314198090Srdivacky 315198090Srdivacky inline bool operator<(const StringRef &LHS, const StringRef &RHS) { 316198090Srdivacky return LHS.compare(RHS) == -1; 317198090Srdivacky } 318198090Srdivacky 319198090Srdivacky inline bool operator<=(const StringRef &LHS, const StringRef &RHS) { 320198090Srdivacky return LHS.compare(RHS) != 1; 321198090Srdivacky } 322198090Srdivacky 323198090Srdivacky inline bool operator>(const StringRef &LHS, const StringRef &RHS) { 324198090Srdivacky return LHS.compare(RHS) == 1; 325198090Srdivacky } 326198090Srdivacky 327198090Srdivacky inline bool operator>=(const StringRef &LHS, const StringRef &RHS) { 328198090Srdivacky return LHS.compare(RHS) != -1; 329198090Srdivacky } 330198090Srdivacky 331198090Srdivacky /// @} 332198090Srdivacky 333198090Srdivacky} 334198090Srdivacky 335198090Srdivacky#endif 336