StringRef.h revision 198090
1198090Srdivacky//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
2198090Srdivacky//
3198090Srdivacky//                     The LLVM Compiler Infrastructure
4198090Srdivacky//
5198090Srdivacky// This file is distributed under the University of Illinois Open Source
6198090Srdivacky// License. See LICENSE.TXT for details.
7198090Srdivacky//
8198090Srdivacky//===----------------------------------------------------------------------===//
9198090Srdivacky
10198090Srdivacky#ifndef LLVM_ADT_STRINGREF_H
11198090Srdivacky#define LLVM_ADT_STRINGREF_H
12198090Srdivacky
13198090Srdivacky#include <algorithm>
14198090Srdivacky#include <cassert>
15198090Srdivacky#include <cstring>
16198090Srdivacky#include <string>
17198090Srdivacky
18198090Srdivackynamespace llvm {
19198090Srdivacky
20198090Srdivacky  /// StringRef - Represent a constant reference to a string, i.e. a character
21198090Srdivacky  /// array and a length, which need not be null terminated.
22198090Srdivacky  ///
23198090Srdivacky  /// This class does not own the string data, it is expected to be used in
24198090Srdivacky  /// situations where the character data resides in some other buffer, whose
25198090Srdivacky  /// lifetime extends past that of the StringRef. For this reason, it is not in
26198090Srdivacky  /// general safe to store a StringRef.
27198090Srdivacky  class StringRef {
28198090Srdivacky  public:
29198090Srdivacky    typedef const char *iterator;
30198090Srdivacky    static const size_t npos = ~size_t(0);
31198090Srdivacky    typedef size_t size_type;
32198090Srdivacky
33198090Srdivacky  private:
34198090Srdivacky    /// The start of the string, in an external buffer.
35198090Srdivacky    const char *Data;
36198090Srdivacky
37198090Srdivacky    /// The length of the string.
38198090Srdivacky    size_t Length;
39198090Srdivacky
40198090Srdivacky  public:
41198090Srdivacky    /// @name Constructors
42198090Srdivacky    /// @{
43198090Srdivacky
44198090Srdivacky    /// Construct an empty string ref.
45198090Srdivacky    /*implicit*/ StringRef() : Data(0), Length(0) {}
46198090Srdivacky
47198090Srdivacky    /// Construct a string ref from a cstring.
48198090Srdivacky    /*implicit*/ StringRef(const char *Str)
49198090Srdivacky      : Data(Str) { if (Str) Length = ::strlen(Str); else Length = 0; }
50198090Srdivacky
51198090Srdivacky    /// Construct a string ref from a pointer and length.
52198090Srdivacky    /*implicit*/ StringRef(const char *data, unsigned length)
53198090Srdivacky      : Data(data), Length(length) {}
54198090Srdivacky
55198090Srdivacky    /// Construct a string ref from an std::string.
56198090Srdivacky    /*implicit*/ StringRef(const std::string &Str)
57198090Srdivacky      : Data(Str.c_str()), Length(Str.length()) {}
58198090Srdivacky
59198090Srdivacky    /// @}
60198090Srdivacky    /// @name Iterators
61198090Srdivacky    /// @{
62198090Srdivacky
63198090Srdivacky    iterator begin() const { return Data; }
64198090Srdivacky
65198090Srdivacky    iterator end() const { return Data + Length; }
66198090Srdivacky
67198090Srdivacky    /// @}
68198090Srdivacky    /// @name String Operations
69198090Srdivacky    /// @{
70198090Srdivacky
71198090Srdivacky    /// data - Get a pointer to the start of the string (which may not be null
72198090Srdivacky    /// terminated).
73198090Srdivacky    const char *data() const { return Data; }
74198090Srdivacky
75198090Srdivacky    /// empty - Check if the string is empty.
76198090Srdivacky    bool empty() const { return Length == 0; }
77198090Srdivacky
78198090Srdivacky    /// size - Get the string size.
79198090Srdivacky    size_t size() const { return Length; }
80198090Srdivacky
81198090Srdivacky    /// front - Get the first character in the string.
82198090Srdivacky    char front() const {
83198090Srdivacky      assert(!empty());
84198090Srdivacky      return Data[0];
85198090Srdivacky    }
86198090Srdivacky
87198090Srdivacky    /// back - Get the last character in the string.
88198090Srdivacky    char back() const {
89198090Srdivacky      assert(!empty());
90198090Srdivacky      return Data[Length-1];
91198090Srdivacky    }
92198090Srdivacky
93198090Srdivacky    /// equals - Check for string equality, this is more efficient than
94198090Srdivacky    /// compare() when the relative ordering of inequal strings isn't needed.
95198090Srdivacky    bool equals(const StringRef &RHS) const {
96198090Srdivacky      return (Length == RHS.Length &&
97198090Srdivacky              memcmp(Data, RHS.Data, RHS.Length) == 0);
98198090Srdivacky    }
99198090Srdivacky
100198090Srdivacky    /// compare - Compare two strings; the result is -1, 0, or 1 if this string
101198090Srdivacky    /// is lexicographically less than, equal to, or greater than the \arg RHS.
102198090Srdivacky    int compare(const StringRef &RHS) const {
103198090Srdivacky      // Check the prefix for a mismatch.
104198090Srdivacky      if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length)))
105198090Srdivacky        return Res < 0 ? -1 : 1;
106198090Srdivacky
107198090Srdivacky      // Otherwise the prefixes match, so we only need to check the lengths.
108198090Srdivacky      if (Length == RHS.Length)
109198090Srdivacky        return 0;
110198090Srdivacky      return Length < RHS.Length ? -1 : 1;
111198090Srdivacky    }
112198090Srdivacky
113198090Srdivacky    /// str - Get the contents as an std::string.
114198090Srdivacky    std::string str() const { return std::string(Data, Length); }
115198090Srdivacky
116198090Srdivacky    /// @}
117198090Srdivacky    /// @name Operator Overloads
118198090Srdivacky    /// @{
119198090Srdivacky
120198090Srdivacky    char operator[](size_t Index) const {
121198090Srdivacky      assert(Index < Length && "Invalid index!");
122198090Srdivacky      return Data[Index];
123198090Srdivacky    }
124198090Srdivacky
125198090Srdivacky    /// @}
126198090Srdivacky    /// @name Type Conversions
127198090Srdivacky    /// @{
128198090Srdivacky
129198090Srdivacky    operator std::string() const {
130198090Srdivacky      return str();
131198090Srdivacky    }
132198090Srdivacky
133198090Srdivacky    /// @}
134198090Srdivacky    /// @name String Predicates
135198090Srdivacky    /// @{
136198090Srdivacky
137198090Srdivacky    /// startswith - Check if this string starts with the given \arg Prefix.
138198090Srdivacky    bool startswith(const StringRef &Prefix) const {
139198090Srdivacky      return substr(0, Prefix.Length).equals(Prefix);
140198090Srdivacky    }
141198090Srdivacky
142198090Srdivacky    /// endswith - Check if this string ends with the given \arg Suffix.
143198090Srdivacky    bool endswith(const StringRef &Suffix) const {
144198090Srdivacky      return slice(size() - Suffix.Length, size()).equals(Suffix);
145198090Srdivacky    }
146198090Srdivacky
147198090Srdivacky    /// @}
148198090Srdivacky    /// @name String Searching
149198090Srdivacky    /// @{
150198090Srdivacky
151198090Srdivacky    /// find - Search for the first character \arg C in the string.
152198090Srdivacky    ///
153198090Srdivacky    /// \return - The index of the first occurence of \arg C, or npos if not
154198090Srdivacky    /// found.
155198090Srdivacky    size_t find(char C) const {
156198090Srdivacky      for (size_t i = 0, e = Length; i != e; ++i)
157198090Srdivacky        if (Data[i] == C)
158198090Srdivacky          return i;
159198090Srdivacky      return npos;
160198090Srdivacky    }
161198090Srdivacky
162198090Srdivacky    /// find - Search for the first string \arg Str in the string.
163198090Srdivacky    ///
164198090Srdivacky    /// \return - The index of the first occurence of \arg Str, or npos if not
165198090Srdivacky    /// found.
166198090Srdivacky    size_t find(const StringRef &Str) const;
167198090Srdivacky
168198090Srdivacky    /// rfind - Search for the last character \arg C in the string.
169198090Srdivacky    ///
170198090Srdivacky    /// \return - The index of the last occurence of \arg C, or npos if not
171198090Srdivacky    /// found.
172198090Srdivacky    size_t rfind(char C, size_t From = npos) const {
173198090Srdivacky      From = std::min(From, Length);
174198090Srdivacky      size_t i = From;
175198090Srdivacky      while (i != 0) {
176198090Srdivacky        --i;
177198090Srdivacky        if (Data[i] == C)
178198090Srdivacky          return i;
179198090Srdivacky      }
180198090Srdivacky      return npos;
181198090Srdivacky    }
182198090Srdivacky
183198090Srdivacky    /// rfind - Search for the last string \arg Str in the string.
184198090Srdivacky    ///
185198090Srdivacky    /// \return - The index of the last occurence of \arg Str, or npos if not
186198090Srdivacky    /// found.
187198090Srdivacky    size_t rfind(const StringRef &Str) const;
188198090Srdivacky
189198090Srdivacky    /// find_first_of - Find the first instance of the specified character or
190198090Srdivacky    /// return npos if not in string.  Same as find.
191198090Srdivacky    size_type find_first_of(char C) const { return find(C); }
192198090Srdivacky
193198090Srdivacky    /// find_first_of - Find the first character from the string 'Chars' in the
194198090Srdivacky    /// current string or return npos if not in string.
195198090Srdivacky    size_type find_first_of(StringRef Chars) const;
196198090Srdivacky
197198090Srdivacky    /// find_first_not_of - Find the first character in the string that is not
198198090Srdivacky    /// in the string 'Chars' or return npos if all are in string. Same as find.
199198090Srdivacky    size_type find_first_not_of(StringRef Chars) const;
200198090Srdivacky
201198090Srdivacky    /// @}
202198090Srdivacky    /// @name Helpful Algorithms
203198090Srdivacky    /// @{
204198090Srdivacky
205198090Srdivacky    /// count - Return the number of occurrences of \arg C in the string.
206198090Srdivacky    size_t count(char C) const {
207198090Srdivacky      size_t Count = 0;
208198090Srdivacky      for (size_t i = 0, e = Length; i != e; ++i)
209198090Srdivacky        if (Data[i] == C)
210198090Srdivacky          ++Count;
211198090Srdivacky      return Count;
212198090Srdivacky    }
213198090Srdivacky
214198090Srdivacky    /// count - Return the number of non-overlapped occurrences of \arg Str in
215198090Srdivacky    /// the string.
216198090Srdivacky    size_t count(const StringRef &Str) const;
217198090Srdivacky
218198090Srdivacky    /// getAsInteger - Parse the current string as an integer of the specified
219198090Srdivacky    /// radix.  If Radix is specified as zero, this does radix autosensing using
220198090Srdivacky    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
221198090Srdivacky    ///
222198090Srdivacky    /// If the string is invalid or if only a subset of the string is valid,
223198090Srdivacky    /// this returns true to signify the error.  The string is considered
224198090Srdivacky    /// erroneous if empty.
225198090Srdivacky    ///
226198090Srdivacky    bool getAsInteger(unsigned Radix, long long &Result) const;
227198090Srdivacky    bool getAsInteger(unsigned Radix, unsigned long long &Result) const;
228198090Srdivacky    bool getAsInteger(unsigned Radix, int &Result) const;
229198090Srdivacky    bool getAsInteger(unsigned Radix, unsigned &Result) const;
230198090Srdivacky
231198090Srdivacky    // TODO: Provide overloads for int/unsigned that check for overflow.
232198090Srdivacky
233198090Srdivacky    /// @}
234198090Srdivacky    /// @name Substring Operations
235198090Srdivacky    /// @{
236198090Srdivacky
237198090Srdivacky    /// substr - Return a reference to the substring from [Start, Start + N).
238198090Srdivacky    ///
239198090Srdivacky    /// \param Start - The index of the starting character in the substring; if
240198090Srdivacky    /// the index is npos or greater than the length of the string then the
241198090Srdivacky    /// empty substring will be returned.
242198090Srdivacky    ///
243198090Srdivacky    /// \param N - The number of characters to included in the substring. If N
244198090Srdivacky    /// exceeds the number of characters remaining in the string, the string
245198090Srdivacky    /// suffix (starting with \arg Start) will be returned.
246198090Srdivacky    StringRef substr(size_t Start, size_t N = npos) const {
247198090Srdivacky      Start = std::min(Start, Length);
248198090Srdivacky      return StringRef(Data + Start, std::min(N, Length - Start));
249198090Srdivacky    }
250198090Srdivacky
251198090Srdivacky    /// slice - Return a reference to the substring from [Start, End).
252198090Srdivacky    ///
253198090Srdivacky    /// \param Start - The index of the starting character in the substring; if
254198090Srdivacky    /// the index is npos or greater than the length of the string then the
255198090Srdivacky    /// empty substring will be returned.
256198090Srdivacky    ///
257198090Srdivacky    /// \param End - The index following the last character to include in the
258198090Srdivacky    /// substring. If this is npos, or less than \arg Start, or exceeds the
259198090Srdivacky    /// number of characters remaining in the string, the string suffix
260198090Srdivacky    /// (starting with \arg Start) will be returned.
261198090Srdivacky    StringRef slice(size_t Start, size_t End) const {
262198090Srdivacky      Start = std::min(Start, Length);
263198090Srdivacky      End = std::min(std::max(Start, End), Length);
264198090Srdivacky      return StringRef(Data + Start, End - Start);
265198090Srdivacky    }
266198090Srdivacky
267198090Srdivacky    /// split - Split into two substrings around the first occurence of a
268198090Srdivacky    /// separator character.
269198090Srdivacky    ///
270198090Srdivacky    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
271198090Srdivacky    /// such that (*this == LHS + Separator + RHS) is true and RHS is
272198090Srdivacky    /// maximal. If \arg Separator is not in the string, then the result is a
273198090Srdivacky    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
274198090Srdivacky    ///
275198090Srdivacky    /// \param Separator - The character to split on.
276198090Srdivacky    /// \return - The split substrings.
277198090Srdivacky    std::pair<StringRef, StringRef> split(char Separator) const {
278198090Srdivacky      size_t Idx = find(Separator);
279198090Srdivacky      if (Idx == npos)
280198090Srdivacky        return std::make_pair(*this, StringRef());
281198090Srdivacky      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
282198090Srdivacky    }
283198090Srdivacky
284198090Srdivacky    /// rsplit - Split into two substrings around the last occurence of a
285198090Srdivacky    /// separator character.
286198090Srdivacky    ///
287198090Srdivacky    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
288198090Srdivacky    /// such that (*this == LHS + Separator + RHS) is true and RHS is
289198090Srdivacky    /// minimal. If \arg Separator is not in the string, then the result is a
290198090Srdivacky    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
291198090Srdivacky    ///
292198090Srdivacky    /// \param Separator - The character to split on.
293198090Srdivacky    /// \return - The split substrings.
294198090Srdivacky    std::pair<StringRef, StringRef> rsplit(char Separator) const {
295198090Srdivacky      size_t Idx = rfind(Separator);
296198090Srdivacky      if (Idx == npos)
297198090Srdivacky        return std::make_pair(*this, StringRef());
298198090Srdivacky      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
299198090Srdivacky    }
300198090Srdivacky
301198090Srdivacky    /// @}
302198090Srdivacky  };
303198090Srdivacky
304198090Srdivacky  /// @name StringRef Comparison Operators
305198090Srdivacky  /// @{
306198090Srdivacky
307198090Srdivacky  inline bool operator==(const StringRef &LHS, const StringRef &RHS) {
308198090Srdivacky    return LHS.equals(RHS);
309198090Srdivacky  }
310198090Srdivacky
311198090Srdivacky  inline bool operator!=(const StringRef &LHS, const StringRef &RHS) {
312198090Srdivacky    return !(LHS == RHS);
313198090Srdivacky  }
314198090Srdivacky
315198090Srdivacky  inline bool operator<(const StringRef &LHS, const StringRef &RHS) {
316198090Srdivacky    return LHS.compare(RHS) == -1;
317198090Srdivacky  }
318198090Srdivacky
319198090Srdivacky  inline bool operator<=(const StringRef &LHS, const StringRef &RHS) {
320198090Srdivacky    return LHS.compare(RHS) != 1;
321198090Srdivacky  }
322198090Srdivacky
323198090Srdivacky  inline bool operator>(const StringRef &LHS, const StringRef &RHS) {
324198090Srdivacky    return LHS.compare(RHS) == 1;
325198090Srdivacky  }
326198090Srdivacky
327198090Srdivacky  inline bool operator>=(const StringRef &LHS, const StringRef &RHS) {
328198090Srdivacky    return LHS.compare(RHS) != -1;
329198090Srdivacky  }
330198090Srdivacky
331198090Srdivacky  /// @}
332198090Srdivacky
333198090Srdivacky}
334198090Srdivacky
335198090Srdivacky#endif
336