RegularExpression.h revision 317032
138494Sobrien//===-- RegularExpression.h -------------------------------------*- C++ -*-===//
2174294Sobrien//
338494Sobrien//                     The LLVM Compiler Infrastructure
438494Sobrien//
538494Sobrien// This file is distributed under the University of Illinois Open Source
638494Sobrien// License. See LICENSE.TXT for details.
738494Sobrien//
838494Sobrien//===----------------------------------------------------------------------===//
938494Sobrien
1038494Sobrien#ifndef liblldb_RegularExpression_h_
1138494Sobrien#define liblldb_RegularExpression_h_
1238494Sobrien
1338494Sobrien#ifdef _WIN32
1438494Sobrien#include "../lib/Support/regex_impl.h"
1538494Sobrien
1638494Sobrientypedef llvm_regmatch_t regmatch_t;
1738494Sobrientypedef llvm_regex_t regex_t;
1838494Sobrien
1938494Sobrieninline int regcomp(llvm_regex_t *a, const char *b, int c) {
2042629Sobrien  return llvm_regcomp(a, b, c);
2138494Sobrien}
2238494Sobrien
2338494Sobrieninline size_t regerror(int a, const llvm_regex_t *b, char *c, size_t d) {
2438494Sobrien  return llvm_regerror(a, b, c, d);
2538494Sobrien}
2638494Sobrien
2738494Sobrieninline int regexec(const llvm_regex_t *a, const char *b, size_t c,
2838494Sobrien                   llvm_regmatch_t d[], int e) {
2938494Sobrien  return llvm_regexec(a, b, c, d, e);
3038494Sobrien}
3138494Sobrien
3238494Sobrieninline void regfree(llvm_regex_t *a) { llvm_regfree(a); }
3338494Sobrien#else
3438494Sobrien#ifdef __ANDROID__
3538494Sobrien#include <regex>
3638494Sobrien#endif
3738494Sobrien#include <regex.h>
3838494Sobrien#endif
3938494Sobrien
40174294Sobrien#include <string>
4138494Sobrien#include <vector>
4238494Sobrien
4338494Sobrien#include <stddef.h> // for size_t
44174294Sobrien#include <stdint.h>
45174294Sobrien
46174294Sobriennamespace llvm {
47174294Sobrienclass StringRef;
48174294Sobrien} // namespace llvm
49174294Sobrien
50174294Sobriennamespace lldb_private {
51174294Sobrien
52174294Sobrien//----------------------------------------------------------------------
53174294Sobrien/// @class RegularExpression RegularExpression.h
54174294Sobrien/// "lldb/Utility/RegularExpression.h"
55174294Sobrien/// @brief A C++ wrapper class for regex.
56174294Sobrien///
57174294Sobrien/// This regular expression class wraps the posix regex functions
58174294Sobrien/// \c regcomp(), \c regerror(), \c regexec(), and \c regfree() from
59174294Sobrien/// the header file in \c /usr/include/regex\.h.
60174294Sobrien//----------------------------------------------------------------------
61174294Sobrienclass RegularExpression {
62174294Sobrienpublic:
63174294Sobrien  class Match {
64174294Sobrien  public:
65174294Sobrien    Match(uint32_t max_matches) : m_matches() {
66174294Sobrien      if (max_matches > 0)
67174294Sobrien        m_matches.resize(max_matches + 1);
68174294Sobrien    }
69174294Sobrien
70174294Sobrien    void Clear() {
71174294Sobrien      const size_t num_matches = m_matches.size();
72174294Sobrien      regmatch_t invalid_match = {-1, -1};
73174294Sobrien      for (size_t i = 0; i < num_matches; ++i)
74174294Sobrien        m_matches[i] = invalid_match;
75174294Sobrien    }
76174294Sobrien
77174294Sobrien    size_t GetSize() const { return m_matches.size(); }
78174294Sobrien
79174294Sobrien    regmatch_t *GetData() {
80174294Sobrien      return (m_matches.empty() ? nullptr : m_matches.data());
81174294Sobrien    }
82174294Sobrien
83174294Sobrien    bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
84174294Sobrien                         std::string &match_str) const;
85174294Sobrien
86174294Sobrien    bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
87174294Sobrien                         llvm::StringRef &match_str) const;
88174294Sobrien
89174294Sobrien    bool GetMatchSpanningIndices(llvm::StringRef s, uint32_t idx1,
90174294Sobrien                                 uint32_t idx2,
91174294Sobrien                                 llvm::StringRef &match_str) const;
92174294Sobrien
93174294Sobrien  protected:
94174294Sobrien    std::vector<regmatch_t>
95174294Sobrien        m_matches; ///< Where parenthesized subexpressions results are stored
96174294Sobrien  };
97174294Sobrien
98174294Sobrien  //------------------------------------------------------------------
99174294Sobrien  /// Default constructor.
100174294Sobrien  ///
101174294Sobrien  /// The default constructor that initializes the object state such
102174294Sobrien  /// that it contains no compiled regular expression.
103174294Sobrien  //------------------------------------------------------------------
104174294Sobrien  RegularExpression();
105174294Sobrien
106174294Sobrien  explicit RegularExpression(llvm::StringRef string);
107174294Sobrien
108174294Sobrien  //------------------------------------------------------------------
109174294Sobrien  /// Destructor.
110174294Sobrien  ///
111174294Sobrien  /// Any previously compiled regular expression contained in this
112174294Sobrien  /// object will be freed.
113174294Sobrien  //------------------------------------------------------------------
114174294Sobrien  ~RegularExpression();
115174294Sobrien
116174294Sobrien  RegularExpression(const RegularExpression &rhs);
117174294Sobrien
118174294Sobrien  const RegularExpression &operator=(const RegularExpression &rhs);
119174294Sobrien
120174294Sobrien  //------------------------------------------------------------------
121174294Sobrien  /// Compile a regular expression.
122174294Sobrien  ///
123174294Sobrien  /// Compile a regular expression using the supplied regular
124174294Sobrien  /// expression text. The compiled regular expression lives
125174294Sobrien  /// in this object so that it can be readily used for regular
126174294Sobrien  /// expression matches. Execute() can be called after the regular
127174294Sobrien  /// expression is compiled. Any previously compiled regular
128174294Sobrien  /// expression contained in this object will be freed.
129174294Sobrien  ///
130174294Sobrien  /// @param[in] re
131174294Sobrien  ///     A NULL terminated C string that represents the regular
132174294Sobrien  ///     expression to compile.
133174294Sobrien  ///
134174294Sobrien  /// @return
135174294Sobrien  ///     \b true if the regular expression compiles successfully,
136174294Sobrien  ///     \b false otherwise.
137174294Sobrien  //------------------------------------------------------------------
138174294Sobrien  bool Compile(llvm::StringRef string);
139174294Sobrien  bool Compile(const char *) = delete;
140174294Sobrien
141174294Sobrien  //------------------------------------------------------------------
142174294Sobrien  /// Executes a regular expression.
143174294Sobrien  ///
144174294Sobrien  /// Execute a regular expression match using the compiled regular
145174294Sobrien  /// expression that is already in this object against the match
146174294Sobrien  /// string \a s. If any parens are used for regular expression
147174294Sobrien  /// matches \a match_count should indicate the number of regmatch_t
148174294Sobrien  /// values that are present in \a match_ptr.
149174294Sobrien  ///
150174294Sobrien  /// @param[in] string
151174294Sobrien  ///     The string to match against the compile regular expression.
152174294Sobrien  ///
153174294Sobrien  /// @param[in] match
154174294Sobrien  ///     A pointer to a RegularExpression::Match structure that was
155174294Sobrien  ///     properly initialized with the desired number of maximum
156174294Sobrien  ///     matches, or nullptr if no parenthesized matching is needed.
157174294Sobrien  ///
158174294Sobrien  /// @return
159174294Sobrien  ///     \b true if \a string matches the compiled regular
160174294Sobrien  ///     expression, \b false otherwise.
161174294Sobrien  //------------------------------------------------------------------
162174294Sobrien  bool Execute(llvm::StringRef string, Match *match = nullptr) const;
163174294Sobrien  bool Execute(const char *, Match * = nullptr) = delete;
164174294Sobrien
165174294Sobrien  size_t GetErrorAsCString(char *err_str, size_t err_str_max_len) const;
166174294Sobrien
16738494Sobrien  //------------------------------------------------------------------
168174294Sobrien  /// Free the compiled regular expression.
169174294Sobrien  ///
170174294Sobrien  /// If this object contains a valid compiled regular expression,
171174294Sobrien  /// this function will free any resources it was consuming.
172174294Sobrien  //------------------------------------------------------------------
173174294Sobrien  void Free();
174174294Sobrien
175174294Sobrien  //------------------------------------------------------------------
176174294Sobrien  /// Access the regular expression text.
177174294Sobrien  ///
178174294Sobrien  /// Returns the text that was used to compile the current regular
179174294Sobrien  /// expression.
180174294Sobrien  ///
18138494Sobrien  /// @return
18238494Sobrien  ///     The NULL terminated C string that was used to compile the
18338494Sobrien  ///     current regular expression
18438494Sobrien  //------------------------------------------------------------------
18538494Sobrien  llvm::StringRef GetText() const;
18638494Sobrien
18738494Sobrien  //------------------------------------------------------------------
18838494Sobrien  /// Test if valid.
18938494Sobrien  ///
19038494Sobrien  /// Test if this object contains a valid regular expression.
19138494Sobrien  ///
19238494Sobrien  /// @return
19338494Sobrien  ///     \b true if the regular expression compiled and is ready
19438494Sobrien  ///     for execution, \b false otherwise.
19538494Sobrien  //------------------------------------------------------------------
19638494Sobrien  bool IsValid() const;
19738494Sobrien
19838494Sobrien  void Clear() {
19938494Sobrien    Free();
20038494Sobrien    m_re.clear();
20138494Sobrien    m_comp_err = 1;
20238494Sobrien  }
20338494Sobrien
20438494Sobrien  int GetErrorCode() const { return m_comp_err; }
20538494Sobrien
20638494Sobrien  bool operator<(const RegularExpression &rhs) const;
20738494Sobrien
20838494Sobrienprivate:
20938494Sobrien  //------------------------------------------------------------------
21038494Sobrien  // Member variables
21138494Sobrien  //------------------------------------------------------------------
21238494Sobrien  std::string m_re; ///< A copy of the original regular expression text
21338494Sobrien  int m_comp_err;   ///< Error code for the regular expression compilation
21438494Sobrien  regex_t m_preg;   ///< The compiled regular expression
21538494Sobrien};
21638494Sobrien
21738494Sobrien} // namespace lldb_private
21838494Sobrien
21938494Sobrien#endif // liblldb_RegularExpression_h_
22038494Sobrien