1/* 2 * Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2013, Rene Gollent, rene@gollent.com. 4 * Distributed under the terms of the MIT License. 5 */ 6 7 8#include <RegExp.h> 9 10#include <new> 11 12#include <regex.h> 13 14#include <String.h> 15 16#include <Referenceable.h> 17 18 19// #pragma mark - RegExp::Data 20 21 22struct RegExp::Data : public BReferenceable { 23 Data(const char* pattern, PatternType patternType, bool caseSensitive) 24 : 25 BReferenceable() 26 { 27 // convert the shell pattern to a regular expression 28 BString patternString; 29 if (patternType == PATTERN_TYPE_WILDCARD) { 30 while (*pattern != '\0') { 31 char c = *pattern++; 32 switch (c) { 33 case '?': 34 patternString += '.'; 35 continue; 36 case '*': 37 patternString += ".*"; 38 continue; 39 case '[': 40 { 41 // find the matching ']' first 42 const char* end = pattern; 43 while (*end != ']') { 44 if (*end++ == '\0') { 45 fError = REG_EBRACK; 46 return; 47 } 48 } 49 50 if (pattern == end) { 51 // Empty bracket expression. It will never match 52 // anything. Strictly speaking this is not 53 // considered an error, but we handle it like one. 54 fError = REG_EBRACK; 55 return; 56 } 57 58 patternString += '['; 59 60 // We need to avoid "[." ... ".]", "[=" ... "=]", and 61 // "[:" ... ":]" sequences, since those have special 62 // meaning in regular expressions. If we encounter 63 // a '[' followed by either of '.', '=', or ':', we 64 // replace the '[' by "[.[.]". 65 while (pattern < end) { 66 c = *pattern++; 67 if (c == '[' && pattern < end) { 68 switch (*pattern) { 69 case '.': 70 case '=': 71 case ':': 72 patternString += "[.[.]"; 73 continue; 74 } 75 } 76 patternString += c; 77 } 78 79 pattern++; 80 patternString += ']'; 81 break; 82 } 83 84 case '\\': 85 { 86 // Quotes the next character. Works the same way for 87 // regular expressions. 88 if (*pattern == '\0') { 89 fError = REG_EESCAPE; 90 return; 91 } 92 93 patternString += '\\'; 94 patternString += *pattern++; 95 break; 96 } 97 98 case '^': 99 case '.': 100 case '$': 101 case '(': 102 case ')': 103 case '|': 104 case '+': 105 case '{': 106 // need to be quoted 107 patternString += '\\'; 108 // fall through 109 default: 110 patternString += c; 111 break; 112 } 113 } 114 115 pattern = patternString.String(); 116 } 117 118 int flags = REG_EXTENDED; 119 if (!caseSensitive) 120 flags |= REG_ICASE; 121 122 fError = regcomp(&fCompiledExpression, pattern, flags); 123 } 124 125 ~Data() 126 { 127 if (fError == 0) 128 regfree(&fCompiledExpression); 129 } 130 131 bool IsValid() const 132 { 133 return fError == 0; 134 } 135 136 const regex_t* CompiledExpression() const 137 { 138 return &fCompiledExpression; 139 } 140 141private: 142 int fError; 143 regex_t fCompiledExpression; 144}; 145 146 147// #pragma mark - RegExp::MatchResultData 148 149 150struct RegExp::MatchResultData : public BReferenceable { 151 MatchResultData(const regex_t* compiledExpression, const char* string) 152 : 153 BReferenceable(), 154 fMatchCount(0), 155 fMatches(NULL) 156 { 157 // fMatchCount is always set to the number of matching groups in the 158 // expression (or 0 if an error occured). Some of the "matches" in 159 // the array may still point to the (-1,-1) range if they don't 160 // actually match anything. 161 fMatchCount = compiledExpression->re_nsub + 1; 162 fMatches = new regmatch_t[fMatchCount]; 163 if (regexec(compiledExpression, string, fMatchCount, fMatches, 0) 164 != 0) { 165 delete[] fMatches; 166 fMatches = NULL; 167 fMatchCount = 0; 168 } 169 } 170 171 ~MatchResultData() 172 { 173 delete[] fMatches; 174 } 175 176 size_t MatchCount() const 177 { 178 return fMatchCount; 179 } 180 181 const regmatch_t* Matches() const 182 { 183 return fMatches; 184 } 185 186private: 187 size_t fMatchCount; 188 regmatch_t* fMatches; 189}; 190 191 192// #pragma mark - RegExp 193 194 195RegExp::RegExp() 196 : 197 fData(NULL) 198{ 199} 200 201 202RegExp::RegExp(const char* pattern, PatternType patternType, 203 bool caseSensitive) 204 : 205 fData(NULL) 206{ 207 SetPattern(pattern, patternType, caseSensitive); 208} 209 210 211RegExp::RegExp(const RegExp& other) 212 : 213 fData(other.fData) 214{ 215 if (fData != NULL) 216 fData->AcquireReference(); 217} 218 219 220RegExp::~RegExp() 221{ 222 if (fData != NULL) 223 fData->ReleaseReference(); 224} 225 226 227bool 228RegExp::SetPattern(const char* pattern, PatternType patternType, 229 bool caseSensitive) 230{ 231 if (fData != NULL) { 232 fData->ReleaseReference(); 233 fData = NULL; 234 } 235 236 Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive); 237 if (newData == NULL) 238 return false; 239 240 BReference<Data> dataReference(newData, true); 241 if (!newData->IsValid()) 242 return false; 243 244 fData = dataReference.Detach(); 245 return true; 246} 247 248 249RegExp::MatchResult 250RegExp::Match(const char* string) const 251{ 252 if (!IsValid()) 253 return MatchResult(); 254 255 return MatchResult( 256 new(std::nothrow) MatchResultData(fData->CompiledExpression(), 257 string)); 258} 259 260 261RegExp& 262RegExp::operator=(const RegExp& other) 263{ 264 if (fData != NULL) 265 fData->ReleaseReference(); 266 267 fData = other.fData; 268 269 if (fData != NULL) 270 fData->AcquireReference(); 271 272 return *this; 273} 274 275 276// #pragma mark - RegExp::MatchResult 277 278 279RegExp::MatchResult::MatchResult() 280 : 281 fData(NULL) 282{ 283} 284 285 286RegExp::MatchResult::MatchResult(MatchResultData* data) 287 : 288 fData(data) 289{ 290} 291 292 293RegExp::MatchResult::MatchResult(const MatchResult& other) 294 : 295 fData(other.fData) 296{ 297 if (fData != NULL) 298 fData->AcquireReference(); 299} 300 301 302RegExp::MatchResult::~MatchResult() 303{ 304 if (fData != NULL) 305 fData->ReleaseReference(); 306} 307 308 309bool 310RegExp::MatchResult::HasMatched() const 311{ 312 return fData != NULL && fData->MatchCount() > 0; 313} 314 315 316size_t 317RegExp::MatchResult::StartOffset() const 318{ 319 return fData != NULL && fData->MatchCount() > 0 320 ? fData->Matches()[0].rm_so : 0; 321} 322 323 324size_t 325RegExp::MatchResult::EndOffset() const 326{ 327 return fData != NULL && fData->MatchCount() > 0 328 ? fData->Matches()[0].rm_eo : 0; 329} 330 331 332size_t 333RegExp::MatchResult::GroupCount() const 334{ 335 if (fData == NULL) 336 return 0; 337 338 size_t matchCount = fData->MatchCount(); 339 return matchCount > 0 ? matchCount - 1 : 0; 340} 341 342 343size_t 344RegExp::MatchResult::GroupStartOffsetAt(size_t index) const 345{ 346 return fData != NULL && fData->MatchCount() > index + 1 347 ? fData->Matches()[index + 1].rm_so : 0; 348} 349 350 351size_t 352RegExp::MatchResult::GroupEndOffsetAt(size_t index) const 353{ 354 return fData != NULL && fData->MatchCount() > index + 1 355 ? fData->Matches()[index + 1].rm_eo : 0; 356} 357 358 359RegExp::MatchResult& 360RegExp::MatchResult::operator=(const MatchResult& other) 361{ 362 if (fData != NULL) 363 fData->ReleaseReference(); 364 365 fData = other.fData; 366 367 if (fData != NULL) 368 fData->AcquireReference(); 369 370 return *this; 371} 372