1/*
2 * Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2013, Rene Gollent, rene@gollent.com.
4 * Distributed under the terms of the MIT License.
5 */
6
7
8#include <RegExp.h>
9
10#include <new>
11
12#include <regex.h>
13
14#include <String.h>
15
16#include <Referenceable.h>
17
18
19// #pragma mark - RegExp::Data
20
21
22struct RegExp::Data : public BReferenceable {
23	Data(const char* pattern, PatternType patternType, bool caseSensitive)
24		:
25		BReferenceable()
26	{
27		// convert the shell pattern to a regular expression
28		BString patternString;
29		if (patternType == PATTERN_TYPE_WILDCARD) {
30			while (*pattern != '\0') {
31				char c = *pattern++;
32				switch (c) {
33					case '?':
34						patternString += '.';
35						continue;
36					case '*':
37						patternString += ".*";
38						continue;
39					case '[':
40					{
41						// find the matching ']' first
42						const char* end = pattern;
43						while (*end != ']') {
44							if (*end++ == '\0') {
45								fError = REG_EBRACK;
46								return;
47							}
48						}
49
50						if (pattern == end) {
51							// Empty bracket expression. It will never match
52							// anything. Strictly speaking this is not
53							// considered an error, but we handle it like one.
54							fError = REG_EBRACK;
55							return;
56						}
57
58						patternString += '[';
59
60						// We need to avoid "[." ... ".]", "[=" ... "=]", and
61						// "[:" ... ":]" sequences, since those have special
62						// meaning in regular expressions. If we encounter
63						// a '[' followed by either of '.', '=', or ':', we
64						// replace the '[' by "[.[.]".
65						while (pattern < end) {
66							c = *pattern++;
67							if (c == '[' && pattern < end) {
68								switch (*pattern) {
69									case '.':
70									case '=':
71									case ':':
72										patternString += "[.[.]";
73										continue;
74								}
75							}
76							patternString += c;
77						}
78
79						pattern++;
80						patternString += ']';
81						break;
82					}
83
84					case '\\':
85					{
86						// Quotes the next character. Works the same way for
87						// regular expressions.
88						if (*pattern == '\0') {
89							fError = REG_EESCAPE;
90							return;
91						}
92
93						patternString += '\\';
94						patternString += *pattern++;
95						break;
96					}
97
98					case '^':
99					case '.':
100					case '$':
101					case '(':
102					case ')':
103					case '|':
104					case '+':
105					case '{':
106						// need to be quoted
107						patternString += '\\';
108						// fall through
109					default:
110						patternString += c;
111						break;
112				}
113			}
114
115			pattern = patternString.String();
116		}
117
118		int flags = REG_EXTENDED;
119		if (!caseSensitive)
120			flags |= REG_ICASE;
121
122		fError = regcomp(&fCompiledExpression, pattern, flags);
123	}
124
125	~Data()
126	{
127		if (fError == 0)
128			regfree(&fCompiledExpression);
129	}
130
131	bool IsValid() const
132	{
133		return fError == 0;
134	}
135
136	const regex_t* CompiledExpression() const
137	{
138		return &fCompiledExpression;
139	}
140
141private:
142	int		fError;
143	regex_t	fCompiledExpression;
144};
145
146
147// #pragma mark - RegExp::MatchResultData
148
149
150struct RegExp::MatchResultData : public BReferenceable {
151	MatchResultData(const regex_t* compiledExpression, const char* string)
152		:
153		BReferenceable(),
154		fMatchCount(0),
155		fMatches(NULL)
156	{
157		// fMatchCount is always set to the number of matching groups in the
158		// expression (or 0 if an error occured). Some of the "matches" in
159		// the array may still point to the (-1,-1) range if they don't
160		// actually match anything.
161		fMatchCount = compiledExpression->re_nsub + 1;
162		fMatches = new regmatch_t[fMatchCount];
163		if (regexec(compiledExpression, string, fMatchCount, fMatches, 0)
164				!= 0) {
165			delete[] fMatches;
166			fMatches = NULL;
167			fMatchCount = 0;
168		}
169	}
170
171	~MatchResultData()
172	{
173		delete[] fMatches;
174	}
175
176	size_t MatchCount() const
177	{
178		return fMatchCount;
179	}
180
181	const regmatch_t* Matches() const
182	{
183		return fMatches;
184	}
185
186private:
187	size_t		fMatchCount;
188	regmatch_t*	fMatches;
189};
190
191
192// #pragma mark - RegExp
193
194
195RegExp::RegExp()
196	:
197	fData(NULL)
198{
199}
200
201
202RegExp::RegExp(const char* pattern, PatternType patternType,
203	bool caseSensitive)
204	:
205	fData(NULL)
206{
207	SetPattern(pattern, patternType, caseSensitive);
208}
209
210
211RegExp::RegExp(const RegExp& other)
212	:
213	fData(other.fData)
214{
215	if (fData != NULL)
216		fData->AcquireReference();
217}
218
219
220RegExp::~RegExp()
221{
222	if (fData != NULL)
223		fData->ReleaseReference();
224}
225
226
227bool
228RegExp::SetPattern(const char* pattern, PatternType patternType,
229	bool caseSensitive)
230{
231	if (fData != NULL) {
232		fData->ReleaseReference();
233		fData = NULL;
234	}
235
236	Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive);
237	if (newData == NULL)
238		return false;
239
240	BReference<Data> dataReference(newData, true);
241	if (!newData->IsValid())
242		return false;
243
244	fData = dataReference.Detach();
245	return true;
246}
247
248
249RegExp::MatchResult
250RegExp::Match(const char* string) const
251{
252	if (!IsValid())
253		return MatchResult();
254
255	return MatchResult(
256		new(std::nothrow) MatchResultData(fData->CompiledExpression(),
257			string));
258}
259
260
261RegExp&
262RegExp::operator=(const RegExp& other)
263{
264	if (fData != NULL)
265		fData->ReleaseReference();
266
267	fData = other.fData;
268
269	if (fData != NULL)
270		fData->AcquireReference();
271
272	return *this;
273}
274
275
276// #pragma mark - RegExp::MatchResult
277
278
279RegExp::MatchResult::MatchResult()
280	:
281	fData(NULL)
282{
283}
284
285
286RegExp::MatchResult::MatchResult(MatchResultData* data)
287	:
288	fData(data)
289{
290}
291
292
293RegExp::MatchResult::MatchResult(const MatchResult& other)
294	:
295	fData(other.fData)
296{
297	if (fData != NULL)
298		fData->AcquireReference();
299}
300
301
302RegExp::MatchResult::~MatchResult()
303{
304	if (fData != NULL)
305		fData->ReleaseReference();
306}
307
308
309bool
310RegExp::MatchResult::HasMatched() const
311{
312	return fData != NULL && fData->MatchCount() > 0;
313}
314
315
316size_t
317RegExp::MatchResult::StartOffset() const
318{
319	return fData != NULL && fData->MatchCount() > 0
320		? fData->Matches()[0].rm_so : 0;
321}
322
323
324size_t
325RegExp::MatchResult::EndOffset() const
326{
327	return fData != NULL && fData->MatchCount() > 0
328		? fData->Matches()[0].rm_eo : 0;
329}
330
331
332size_t
333RegExp::MatchResult::GroupCount() const
334{
335	if (fData == NULL)
336		return 0;
337
338	size_t matchCount = fData->MatchCount();
339	return matchCount > 0 ? matchCount - 1 : 0;
340}
341
342
343size_t
344RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
345{
346	return fData != NULL && fData->MatchCount() > index + 1
347		? fData->Matches()[index + 1].rm_so : 0;
348}
349
350
351size_t
352RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
353{
354	return fData != NULL && fData->MatchCount() > index + 1
355		? fData->Matches()[index + 1].rm_eo : 0;
356}
357
358
359RegExp::MatchResult&
360RegExp::MatchResult::operator=(const MatchResult& other)
361{
362	if (fData != NULL)
363		fData->ReleaseReference();
364
365	fData = other.fData;
366
367	if (fData != NULL)
368		fData->AcquireReference();
369
370	return *this;
371}
372