1/*
2Open Tracker License
3
4Terms and Conditions
5
6Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining a copy of
9this software and associated documentation files (the "Software"), to deal in
10the Software without restriction, including without limitation the rights to
11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12of the Software, and to permit persons to whom the Software is furnished to do
13so, subject to the following conditions:
14
15The above copyright notice and this permission notice applies to all licensees
16and shall be included in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25Except as contained in this notice, the name of Be Incorporated shall not be
26used in advertising or otherwise to promote the sale, use or other dealings in
27this Software without prior written authorization from Be Incorporated.
28
29Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30of Be Incorporated in the United States and other countries. Other brand product
31names are registered trademarks or trademarks of their respective holders.
32All rights reserved.
33*/
34
35#include "TrackerString.h"
36
37#include <stdio.h>
38#include <stdlib.h>
39#include <strings.h>
40
41
42//	#pragma mark - TrackerString
43
44
45TrackerString::TrackerString()
46{
47}
48
49
50TrackerString::TrackerString(const char* string)
51	:
52	BString(string)
53{
54}
55
56
57TrackerString::TrackerString(const TrackerString &string)
58	:
59	BString(string)
60{
61}
62
63
64TrackerString::TrackerString(const char* string, int32 maxLength)
65	:
66	BString(string, maxLength)
67{
68}
69
70
71TrackerString::~TrackerString()
72{
73}
74
75
76bool
77TrackerString::Matches(const char* string, bool caseSensitivity,
78	TrackerStringExpressionType expressionType) const
79{
80	switch (expressionType) {
81		default:
82		case kNone:
83			return false;
84
85		case kStartsWith:
86			return StartsWith(string, caseSensitivity);
87
88		case kEndsWith:
89			return EndsWith(string, caseSensitivity);
90
91		case kContains:
92			return Contains(string, caseSensitivity);
93
94		case kGlobMatch:
95			return MatchesGlob(string, caseSensitivity);
96
97		case kRegexpMatch:
98			return MatchesRegExp(string, caseSensitivity);
99	}
100}
101
102
103bool
104TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const
105{
106	BString patternString(pattern);
107	BString textString(String());
108
109	if (caseSensitivity == false) {
110		patternString.ToLower();
111		textString.ToLower();
112	}
113
114	RegExp expression(patternString);
115
116	if (expression.InitCheck() != B_OK)
117		return false;
118
119	return expression.Matches(textString);
120}
121
122
123bool
124TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const
125{
126	return StringMatchesPattern(String(), string, caseSensitivity);
127}
128
129
130bool
131TrackerString::EndsWith(const char* string, bool caseSensitivity) const
132{
133	// If "string" is longer than "this",
134	// we should simply return false
135	int32 position = Length() - (int32)strlen(string);
136	if (position < 0)
137		return false;
138
139	if (caseSensitivity)
140		return FindLast(string) == position;
141	else
142		return IFindLast(string) == position;
143}
144
145
146bool
147TrackerString::StartsWith(const char* string, bool caseSensitivity) const
148{
149	if (caseSensitivity)
150		return FindFirst(string) == 0;
151	else
152		return IFindFirst(string) == 0;
153}
154
155
156bool
157TrackerString::Contains(const char* string, bool caseSensitivity) const
158{
159	if (caseSensitivity)
160		return FindFirst(string) > -1;
161	else
162		return IFindFirst(string) > -1;
163}
164
165
166// MatchesBracketExpression() assumes 'pattern' to point to the
167// character following the initial '[' in a bracket expression.
168// The reason is that an encountered '[' will be taken literally.
169// (Makes it possible to match a '[' with the expression '[[]').
170bool
171TrackerString::MatchesBracketExpression(const char* string,
172	const char* pattern, bool caseSensitivity) const
173{
174	bool GlyphMatch = IsStartOfGlyph(string[0]);
175
176	if (IsInsideGlyph(string[0]))
177		return false;
178
179	char testChar = ConditionalToLower(string[0], caseSensitivity);
180	bool match = false;
181
182	bool inverse = *pattern == '^' || *pattern == '!';
183		// We allow both ^ and ! as a initial inverting character.
184
185	if (inverse)
186		pattern++;
187
188	while (!match && *pattern != ']' && *pattern != '\0') {
189		switch (*pattern) {
190			case '-':
191			{
192				char start = ConditionalToLower(*(pattern - 1),
193					caseSensitivity);
194				char stop = ConditionalToLower(*(pattern + 1),
195					caseSensitivity);
196
197				if (IsGlyph(start) || IsGlyph(stop))
198					return false;
199						// Not a valid range!
200
201				if ((islower(start) && islower(stop))
202					|| (isupper(start) && isupper(stop))
203					|| (isdigit(start) && isdigit(stop))) {
204					// Make sure 'start' and 'stop' are of the same type.
205					match = start <= testChar && testChar <= stop;
206				} else {
207					// If no valid range, we've got a syntax error.
208					return false;
209				}
210
211				break;
212			}
213
214			default:
215				if (GlyphMatch)
216					match = UTF8CharsAreEqual(string, pattern);
217				else
218					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
219				break;
220		}
221
222		if (!match) {
223			pattern++;
224			if (IsInsideGlyph(pattern[0]))
225				pattern = MoveToEndOfGlyph(pattern);
226		}
227	}
228
229	// Consider an unmatched bracket a failure
230	// (i.e. when detecting a '\0' instead of a ']'.)
231	if (*pattern == '\0')
232		return false;
233
234	return (match ^ inverse) != 0;
235}
236
237
238bool
239TrackerString::StringMatchesPattern(const char* string, const char* pattern,
240	bool caseSensitivity) const
241{
242	// One could do this dynamically, counting the number of *'s,
243	// but then you have to free them at every exit of this
244	// function, which is awkward and ugly.
245	const int32 kWildCardMaximum = 100;
246	const char* pStorage[kWildCardMaximum];
247	const char* sStorage[kWildCardMaximum];
248
249	int32 patternLevel = 0;
250
251	if (string == NULL || pattern == NULL)
252		return false;
253
254	while (*pattern != '\0') {
255		switch (*pattern) {
256			case '?':
257				pattern++;
258				string++;
259				if (IsInsideGlyph(string[0]))
260					string = MoveToEndOfGlyph(string);
261				break;
262
263			case '*':
264			{
265				// Collapse any ** and *? constructions:
266				while (*pattern == '*' || *pattern == '?') {
267					pattern++;
268					if (*pattern == '?' && *string != '\0') {
269						string++;
270						if (IsInsideGlyph(string[0]))
271							string = MoveToEndOfGlyph(string);
272					}
273				}
274
275				if (*pattern == '\0') {
276					// An ending * matches all strings.
277					return true;
278				}
279
280				bool match = false;
281				const char* pBefore = pattern - 1;
282
283				if (*pattern == '[') {
284					pattern++;
285
286					while (!match && *string != '\0') {
287						match = MatchesBracketExpression(string++, pattern,
288							caseSensitivity);
289					}
290
291					while (*pattern != ']' && *pattern != '\0') {
292						// Skip the rest of the bracket:
293						pattern++;
294					}
295
296					if (*pattern == '\0') {
297						// Failure if no closing bracket;
298						return false;
299					}
300				} else {
301					// No bracket, just one character:
302					while (!match && *string != '\0') {
303						if (IsGlyph(string[0]))
304							match = UTF8CharsAreEqual(string++, pattern);
305						else {
306							match = CharsAreEqual(*string++, *pattern,
307								caseSensitivity);
308						}
309					}
310				}
311
312				if (!match)
313					return false;
314				else {
315					pStorage[patternLevel] = pBefore;
316					if (IsInsideGlyph(string[0]))
317						string = MoveToEndOfGlyph(string);
318
319					sStorage[patternLevel++] = string;
320					if (patternLevel > kWildCardMaximum)
321						return false;
322
323					pattern++;
324					if (IsInsideGlyph(pattern[0]))
325						pattern = MoveToEndOfGlyph(pattern);
326				}
327				break;
328			}
329
330			case '[':
331				pattern++;
332
333				if (!MatchesBracketExpression(string, pattern,
334						caseSensitivity)) {
335					if (patternLevel > 0) {
336						pattern = pStorage[--patternLevel];
337						string = sStorage[patternLevel];
338					} else
339						return false;
340				} else {
341					// Skip the rest of the bracket:
342					while (*pattern != ']' && *pattern != '\0')
343						pattern++;
344
345					// Failure if no closing bracket;
346					if (*pattern == '\0')
347						return false;
348
349					string++;
350					if (IsInsideGlyph(string[0]))
351						string = MoveToEndOfGlyph(string);
352					pattern++;
353				}
354				break;
355
356			default:
357			{
358				bool equal = false;
359				if (IsGlyph(string[0]))
360					equal = UTF8CharsAreEqual(string, pattern);
361				else
362					equal = CharsAreEqual(*string, *pattern, caseSensitivity);
363
364				if (equal) {
365					pattern++;
366					if (IsInsideGlyph(pattern[0]))
367						pattern = MoveToEndOfGlyph(pattern);
368					string++;
369					if (IsInsideGlyph(string[0]))
370						string = MoveToEndOfGlyph(string);
371				} else if (patternLevel > 0) {
372						pattern = pStorage[--patternLevel];
373						string = sStorage[patternLevel];
374				} else
375					return false;
376				break;
377			}
378		}
379
380		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
381			pattern = pStorage[--patternLevel];
382			string = sStorage[patternLevel];
383		}
384	}
385
386	return *string == '\0' && *pattern == '\0';
387}
388
389
390bool
391TrackerString::UTF8CharsAreEqual(const char* string1,
392	const char* string2) const
393{
394	const char* s1 = string1;
395	const char* s2 = string2;
396
397	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
398		s1++;
399		s2++;
400
401		while (IsInsideGlyph(*s1) && *s1 == *s2) {
402			s1++;
403			s2++;
404		}
405
406		return !IsInsideGlyph(*s1)
407			&& !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
408	} else
409		return false;
410}
411
412
413const char*
414TrackerString::MoveToEndOfGlyph(const char* string) const
415{
416	const char* ptr = string;
417
418	while (IsInsideGlyph(*ptr))
419		ptr++;
420
421	return ptr;
422}
423
424
425bool
426TrackerString::IsGlyph(char ch) const
427{
428	return (ch & 0x80) == 0x80;
429}
430
431
432bool
433TrackerString::IsInsideGlyph(char ch) const
434{
435	return (ch & 0xC0) == 0x80;
436}
437
438
439bool
440TrackerString::IsStartOfGlyph(char ch) const
441{
442	return (ch & 0xC0) == 0xC0;
443}
444