1/*
2Open Tracker License
3
4Terms and Conditions
5
6Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining a copy of
9this software and associated documentation files (the "Software"), to deal in
10the Software without restriction, including without limitation the rights to
11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12of the Software, and to permit persons to whom the Software is furnished to do
13so, subject to the following conditions:
14
15The above copyright notice and this permission notice applies to all licensees
16and shall be included in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25Except as contained in this notice, the name of Be Incorporated shall not be
26used in advertising or otherwise to promote the sale, use or other dealings in
27this Software without prior written authorization from Be Incorporated.
28
29Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30of Be Incorporated in the United States and other countries. Other brand product
31names are registered trademarks or trademarks of their respective holders.
32All rights reserved.
33*/
34
35#include "TrackerString.h"
36
37#include <stdio.h>
38#include <stdlib.h>
39
40TrackerString::TrackerString()
41{
42}
43
44
45TrackerString::TrackerString(const char* string)
46	:	BString(string)
47{
48}
49
50
51TrackerString::TrackerString(const TrackerString &string)
52	:	BString(string)
53{
54}
55
56
57TrackerString::TrackerString(const char* string, int32 maxLength)
58	:	BString(string, maxLength)
59{
60}
61
62
63TrackerString::~TrackerString()
64{
65}
66
67
68bool
69TrackerString::Matches(const char* string, bool caseSensitivity,
70	TrackerStringExpressionType expressionType) const
71{
72	switch (expressionType) {
73		default:
74		case kNone:
75			return false;
76
77		case kStartsWith:
78			return StartsWith(string, caseSensitivity);
79
80		case kEndsWith:
81			return EndsWith(string, caseSensitivity);
82
83		case kContains:
84			return Contains(string, caseSensitivity);
85
86		case kGlobMatch:
87			return MatchesGlob(string, caseSensitivity);
88
89		case kRegexpMatch:
90			return MatchesRegExp(string, caseSensitivity);
91	}
92}
93
94
95bool
96TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const
97{
98	BString patternString(pattern);
99	BString textString(String());
100
101	if (caseSensitivity == false) {
102		patternString.ToLower();
103		textString.ToLower();
104	}
105
106	RegExp expression(patternString);
107
108	if (expression.InitCheck() != B_OK)
109		return false;
110
111	return expression.Matches(textString);
112}
113
114
115bool
116TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const
117{
118	return StringMatchesPattern(String(), string, caseSensitivity);
119}
120
121
122bool
123TrackerString::EndsWith(const char* string, bool caseSensitivity) const
124{
125	// If "string" is longer than "this",
126	// we should simply return false
127	int32 position = Length() - (int32)strlen(string);
128	if (position < 0)
129		return false;
130
131	if (caseSensitivity)
132		return FindLast(string) == position;
133	else
134		return IFindLast(string) == position;
135}
136
137
138bool
139TrackerString::StartsWith(const char* string, bool caseSensitivity) const
140{
141	if (caseSensitivity)
142		return FindFirst(string) == 0;
143	else
144		return IFindFirst(string) == 0;
145}
146
147
148bool
149TrackerString::Contains(const char* string, bool caseSensitivity) const
150{
151	if (caseSensitivity)
152		return FindFirst(string) > -1;
153	else
154		return IFindFirst(string) > -1;
155}
156
157
158// About the ?Find* functions:
159// The leading star here has been compliance with BString,
160// simplicity and performance. Therefore unncessary copying
161// has been avoided, as unncessary function calls.
162// The copying has been avoided by implementing the
163// ?Find*(const char*) functions rather than
164// the ?Find*(TrackerString &) functions.
165// The function calls has been avoided by
166// inserting a check on the first character
167// before calling the str*cmp functions.
168
169
170int32
171TrackerString::FindFirst(const BString &string) const
172{
173	return FindFirst(string.String(), 0);
174}
175
176
177int32
178TrackerString::FindFirst(const char* string) const
179{
180	return FindFirst(string, 0);
181}
182
183
184int32
185TrackerString::FindFirst(const BString &string, int32 fromOffset) const
186{
187	return FindFirst(string.String(), fromOffset);
188}
189
190
191int32
192TrackerString::FindFirst(const char* string, int32 fromOffset) const
193{
194	if (!string)
195		return -1;
196
197	int32 length = Length();
198	uint32 stringLength = strlen(string);
199
200	// The following two checks are required to be compatible
201	// with BString:
202	if (length <= 0)
203		return -1;
204
205	if (stringLength == 0)
206		return fromOffset;
207
208	int32 stop = length - static_cast<int32>(stringLength);
209	int32 start = MAX(0, MIN(fromOffset, stop));
210	int32 position = -1;
211
212	for (int32 i = start; i <= stop; i++) {
213		if (string[0] == ByteAt(i)) {
214			// This check is to avoid mute str*cmp() calls. Performance.
215			if (strncmp(string, String() + i, stringLength) == 0) {
216				position = i;
217				break;
218			}
219		}
220	}
221
222	return position;
223}
224
225
226int32
227TrackerString::FindFirst(char ch) const
228{
229	char string[2] = {ch, '\0'};
230	return FindFirst(string, 0);
231}
232
233
234int32
235TrackerString::FindFirst(char ch, int32 fromOffset) const
236{
237	char string[2] = {ch, '\0'};
238	return FindFirst(string, fromOffset);
239}
240
241
242int32
243TrackerString::FindLast(const BString &string) const
244{
245	return FindLast(string.String(), Length() - 1);
246}
247
248
249int32
250TrackerString::FindLast(const char* string) const
251{
252	return FindLast(string, Length() - 1);
253}
254
255
256int32
257TrackerString::FindLast(const BString &string, int32 beforeOffset) const
258{
259	return FindLast(string.String(), beforeOffset);
260}
261
262
263int32
264TrackerString::FindLast(const char* string, int32 beforeOffset) const
265{
266	if (!string)
267		return -1;
268
269	int32 length = Length();
270	uint32 stringLength = strlen(string);
271
272	// The following two checks are required to be compatible
273	// with BString:
274	if (length <= 0)
275		return -1;
276
277	if (stringLength == 0)
278		return beforeOffset;
279
280	int32 start = MIN(beforeOffset,
281		length - static_cast<int32>(stringLength));
282	int32 stop = 0;
283	int32 position = -1;
284
285	for (int32 i = start; i >= stop; i--) {
286		if (string[0] == ByteAt(i)) {
287			// This check is to avoid mute str*cmp() calls. Performance.
288			if (strncmp(string, String() + i, stringLength) == 0) {
289				position = i;
290				break;
291			}
292		}
293	}
294
295	return position;
296}
297
298
299int32
300TrackerString::FindLast(char ch) const
301{
302	char string[2] = {ch, '\0'};
303	return FindLast(string, Length() - 1);
304}
305
306
307int32
308TrackerString::FindLast(char ch, int32 beforeOffset) const
309{
310	char string[2] = {ch, '\0'};
311	return FindLast(string, beforeOffset);
312}
313
314
315int32
316TrackerString::IFindFirst(const BString &string) const
317{
318	return IFindFirst(string.String(), 0);
319}
320
321
322int32
323TrackerString::IFindFirst(const char* string) const
324{
325	return IFindFirst(string, 0);
326}
327
328
329int32
330TrackerString::IFindFirst(const BString &string, int32 fromOffset) const
331{
332	return IFindFirst(string.String(), fromOffset);
333}
334
335
336int32
337TrackerString::IFindFirst(const char* string, int32 fromOffset) const
338{
339	if (!string)
340		return -1;
341
342	int32 length = Length();
343	uint32 stringLength = strlen(string);
344
345	// The following two checks are required to be compatible
346	// with BString:
347	if (length <= 0)
348		return -1;
349
350	if (stringLength == 0)
351		return fromOffset;
352
353	int32 stop = length - static_cast<int32>(stringLength);
354	int32 start = MAX(0, MIN(fromOffset, stop));
355	int32 position = -1;
356
357	for (int32 i = start; i <= stop; i++) {
358		if (tolower(string[0]) == tolower(ByteAt(i))) {
359			// This check is to avoid mute str*cmp() calls. Performance.
360			if (strncasecmp(string, String() + i, stringLength) == 0) {
361				position = i;
362				break;
363			}
364		}
365	}
366
367	return position;
368}
369
370
371int32
372TrackerString::IFindLast(const BString &string) const
373{
374	return IFindLast(string.String(), Length() - 1);
375}
376
377
378int32
379TrackerString::IFindLast(const char* string) const
380{
381	return IFindLast(string, Length() - 1);
382}
383
384
385int32
386TrackerString::IFindLast(const BString &string, int32 beforeOffset) const
387{
388	return IFindLast(string.String(), beforeOffset);
389}
390
391
392int32
393TrackerString::IFindLast(const char* string, int32 beforeOffset) const
394{
395	if (!string)
396		return -1;
397
398	int32 length = Length();
399	uint32 stringLength = strlen(string);
400
401	// The following two checks are required to be compatible
402	// with BString:
403	if (length <= 0)
404		return -1;
405
406	if (stringLength == 0)
407		return beforeOffset;
408
409	int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength));
410	int32 stop = 0;
411	int32 position = -1;
412
413	for (int32 i = start; i >= stop; i--) {
414		if (tolower(string[0]) == tolower(ByteAt(i))) {
415			// This check is to avoid mute str*cmp() calls. Performance.
416			if (strncasecmp(string, String() + i, stringLength) == 0) {
417				position = i;
418				break;
419			}
420		}
421	}
422
423	return position;
424}
425
426
427// MatchesBracketExpression() assumes 'pattern' to point to the
428// character following the initial '[' in a bracket expression.
429// The reason is that an encountered '[' will be taken literally.
430// (Makes it possible to match a '[' with the expression '[[]').
431bool
432TrackerString::MatchesBracketExpression(const char* string,
433	const char* pattern, bool caseSensitivity) const
434{
435	bool GlyphMatch = IsStartOfGlyph(string[0]);
436
437	if (IsInsideGlyph(string[0]))
438		return false;
439
440	char testChar = ConditionalToLower(string[0], caseSensitivity);
441	bool match = false;
442
443	bool inverse = *pattern == '^' || *pattern == '!';
444		// We allow both ^ and ! as a initial inverting character.
445
446	if (inverse)
447		pattern++;
448
449	while (!match && *pattern != ']' && *pattern != '\0') {
450		switch (*pattern) {
451			case '-':
452			{
453				char start = ConditionalToLower(*(pattern - 1),
454					caseSensitivity);
455				char stop = ConditionalToLower(*(pattern + 1),
456					caseSensitivity);
457
458				if (IsGlyph(start) || IsGlyph(stop))
459					return false;
460						// Not a valid range!
461
462				if ((islower(start) && islower(stop))
463					|| (isupper(start) && isupper(stop))
464					|| (isdigit(start) && isdigit(stop))) {
465					// Make sure 'start' and 'stop' are of the same type.
466					match = start <= testChar && testChar <= stop;
467				} else {
468					// If no valid range, we've got a syntax error.
469					return false;
470				}
471
472				break;
473			}
474
475			default:
476				if (GlyphMatch)
477					match = UTF8CharsAreEqual(string, pattern);
478				else
479					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
480				break;
481		}
482
483		if (!match) {
484			pattern++;
485			if (IsInsideGlyph(pattern[0]))
486				pattern = MoveToEndOfGlyph(pattern);
487		}
488	}
489
490	// Consider an unmatched bracket a failure
491	// (i.e. when detecting a '\0' instead of a ']'.)
492	if (*pattern == '\0')
493		return false;
494
495	return (match ^ inverse) != 0;
496}
497
498
499bool
500TrackerString::StringMatchesPattern(const char* string, const char* pattern,
501	bool caseSensitivity) const
502{
503	// One could do this dynamically, counting the number of *'s,
504	// but then you have to free them at every exit of this
505	// function, which is awkward and ugly.
506	const int32 kWildCardMaximum = 100;
507	const char* pStorage[kWildCardMaximum];
508	const char* sStorage[kWildCardMaximum];
509
510	int32 patternLevel = 0;
511
512	if (string == NULL || pattern == NULL)
513		return false;
514
515	while (*pattern != '\0') {
516		switch (*pattern) {
517			case '?':
518				pattern++;
519				string++;
520				if (IsInsideGlyph(string[0]))
521					string = MoveToEndOfGlyph(string);
522
523				break;
524
525			case '*':
526			{
527				// Collapse any ** and *? constructions:
528				while (*pattern == '*' || *pattern == '?') {
529					pattern++;
530					if (*pattern == '?' && string != '\0') {
531						string++;
532						if (IsInsideGlyph(string[0]))
533							string = MoveToEndOfGlyph(string);
534					}
535				}
536
537				if (*pattern == '\0') {
538					// An ending * matches all strings.
539					return true;
540				}
541
542				bool match = false;
543				const char* pBefore = pattern - 1;
544
545				if (*pattern == '[') {
546					pattern++;
547
548					while (!match && *string != '\0') {
549						match = MatchesBracketExpression(string++, pattern,
550							caseSensitivity);
551					}
552
553					while (*pattern != ']' && *pattern != '\0') {
554						// Skip the rest of the bracket:
555						pattern++;
556					}
557
558					if (*pattern == '\0') {
559						// Failure if no closing bracket;
560						return false;
561					}
562				} else {
563					// No bracket, just one character:
564					while (!match && *string != '\0') {
565						if (IsGlyph(string[0]))
566							match = UTF8CharsAreEqual(string++, pattern);
567						else {
568							match = CharsAreEqual(*string++, *pattern,
569								caseSensitivity);
570						}
571					}
572				}
573
574				if (!match)
575					return false;
576				else {
577					pStorage[patternLevel] = pBefore;
578					if (IsInsideGlyph(string[0]))
579						string = MoveToEndOfGlyph(string);
580
581					sStorage[patternLevel++] = string;
582					if (patternLevel > kWildCardMaximum)
583						return false;
584
585					pattern++;
586					if (IsInsideGlyph(pattern[0]))
587						pattern = MoveToEndOfGlyph(pattern);
588				}
589				break;
590			}
591
592			case '[':
593				pattern++;
594
595				if (!MatchesBracketExpression(string, pattern,
596						caseSensitivity)) {
597					if (patternLevel > 0) {
598						pattern = pStorage[--patternLevel];
599						string = sStorage[patternLevel];
600					} else
601						return false;
602				} else {
603					// Skip the rest of the bracket:
604					while (*pattern != ']' && *pattern != '\0')
605						pattern++;
606
607					// Failure if no closing bracket;
608					if (*pattern == '\0')
609						return false;
610
611					string++;
612					if (IsInsideGlyph(string[0]))
613						string = MoveToEndOfGlyph(string);
614					pattern++;
615				}
616				break;
617
618			default:
619			{
620				bool equal = false;
621				if (IsGlyph(string[0]))
622					equal = UTF8CharsAreEqual(string, pattern);
623				else
624					equal = CharsAreEqual(*string, *pattern, caseSensitivity);
625
626				if (equal) {
627					pattern++;
628					if (IsInsideGlyph(pattern[0]))
629						pattern = MoveToEndOfGlyph(pattern);
630					string++;
631					if (IsInsideGlyph(string[0]))
632						string = MoveToEndOfGlyph(string);
633				} else if (patternLevel > 0) {
634						pattern = pStorage[--patternLevel];
635						string = sStorage[patternLevel];
636				} else
637					return false;
638
639				break;
640			}
641		}
642
643		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
644			pattern = pStorage[--patternLevel];
645			string = sStorage[patternLevel];
646		}
647	}
648
649	return *string == '\0' && *pattern == '\0';
650}
651
652
653bool
654TrackerString::UTF8CharsAreEqual(const char* string1,
655	const char* string2) const
656{
657	const char* s1 = string1;
658	const char* s2 = string2;
659
660	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
661		s1++;
662		s2++;
663
664		while (IsInsideGlyph(*s1) && *s1 == *s2) {
665			s1++;
666			s2++;
667		}
668
669		return !IsInsideGlyph(*s1)
670			&& !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
671	} else
672		return false;
673}
674
675
676const char*
677TrackerString::MoveToEndOfGlyph(const char* string) const
678{
679	const char* ptr = string;
680
681	while (IsInsideGlyph(*ptr))
682		ptr++;
683
684	return ptr;
685}
686
687
688bool
689TrackerString::IsGlyph(char ch) const
690{
691	return (ch & 0x80) == 0x80;
692}
693
694
695bool
696TrackerString::IsInsideGlyph(char ch) const
697{
698	return (ch & 0xC0) == 0x80;
699}
700
701
702bool
703TrackerString::IsStartOfGlyph(char ch) const
704{
705	return (ch & 0xC0) == 0xC0;
706}
707