1/* 2Open Tracker License 3 4Terms and Conditions 5 6Copyright (c) 1991-2000, Be Incorporated. All rights reserved. 7 8Permission is hereby granted, free of charge, to any person obtaining a copy of 9this software and associated documentation files (the "Software"), to deal in 10the Software without restriction, including without limitation the rights to 11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12of the Software, and to permit persons to whom the Software is furnished to do 13so, subject to the following conditions: 14 15The above copyright notice and this permission notice applies to all licensees 16and shall be included in all copies or substantial portions of the Software. 17 18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25Except as contained in this notice, the name of Be Incorporated shall not be 26used in advertising or otherwise to promote the sale, use or other dealings in 27this Software without prior written authorization from Be Incorporated. 28 29Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30of Be Incorporated in the United States and other countries. Other brand product 31names are registered trademarks or trademarks of their respective holders. 32All rights reserved. 33*/ 34 35#include "TrackerString.h" 36 37#include <stdio.h> 38#include <stdlib.h> 39 40TrackerString::TrackerString() 41{ 42} 43 44 45TrackerString::TrackerString(const char* string) 46 : BString(string) 47{ 48} 49 50 51TrackerString::TrackerString(const TrackerString &string) 52 : BString(string) 53{ 54} 55 56 57TrackerString::TrackerString(const char* string, int32 maxLength) 58 : BString(string, maxLength) 59{ 60} 61 62 63TrackerString::~TrackerString() 64{ 65} 66 67 68bool 69TrackerString::Matches(const char* string, bool caseSensitivity, 70 TrackerStringExpressionType expressionType) const 71{ 72 switch (expressionType) { 73 default: 74 case kNone: 75 return false; 76 77 case kStartsWith: 78 return StartsWith(string, caseSensitivity); 79 80 case kEndsWith: 81 return EndsWith(string, caseSensitivity); 82 83 case kContains: 84 return Contains(string, caseSensitivity); 85 86 case kGlobMatch: 87 return MatchesGlob(string, caseSensitivity); 88 89 case kRegexpMatch: 90 return MatchesRegExp(string, caseSensitivity); 91 } 92} 93 94 95bool 96TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const 97{ 98 BString patternString(pattern); 99 BString textString(String()); 100 101 if (caseSensitivity == false) { 102 patternString.ToLower(); 103 textString.ToLower(); 104 } 105 106 RegExp expression(patternString); 107 108 if (expression.InitCheck() != B_OK) 109 return false; 110 111 return expression.Matches(textString); 112} 113 114 115bool 116TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const 117{ 118 return StringMatchesPattern(String(), string, caseSensitivity); 119} 120 121 122bool 123TrackerString::EndsWith(const char* string, bool caseSensitivity) const 124{ 125 // If "string" is longer than "this", 126 // we should simply return false 127 int32 position = Length() - (int32)strlen(string); 128 if (position < 0) 129 return false; 130 131 if (caseSensitivity) 132 return FindLast(string) == position; 133 else 134 return IFindLast(string) == position; 135} 136 137 138bool 139TrackerString::StartsWith(const char* string, bool caseSensitivity) const 140{ 141 if (caseSensitivity) 142 return FindFirst(string) == 0; 143 else 144 return IFindFirst(string) == 0; 145} 146 147 148bool 149TrackerString::Contains(const char* string, bool caseSensitivity) const 150{ 151 if (caseSensitivity) 152 return FindFirst(string) > -1; 153 else 154 return IFindFirst(string) > -1; 155} 156 157 158// About the ?Find* functions: 159// The leading star here has been compliance with BString, 160// simplicity and performance. Therefore unncessary copying 161// has been avoided, as unncessary function calls. 162// The copying has been avoided by implementing the 163// ?Find*(const char*) functions rather than 164// the ?Find*(TrackerString &) functions. 165// The function calls has been avoided by 166// inserting a check on the first character 167// before calling the str*cmp functions. 168 169 170int32 171TrackerString::FindFirst(const BString &string) const 172{ 173 return FindFirst(string.String(), 0); 174} 175 176 177int32 178TrackerString::FindFirst(const char* string) const 179{ 180 return FindFirst(string, 0); 181} 182 183 184int32 185TrackerString::FindFirst(const BString &string, int32 fromOffset) const 186{ 187 return FindFirst(string.String(), fromOffset); 188} 189 190 191int32 192TrackerString::FindFirst(const char* string, int32 fromOffset) const 193{ 194 if (!string) 195 return -1; 196 197 int32 length = Length(); 198 uint32 stringLength = strlen(string); 199 200 // The following two checks are required to be compatible 201 // with BString: 202 if (length <= 0) 203 return -1; 204 205 if (stringLength == 0) 206 return fromOffset; 207 208 int32 stop = length - static_cast<int32>(stringLength); 209 int32 start = MAX(0, MIN(fromOffset, stop)); 210 int32 position = -1; 211 212 for (int32 i = start; i <= stop; i++) { 213 if (string[0] == ByteAt(i)) { 214 // This check is to avoid mute str*cmp() calls. Performance. 215 if (strncmp(string, String() + i, stringLength) == 0) { 216 position = i; 217 break; 218 } 219 } 220 } 221 222 return position; 223} 224 225 226int32 227TrackerString::FindFirst(char ch) const 228{ 229 char string[2] = {ch, '\0'}; 230 return FindFirst(string, 0); 231} 232 233 234int32 235TrackerString::FindFirst(char ch, int32 fromOffset) const 236{ 237 char string[2] = {ch, '\0'}; 238 return FindFirst(string, fromOffset); 239} 240 241 242int32 243TrackerString::FindLast(const BString &string) const 244{ 245 return FindLast(string.String(), Length() - 1); 246} 247 248 249int32 250TrackerString::FindLast(const char* string) const 251{ 252 return FindLast(string, Length() - 1); 253} 254 255 256int32 257TrackerString::FindLast(const BString &string, int32 beforeOffset) const 258{ 259 return FindLast(string.String(), beforeOffset); 260} 261 262 263int32 264TrackerString::FindLast(const char* string, int32 beforeOffset) const 265{ 266 if (!string) 267 return -1; 268 269 int32 length = Length(); 270 uint32 stringLength = strlen(string); 271 272 // The following two checks are required to be compatible 273 // with BString: 274 if (length <= 0) 275 return -1; 276 277 if (stringLength == 0) 278 return beforeOffset; 279 280 int32 start = MIN(beforeOffset, 281 length - static_cast<int32>(stringLength)); 282 int32 stop = 0; 283 int32 position = -1; 284 285 for (int32 i = start; i >= stop; i--) { 286 if (string[0] == ByteAt(i)) { 287 // This check is to avoid mute str*cmp() calls. Performance. 288 if (strncmp(string, String() + i, stringLength) == 0) { 289 position = i; 290 break; 291 } 292 } 293 } 294 295 return position; 296} 297 298 299int32 300TrackerString::FindLast(char ch) const 301{ 302 char string[2] = {ch, '\0'}; 303 return FindLast(string, Length() - 1); 304} 305 306 307int32 308TrackerString::FindLast(char ch, int32 beforeOffset) const 309{ 310 char string[2] = {ch, '\0'}; 311 return FindLast(string, beforeOffset); 312} 313 314 315int32 316TrackerString::IFindFirst(const BString &string) const 317{ 318 return IFindFirst(string.String(), 0); 319} 320 321 322int32 323TrackerString::IFindFirst(const char* string) const 324{ 325 return IFindFirst(string, 0); 326} 327 328 329int32 330TrackerString::IFindFirst(const BString &string, int32 fromOffset) const 331{ 332 return IFindFirst(string.String(), fromOffset); 333} 334 335 336int32 337TrackerString::IFindFirst(const char* string, int32 fromOffset) const 338{ 339 if (!string) 340 return -1; 341 342 int32 length = Length(); 343 uint32 stringLength = strlen(string); 344 345 // The following two checks are required to be compatible 346 // with BString: 347 if (length <= 0) 348 return -1; 349 350 if (stringLength == 0) 351 return fromOffset; 352 353 int32 stop = length - static_cast<int32>(stringLength); 354 int32 start = MAX(0, MIN(fromOffset, stop)); 355 int32 position = -1; 356 357 for (int32 i = start; i <= stop; i++) { 358 if (tolower(string[0]) == tolower(ByteAt(i))) { 359 // This check is to avoid mute str*cmp() calls. Performance. 360 if (strncasecmp(string, String() + i, stringLength) == 0) { 361 position = i; 362 break; 363 } 364 } 365 } 366 367 return position; 368} 369 370 371int32 372TrackerString::IFindLast(const BString &string) const 373{ 374 return IFindLast(string.String(), Length() - 1); 375} 376 377 378int32 379TrackerString::IFindLast(const char* string) const 380{ 381 return IFindLast(string, Length() - 1); 382} 383 384 385int32 386TrackerString::IFindLast(const BString &string, int32 beforeOffset) const 387{ 388 return IFindLast(string.String(), beforeOffset); 389} 390 391 392int32 393TrackerString::IFindLast(const char* string, int32 beforeOffset) const 394{ 395 if (!string) 396 return -1; 397 398 int32 length = Length(); 399 uint32 stringLength = strlen(string); 400 401 // The following two checks are required to be compatible 402 // with BString: 403 if (length <= 0) 404 return -1; 405 406 if (stringLength == 0) 407 return beforeOffset; 408 409 int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength)); 410 int32 stop = 0; 411 int32 position = -1; 412 413 for (int32 i = start; i >= stop; i--) { 414 if (tolower(string[0]) == tolower(ByteAt(i))) { 415 // This check is to avoid mute str*cmp() calls. Performance. 416 if (strncasecmp(string, String() + i, stringLength) == 0) { 417 position = i; 418 break; 419 } 420 } 421 } 422 423 return position; 424} 425 426 427// MatchesBracketExpression() assumes 'pattern' to point to the 428// character following the initial '[' in a bracket expression. 429// The reason is that an encountered '[' will be taken literally. 430// (Makes it possible to match a '[' with the expression '[[]'). 431bool 432TrackerString::MatchesBracketExpression(const char* string, 433 const char* pattern, bool caseSensitivity) const 434{ 435 bool GlyphMatch = IsStartOfGlyph(string[0]); 436 437 if (IsInsideGlyph(string[0])) 438 return false; 439 440 char testChar = ConditionalToLower(string[0], caseSensitivity); 441 bool match = false; 442 443 bool inverse = *pattern == '^' || *pattern == '!'; 444 // We allow both ^ and ! as a initial inverting character. 445 446 if (inverse) 447 pattern++; 448 449 while (!match && *pattern != ']' && *pattern != '\0') { 450 switch (*pattern) { 451 case '-': 452 { 453 char start = ConditionalToLower(*(pattern - 1), 454 caseSensitivity); 455 char stop = ConditionalToLower(*(pattern + 1), 456 caseSensitivity); 457 458 if (IsGlyph(start) || IsGlyph(stop)) 459 return false; 460 // Not a valid range! 461 462 if ((islower(start) && islower(stop)) 463 || (isupper(start) && isupper(stop)) 464 || (isdigit(start) && isdigit(stop))) { 465 // Make sure 'start' and 'stop' are of the same type. 466 match = start <= testChar && testChar <= stop; 467 } else { 468 // If no valid range, we've got a syntax error. 469 return false; 470 } 471 472 break; 473 } 474 475 default: 476 if (GlyphMatch) 477 match = UTF8CharsAreEqual(string, pattern); 478 else 479 match = CharsAreEqual(testChar, *pattern, caseSensitivity); 480 break; 481 } 482 483 if (!match) { 484 pattern++; 485 if (IsInsideGlyph(pattern[0])) 486 pattern = MoveToEndOfGlyph(pattern); 487 } 488 } 489 490 // Consider an unmatched bracket a failure 491 // (i.e. when detecting a '\0' instead of a ']'.) 492 if (*pattern == '\0') 493 return false; 494 495 return (match ^ inverse) != 0; 496} 497 498 499bool 500TrackerString::StringMatchesPattern(const char* string, const char* pattern, 501 bool caseSensitivity) const 502{ 503 // One could do this dynamically, counting the number of *'s, 504 // but then you have to free them at every exit of this 505 // function, which is awkward and ugly. 506 const int32 kWildCardMaximum = 100; 507 const char* pStorage[kWildCardMaximum]; 508 const char* sStorage[kWildCardMaximum]; 509 510 int32 patternLevel = 0; 511 512 if (string == NULL || pattern == NULL) 513 return false; 514 515 while (*pattern != '\0') { 516 switch (*pattern) { 517 case '?': 518 pattern++; 519 string++; 520 if (IsInsideGlyph(string[0])) 521 string = MoveToEndOfGlyph(string); 522 523 break; 524 525 case '*': 526 { 527 // Collapse any ** and *? constructions: 528 while (*pattern == '*' || *pattern == '?') { 529 pattern++; 530 if (*pattern == '?' && string != '\0') { 531 string++; 532 if (IsInsideGlyph(string[0])) 533 string = MoveToEndOfGlyph(string); 534 } 535 } 536 537 if (*pattern == '\0') { 538 // An ending * matches all strings. 539 return true; 540 } 541 542 bool match = false; 543 const char* pBefore = pattern - 1; 544 545 if (*pattern == '[') { 546 pattern++; 547 548 while (!match && *string != '\0') { 549 match = MatchesBracketExpression(string++, pattern, 550 caseSensitivity); 551 } 552 553 while (*pattern != ']' && *pattern != '\0') { 554 // Skip the rest of the bracket: 555 pattern++; 556 } 557 558 if (*pattern == '\0') { 559 // Failure if no closing bracket; 560 return false; 561 } 562 } else { 563 // No bracket, just one character: 564 while (!match && *string != '\0') { 565 if (IsGlyph(string[0])) 566 match = UTF8CharsAreEqual(string++, pattern); 567 else { 568 match = CharsAreEqual(*string++, *pattern, 569 caseSensitivity); 570 } 571 } 572 } 573 574 if (!match) 575 return false; 576 else { 577 pStorage[patternLevel] = pBefore; 578 if (IsInsideGlyph(string[0])) 579 string = MoveToEndOfGlyph(string); 580 581 sStorage[patternLevel++] = string; 582 if (patternLevel > kWildCardMaximum) 583 return false; 584 585 pattern++; 586 if (IsInsideGlyph(pattern[0])) 587 pattern = MoveToEndOfGlyph(pattern); 588 } 589 break; 590 } 591 592 case '[': 593 pattern++; 594 595 if (!MatchesBracketExpression(string, pattern, 596 caseSensitivity)) { 597 if (patternLevel > 0) { 598 pattern = pStorage[--patternLevel]; 599 string = sStorage[patternLevel]; 600 } else 601 return false; 602 } else { 603 // Skip the rest of the bracket: 604 while (*pattern != ']' && *pattern != '\0') 605 pattern++; 606 607 // Failure if no closing bracket; 608 if (*pattern == '\0') 609 return false; 610 611 string++; 612 if (IsInsideGlyph(string[0])) 613 string = MoveToEndOfGlyph(string); 614 pattern++; 615 } 616 break; 617 618 default: 619 { 620 bool equal = false; 621 if (IsGlyph(string[0])) 622 equal = UTF8CharsAreEqual(string, pattern); 623 else 624 equal = CharsAreEqual(*string, *pattern, caseSensitivity); 625 626 if (equal) { 627 pattern++; 628 if (IsInsideGlyph(pattern[0])) 629 pattern = MoveToEndOfGlyph(pattern); 630 string++; 631 if (IsInsideGlyph(string[0])) 632 string = MoveToEndOfGlyph(string); 633 } else if (patternLevel > 0) { 634 pattern = pStorage[--patternLevel]; 635 string = sStorage[patternLevel]; 636 } else 637 return false; 638 639 break; 640 } 641 } 642 643 if (*pattern == '\0' && *string != '\0' && patternLevel > 0) { 644 pattern = pStorage[--patternLevel]; 645 string = sStorage[patternLevel]; 646 } 647 } 648 649 return *string == '\0' && *pattern == '\0'; 650} 651 652 653bool 654TrackerString::UTF8CharsAreEqual(const char* string1, 655 const char* string2) const 656{ 657 const char* s1 = string1; 658 const char* s2 = string2; 659 660 if (IsStartOfGlyph(*s1) && *s1 == *s2) { 661 s1++; 662 s2++; 663 664 while (IsInsideGlyph(*s1) && *s1 == *s2) { 665 s1++; 666 s2++; 667 } 668 669 return !IsInsideGlyph(*s1) 670 && !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1); 671 } else 672 return false; 673} 674 675 676const char* 677TrackerString::MoveToEndOfGlyph(const char* string) const 678{ 679 const char* ptr = string; 680 681 while (IsInsideGlyph(*ptr)) 682 ptr++; 683 684 return ptr; 685} 686 687 688bool 689TrackerString::IsGlyph(char ch) const 690{ 691 return (ch & 0x80) == 0x80; 692} 693 694 695bool 696TrackerString::IsInsideGlyph(char ch) const 697{ 698 return (ch & 0xC0) == 0x80; 699} 700 701 702bool 703TrackerString::IsStartOfGlyph(char ch) const 704{ 705 return (ch & 0xC0) == 0xC0; 706} 707