1/* Query - query parsing and evaluation 2 * 3 * The pattern matching is roughly based on code originally written 4 * by J. Kercheval, and on code written by Kenneth Almquist, though 5 * it shares no code. 6 * 7 * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de. 8 * This file may be used under the terms of the MIT License. 9 */ 10 11 12#include "fsproto.h" // include first for hacky reasons 13 14#include "Query.h" 15#include "bfs.h" 16#include "Debug.h" 17#include "Stack.h" 18#include "Volume.h" 19#include "Inode.h" 20#include "BPlusTree.h" 21#include "Index.h" 22 23#include <util/kernel_cpp.h> 24#include <SupportDefs.h> 25#include <NodeMonitor.h> 26#include <TypeConstants.h> 27#include <AppDefs.h> 28#include <fs_query.h> 29 30#include <malloc.h> 31#include <stdio.h> 32#include <string.h> 33 34 35// The parser has a very static design, but it will do what is required. 36// 37// ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 38// precedence, that is =,!=,>,<,>=,<= .. && .. ||. 39// Apparently, the "!" (not) can only be used with brackets. 40// 41// If you think that there are too few NULL pointer checks in some places 42// of the code, just read the beginning of the query constructor. 43// The API is not fully available, just the Query and the Expression class 44// are. 45 46 47enum ops { 48 OP_NONE, 49 50 OP_AND, 51 OP_OR, 52 53 OP_EQUATION, 54 55 OP_EQUAL, 56 OP_UNEQUAL, 57 OP_GREATER_THAN, 58 OP_LESS_THAN, 59 OP_GREATER_THAN_OR_EQUAL, 60 OP_LESS_THAN_OR_EQUAL, 61}; 62 63enum match { 64 NO_MATCH = 0, 65 MATCH_OK = 1, 66 67 MATCH_BAD_PATTERN = -2, 68 MATCH_INVALID_CHARACTER 69}; 70 71// return values from isValidPattern() 72enum { 73 PATTERN_INVALID_ESCAPE = -3, 74 PATTERN_INVALID_RANGE, 75 PATTERN_INVALID_SET 76}; 77 78union value { 79 int64 Int64; 80 uint64 Uint64; 81 int32 Int32; 82 uint32 Uint32; 83 float Float; 84 double Double; 85 char String[INODE_FILE_NAME_LENGTH]; 86}; 87 88// B_MIME_STRING_TYPE is defined in storage/Mime.h, but we 89// don't need the whole file here; the type can't change anyway 90#ifndef _MIME_H 91# define B_MIME_STRING_TYPE 'MIMS' 92#endif 93 94class Term { 95 public: 96 Term(int8 op) : fOp(op), fParent(NULL) {} 97 virtual ~Term() {} 98 99 int8 Op() const { return fOp; } 100 101 void SetParent(Term *parent) { fParent = parent; } 102 Term *Parent() const { return fParent; } 103 104 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0, 105 const uint8 *key = NULL,size_t size = 0) = 0; 106 virtual void Complement() = 0; 107 108 virtual void CalculateScore(Index &index) = 0; 109 virtual int32 Score() const = 0; 110 111 virtual status_t InitCheck() = 0; 112 113#ifdef DEBUG 114 virtual void PrintToStream() = 0; 115#endif 116 117 protected: 118 int8 fOp; 119 Term *fParent; 120}; 121 122// Although an Equation object is quite independent from the volume on which 123// the query is run, there are some dependencies that are produced while 124// querying: 125// The type/size of the value, the score, and if it has an index or not. 126// So you could run more than one query on the same volume, but it might return 127// wrong values when it runs concurrently on another volume. 128// That's not an issue right now, because we run single-threaded and don't use 129// queries more than once. 130 131class Equation : public Term { 132 public: 133 Equation(char **expr); 134 virtual ~Equation(); 135 136 virtual status_t InitCheck(); 137 138 status_t ParseQuotedString(char **_start, char **_end); 139 char *CopyString(char *start, char *end); 140 141 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 142 const uint8 *key = NULL, size_t size = 0); 143 virtual void Complement(); 144 145 status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator, 146 bool queryNonIndexed); 147 status_t GetNextMatching(Volume *volume, TreeIterator *iterator, 148 struct dirent *dirent, size_t bufferSize); 149 150 virtual void CalculateScore(Index &index); 151 virtual int32 Score() const { return fScore; } 152 153#ifdef DEBUG 154 virtual void PrintToStream(); 155#endif 156 157 private: 158 Equation(const Equation &); 159 Equation &operator=(const Equation &); 160 // no implementation 161 162 status_t ConvertValue(type_code type); 163 bool CompareTo(const uint8 *value, uint16 size); 164 uint8 *Value() const { return (uint8 *)&fValue; } 165 status_t MatchEmptyString(); 166 167 char *fAttribute; 168 char *fString; 169 union value fValue; 170 type_code fType; 171 size_t fSize; 172 bool fIsPattern; 173 bool fIsSpecialTime; 174 175 int32 fScore; 176 bool fHasIndex; 177}; 178 179class Operator : public Term { 180 public: 181 Operator(Term *,int8,Term *); 182 virtual ~Operator(); 183 184 Term *Left() const { return fLeft; } 185 Term *Right() const { return fRight; } 186 187 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 188 const uint8 *key = NULL, size_t size = 0); 189 virtual void Complement(); 190 191 virtual void CalculateScore(Index &index); 192 virtual int32 Score() const; 193 194 virtual status_t InitCheck(); 195 196 //Term *Copy() const; 197#ifdef DEBUG 198 virtual void PrintToStream(); 199#endif 200 201 private: 202 Operator(const Operator &); 203 Operator &operator=(const Operator &); 204 // no implementation 205 206 Term *fLeft,*fRight; 207}; 208 209 210//--------------------------------- 211 212 213void 214skipWhitespace(char **expr, int32 skip = 0) 215{ 216 char *string = (*expr) + skip; 217 while (*string == ' ' || *string == '\t') string++; 218 *expr = string; 219} 220 221 222void 223skipWhitespaceReverse(char **expr,char *stop) 224{ 225 char *string = *expr; 226 while (string > stop && (*string == ' ' || *string == '\t')) string--; 227 *expr = string; 228} 229 230 231// #pragma mark - 232 233 234uint32 235utf8ToUnicode(char **string) 236{ 237 uint8 *bytes = (uint8 *)*string; 238 int32 length; 239 uint8 mask = 0x1f; 240 241 switch (bytes[0] & 0xf0) { 242 case 0xc0: 243 case 0xd0: length = 2; break; 244 case 0xe0: length = 3; break; 245 case 0xf0: 246 mask = 0x0f; 247 length = 4; 248 break; 249 default: 250 // valid 1-byte character 251 // and invalid characters 252 (*string)++; 253 return bytes[0]; 254 } 255 uint32 c = bytes[0] & mask; 256 int32 i = 1; 257 for (;i < length && (bytes[i] & 0x80) > 0;i++) 258 c = (c << 6) | (bytes[i] & 0x3f); 259 260 if (i < length) { 261 // invalid character 262 (*string)++; 263 return (uint32)bytes[0]; 264 } 265 *string += length; 266 return c; 267} 268 269 270int32 271getFirstPatternSymbol(char *string) 272{ 273 char c; 274 275 for (int32 index = 0;(c = *string++);index++) { 276 if (c == '*' || c == '?' || c == '[') 277 return index; 278 } 279 return -1; 280} 281 282 283bool 284isPattern(char *string) 285{ 286 return getFirstPatternSymbol(string) >= 0 ? true : false; 287} 288 289 290status_t 291isValidPattern(char *pattern) 292{ 293 while (*pattern) { 294 switch (*pattern++) { 295 case '\\': 296 // the escape character must not be at the end of the pattern 297 if (!*pattern++) 298 return PATTERN_INVALID_ESCAPE; 299 break; 300 301 case '[': 302 if (pattern[0] == ']' || !pattern[0]) 303 return PATTERN_INVALID_SET; 304 305 while (*pattern != ']') { 306 if (*pattern == '\\' && !*++pattern) 307 return PATTERN_INVALID_ESCAPE; 308 309 if (!*pattern) 310 return PATTERN_INVALID_SET; 311 312 if (pattern[0] == '-' && pattern[1] == '-') 313 return PATTERN_INVALID_RANGE; 314 315 pattern++; 316 } 317 break; 318 } 319 } 320 return B_OK; 321} 322 323 324/** Matches the string against the given wildcard pattern. 325 * Returns either MATCH_OK, or NO_MATCH when everything went fine, 326 * or values < 0 (see enum at the top of Query.cpp) if an error 327 * occurs 328 */ 329 330status_t 331matchString(char *pattern, char *string) 332{ 333 while (*pattern) { 334 // end of string == valid end of pattern? 335 if (!string[0]) { 336 while (pattern[0] == '*') 337 pattern++; 338 return !pattern[0] ? MATCH_OK : NO_MATCH; 339 } 340 341 switch (*pattern++) { 342 case '?': 343 { 344 // match exactly one UTF-8 character; we are 345 // not interested in the result 346 utf8ToUnicode(&string); 347 break; 348 } 349 350 case '*': 351 { 352 // compact pattern 353 while (true) { 354 if (pattern[0] == '?') { 355 if (!*++string) 356 return NO_MATCH; 357 } else if (pattern[0] != '*') 358 break; 359 360 pattern++; 361 } 362 363 // if the pattern is done, we have matched the string 364 if (!pattern[0]) 365 return MATCH_OK; 366 367 while(true) { 368 // we have removed all occurences of '*' and '?' 369 if (pattern[0] == string[0] 370 || pattern[0] == '[' 371 || pattern[0] == '\\') { 372 status_t status = matchString(pattern,string); 373 if (status < B_OK || status == MATCH_OK) 374 return status; 375 } 376 377 // we could be nice here and just jump to the next 378 // UTF-8 character - but we wouldn't gain that much 379 // and it'd be slower (since we're checking for 380 // equality before entering the recursion) 381 if (!*++string) 382 return NO_MATCH; 383 } 384 break; 385 } 386 387 case '[': 388 { 389 bool invert = false; 390 if (pattern[0] == '^' || pattern[0] == '!') { 391 invert = true; 392 pattern++; 393 } 394 395 if (!pattern[0] || pattern[0] == ']') 396 return MATCH_BAD_PATTERN; 397 398 uint32 c = utf8ToUnicode(&string); 399 bool matched = false; 400 401 while (pattern[0] != ']') { 402 if (!pattern[0]) 403 return MATCH_BAD_PATTERN; 404 405 if (pattern[0] == '\\') 406 pattern++; 407 408 uint32 first = utf8ToUnicode(&pattern); 409 410 // Does this character match, or is this a range? 411 if (first == c) { 412 matched = true; 413 break; 414 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) { 415 pattern++; 416 417 if (pattern[0] == '\\') { 418 pattern++; 419 if (!pattern[0]) 420 return MATCH_BAD_PATTERN; 421 } 422 uint32 last = utf8ToUnicode(&pattern); 423 424 if (c >= first && c <= last) { 425 matched = true; 426 break; 427 } 428 } 429 } 430 431 if (invert) 432 matched = !matched; 433 434 if (matched) { 435 while (pattern[0] != ']') { 436 if (!pattern[0]) 437 return MATCH_BAD_PATTERN; 438 pattern++; 439 } 440 pattern++; 441 break; 442 } 443 return NO_MATCH; 444 } 445 446 case '\\': 447 if (!pattern[0]) 448 return MATCH_BAD_PATTERN; 449 // supposed to fall through 450 default: 451 if (pattern[-1] != string[0]) 452 return NO_MATCH; 453 string++; 454 break; 455 } 456 } 457 458 if (string[0]) 459 return NO_MATCH; 460 461 return MATCH_OK; 462} 463 464 465// #pragma mark - 466 467 468Equation::Equation(char **expr) 469 : Term(OP_EQUATION), 470 fAttribute(NULL), 471 fString(NULL), 472 fType(0), 473 fIsPattern(false) 474{ 475 char *string = *expr; 476 char *start = string; 477 char *end = NULL; 478 479 // Since the equation is the integral part of any query, we're just parsing 480 // the whole thing here. 481 // The whitespace at the start is already removed in Expression::ParseEquation() 482 483 if (*start == '"' || *start == '\'') { 484 // string is quoted (start has to be on the beginning of a string) 485 if (ParseQuotedString(&start, &end) < B_OK) 486 return; 487 488 // set string to a valid start of the equation symbol 489 string = end + 2; 490 skipWhitespace(&string); 491 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') { 492 *expr = string; 493 return; 494 } 495 } else { 496 // search the (in)equation for the actual equation symbol (and for other operators 497 // in case the equation is malformed) 498 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!' 499 && *string != '&' && *string != '|') 500 string++; 501 502 // get the attribute string (and trim whitespace), in case 503 // the string was not quoted 504 end = string - 1; 505 skipWhitespaceReverse(&end, start); 506 } 507 508 // attribute string is empty (which is not allowed) 509 if (start > end) 510 return; 511 512 // at this point, "start" points to the beginning of the string, "end" points 513 // to the last character of the string, and "string" points to the first 514 // character of the equation symbol 515 516 // test for the right symbol (as this doesn't need any memory) 517 switch (*string) { 518 case '=': 519 fOp = OP_EQUAL; 520 break; 521 case '>': 522 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 523 break; 524 case '<': 525 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 526 break; 527 case '!': 528 if (*(string + 1) != '=') 529 return; 530 fOp = OP_UNEQUAL; 531 break; 532 533 // any invalid characters will be rejected 534 default: 535 *expr = string; 536 return; 537 } 538 // lets change "start" to point to the first character after the symbol 539 if (*(string + 1) == '=') 540 string++; 541 string++; 542 skipWhitespace(&string); 543 544 // allocate & copy the attribute string 545 546 fAttribute = CopyString(start, end); 547 if (fAttribute == NULL) 548 return; 549 550 start = string; 551 if (*start == '"' || *start == '\'') { 552 // string is quoted (start has to be on the beginning of a string) 553 if (ParseQuotedString(&start, &end) < B_OK) 554 return; 555 556 string = end + 2; 557 skipWhitespace(&string); 558 } else { 559 while (*string && *string != '&' && *string != '|' && *string != ')') 560 string++; 561 562 end = string - 1; 563 skipWhitespaceReverse(&end, start); 564 } 565 566 // at this point, "start" will point to the first character of the value, 567 // "end" will point to its last character, and "start" to the first non- 568 // whitespace character after the value string 569 570 fString = CopyString(start, end); 571 if (fString == NULL) 572 return; 573 574 // patterns are only allowed for these operations (and strings) 575 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 576 fIsPattern = isPattern(fString); 577 if (fIsPattern && isValidPattern(fString) < B_OK) { 578 // we only want to have valid patterns; setting fString 579 // to NULL will cause InitCheck() to fail 580 free(fString); 581 fString = NULL; 582 } 583 } 584 585 // The special time flag is set if the time values are shifted 586 // 64-bit values to reduce the number of duplicates. 587 // We have to be able to compare them against unshifted values 588 // later. The only index which needs this is the last_modified 589 // index, but we may want to open that feature for other indices, 590 // too one day. 591 fIsSpecialTime = !strcmp(fAttribute, "last_modified"); 592 593 *expr = string; 594} 595 596 597Equation::~Equation() 598{ 599 if (fAttribute != NULL) 600 free(fAttribute); 601 if (fString != NULL) 602 free(fString); 603} 604 605 606status_t 607Equation::InitCheck() 608{ 609 if (fAttribute == NULL 610 || fString == NULL 611 || fOp == OP_NONE) 612 return B_BAD_VALUE; 613 614 return B_OK; 615} 616 617 618status_t 619Equation::ParseQuotedString(char **_start, char **_end) 620{ 621 char *start = *_start; 622 char quote = *start++; 623 char *end = start; 624 625 for (;*end && *end != quote;end++) { 626 if (*end == '\\') 627 end++; 628 } 629 if (*end == '\0') 630 return B_BAD_VALUE; 631 632 *_start = start; 633 *_end = end - 1; 634 635 return B_OK; 636} 637 638 639char * 640Equation::CopyString(char *start, char *end) 641{ 642 // end points to the last character of the string - and the length 643 // also has to include the null-termination 644 int32 length = end + 2 - start; 645 // just to make sure; since that's the max. attribute name length and 646 // the max. string in an index, it make sense to have it that way 647 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 648 return NULL; 649 650 char *copy = (char *)malloc(length); 651 if (copy == NULL) 652 return NULL; 653 654 memcpy(copy,start,length - 1); 655 copy[length - 1] = '\0'; 656 657 return copy; 658} 659 660 661status_t 662Equation::ConvertValue(type_code type) 663{ 664 // Has the type already been converted? 665 if (type == fType) 666 return B_OK; 667 668 char *string = fString; 669 670 switch (type) { 671 case B_MIME_STRING_TYPE: 672 type = B_STRING_TYPE; 673 // supposed to fall through 674 case B_STRING_TYPE: 675 strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH); 676 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 677 fSize = strlen(fValue.String); 678 break; 679 case B_INT32_TYPE: 680 fValue.Int32 = strtol(string, &string, 0); 681 fSize = sizeof(int32); 682 break; 683 case B_UINT32_TYPE: 684 fValue.Int32 = strtoul(string, &string, 0); 685 fSize = sizeof(uint32); 686 break; 687 case B_INT64_TYPE: 688 fValue.Int64 = strtoll(string, &string, 0); 689 fSize = sizeof(int64); 690 break; 691 case B_UINT64_TYPE: 692 fValue.Uint64 = strtoull(string, &string, 0); 693 fSize = sizeof(uint64); 694 break; 695 case B_FLOAT_TYPE: 696 fValue.Float = strtod(string, &string); 697 fSize = sizeof(float); 698 break; 699 case B_DOUBLE_TYPE: 700 fValue.Double = strtod(string, &string); 701 fSize = sizeof(double); 702 break; 703 default: 704 FATAL(("query value conversion to 0x%lx requested!\n", type)); 705 // should we fail here or just do a safety int32 conversion? 706 return B_ERROR; 707 } 708 709 fType = type; 710 711 // patterns are only allowed for string types 712 if (fType != B_STRING_TYPE && fIsPattern) 713 fIsPattern = false; 714 715 return B_OK; 716} 717 718 719/** Returns true when the key matches the equation. You have to 720 * call ConvertValue() before this one. 721 */ 722 723bool 724Equation::CompareTo(const uint8 *value, uint16 size) 725{ 726 int32 compare; 727 728 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL 729 if (fIsPattern) { 730 // we have already validated the pattern, so we don't check for failing 731 // here - if something is broken, and matchString() returns an error, 732 // we just don't match 733 compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1; 734 } else if (fIsSpecialTime) { 735 // the index is a shifted int64 index, but we have to match 736 // against an unshifted value (i.e. the last_modified index) 737 int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT; 738 compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64)); 739 } else 740 compare = compareKeys(fType, value, size, Value(), fSize); 741 742 switch (fOp) { 743 case OP_EQUAL: 744 return compare == 0; 745 case OP_UNEQUAL: 746 return compare != 0; 747 case OP_LESS_THAN: 748 return compare < 0; 749 case OP_LESS_THAN_OR_EQUAL: 750 return compare <= 0; 751 case OP_GREATER_THAN: 752 return compare > 0; 753 case OP_GREATER_THAN_OR_EQUAL: 754 return compare >= 0; 755 } 756 FATAL(("Unknown/Unsupported operation: %d\n", fOp)); 757 return false; 758} 759 760 761void 762Equation::Complement() 763{ 764 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 765 FATAL(("op out of range!")); 766 return; 767 }); 768 769 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 770 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 771 fOp = complementOp[fOp - OP_EQUAL]; 772} 773 774 775status_t 776Equation::MatchEmptyString() 777{ 778 // there is no matching attribute, we will just bail out if we 779 // already know that our value is not of a string type. 780 // If not, it will be converted to a string - and then be compared with "". 781 // That's why we have to call ConvertValue() here - but it will be 782 // a cheap call for the next time 783 // Should we do this only for OP_UNEQUAL? 784 if (fType != 0 && fType != B_STRING_TYPE) 785 return NO_MATCH; 786 787 status_t status = ConvertValue(B_STRING_TYPE); 788 if (status == B_OK) 789 status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH; 790 791 return status; 792} 793 794 795/** Matches the inode's attribute value with the equation. 796 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong 797 */ 798 799status_t 800Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size) 801{ 802 // get a pointer to the attribute in question 803 union value value; 804 uint8 *buffer; 805 bool locked = false; 806 807 // first, check if we are matching for a live query and use that value 808 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) { 809 if (key == NULL) { 810 if (type == B_STRING_TYPE) 811 return MatchEmptyString(); 812 813 return NO_MATCH; 814 } 815 buffer = const_cast<uint8 *>(key); 816 } else if (!strcmp(fAttribute, "name")) { 817 // we need to lock before accessing Inode::Name() 818 inode->SmallDataLock().Lock(); 819 locked = true; 820 821 // if not, check for "fake" attributes, "name", "size", "last_modified", 822 buffer = (uint8 *)inode->Name(); 823 if (buffer == NULL) { 824 inode->SmallDataLock().Unlock(); 825 return B_ERROR; 826 } 827 828 type = B_STRING_TYPE; 829 size = strlen((const char *)buffer); 830 } else if (!strcmp(fAttribute,"size")) { 831 buffer = (uint8 *)&inode->Node()->data.size; 832 type = B_INT64_TYPE; 833 } else if (!strcmp(fAttribute,"last_modified")) { 834 buffer = (uint8 *)&inode->Node()->last_modified_time; 835 type = B_INT64_TYPE; 836 } else { 837 // then for attributes in the small_data section, and finally for the 838 // real attributes 839 Inode *attribute; 840 841 inode->SmallDataLock().Lock(); 842 small_data *smallData = inode->FindSmallData(fAttribute); 843 if (smallData != NULL) { 844 buffer = smallData->Data(); 845 type = smallData->type; 846 size = smallData->data_size; 847 locked = true; 848 } else { 849 // needed to unlock the small_data section as fast as possible 850 inode->SmallDataLock().Unlock(); 851 852 if (inode->GetAttribute(fAttribute, &attribute) == B_OK) { 853 buffer = (uint8 *)&value; 854 type = attribute->Node()->type; 855 size = attribute->Size(); 856 857 if (size > INODE_FILE_NAME_LENGTH) 858 size = INODE_FILE_NAME_LENGTH; 859 860 if (attribute->ReadAt(0, buffer, &size) < B_OK) { 861 inode->ReleaseAttribute(attribute); 862 return B_IO_ERROR; 863 } 864 inode->ReleaseAttribute(attribute); 865 } else 866 return MatchEmptyString(); 867 } 868 } 869 // prepare own value for use, if it is possible to convert it 870 status_t status = ConvertValue(type); 871 if (status == B_OK) 872 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH; 873 874 if (locked) 875 inode->SmallDataLock().Unlock(); 876 877 RETURN_ERROR(status); 878} 879 880 881void 882Equation::CalculateScore(Index &index) 883{ 884 // As always, these values could be tuned and refined. 885 // And the code could also need some real world testing :-) 886 887 // do we have to operate on a "foreign" index? 888 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 889 fScore = 0; 890 return; 891 } 892 893 // if we have a pattern, how much does it help our search? 894 if (fIsPattern) 895 fScore = getFirstPatternSymbol(fString) << 3; 896 else { 897 // Score by operator 898 if (fOp == OP_EQUAL) 899 // higher than pattern="255 chars+*" 900 fScore = 2048; 901 else 902 // the pattern search is regarded cheaper when you have at 903 // least one character to set your index to 904 fScore = 5; 905 } 906 907 // take index size into account (1024 is the current node size 908 // in our B+trees) 909 // 2048 * 2048 == 4194304 is the maximum score (for an empty 910 // tree, since the header + 1 node are already 2048 bytes) 911 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 912} 913 914 915status_t 916Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator, bool queryNonIndexed) 917{ 918 status_t status = index.SetTo(fAttribute); 919 920 // if we should query attributes without an index, we can just proceed here 921 if (status < B_OK && !queryNonIndexed) 922 return B_ENTRY_NOT_FOUND; 923 924 type_code type; 925 926 // special case for OP_UNEQUAL - it will always operate through the whole index 927 // but we need the call to the original index to get the correct type 928 if (status < B_OK || fOp == OP_UNEQUAL) { 929 // Try to get an index that holds all files (name) 930 // Also sets the default type for all attributes without index 931 // to string. 932 type = status < B_OK ? B_STRING_TYPE : index.Type(); 933 934 if (index.SetTo("name") < B_OK) 935 return B_ENTRY_NOT_FOUND; 936 937 fHasIndex = false; 938 } else { 939 fHasIndex = true; 940 type = index.Type(); 941 } 942 943 if (ConvertValue(type) < B_OK) 944 return B_BAD_VALUE; 945 946 BPlusTree *tree; 947 if (index.Node()->GetTree(&tree) < B_OK) 948 return B_ERROR; 949 950 *iterator = new TreeIterator(tree); 951 if (*iterator == NULL) 952 return B_NO_MEMORY; 953 954 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL 955 || fIsPattern) 956 && fHasIndex) { 957 // set iterator to the exact position 958 959 int32 keySize = index.KeySize(); 960 961 // at this point, fIsPattern is only true if it's a string type, and fOp 962 // is either OP_EQUAL or OP_UNEQUAL 963 if (fIsPattern) { 964 // let's see if we can use the beginning of the key for positioning 965 // the iterator and adjust the key size; if not, just leave the 966 // iterator at the start and return success 967 keySize = getFirstPatternSymbol(fString); 968 if (keySize <= 0) 969 return B_OK; 970 } 971 972 if (keySize == 0) { 973 // B_STRING_TYPE doesn't have a fixed length, so it was set 974 // to 0 before - we compute the correct value here 975 if (fType == B_STRING_TYPE) { 976 keySize = strlen(fValue.String); 977 978 // The empty string is a special case - we normally don't check 979 // for the trailing null byte, in the case for the empty string 980 // we do it explicitly, because there can't be keys in the B+tree 981 // with a length of zero 982 if (keySize == 0) 983 keySize = 1; 984 } else 985 RETURN_ERROR(B_ENTRY_NOT_FOUND); 986 } 987 988 if (fIsSpecialTime) { 989 // we have to find the first matching shifted value 990 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 991 status = (*iterator)->Find((uint8 *)&value, keySize); 992 if (status == B_ENTRY_NOT_FOUND) 993 return B_OK; 994 } else { 995 status = (*iterator)->Find(Value(), keySize); 996 if (fOp == OP_EQUAL && !fIsPattern) 997 return status; 998 else if (status == B_ENTRY_NOT_FOUND 999 && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL)) 1000 return B_OK; 1001 } 1002 1003 RETURN_ERROR(status); 1004 } 1005 1006 return B_OK; 1007} 1008 1009 1010status_t 1011Equation::GetNextMatching(Volume *volume, TreeIterator *iterator, 1012 struct dirent *dirent, size_t bufferSize) 1013{ 1014 while (true) { 1015 union value indexValue; 1016 uint16 keyLength; 1017 uint16 duplicate; 1018 off_t offset; 1019 1020 status_t status = iterator->GetNextEntry(&indexValue, &keyLength, 1021 (uint16)sizeof(indexValue), &offset, &duplicate); 1022 if (status < B_OK) 1023 return status; 1024 1025 // only compare against the index entry when this is the correct 1026 // index for the equation 1027 if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) { 1028 // They aren't equal? let the operation decide what to do 1029 // Since we always start at the beginning of the index (or the correct 1030 // position), only some needs to be stopped if the entry doesn't fit. 1031 if (fOp == OP_LESS_THAN 1032 || fOp == OP_LESS_THAN_OR_EQUAL 1033 || (fOp == OP_EQUAL && !fIsPattern)) 1034 return B_ENTRY_NOT_FOUND; 1035 1036 if (duplicate > 0) 1037 iterator->SkipDuplicates(); 1038 continue; 1039 } 1040 1041 Vnode vnode(volume, offset); 1042 Inode *inode; 1043 if ((status = vnode.Get(&inode)) != B_OK) { 1044 REPORT_ERROR(status); 1045 FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute)); 1046 // try with next 1047 continue; 1048 } 1049 1050 // ToDo: check user permissions here - but which one?! 1051 // we could filter out all those where we don't have 1052 // read access... (we should check for every parent 1053 // directory if the X_OK is allowed) 1054 // Although it's quite expensive to open all parents, 1055 // it's likely that the application that runs the 1056 // query will do something similar (and we don't have 1057 // to do it for root, either). 1058 1059 // go up in the tree until a &&-operator is found, and check if the 1060 // inode matches with the rest of the expression - we don't have to 1061 // check ||-operators for that 1062 Term *term = this; 1063 status = MATCH_OK; 1064 1065 if (!fHasIndex) 1066 status = Match(inode); 1067 1068 while (term != NULL && status == MATCH_OK) { 1069 Operator *parent = (Operator *)term->Parent(); 1070 if (parent == NULL) 1071 break; 1072 1073 if (parent->Op() == OP_AND) { 1074 // choose the other child of the parent 1075 Term *other = parent->Right(); 1076 if (other == term) 1077 other = parent->Left(); 1078 1079 if (other == NULL) { 1080 FATAL(("&&-operator has only one child... (parent = %p)\n", parent)); 1081 break; 1082 } 1083 status = other->Match(inode); 1084 if (status < 0) { 1085 REPORT_ERROR(status); 1086 status = NO_MATCH; 1087 } 1088 } 1089 term = (Term *)parent; 1090 } 1091 1092 if (status == MATCH_OK) { 1093 dirent->d_dev = volume->ID(); 1094 dirent->d_ino = offset; 1095 dirent->d_pdev = volume->ID(); 1096 dirent->d_pino = volume->ToVnode(inode->Parent()); 1097 1098 if (inode->GetName(dirent->d_name) < B_OK) 1099 FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber())); 1100 1101#ifdef KEEP_WRONG_DIRENT_RECLEN 1102 // ToDo: The available file systems in BeOS apparently don't set the 1103 // correct d_reclen - we are copying that behaviour if requested, but 1104 // if it doesn't break compatibility, we will remove it. 1105 dirent->d_reclen = strlen(dirent->d_name); 1106#else 1107 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1108#endif 1109 } 1110 1111 if (status == MATCH_OK) 1112 return B_OK; 1113 } 1114 RETURN_ERROR(B_ERROR); 1115} 1116 1117 1118// #pragma mark - 1119 1120 1121Operator::Operator(Term *left, int8 op, Term *right) 1122 : Term(op), 1123 fLeft(left), 1124 fRight(right) 1125{ 1126 if (left) 1127 left->SetParent(this); 1128 if (right) 1129 right->SetParent(this); 1130} 1131 1132 1133Operator::~Operator() 1134{ 1135 delete fLeft; 1136 delete fRight; 1137} 1138 1139 1140status_t 1141Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size) 1142{ 1143 if (fOp == OP_AND) { 1144 status_t status = fLeft->Match(inode, attribute, type, key, size); 1145 if (status != MATCH_OK) 1146 return status; 1147 1148 return fRight->Match(inode, attribute, type, key, size); 1149 } else { 1150 // choose the term with the better score for OP_OR 1151 if (fRight->Score() > fLeft->Score()) { 1152 status_t status = fRight->Match(inode, attribute, type, key, size); 1153 if (status != NO_MATCH) 1154 return status; 1155 } 1156 return fLeft->Match(inode, attribute, type, key, size); 1157 } 1158} 1159 1160 1161void 1162Operator::Complement() 1163{ 1164 if (fOp == OP_AND) 1165 fOp = OP_OR; 1166 else 1167 fOp = OP_AND; 1168 1169 fLeft->Complement(); 1170 fRight->Complement(); 1171} 1172 1173 1174void 1175Operator::CalculateScore(Index &index) 1176{ 1177 fLeft->CalculateScore(index); 1178 fRight->CalculateScore(index); 1179} 1180 1181 1182int32 1183Operator::Score() const 1184{ 1185 if (fOp == OP_AND) { 1186 // return the one with the better score 1187 if (fRight->Score() > fLeft->Score()) 1188 return fRight->Score(); 1189 1190 return fLeft->Score(); 1191 } 1192 1193 // for OP_OR, be honest, and return the one with the worse score 1194 if (fRight->Score() < fLeft->Score()) 1195 return fRight->Score(); 1196 1197 return fLeft->Score(); 1198} 1199 1200 1201status_t 1202Operator::InitCheck() 1203{ 1204 if (fOp != OP_AND && fOp != OP_OR 1205 || fLeft == NULL || fLeft->InitCheck() < B_OK 1206 || fRight == NULL || fRight->InitCheck() < B_OK) 1207 return B_ERROR; 1208 1209 return B_OK; 1210} 1211 1212 1213#if 0 1214Term * 1215Operator::Copy() const 1216{ 1217 if (fEquation != NULL) { 1218 Equation *equation = new Equation(*fEquation); 1219 if (equation == NULL) 1220 return NULL; 1221 1222 Term *term = new Term(equation); 1223 if (term == NULL) 1224 delete equation; 1225 1226 return term; 1227 } 1228 1229 Term *left = NULL, *right = NULL; 1230 1231 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1232 return NULL; 1233 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1234 delete left; 1235 return NULL; 1236 } 1237 1238 Term *term = new Term(left,fOp,right); 1239 if (term == NULL) { 1240 delete left; 1241 delete right; 1242 return NULL; 1243 } 1244 return term; 1245} 1246#endif 1247 1248 1249// #pragma mark - 1250 1251#ifdef DEBUG 1252void 1253Operator::PrintToStream() 1254{ 1255 D(__out("( ")); 1256 if (fLeft != NULL) 1257 fLeft->PrintToStream(); 1258 1259 char *op; 1260 switch (fOp) { 1261 case OP_OR: op = "OR"; break; 1262 case OP_AND: op = "AND"; break; 1263 default: op = "?"; break; 1264 } 1265 D(__out(" %s ",op)); 1266 1267 if (fRight != NULL) 1268 fRight->PrintToStream(); 1269 1270 D(__out(" )")); 1271} 1272 1273 1274void 1275Equation::PrintToStream() 1276{ 1277 char *symbol = "???"; 1278 switch (fOp) { 1279 case OP_EQUAL: symbol = "=="; break; 1280 case OP_UNEQUAL: symbol = "!="; break; 1281 case OP_GREATER_THAN: symbol = ">"; break; 1282 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1283 case OP_LESS_THAN: symbol = "<"; break; 1284 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1285 } 1286 D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString)); 1287} 1288 1289#endif /* DEBUG */ 1290 1291// #pragma mark - 1292 1293 1294Expression::Expression(char *expr) 1295{ 1296 if (expr == NULL) 1297 return; 1298 1299 fTerm = ParseOr(&expr); 1300 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1301 FATAL(("Corrupt tree in expression!\n")); 1302 delete fTerm; 1303 fTerm = NULL; 1304 } 1305 D(if (fTerm != NULL) { 1306 fTerm->PrintToStream(); 1307 D(__out("\n")); 1308 if (*expr != '\0') 1309 PRINT(("Unexpected end of string: \"%s\"!\n", expr)); 1310 }); 1311 fPosition = expr; 1312} 1313 1314 1315Expression::~Expression() 1316{ 1317 delete fTerm; 1318} 1319 1320 1321Term * 1322Expression::ParseEquation(char **expr) 1323{ 1324 skipWhitespace(expr); 1325 1326 bool _not = false; 1327 if (**expr == '!') { 1328 skipWhitespace(expr, 1); 1329 if (**expr != '(') 1330 return NULL; 1331 1332 _not = true; 1333 } 1334 1335 if (**expr == ')') { 1336 // shouldn't be handled here 1337 return NULL; 1338 } else if (**expr == '(') { 1339 skipWhitespace(expr, 1); 1340 1341 Term *term = ParseOr(expr); 1342 1343 skipWhitespace(expr); 1344 1345 if (**expr != ')') { 1346 delete term; 1347 return NULL; 1348 } 1349 1350 // If the term is negated, we just complement the tree, to get 1351 // rid of the not, a.k.a. DeMorgan's Law. 1352 if (_not) 1353 term->Complement(); 1354 1355 skipWhitespace(expr, 1); 1356 1357 return term; 1358 } 1359 1360 Equation *equation = new Equation(expr); 1361 if (equation == NULL || equation->InitCheck() < B_OK) { 1362 delete equation; 1363 return NULL; 1364 } 1365 return equation; 1366} 1367 1368 1369Term * 1370Expression::ParseAnd(char **expr) 1371{ 1372 Term *left = ParseEquation(expr); 1373 if (left == NULL) 1374 return NULL; 1375 1376 while (IsOperator(expr,'&')) { 1377 Term *right = ParseAnd(expr); 1378 Term *newParent = NULL; 1379 1380 if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) { 1381 delete left; 1382 delete right; 1383 1384 return NULL; 1385 } 1386 left = newParent; 1387 } 1388 1389 return left; 1390} 1391 1392 1393Term * 1394Expression::ParseOr(char **expr) 1395{ 1396 Term *left = ParseAnd(expr); 1397 if (left == NULL) 1398 return NULL; 1399 1400 while (IsOperator(expr,'|')) { 1401 Term *right = ParseAnd(expr); 1402 Term *newParent = NULL; 1403 1404 if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) { 1405 delete left; 1406 delete right; 1407 1408 return NULL; 1409 } 1410 left = newParent; 1411 } 1412 1413 return left; 1414} 1415 1416 1417bool 1418Expression::IsOperator(char **expr, char op) 1419{ 1420 char *string = *expr; 1421 1422 if (*string == op && *(string + 1) == op) { 1423 *expr += 2; 1424 return true; 1425 } 1426 return false; 1427} 1428 1429 1430status_t 1431Expression::InitCheck() 1432{ 1433 if (fTerm == NULL) 1434 return B_BAD_VALUE; 1435 1436 return B_OK; 1437} 1438 1439 1440// #pragma mark - 1441 1442 1443Query::Query(Volume *volume, Expression *expression, uint32 flags) 1444 : 1445 fVolume(volume), 1446 fExpression(expression), 1447 fCurrent(NULL), 1448 fIterator(NULL), 1449 fIndex(volume), 1450 fFlags(flags), 1451 fPort(-1) 1452{ 1453 // if the expression has a valid root pointer, the whole tree has 1454 // already passed the sanity check, so that we don't have to check 1455 // every pointer 1456 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1457 return; 1458 1459 // create index on the stack and delete it afterwards 1460 fExpression->Root()->CalculateScore(fIndex); 1461 fIndex.Unset(); 1462 1463 Stack<Term *> stack; 1464 stack.Push(fExpression->Root()); 1465 1466 Term *term; 1467 while (stack.Pop(&term)) { 1468 if (term->Op() < OP_EQUATION) { 1469 Operator *op = (Operator *)term; 1470 1471 if (op->Op() == OP_OR) { 1472 stack.Push(op->Left()); 1473 stack.Push(op->Right()); 1474 } else { 1475 // For OP_AND, we can use the scoring system to decide which path to add 1476 if (op->Right()->Score() > op->Left()->Score()) 1477 stack.Push(op->Right()); 1478 else 1479 stack.Push(op->Left()); 1480 } 1481 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK) 1482 FATAL(("Unknown term on stack or stack error")); 1483 } 1484 1485 if (fFlags & B_LIVE_QUERY) 1486 volume->AddQuery(this); 1487} 1488 1489 1490Query::~Query() 1491{ 1492 if (fFlags & B_LIVE_QUERY) 1493 fVolume->RemoveQuery(this); 1494} 1495 1496 1497status_t 1498Query::GetNextEntry(struct dirent *dirent, size_t size) 1499{ 1500 // If we don't have an equation to use yet/anymore, get a new one 1501 // from the stack 1502 while (true) { 1503 if (fIterator == NULL) { 1504 if (!fStack.Pop(&fCurrent) 1505 || fCurrent == NULL 1506 || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator, 1507 false/*fFlags & B_QUERY_NON_INDEXED*/) < B_OK) 1508 return B_ENTRY_NOT_FOUND; 1509 } 1510 if (fCurrent == NULL) 1511 RETURN_ERROR(B_ERROR); 1512 1513 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size); 1514 if (status < B_OK) { 1515 delete fIterator; 1516 fIterator = NULL; 1517 fCurrent = NULL; 1518 } else { 1519 // only return if we have another entry 1520 return B_OK; 1521 } 1522 } 1523} 1524 1525 1526void 1527Query::SetLiveMode(port_id port, int32 token) 1528{ 1529 fPort = port; 1530 fToken = token; 1531 1532 if ((fFlags & B_LIVE_QUERY) == 0) { 1533 // you can decide at any point to set the live query mode, 1534 // only live queries have to be updated by attribute changes 1535 fFlags |= B_LIVE_QUERY; 1536 fVolume->AddQuery(this); 1537 } 1538} 1539 1540 1541void 1542Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 1543 size_t oldLength, const uint8 *newKey, size_t newLength) 1544{ 1545 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1546 return; 1547 1548 // ToDo: check if the attribute is part of the query at all... 1549 1550 status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength); 1551 status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength); 1552 1553 int32 op; 1554 if (oldStatus == MATCH_OK && newStatus == MATCH_OK) { 1555 // only send out a notification if the name was changed 1556 if (oldKey == NULL || strcmp(attribute, "name")) 1557 return; 1558 1559 send_notification(fPort, fToken, B_QUERY_UPDATE, B_ENTRY_REMOVED, fVolume->ID(), 0, 1560 fVolume->ToVnode(inode->Parent()), 0, inode->ID(), (const char *)oldKey); 1561 op = B_ENTRY_CREATED; 1562 } else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) { 1563 // nothing has changed 1564 return; 1565 } else if (oldStatus == MATCH_OK && newStatus != MATCH_OK) 1566 op = B_ENTRY_REMOVED; 1567 else 1568 op = B_ENTRY_CREATED; 1569 1570 // if "value" is NULL, send_notification() crashes... 1571 const char *value = (const char *)newKey; 1572 if (type != B_STRING_TYPE || value == NULL) 1573 value = ""; 1574 1575 send_notification(fPort, fToken, B_QUERY_UPDATE, op, fVolume->ID(), 0, 1576 fVolume->ToVnode(inode->Parent()), 0, inode->ID(), value); 1577} 1578 1579