1/* Query - query parsing and evaluation
2 *
3 * The pattern matching is roughly based on code originally written
4 * by J. Kercheval, and on code written by Kenneth Almquist, though
5 * it shares no code.
6 *
7 * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de.
8 * This file may be used under the terms of the MIT License.
9 */
10
11
12#include "fsproto.h"	// include first for hacky reasons
13
14#include "Query.h"
15#include "bfs.h"
16#include "Debug.h"
17#include "Stack.h"
18#include "Volume.h"
19#include "Inode.h"
20#include "BPlusTree.h"
21#include "Index.h"
22
23#include <util/kernel_cpp.h>
24#include <SupportDefs.h>
25#include <NodeMonitor.h>
26#include <TypeConstants.h>
27#include <AppDefs.h>
28#include <fs_query.h>
29
30#include <malloc.h>
31#include <stdio.h>
32#include <string.h>
33
34
35// The parser has a very static design, but it will do what is required.
36//
37// ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
38// precedence, that is =,!=,>,<,>=,<= .. && .. ||.
39// Apparently, the "!" (not) can only be used with brackets.
40//
41// If you think that there are too few NULL pointer checks in some places
42// of the code, just read the beginning of the query constructor.
43// The API is not fully available, just the Query and the Expression class
44// are.
45
46
47enum ops {
48	OP_NONE,
49
50	OP_AND,
51	OP_OR,
52
53	OP_EQUATION,
54
55	OP_EQUAL,
56	OP_UNEQUAL,
57	OP_GREATER_THAN,
58	OP_LESS_THAN,
59	OP_GREATER_THAN_OR_EQUAL,
60	OP_LESS_THAN_OR_EQUAL,
61};
62
63enum match {
64	NO_MATCH = 0,
65	MATCH_OK = 1,
66
67	MATCH_BAD_PATTERN = -2,
68	MATCH_INVALID_CHARACTER
69};
70
71// return values from isValidPattern()
72enum {
73	PATTERN_INVALID_ESCAPE = -3,
74	PATTERN_INVALID_RANGE,
75	PATTERN_INVALID_SET
76};
77
78union value {
79	int64	Int64;
80	uint64	Uint64;
81	int32	Int32;
82	uint32	Uint32;
83	float	Float;
84	double	Double;
85	char	String[INODE_FILE_NAME_LENGTH];
86};
87
88// B_MIME_STRING_TYPE is defined in storage/Mime.h, but we
89// don't need the whole file here; the type can't change anyway
90#ifndef _MIME_H
91#	define B_MIME_STRING_TYPE 'MIMS'
92#endif
93
94class Term {
95	public:
96		Term(int8 op) : fOp(op), fParent(NULL) {}
97		virtual ~Term() {}
98
99		int8		Op() const { return fOp; }
100
101		void		SetParent(Term *parent) { fParent = parent; }
102		Term		*Parent() const { return fParent; }
103
104		virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,
105				const uint8 *key = NULL,size_t size = 0) = 0;
106		virtual void Complement() = 0;
107
108		virtual void CalculateScore(Index &index) = 0;
109		virtual int32 Score() const = 0;
110
111		virtual status_t InitCheck() = 0;
112
113#ifdef DEBUG
114		virtual void	PrintToStream() = 0;
115#endif
116
117	protected:
118		int8	fOp;
119		Term	*fParent;
120};
121
122// Although an Equation object is quite independent from the volume on which
123// the query is run, there are some dependencies that are produced while
124// querying:
125// The type/size of the value, the score, and if it has an index or not.
126// So you could run more than one query on the same volume, but it might return
127// wrong values when it runs concurrently on another volume.
128// That's not an issue right now, because we run single-threaded and don't use
129// queries more than once.
130
131class Equation : public Term {
132	public:
133		Equation(char **expr);
134		virtual ~Equation();
135
136		virtual status_t InitCheck();
137
138		status_t	ParseQuotedString(char **_start, char **_end);
139		char		*CopyString(char *start, char *end);
140
141		virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0,
142						const uint8 *key = NULL, size_t size = 0);
143		virtual void Complement();
144
145		status_t	PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator,
146						bool queryNonIndexed);
147		status_t	GetNextMatching(Volume *volume, TreeIterator *iterator,
148						struct dirent *dirent, size_t bufferSize);
149
150		virtual void CalculateScore(Index &index);
151		virtual int32 Score() const { return fScore; }
152
153#ifdef DEBUG
154		virtual void PrintToStream();
155#endif
156
157	private:
158		Equation(const Equation &);
159		Equation &operator=(const Equation &);
160			// no implementation
161
162		status_t	ConvertValue(type_code type);
163		bool		CompareTo(const uint8 *value, uint16 size);
164		uint8		*Value() const { return (uint8 *)&fValue; }
165		status_t	MatchEmptyString();
166
167		char		*fAttribute;
168		char		*fString;
169		union value fValue;
170		type_code	fType;
171		size_t		fSize;
172		bool		fIsPattern;
173		bool		fIsSpecialTime;
174
175		int32		fScore;
176		bool		fHasIndex;
177};
178
179class Operator : public Term {
180	public:
181		Operator(Term *,int8,Term *);
182		virtual ~Operator();
183
184		Term		*Left() const { return fLeft; }
185		Term		*Right() const { return fRight; }
186
187		virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0,
188			const uint8 *key = NULL, size_t size = 0);
189		virtual void Complement();
190
191		virtual void CalculateScore(Index &index);
192		virtual int32 Score() const;
193
194		virtual status_t InitCheck();
195
196		//Term		*Copy() const;
197#ifdef DEBUG
198		virtual void PrintToStream();
199#endif
200
201	private:
202		Operator(const Operator &);
203		Operator &operator=(const Operator &);
204			// no implementation
205
206		Term		*fLeft,*fRight;
207};
208
209
210//---------------------------------
211
212
213void
214skipWhitespace(char **expr, int32 skip = 0)
215{
216	char *string = (*expr) + skip;
217	while (*string == ' ' || *string == '\t') string++;
218	*expr = string;
219}
220
221
222void
223skipWhitespaceReverse(char **expr,char *stop)
224{
225	char *string = *expr;
226	while (string > stop && (*string == ' ' || *string == '\t')) string--;
227	*expr = string;
228}
229
230
231//	#pragma mark -
232
233
234uint32
235utf8ToUnicode(char **string)
236{
237	uint8 *bytes = (uint8 *)*string;
238	int32 length;
239	uint8 mask = 0x1f;
240
241	switch (bytes[0] & 0xf0) {
242		case 0xc0:
243		case 0xd0:	length = 2; break;
244		case 0xe0:	length = 3; break;
245		case 0xf0:
246			mask = 0x0f;
247			length = 4;
248			break;
249		default:
250			// valid 1-byte character
251			// and invalid characters
252			(*string)++;
253			return bytes[0];
254	}
255	uint32 c = bytes[0] & mask;
256	int32 i = 1;
257	for (;i < length && (bytes[i] & 0x80) > 0;i++)
258		c = (c << 6) | (bytes[i] & 0x3f);
259
260	if (i < length) {
261		// invalid character
262		(*string)++;
263		return (uint32)bytes[0];
264	}
265	*string += length;
266	return c;
267}
268
269
270int32
271getFirstPatternSymbol(char *string)
272{
273	char c;
274
275	for (int32 index = 0;(c = *string++);index++) {
276		if (c == '*' || c == '?' || c == '[')
277			return index;
278	}
279	return -1;
280}
281
282
283bool
284isPattern(char *string)
285{
286	return getFirstPatternSymbol(string) >= 0 ? true : false;
287}
288
289
290status_t
291isValidPattern(char *pattern)
292{
293	while (*pattern) {
294		switch (*pattern++) {
295			case '\\':
296				// the escape character must not be at the end of the pattern
297				if (!*pattern++)
298					return PATTERN_INVALID_ESCAPE;
299				break;
300
301			case '[':
302				if (pattern[0] == ']' || !pattern[0])
303					return PATTERN_INVALID_SET;
304
305				while (*pattern != ']') {
306					if (*pattern == '\\' && !*++pattern)
307						return PATTERN_INVALID_ESCAPE;
308
309					if (!*pattern)
310						return PATTERN_INVALID_SET;
311
312					if (pattern[0] == '-' && pattern[1] == '-')
313						return PATTERN_INVALID_RANGE;
314
315					pattern++;
316				}
317				break;
318		}
319	}
320	return B_OK;
321}
322
323
324/**	Matches the string against the given wildcard pattern.
325 *	Returns either MATCH_OK, or NO_MATCH when everything went fine,
326 *	or values < 0 (see enum at the top of Query.cpp) if an error
327 *	occurs
328 */
329
330status_t
331matchString(char *pattern, char *string)
332{
333	while (*pattern) {
334		// end of string == valid end of pattern?
335		if (!string[0]) {
336			while (pattern[0] == '*')
337				pattern++;
338			return !pattern[0] ? MATCH_OK : NO_MATCH;
339		}
340
341		switch (*pattern++) {
342			case '?':
343			{
344				// match exactly one UTF-8 character; we are
345				// not interested in the result
346				utf8ToUnicode(&string);
347				break;
348			}
349
350			case '*':
351			{
352				// compact pattern
353				while (true) {
354					if (pattern[0] == '?') {
355						if (!*++string)
356							return NO_MATCH;
357					} else if (pattern[0] != '*')
358						break;
359
360					pattern++;
361				}
362
363				// if the pattern is done, we have matched the string
364				if (!pattern[0])
365					return MATCH_OK;
366
367				while(true) {
368					// we have removed all occurences of '*' and '?'
369					if (pattern[0] == string[0]
370						|| pattern[0] == '['
371						|| pattern[0] == '\\') {
372						status_t status = matchString(pattern,string);
373						if (status < B_OK || status == MATCH_OK)
374							return status;
375					}
376
377					// we could be nice here and just jump to the next
378					// UTF-8 character - but we wouldn't gain that much
379					// and it'd be slower (since we're checking for
380					// equality before entering the recursion)
381					if (!*++string)
382						return NO_MATCH;
383				}
384				break;
385			}
386
387			case '[':
388			{
389				bool invert = false;
390				if (pattern[0] == '^' || pattern[0] == '!') {
391					invert = true;
392					pattern++;
393				}
394
395				if (!pattern[0] || pattern[0] == ']')
396					return MATCH_BAD_PATTERN;
397
398				uint32 c = utf8ToUnicode(&string);
399				bool matched = false;
400
401				while (pattern[0] != ']') {
402					if (!pattern[0])
403						return MATCH_BAD_PATTERN;
404
405					if (pattern[0] == '\\')
406						pattern++;
407
408					uint32 first = utf8ToUnicode(&pattern);
409
410					// Does this character match, or is this a range?
411					if (first == c) {
412						matched = true;
413						break;
414					} else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) {
415						pattern++;
416
417						if (pattern[0] == '\\') {
418							pattern++;
419							if (!pattern[0])
420								return MATCH_BAD_PATTERN;
421						}
422						uint32 last = utf8ToUnicode(&pattern);
423
424						if (c >= first && c <= last) {
425							matched = true;
426							break;
427						}
428					}
429				}
430
431				if (invert)
432					matched = !matched;
433
434				if (matched) {
435					while (pattern[0] != ']') {
436						if (!pattern[0])
437							return MATCH_BAD_PATTERN;
438						pattern++;
439					}
440					pattern++;
441					break;
442				}
443				return NO_MATCH;
444			}
445
446            case '\\':
447				if (!pattern[0])
448					return MATCH_BAD_PATTERN;
449				// supposed to fall through
450			default:
451				if (pattern[-1] != string[0])
452					return NO_MATCH;
453				string++;
454				break;
455		}
456	}
457
458	if (string[0])
459		return NO_MATCH;
460
461	return MATCH_OK;
462}
463
464
465//	#pragma mark -
466
467
468Equation::Equation(char **expr)
469	: Term(OP_EQUATION),
470	fAttribute(NULL),
471	fString(NULL),
472	fType(0),
473	fIsPattern(false)
474{
475	char *string = *expr;
476	char *start = string;
477	char *end = NULL;
478
479	// Since the equation is the integral part of any query, we're just parsing
480	// the whole thing here.
481	// The whitespace at the start is already removed in Expression::ParseEquation()
482
483	if (*start == '"' || *start == '\'') {
484		// string is quoted (start has to be on the beginning of a string)
485		if (ParseQuotedString(&start, &end) < B_OK)
486			return;
487
488		// set string to a valid start of the equation symbol
489		string = end + 2;
490		skipWhitespace(&string);
491		if (*string != '=' && *string != '<' && *string != '>' && *string != '!') {
492			*expr = string;
493			return;
494		}
495	} else {
496		// search the (in)equation for the actual equation symbol (and for other operators
497		// in case the equation is malformed)
498		while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!'
499			&& *string != '&' && *string != '|')
500			string++;
501
502		// get the attribute string	(and trim whitespace), in case
503		// the string was not quoted
504		end = string - 1;
505		skipWhitespaceReverse(&end, start);
506	}
507
508	// attribute string is empty (which is not allowed)
509	if (start > end)
510		return;
511
512	// at this point, "start" points to the beginning of the string, "end" points
513	// to the last character of the string, and "string" points to the first
514	// character of the equation symbol
515
516	// test for the right symbol (as this doesn't need any memory)
517	switch (*string) {
518		case '=':
519			fOp = OP_EQUAL;
520			break;
521		case '>':
522			fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
523			break;
524		case '<':
525			fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
526			break;
527		case '!':
528			if (*(string + 1) != '=')
529				return;
530			fOp = OP_UNEQUAL;
531			break;
532
533		// any invalid characters will be rejected
534		default:
535			*expr = string;
536			return;
537	}
538	// lets change "start" to point to the first character after the symbol
539	if (*(string + 1) == '=')
540		string++;
541	string++;
542	skipWhitespace(&string);
543
544	// allocate & copy the attribute string
545
546	fAttribute = CopyString(start, end);
547	if (fAttribute == NULL)
548		return;
549
550	start = string;
551	if (*start == '"' || *start == '\'') {
552		// string is quoted (start has to be on the beginning of a string)
553		if (ParseQuotedString(&start, &end) < B_OK)
554			return;
555
556		string = end + 2;
557		skipWhitespace(&string);
558	} else {
559		while (*string && *string != '&' && *string != '|' && *string != ')')
560			string++;
561
562		end = string - 1;
563		skipWhitespaceReverse(&end, start);
564	}
565
566	// at this point, "start" will point to the first character of the value,
567	// "end" will point to its last character, and "start" to the first non-
568	// whitespace character after the value string
569
570	fString = CopyString(start, end);
571	if (fString == NULL)
572		return;
573
574	// patterns are only allowed for these operations (and strings)
575	if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
576		fIsPattern = isPattern(fString);
577		if (fIsPattern && isValidPattern(fString) < B_OK) {
578			// we only want to have valid patterns; setting fString
579			// to NULL will cause InitCheck() to fail
580			free(fString);
581			fString = NULL;
582		}
583	}
584
585	// The special time flag is set if the time values are shifted
586	// 64-bit values to reduce the number of duplicates.
587	// We have to be able to compare them against unshifted values
588	// later. The only index which needs this is the last_modified
589	// index, but we may want to open that feature for other indices,
590	// too one day.
591	fIsSpecialTime = !strcmp(fAttribute, "last_modified");
592
593	*expr = string;
594}
595
596
597Equation::~Equation()
598{
599	if (fAttribute != NULL)
600		free(fAttribute);
601	if (fString != NULL)
602		free(fString);
603}
604
605
606status_t
607Equation::InitCheck()
608{
609	if (fAttribute == NULL
610		|| fString == NULL
611		|| fOp == OP_NONE)
612		return B_BAD_VALUE;
613
614	return B_OK;
615}
616
617
618status_t
619Equation::ParseQuotedString(char **_start, char **_end)
620{
621	char *start = *_start;
622	char quote = *start++;
623	char *end = start;
624
625	for (;*end && *end != quote;end++) {
626		if (*end == '\\')
627			end++;
628	}
629	if (*end == '\0')
630		return B_BAD_VALUE;
631
632	*_start = start;
633	*_end = end - 1;
634
635	return B_OK;
636}
637
638
639char *
640Equation::CopyString(char *start, char *end)
641{
642	// end points to the last character of the string - and the length
643	// also has to include the null-termination
644	int32 length = end + 2 - start;
645	// just to make sure; since that's the max. attribute name length and
646	// the max. string in an index, it make sense to have it that way
647	if (length > INODE_FILE_NAME_LENGTH || length <= 0)
648		return NULL;
649
650	char *copy = (char *)malloc(length);
651	if (copy == NULL)
652		return NULL;
653
654	memcpy(copy,start,length - 1);
655	copy[length - 1] = '\0';
656
657	return copy;
658}
659
660
661status_t
662Equation::ConvertValue(type_code type)
663{
664	// Has the type already been converted?
665	if (type == fType)
666		return B_OK;
667
668	char *string = fString;
669
670	switch (type) {
671		case B_MIME_STRING_TYPE:
672			type = B_STRING_TYPE;
673			// supposed to fall through
674		case B_STRING_TYPE:
675			strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH);
676			fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0';
677			fSize = strlen(fValue.String);
678			break;
679		case B_INT32_TYPE:
680			fValue.Int32 = strtol(string, &string, 0);
681			fSize = sizeof(int32);
682			break;
683		case B_UINT32_TYPE:
684			fValue.Int32 = strtoul(string, &string, 0);
685			fSize = sizeof(uint32);
686			break;
687		case B_INT64_TYPE:
688			fValue.Int64 = strtoll(string, &string, 0);
689			fSize = sizeof(int64);
690			break;
691		case B_UINT64_TYPE:
692			fValue.Uint64 = strtoull(string, &string, 0);
693			fSize = sizeof(uint64);
694			break;
695		case B_FLOAT_TYPE:
696			fValue.Float = strtod(string, &string);
697			fSize = sizeof(float);
698			break;
699		case B_DOUBLE_TYPE:
700			fValue.Double = strtod(string, &string);
701			fSize = sizeof(double);
702			break;
703		default:
704			FATAL(("query value conversion to 0x%lx requested!\n", type));
705			// should we fail here or just do a safety int32 conversion?
706			return B_ERROR;
707	}
708
709	fType = type;
710
711	// patterns are only allowed for string types
712	if (fType != B_STRING_TYPE && fIsPattern)
713		fIsPattern = false;
714
715	return B_OK;
716}
717
718
719/**	Returns true when the key matches the equation. You have to
720 *	call ConvertValue() before this one.
721 */
722
723bool
724Equation::CompareTo(const uint8 *value, uint16 size)
725{
726	int32 compare;
727
728	// fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL
729	if (fIsPattern) {
730		// we have already validated the pattern, so we don't check for failing
731		// here - if something is broken, and matchString() returns an error,
732		// we just don't match
733		compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1;
734	} else if (fIsSpecialTime) {
735		// the index is a shifted int64 index, but we have to match
736		// against an unshifted value (i.e. the last_modified index)
737		int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT;
738		compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64));
739	} else
740		compare = compareKeys(fType, value, size, Value(), fSize);
741
742	switch (fOp) {
743		case OP_EQUAL:
744			return compare == 0;
745		case OP_UNEQUAL:
746			return compare != 0;
747		case OP_LESS_THAN:
748			return compare < 0;
749		case OP_LESS_THAN_OR_EQUAL:
750			return compare <= 0;
751		case OP_GREATER_THAN:
752			return compare > 0;
753		case OP_GREATER_THAN_OR_EQUAL:
754			return compare >= 0;
755	}
756	FATAL(("Unknown/Unsupported operation: %d\n", fOp));
757	return false;
758}
759
760
761void
762Equation::Complement()
763{
764	D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
765		FATAL(("op out of range!"));
766		return;
767	});
768
769	int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
770			OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
771	fOp = complementOp[fOp - OP_EQUAL];
772}
773
774
775status_t
776Equation::MatchEmptyString()
777{
778	// there is no matching attribute, we will just bail out if we
779	// already know that our value is not of a string type.
780	// If not, it will be converted to a string - and then be compared with "".
781	// That's why we have to call ConvertValue() here - but it will be
782	// a cheap call for the next time
783	// Should we do this only for OP_UNEQUAL?
784	if (fType != 0 && fType != B_STRING_TYPE)
785		return NO_MATCH;
786
787	status_t status = ConvertValue(B_STRING_TYPE);
788	if (status == B_OK)
789		status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH;
790
791	return status;
792}
793
794
795/**	Matches the inode's attribute value with the equation.
796 *	Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong
797 */
798
799status_t
800Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size)
801{
802	// get a pointer to the attribute in question
803	union value value;
804	uint8 *buffer;
805	bool locked = false;
806
807	// first, check if we are matching for a live query and use that value
808	if (attributeName != NULL && !strcmp(fAttribute, attributeName)) {
809		if (key == NULL) {
810			if (type == B_STRING_TYPE)
811				return MatchEmptyString();
812
813			return NO_MATCH;
814		}
815		buffer = const_cast<uint8 *>(key);
816	} else if (!strcmp(fAttribute, "name")) {
817		// we need to lock before accessing Inode::Name()
818		inode->SmallDataLock().Lock();
819		locked = true;
820
821		// if not, check for "fake" attributes, "name", "size", "last_modified",
822		buffer = (uint8 *)inode->Name();
823		if (buffer == NULL) {
824			inode->SmallDataLock().Unlock();
825			return B_ERROR;
826		}
827
828		type = B_STRING_TYPE;
829		size = strlen((const char *)buffer);
830	} else if (!strcmp(fAttribute,"size")) {
831		buffer = (uint8 *)&inode->Node()->data.size;
832		type = B_INT64_TYPE;
833	} else if (!strcmp(fAttribute,"last_modified")) {
834		buffer = (uint8 *)&inode->Node()->last_modified_time;
835		type = B_INT64_TYPE;
836	} else {
837		// then for attributes in the small_data section, and finally for the
838		// real attributes
839		Inode *attribute;
840
841		inode->SmallDataLock().Lock();
842		small_data *smallData = inode->FindSmallData(fAttribute);
843		if (smallData != NULL) {
844			buffer = smallData->Data();
845			type = smallData->type;
846			size = smallData->data_size;
847			locked = true;
848		} else {
849			// needed to unlock the small_data section as fast as possible
850			inode->SmallDataLock().Unlock();
851
852			if (inode->GetAttribute(fAttribute, &attribute) == B_OK) {
853				buffer = (uint8 *)&value;
854				type = attribute->Node()->type;
855				size = attribute->Size();
856
857				if (size > INODE_FILE_NAME_LENGTH)
858					size = INODE_FILE_NAME_LENGTH;
859
860				if (attribute->ReadAt(0, buffer, &size) < B_OK) {
861					inode->ReleaseAttribute(attribute);
862					return B_IO_ERROR;
863				}
864				inode->ReleaseAttribute(attribute);
865			} else
866				return MatchEmptyString();
867		}
868	}
869	// prepare own value for use, if it is possible to convert it
870	status_t status = ConvertValue(type);
871	if (status == B_OK)
872		status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH;
873
874	if (locked)
875		inode->SmallDataLock().Unlock();
876
877	RETURN_ERROR(status);
878}
879
880
881void
882Equation::CalculateScore(Index &index)
883{
884	// As always, these values could be tuned and refined.
885	// And the code could also need some real world testing :-)
886
887	// do we have to operate on a "foreign" index?
888	if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
889		fScore = 0;
890		return;
891	}
892
893	// if we have a pattern, how much does it help our search?
894	if (fIsPattern)
895		fScore = getFirstPatternSymbol(fString) << 3;
896	else {
897		// Score by operator
898		if (fOp == OP_EQUAL)
899			// higher than pattern="255 chars+*"
900			fScore = 2048;
901		else
902			// the pattern search is regarded cheaper when you have at
903			// least one character to set your index to
904			fScore = 5;
905	}
906
907	// take index size into account (1024 is the current node size
908	// in our B+trees)
909	// 2048 * 2048 == 4194304 is the maximum score (for an empty
910	// tree, since the header + 1 node are already 2048 bytes)
911	fScore = fScore * ((2048 * 1024LL) / index.Node()->Size());
912}
913
914
915status_t
916Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator, bool queryNonIndexed)
917{
918	status_t status = index.SetTo(fAttribute);
919
920	// if we should query attributes without an index, we can just proceed here
921	if (status < B_OK && !queryNonIndexed)
922		return B_ENTRY_NOT_FOUND;
923
924	type_code type;
925
926	// special case for OP_UNEQUAL - it will always operate through the whole index
927	// but we need the call to the original index to get the correct type
928	if (status < B_OK || fOp == OP_UNEQUAL) {
929		// Try to get an index that holds all files (name)
930		// Also sets the default type for all attributes without index
931		// to string.
932		type = status < B_OK ? B_STRING_TYPE : index.Type();
933
934		if (index.SetTo("name") < B_OK)
935			return B_ENTRY_NOT_FOUND;
936
937		fHasIndex = false;
938	} else {
939		fHasIndex = true;
940		type = index.Type();
941	}
942
943	if (ConvertValue(type) < B_OK)
944		return B_BAD_VALUE;
945
946	BPlusTree *tree;
947	if (index.Node()->GetTree(&tree) < B_OK)
948		return B_ERROR;
949
950	*iterator = new TreeIterator(tree);
951	if (*iterator == NULL)
952		return B_NO_MEMORY;
953
954	if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL
955		|| fIsPattern)
956		&& fHasIndex) {
957		// set iterator to the exact position
958
959		int32 keySize = index.KeySize();
960
961		// at this point, fIsPattern is only true if it's a string type, and fOp
962		// is either OP_EQUAL or OP_UNEQUAL
963		if (fIsPattern) {
964			// let's see if we can use the beginning of the key for positioning
965			// the iterator and adjust the key size; if not, just leave the
966			// iterator at the start and return success
967			keySize = getFirstPatternSymbol(fString);
968			if (keySize <= 0)
969				return B_OK;
970		}
971
972		if (keySize == 0) {
973			// B_STRING_TYPE doesn't have a fixed length, so it was set
974			// to 0 before - we compute the correct value here
975			if (fType == B_STRING_TYPE) {
976				keySize = strlen(fValue.String);
977
978				// The empty string is a special case - we normally don't check
979				// for the trailing null byte, in the case for the empty string
980				// we do it explicitly, because there can't be keys in the B+tree
981				// with a length of zero
982				if (keySize == 0)
983					keySize = 1;
984			} else
985				RETURN_ERROR(B_ENTRY_NOT_FOUND);
986		}
987
988		if (fIsSpecialTime) {
989			// we have to find the first matching shifted value
990			off_t value = fValue.Int64 << INODE_TIME_SHIFT;
991			status = (*iterator)->Find((uint8 *)&value, keySize);
992			if (status == B_ENTRY_NOT_FOUND)
993				return B_OK;
994		} else {
995			status = (*iterator)->Find(Value(), keySize);
996			if (fOp == OP_EQUAL && !fIsPattern)
997				return status;
998			else if (status == B_ENTRY_NOT_FOUND
999				&& (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL))
1000				return B_OK;
1001		}
1002
1003		RETURN_ERROR(status);
1004	}
1005
1006	return B_OK;
1007}
1008
1009
1010status_t
1011Equation::GetNextMatching(Volume *volume, TreeIterator *iterator,
1012	struct dirent *dirent, size_t bufferSize)
1013{
1014	while (true) {
1015		union value indexValue;
1016		uint16 keyLength;
1017		uint16 duplicate;
1018		off_t offset;
1019
1020		status_t status = iterator->GetNextEntry(&indexValue, &keyLength,
1021			(uint16)sizeof(indexValue), &offset, &duplicate);
1022		if (status < B_OK)
1023			return status;
1024
1025		// only compare against the index entry when this is the correct
1026		// index for the equation
1027		if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) {
1028			// They aren't equal? let the operation decide what to do
1029			// Since we always start at the beginning of the index (or the correct
1030			// position), only some needs to be stopped if the entry doesn't fit.
1031			if (fOp == OP_LESS_THAN
1032				|| fOp == OP_LESS_THAN_OR_EQUAL
1033				|| (fOp == OP_EQUAL && !fIsPattern))
1034				return B_ENTRY_NOT_FOUND;
1035
1036			if (duplicate > 0)
1037				iterator->SkipDuplicates();
1038			continue;
1039		}
1040
1041		Vnode vnode(volume, offset);
1042		Inode *inode;
1043		if ((status = vnode.Get(&inode)) != B_OK) {
1044			REPORT_ERROR(status);
1045			FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute));
1046			// try with next
1047			continue;
1048		}
1049
1050		// ToDo: check user permissions here - but which one?!
1051		// we could filter out all those where we don't have
1052		// read access... (we should check for every parent
1053		// directory if the X_OK is allowed)
1054		// Although it's quite expensive to open all parents,
1055		// it's likely that the application that runs the
1056		// query will do something similar (and we don't have
1057		// to do it for root, either).
1058
1059		// go up in the tree until a &&-operator is found, and check if the
1060		// inode matches with the rest of the expression - we don't have to
1061		// check ||-operators for that
1062		Term *term = this;
1063		status = MATCH_OK;
1064
1065		if (!fHasIndex)
1066			status = Match(inode);
1067
1068		while (term != NULL && status == MATCH_OK) {
1069			Operator *parent = (Operator *)term->Parent();
1070			if (parent == NULL)
1071				break;
1072
1073			if (parent->Op() == OP_AND) {
1074				// choose the other child of the parent
1075				Term *other = parent->Right();
1076				if (other == term)
1077					other = parent->Left();
1078
1079				if (other == NULL) {
1080					FATAL(("&&-operator has only one child... (parent = %p)\n", parent));
1081					break;
1082				}
1083				status = other->Match(inode);
1084				if (status < 0) {
1085					REPORT_ERROR(status);
1086					status = NO_MATCH;
1087				}
1088			}
1089			term = (Term *)parent;
1090		}
1091
1092		if (status == MATCH_OK) {
1093			dirent->d_dev = volume->ID();
1094			dirent->d_ino = offset;
1095			dirent->d_pdev = volume->ID();
1096			dirent->d_pino = volume->ToVnode(inode->Parent());
1097
1098			if (inode->GetName(dirent->d_name) < B_OK)
1099				FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber()));
1100
1101#ifdef KEEP_WRONG_DIRENT_RECLEN
1102			// ToDo: The available file systems in BeOS apparently don't set the
1103			// correct d_reclen - we are copying that behaviour if requested, but
1104			// if it doesn't break compatibility, we will remove it.
1105			dirent->d_reclen = strlen(dirent->d_name);
1106#else
1107			dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name);
1108#endif
1109		}
1110
1111		if (status == MATCH_OK)
1112			return B_OK;
1113	}
1114	RETURN_ERROR(B_ERROR);
1115}
1116
1117
1118//	#pragma mark -
1119
1120
1121Operator::Operator(Term *left, int8 op, Term *right)
1122	: Term(op),
1123	fLeft(left),
1124	fRight(right)
1125{
1126	if (left)
1127		left->SetParent(this);
1128	if (right)
1129		right->SetParent(this);
1130}
1131
1132
1133Operator::~Operator()
1134{
1135	delete fLeft;
1136	delete fRight;
1137}
1138
1139
1140status_t
1141Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size)
1142{
1143	if (fOp == OP_AND) {
1144		status_t status = fLeft->Match(inode, attribute, type, key, size);
1145		if (status != MATCH_OK)
1146			return status;
1147
1148		return fRight->Match(inode, attribute, type, key, size);
1149	} else {
1150		// choose the term with the better score for OP_OR
1151		if (fRight->Score() > fLeft->Score()) {
1152			status_t status = fRight->Match(inode, attribute, type, key, size);
1153			if (status != NO_MATCH)
1154				return status;
1155		}
1156		return fLeft->Match(inode, attribute, type, key, size);
1157	}
1158}
1159
1160
1161void
1162Operator::Complement()
1163{
1164	if (fOp == OP_AND)
1165		fOp = OP_OR;
1166	else
1167		fOp = OP_AND;
1168
1169	fLeft->Complement();
1170	fRight->Complement();
1171}
1172
1173
1174void
1175Operator::CalculateScore(Index &index)
1176{
1177	fLeft->CalculateScore(index);
1178	fRight->CalculateScore(index);
1179}
1180
1181
1182int32
1183Operator::Score() const
1184{
1185	if (fOp == OP_AND) {
1186		// return the one with the better score
1187		if (fRight->Score() > fLeft->Score())
1188			return fRight->Score();
1189
1190		return fLeft->Score();
1191	}
1192
1193	// for OP_OR, be honest, and return the one with the worse score
1194	if (fRight->Score() < fLeft->Score())
1195		return fRight->Score();
1196
1197	return fLeft->Score();
1198}
1199
1200
1201status_t
1202Operator::InitCheck()
1203{
1204	if (fOp != OP_AND && fOp != OP_OR
1205		|| fLeft == NULL || fLeft->InitCheck() < B_OK
1206		|| fRight == NULL || fRight->InitCheck() < B_OK)
1207		return B_ERROR;
1208
1209	return B_OK;
1210}
1211
1212
1213#if 0
1214Term *
1215Operator::Copy() const
1216{
1217	if (fEquation != NULL) {
1218		Equation *equation = new Equation(*fEquation);
1219		if (equation == NULL)
1220			return NULL;
1221
1222		Term *term = new Term(equation);
1223		if (term == NULL)
1224			delete equation;
1225
1226		return term;
1227	}
1228
1229	Term *left = NULL, *right = NULL;
1230
1231	if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
1232		return NULL;
1233	if (fRight != NULL && (right = fRight->Copy()) == NULL) {
1234		delete left;
1235		return NULL;
1236	}
1237
1238	Term *term = new Term(left,fOp,right);
1239	if (term == NULL) {
1240		delete left;
1241		delete right;
1242		return NULL;
1243	}
1244	return term;
1245}
1246#endif
1247
1248
1249//	#pragma mark -
1250
1251#ifdef DEBUG
1252void
1253Operator::PrintToStream()
1254{
1255	D(__out("( "));
1256	if (fLeft != NULL)
1257		fLeft->PrintToStream();
1258
1259	char *op;
1260	switch (fOp) {
1261		case OP_OR: op = "OR"; break;
1262		case OP_AND: op = "AND"; break;
1263		default: op = "?"; break;
1264	}
1265	D(__out(" %s ",op));
1266
1267	if (fRight != NULL)
1268		fRight->PrintToStream();
1269
1270	D(__out(" )"));
1271}
1272
1273
1274void
1275Equation::PrintToStream()
1276{
1277	char *symbol = "???";
1278	switch (fOp) {
1279		case OP_EQUAL: symbol = "=="; break;
1280		case OP_UNEQUAL: symbol = "!="; break;
1281		case OP_GREATER_THAN: symbol = ">"; break;
1282		case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break;
1283		case OP_LESS_THAN: symbol = "<"; break;
1284		case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break;
1285	}
1286	D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString));
1287}
1288
1289#endif	/* DEBUG */
1290
1291//	#pragma mark -
1292
1293
1294Expression::Expression(char *expr)
1295{
1296	if (expr == NULL)
1297		return;
1298
1299	fTerm = ParseOr(&expr);
1300	if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
1301		FATAL(("Corrupt tree in expression!\n"));
1302		delete fTerm;
1303		fTerm = NULL;
1304	}
1305	D(if (fTerm != NULL) {
1306		fTerm->PrintToStream();
1307		D(__out("\n"));
1308		if (*expr != '\0')
1309			PRINT(("Unexpected end of string: \"%s\"!\n", expr));
1310	});
1311	fPosition = expr;
1312}
1313
1314
1315Expression::~Expression()
1316{
1317	delete fTerm;
1318}
1319
1320
1321Term *
1322Expression::ParseEquation(char **expr)
1323{
1324	skipWhitespace(expr);
1325
1326	bool _not = false;
1327	if (**expr == '!') {
1328		skipWhitespace(expr, 1);
1329		if (**expr != '(')
1330			return NULL;
1331
1332		_not = true;
1333	}
1334
1335	if (**expr == ')') {
1336		// shouldn't be handled here
1337		return NULL;
1338	} else if (**expr == '(') {
1339		skipWhitespace(expr, 1);
1340
1341		Term *term = ParseOr(expr);
1342
1343		skipWhitespace(expr);
1344
1345		if (**expr != ')') {
1346			delete term;
1347			return NULL;
1348		}
1349
1350		// If the term is negated, we just complement the tree, to get
1351		// rid of the not, a.k.a. DeMorgan's Law.
1352		if (_not)
1353			term->Complement();
1354
1355		skipWhitespace(expr, 1);
1356
1357		return term;
1358	}
1359
1360	Equation *equation = new Equation(expr);
1361	if (equation == NULL || equation->InitCheck() < B_OK) {
1362		delete equation;
1363		return NULL;
1364	}
1365	return equation;
1366}
1367
1368
1369Term *
1370Expression::ParseAnd(char **expr)
1371{
1372	Term *left = ParseEquation(expr);
1373	if (left == NULL)
1374		return NULL;
1375
1376	while (IsOperator(expr,'&')) {
1377		Term *right = ParseAnd(expr);
1378		Term *newParent = NULL;
1379
1380		if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) {
1381			delete left;
1382			delete right;
1383
1384			return NULL;
1385		}
1386		left = newParent;
1387	}
1388
1389	return left;
1390}
1391
1392
1393Term *
1394Expression::ParseOr(char **expr)
1395{
1396	Term *left = ParseAnd(expr);
1397	if (left == NULL)
1398		return NULL;
1399
1400	while (IsOperator(expr,'|')) {
1401		Term *right = ParseAnd(expr);
1402		Term *newParent = NULL;
1403
1404		if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) {
1405			delete left;
1406			delete right;
1407
1408			return NULL;
1409		}
1410		left = newParent;
1411	}
1412
1413	return left;
1414}
1415
1416
1417bool
1418Expression::IsOperator(char **expr, char op)
1419{
1420	char *string = *expr;
1421
1422	if (*string == op && *(string + 1) == op) {
1423		*expr += 2;
1424		return true;
1425	}
1426	return false;
1427}
1428
1429
1430status_t
1431Expression::InitCheck()
1432{
1433	if (fTerm == NULL)
1434		return B_BAD_VALUE;
1435
1436	return B_OK;
1437}
1438
1439
1440//	#pragma mark -
1441
1442
1443Query::Query(Volume *volume, Expression *expression, uint32 flags)
1444	:
1445	fVolume(volume),
1446	fExpression(expression),
1447	fCurrent(NULL),
1448	fIterator(NULL),
1449	fIndex(volume),
1450	fFlags(flags),
1451	fPort(-1)
1452{
1453	// if the expression has a valid root pointer, the whole tree has
1454	// already passed the sanity check, so that we don't have to check
1455	// every pointer
1456	if (volume == NULL || expression == NULL || expression->Root() == NULL)
1457		return;
1458
1459	// create index on the stack and delete it afterwards
1460	fExpression->Root()->CalculateScore(fIndex);
1461	fIndex.Unset();
1462
1463	Stack<Term *> stack;
1464	stack.Push(fExpression->Root());
1465
1466	Term *term;
1467	while (stack.Pop(&term)) {
1468		if (term->Op() < OP_EQUATION) {
1469			Operator *op = (Operator *)term;
1470
1471			if (op->Op() == OP_OR) {
1472				stack.Push(op->Left());
1473				stack.Push(op->Right());
1474			} else {
1475				// For OP_AND, we can use the scoring system to decide which path to add
1476				if (op->Right()->Score() > op->Left()->Score())
1477					stack.Push(op->Right());
1478				else
1479					stack.Push(op->Left());
1480			}
1481		} else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK)
1482			FATAL(("Unknown term on stack or stack error"));
1483	}
1484
1485	if (fFlags & B_LIVE_QUERY)
1486		volume->AddQuery(this);
1487}
1488
1489
1490Query::~Query()
1491{
1492	if (fFlags & B_LIVE_QUERY)
1493		fVolume->RemoveQuery(this);
1494}
1495
1496
1497status_t
1498Query::GetNextEntry(struct dirent *dirent, size_t size)
1499{
1500	// If we don't have an equation to use yet/anymore, get a new one
1501	// from the stack
1502	while (true) {
1503		if (fIterator == NULL) {
1504			if (!fStack.Pop(&fCurrent)
1505				|| fCurrent == NULL
1506				|| fCurrent->PrepareQuery(fVolume, fIndex, &fIterator,
1507						false/*fFlags & B_QUERY_NON_INDEXED*/) < B_OK)
1508				return B_ENTRY_NOT_FOUND;
1509		}
1510		if (fCurrent == NULL)
1511			RETURN_ERROR(B_ERROR);
1512
1513		status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size);
1514		if (status < B_OK) {
1515			delete fIterator;
1516			fIterator = NULL;
1517			fCurrent = NULL;
1518		} else {
1519			// only return if we have another entry
1520			return B_OK;
1521		}
1522	}
1523}
1524
1525
1526void
1527Query::SetLiveMode(port_id port, int32 token)
1528{
1529	fPort = port;
1530	fToken = token;
1531
1532	if ((fFlags & B_LIVE_QUERY) == 0) {
1533		// you can decide at any point to set the live query mode,
1534		// only live queries have to be updated by attribute changes
1535		fFlags |= B_LIVE_QUERY;
1536		fVolume->AddQuery(this);
1537	}
1538}
1539
1540
1541void
1542Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
1543	size_t oldLength, const uint8 *newKey, size_t newLength)
1544{
1545	if (fPort < 0 || fExpression == NULL || attribute == NULL)
1546		return;
1547
1548	// ToDo: check if the attribute is part of the query at all...
1549
1550	status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength);
1551	status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength);
1552
1553	int32 op;
1554	if (oldStatus == MATCH_OK && newStatus == MATCH_OK) {
1555		// only send out a notification if the name was changed
1556		if (oldKey == NULL || strcmp(attribute, "name"))
1557			return;
1558
1559		send_notification(fPort, fToken, B_QUERY_UPDATE, B_ENTRY_REMOVED, fVolume->ID(), 0,
1560			fVolume->ToVnode(inode->Parent()), 0, inode->ID(), (const char *)oldKey);
1561		op = B_ENTRY_CREATED;
1562	} else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) {
1563		// nothing has changed
1564		return;
1565	} else if (oldStatus == MATCH_OK && newStatus != MATCH_OK)
1566		op = B_ENTRY_REMOVED;
1567	else
1568		op = B_ENTRY_CREATED;
1569
1570	// if "value" is NULL, send_notification() crashes...
1571	const char *value = (const char *)newKey;
1572	if (type != B_STRING_TYPE || value == NULL)
1573		value = "";
1574
1575	send_notification(fPort, fToken, B_QUERY_UPDATE, op, fVolume->ID(), 0,
1576		fVolume->ToVnode(inode->Parent()), 0, inode->ID(), value);
1577}
1578
1579