1/*
2 * Copyright 2001-2014, Axel Dörfler, axeld@pinc-software.de.
3 * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
4 * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de.
5 * This file may be used under the terms of the MIT License.
6 */
7
8
9// This needs to be the first include because of the fs shell API wrapper
10#include <algorithm>
11
12#include <file_systems/QueryParserUtils.h>
13
14#ifndef FS_SHELL
15#	include <string.h>
16
17#	include <TypeConstants.h>
18#endif
19
20
21namespace QueryParser {
22
23
24template<typename Key>
25static inline int
26compare_integral(const Key& a, const Key& b)
27{
28	if (a < b)
29		return -1;
30	if (a > b)
31		return 1;
32	return 0;
33}
34
35
36// #pragma mark -
37
38
39void
40skipWhitespace(char** expr, int32 skip)
41{
42	char* string = (*expr) + skip;
43	while (*string == ' ' || *string == '\t') string++;
44	*expr = string;
45}
46
47
48void
49skipWhitespaceReverse(char** expr, char* stop)
50{
51	char* string = *expr;
52	while (string > stop && (*string == ' ' || *string == '\t'))
53		string--;
54	*expr = string;
55}
56
57
58int
59compareKeys(uint32 type, const void* key1, size_t length1, const void* key2,
60	size_t length2)
61{
62	switch (type) {
63		case B_INT32_TYPE:
64			return compare_integral(*(int32*)key1, *(int32*)key2);
65		case B_UINT32_TYPE:
66			return compare_integral(*(uint32*)key1, *(uint32*)key2);
67		case B_INT64_TYPE:
68			return compare_integral(*(int64*)key1, *(int64*)key2);
69		case B_UINT64_TYPE:
70			return compare_integral(*(uint64*)key1, *(uint64*)key2);
71		case B_FLOAT_TYPE:
72			return compare_integral(*(float*)key1, *(float*)key2);
73		case B_DOUBLE_TYPE:
74			return compare_integral(*(double*)key1, *(double*)key2);
75		case B_STRING_TYPE:
76		case B_MIME_STRING_TYPE:
77		{
78			int result = strncmp((const char*)key1, (const char*)key2,
79				std::min(length1, length2));
80			if (result == 0) {
81				result = compare_integral(strnlen((const char*)key1, length1),
82					strnlen((const char*)key2, length2));
83			}
84			return result;
85		}
86	}
87	return -1;
88}
89
90
91//	#pragma mark -
92
93
94uint32
95utf8ToUnicode(char** string)
96{
97	uint8* bytes = (uint8*)*string;
98	int32 length;
99	uint8 mask = 0x1f;
100
101	switch (bytes[0] & 0xf0) {
102		case 0xc0:
103		case 0xd0:
104			length = 2;
105			break;
106		case 0xe0:
107			length = 3;
108			break;
109		case 0xf0:
110			mask = 0x0f;
111			length = 4;
112			break;
113		default:
114			// valid 1-byte character
115			// and invalid characters
116			(*string)++;
117			return bytes[0];
118	}
119	uint32 c = bytes[0] & mask;
120	int32 i = 1;
121	for (; i < length && (bytes[i] & 0x80) > 0; i++)
122		c = (c << 6) | (bytes[i] & 0x3f);
123
124	if (i < length) {
125		// invalid character
126		(*string)++;
127		return (uint32)bytes[0];
128	}
129	*string += length;
130	return c;
131}
132
133
134int32
135getFirstPatternSymbol(char* string)
136{
137	char c;
138
139	for (int32 index = 0; (c = *string++); index++) {
140		if (c == '*' || c == '?' || c == '[')
141			return index;
142	}
143	return -1;
144}
145
146
147status_t
148isValidPattern(char* pattern)
149{
150	while (*pattern) {
151		switch (*pattern++) {
152			case '\\':
153				// the escape character must not be at the end of the pattern
154				if (!*pattern++)
155					return PATTERN_INVALID_ESCAPE;
156				break;
157
158			case '[':
159				if (pattern[0] == ']' || !pattern[0])
160					return PATTERN_INVALID_SET;
161
162				while (*pattern != ']') {
163					if (*pattern == '\\' && !*++pattern)
164						return PATTERN_INVALID_ESCAPE;
165
166					if (!*pattern)
167						return PATTERN_INVALID_SET;
168
169					if (pattern[0] == '-' && pattern[1] == '-')
170						return PATTERN_INVALID_RANGE;
171
172					pattern++;
173				}
174				break;
175		}
176	}
177	return B_OK;
178}
179
180
181/*!	Matches the string against the given wildcard pattern.
182	Returns either MATCH_OK, or NO_MATCH when everything went fine, or
183	values < 0 (see enum at the top of Query.cpp) if an error occurs.
184*/
185status_t
186matchString(char* pattern, char* string)
187{
188	while (*pattern) {
189		// end of string == valid end of pattern?
190		if (!string[0]) {
191			while (pattern[0] == '*')
192				pattern++;
193			return !pattern[0] ? MATCH_OK : NO_MATCH;
194		}
195
196		switch (*pattern++) {
197			case '?':
198			{
199				// match exactly one UTF-8 character; we are
200				// not interested in the result
201				utf8ToUnicode(&string);
202				break;
203			}
204
205			case '*':
206			{
207				// compact pattern
208				while (true) {
209					if (pattern[0] == '?') {
210						if (!*++string)
211							return NO_MATCH;
212					} else if (pattern[0] != '*')
213						break;
214
215					pattern++;
216				}
217
218				// if the pattern is done, we have matched the string
219				if (!pattern[0])
220					return MATCH_OK;
221
222				while(true) {
223					// we have removed all occurences of '*' and '?'
224					if (pattern[0] == string[0]
225						|| pattern[0] == '['
226						|| pattern[0] == '\\') {
227						status_t status = matchString(pattern, string);
228						if (status < B_OK || status == MATCH_OK)
229							return status;
230					}
231
232					// we could be nice here and just jump to the next
233					// UTF-8 character - but we wouldn't gain that much
234					// and it'd be slower (since we're checking for
235					// equality before entering the recursion)
236					if (!*++string)
237						return NO_MATCH;
238				}
239				break;
240			}
241
242			case '[':
243			{
244				bool invert = false;
245				if (pattern[0] == '^' || pattern[0] == '!') {
246					invert = true;
247					pattern++;
248				}
249
250				if (!pattern[0] || pattern[0] == ']')
251					return MATCH_BAD_PATTERN;
252
253				uint32 c = utf8ToUnicode(&string);
254				bool matched = false;
255
256				while (pattern[0] != ']') {
257					if (!pattern[0])
258						return MATCH_BAD_PATTERN;
259
260					if (pattern[0] == '\\')
261						pattern++;
262
263					uint32 first = utf8ToUnicode(&pattern);
264
265					// Does this character match, or is this a range?
266					if (first == c) {
267						matched = true;
268						break;
269					} else if (pattern[0] == '-' && pattern[1] != ']'
270							&& pattern[1]) {
271						pattern++;
272
273						if (pattern[0] == '\\') {
274							pattern++;
275							if (!pattern[0])
276								return MATCH_BAD_PATTERN;
277						}
278						uint32 last = utf8ToUnicode(&pattern);
279
280						if (c >= first && c <= last) {
281							matched = true;
282							break;
283						}
284					}
285				}
286
287				if (invert)
288					matched = !matched;
289
290				if (matched) {
291					while (pattern[0] != ']') {
292						if (!pattern[0])
293							return MATCH_BAD_PATTERN;
294						pattern++;
295					}
296					pattern++;
297					break;
298				}
299				return NO_MATCH;
300			}
301
302            case '\\':
303				if (!pattern[0])
304					return MATCH_BAD_PATTERN;
305				// supposed to fall through
306			default:
307				if (pattern[-1] != string[0])
308					return NO_MATCH;
309				string++;
310				break;
311		}
312	}
313
314	if (string[0])
315		return NO_MATCH;
316
317	return MATCH_OK;
318}
319
320
321}	// namespace QueryParser
322