1// MimeSnifferTest.cpp
2
3#include "MimeSnifferTest.h"
4
5#include <cppunit/Test.h>
6#include <cppunit/TestSuite.h>
7#include <cppunit/TestCaller.h>
8#include <sniffer/Rule.h>
9#include <sniffer/Parser.h>
10#include <DataIO.h>
11#include <Mime.h>
12#include <String.h>		// BString
13#include <TestUtils.h>
14
15#include <stdio.h>
16#include <string>
17
18using namespace BPrivate::Storage::Sniffer;
19
20// Suite
21CppUnit::Test*
22MimeSnifferTest::Suite() {
23	CppUnit::TestSuite *suite = new CppUnit::TestSuite();
24	typedef CppUnit::TestCaller<MimeSnifferTest> TC;
25
26	suite->addTest( new TC("Mime Sniffer::Scanner Test",
27						   &MimeSnifferTest::ScannerTest) );
28	suite->addTest( new TC("Mime Sniffer::Parser Test",
29						   &MimeSnifferTest::ParserTest) );
30	suite->addTest( new TC("Mime Sniffer::Sniffer Test",
31						   &MimeSnifferTest::SnifferTest) );
32
33	return suite;
34}
35
36// Scanner Test
37void
38MimeSnifferTest::ScannerTest() {
39#if TEST_R5
40	Outputf("(no tests actually performed for R5 version)\n");
41#else	// TEST_R5
42
43
44	// tests:
45	// Internal TokenStream and CharStream classes
46
47// Define some useful macros for dynamically allocating
48// various Token classes
49#define T(type) (new Token(type, -1))
50#define S(str) (new StringToken(str, -1))
51#define I(val) (new IntToken(val, -1))
52#define F(val) (new FloatToken(val, -1))
53
54	struct test_case {
55		const char *rule;
56		int tokenCount;
57		Token *tokens[256];
58	} testCases[] = {
59		{ "'Hey'[]:", 4,
60			{	S("Hey"),
61				T(LeftBracket),
62				T(RightBracket),
63				T(Colon)
64			}
65		},
66		{ "1", 1, { I(1) } },
67		{ "1.0", 1, { F(1.0) } },
68
69		{ "1.0 (\"ABCD\")", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
70		{ "1.0 ('ABCD')", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
71		{ "  1.0 ('ABCD')  ", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
72		{ "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", 11,
73			{	F(0.8),
74				T(LeftBracket),
75				I(0),
76				T(Colon),
77				I(3),
78				T(RightBracket),
79				T(LeftParen),
80				S("ABCDEFG"),
81				T(Divider),
82				S("abcdefghij"),
83				T(RightParen)
84			}
85		},
86		{ "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 17,
87			{	F(0.5),
88				T(LeftParen),
89				T(LeftBracket),
90				I(10),
91				T(RightBracket),
92				S("ABCD"),
93				T(Divider),
94				T(LeftBracket),
95				I(17),
96				T(RightBracket),
97				S("abcd"),
98				T(Divider),
99				T(LeftBracket),
100				I(13),
101				T(RightBracket),
102				S("EFGH"),
103				T(RightParen)
104			}
105		},
106		{ "0.5  \n   [0:3]  \t ('ABCD' \n | 'abcd' | 'EFGH')", 13,
107			{	F(0.5),
108				T(LeftBracket),
109				I(0),
110				T(Colon),
111				I(3),
112				T(RightBracket),
113				T(LeftParen),
114				S("ABCD"),
115				T(Divider),
116				S("abcd"),
117				T(Divider),
118				S("EFGH"),
119				T(RightParen)
120			}
121		},
122		{ "0.8 [  0  :  3  ] ('ABCDEFG' | 'abcdefghij')", 11,
123			{	F(0.8),
124				T(LeftBracket),
125				I(0),
126				T(Colon),
127				I(3),
128				T(RightBracket),
129				T(LeftParen),
130				S("ABCDEFG"),
131				T(Divider),
132				S("abcdefghij"),
133				T(RightParen)
134			}
135		},
136		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11,
137			{	F(0.8),
138				T(LeftBracket),
139				I(0),
140				T(Colon),
141				I(3),
142				T(RightBracket),
143				T(LeftParen),
144				S("ABCDEFG"),
145				T(Ampersand),
146				S("abcdefg"),
147				T(RightParen)
148			}
149		},
150		{ "1.0 ('ABCD') | ('EFGH')", 8,
151			{	F(1.0),
152				T(LeftParen),
153				S("ABCD"),
154				T(RightParen),
155				T(Divider),
156				T(LeftParen),
157				S("EFGH"),
158				T(RightParen)
159			}
160		},
161		{ "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18,
162			{	F(1.0),
163				T(LeftBracket),
164				I(0),
165				T(Colon),
166				I(3),
167				T(RightBracket),
168				T(LeftParen),
169				S("ABCD"),
170				T(RightParen),
171				T(Divider),
172				T(LeftBracket),
173				I(2),
174				T(Colon),
175				I(4),
176				T(RightBracket),
177				T(LeftParen),
178				S("EFGH"),
179				T(RightParen)
180			}
181		},
182		{ "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11,
183			{	F(0.8),
184				T(LeftBracket),
185				I(0),
186				T(Colon),
187				I(4),
188				T(RightBracket),
189				T(LeftParen),
190				S("\077Mkj0x34"),
191				T(Ampersand),
192				S("abcdefgh"),
193				T(RightParen)
194			}
195		},
196		{ "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11,
197			{	F(0.8),
198				T(LeftBracket),
199				I(0),
200				T(Colon),
201				I(4),
202				T(RightBracket),
203				T(LeftParen),
204				S("\077Mkj\x34"),
205				T(Ampersand),
206				S("abcdefgh"),
207				T(RightParen)
208			}
209		},
210		{ "0.8 [0:3] (\\077034 & 'abcd')", 11,
211			{	F(0.8),
212				T(LeftBracket),
213				I(0),
214				T(Colon),
215				I(3),
216				T(RightBracket),
217				T(LeftParen),
218				S("\077034"),
219				T(Ampersand),
220				S("abcd"),
221				T(RightParen)
222			}
223		},
224		{ "0.8 [0:3] (\\077\\034 & 'ab')", 11,
225			{	F(0.8),
226				T(LeftBracket),
227				I(0),
228				T(Colon),
229				I(3),
230				T(RightBracket),
231				T(LeftParen),
232				S("\077\034"),
233				T(Ampersand),
234				S("ab"),
235				T(RightParen)
236			}
237		},
238		{ "0.8 [0:3] (\\77\\034 & 'ab')", 11,
239			{	F(0.8),
240				T(LeftBracket),
241				I(0),
242				T(Colon),
243				I(3),
244				T(RightBracket),
245				T(LeftParen),
246				S("\077\034"),
247				T(Ampersand),
248				S("ab"),
249				T(RightParen)
250			}
251		},
252		{ "0.8 [0:3] (\\7 & 'a')", 11,
253			{	F(0.8),
254				T(LeftBracket),
255				I(0),
256				T(Colon),
257				I(3),
258				T(RightBracket),
259				T(LeftParen),
260				S("\007"),
261				T(Ampersand),
262				S("a"),
263				T(RightParen)
264			}
265		},
266		{ "0.8 [0:3] (\"\\17\" & 'a')", 11,
267			{	F(0.8),
268				T(LeftBracket),
269				I(0),
270				T(Colon),
271				I(3),
272				T(RightBracket),
273				T(LeftParen),
274				S("\017"),
275				T(Ampersand),
276				S("a"),
277				T(RightParen)
278			}
279		},
280		{ "0.8 [0:3] ('\\17' & 'a')", 11,
281			{	F(0.8),
282				T(LeftBracket),
283				I(0),
284				T(Colon),
285				I(3),
286				T(RightBracket),
287				T(LeftParen),
288				S("\017"),
289				T(Ampersand),
290				S("a"),
291				T(RightParen)
292			}
293		},
294		{ "0.8 [0:3] (\\g & 'a')", 11,
295			{	F(0.8),
296				T(LeftBracket),
297				I(0),
298				T(Colon),
299				I(3),
300				T(RightBracket),
301				T(LeftParen),
302				S("g"),
303				T(Ampersand),
304				S("a"),
305				T(RightParen)
306			}
307		},
308		{ "0.8 [0:3] (\\g&\\b)", 11,
309			{	F(0.8),
310				T(LeftBracket),
311				I(0),
312				T(Colon),
313				I(3),
314				T(RightBracket),
315				T(LeftParen),
316				S("g"),
317				T(Ampersand),
318				S("\b"),
319				T(RightParen)
320			}
321		},
322		{ "0.8 [0:3] (\\g\\&b & 'abc')", 11,
323			{	F(0.8),
324				T(LeftBracket),
325				I(0),
326				T(Colon),
327				I(3),
328				T(RightBracket),
329				T(LeftParen),
330				S("g&b"),
331				T(Ampersand),
332				S("abc"),
333				T(RightParen)
334			}
335		},
336		{ "0.8 [0:3] (0x3457 & 'ab')", 11,
337			{	F(0.8),
338				T(LeftBracket),
339				I(0),
340				T(Colon),
341				I(3),
342				T(RightBracket),
343				T(LeftParen),
344				S("\x34\x57"),
345				T(Ampersand),
346				S("ab"),
347				T(RightParen)
348			}
349		},
350		{ "0.8 [0:3] (\\x34\\x57 & 'ab')", 11,
351			{	F(0.8),
352				T(LeftBracket),
353				I(0),
354				T(Colon),
355				I(3),
356				T(RightBracket),
357				T(LeftParen),
358				S("\x34\x57"),
359				T(Ampersand),
360				S("ab"),
361				T(RightParen)
362			}
363		},
364		{ "0.8 [0:3] (0xA4b7 & 'ab')", 11,
365			{	F(0.8),
366				T(LeftBracket),
367				I(0),
368				T(Colon),
369				I(3),
370				T(RightBracket),
371				T(LeftParen),
372				S("\xA4\xb7"),
373				T(Ampersand),
374				S("ab"),
375				T(RightParen)
376			}
377		},
378		{ "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11,
379			{	F(0.8),
380				T(LeftBracket),
381				I(0),
382				T(Colon),
383				I(3),
384				T(RightBracket),
385				T(LeftParen),
386				S("\xA4\xb7"),
387				T(Ampersand),
388				S("ab"),
389				T(RightParen)
390			}
391		},
392		{ "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11,
393			{	F(0.8),
394				T(LeftBracket),
395				I(0),
396				T(Colon),
397				I(3),
398				T(RightBracket),
399				T(LeftParen),
400				S("\xA4\xb7"),
401				T(Ampersand),
402				S("ab"),
403				T(RightParen)
404			}
405		},
406		{ "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11,
407			{	F(0.8),
408				T(LeftBracket),
409				I(0),
410				T(Colon),
411				I(3),
412				T(RightBracket),
413				T(LeftParen),
414				S("\xA4\xb7"),
415				T(Ampersand),
416				S("ab"),
417				T(RightParen)
418			}
419		},
420		{ "0.8 [0:3] ('ab\"' & 'abc')", 11,
421			{	F(0.8),
422				T(LeftBracket),
423				I(0),
424				T(Colon),
425				I(3),
426				T(RightBracket),
427				T(LeftParen),
428				S("ab\""),
429				T(Ampersand),
430				S("abc"),
431				T(RightParen)
432			}
433		},
434		{ "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11,
435			{	F(0.8),
436				T(LeftBracket),
437				I(0),
438				T(Colon),
439				I(3),
440				T(RightBracket),
441				T(LeftParen),
442				S("ab\""),
443				T(Ampersand),
444				S("abc"),
445				T(RightParen)
446			}
447		},
448		{ "0.8 [0:3] (\"ab\\A\" & 'abc')", 11,
449			{	F(0.8),
450				T(LeftBracket),
451				I(0),
452				T(Colon),
453				I(3),
454				T(RightBracket),
455				T(LeftParen),
456				S("abA"),
457				T(Ampersand),
458				S("abc"),
459				T(RightParen)
460			}
461		},
462		{ "0.8 [0:3] (\"ab'\" & 'abc')", 11,
463			{	F(0.8),
464				T(LeftBracket),
465				I(0),
466				T(Colon),
467				I(3),
468				T(RightBracket),
469				T(LeftParen),
470				S("ab'"),
471				T(Ampersand),
472				S("abc"),
473				T(RightParen)
474			}
475		},
476		{ "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11,
477			{	F(0.8),
478				T(LeftBracket),
479				I(0),
480				T(Colon),
481				I(3),
482				T(RightBracket),
483				T(LeftParen),
484				S("ab\\"),
485				T(Ampersand),
486				S("abc"),
487				T(RightParen)
488			}
489		},
490		{ "0.8 [-5:-3] (\"abc\" & 'abc')", 11,
491			{	F(0.8),
492				T(LeftBracket),
493				I(-5),
494				T(Colon),
495				I(-3),
496				T(RightBracket),
497				T(LeftParen),
498				S("abc"),
499				T(Ampersand),
500				S("abc"),
501				T(RightParen)
502			}
503		},
504		{ "0.8 [5:3] (\"abc\" & 'abc')", 11,
505			{	F(0.8),
506				T(LeftBracket),
507				I(5),
508				T(Colon),
509				I(3),
510				T(RightBracket),
511				T(LeftParen),
512				S("abc"),
513				T(Ampersand),
514				S("abc"),
515				T(RightParen)
516			}
517		},
518		{ "1.2 ('ABCD')", 4,
519			{	F(1.2),
520				T(LeftParen),
521				S("ABCD"),
522				T(RightParen)
523			}
524		},
525		{ ".2 ('ABCD')", 4,
526			{	F(0.2),
527				T(LeftParen),
528				S("ABCD"),
529				T(RightParen)
530			}
531		},
532		{ "0. ('ABCD')", 4,
533			{	F(0.0),
534				T(LeftParen),
535				S("ABCD"),
536				T(RightParen)
537			}
538		},
539		// Signed integers
540		{ "-1 ('ABCD')", 4,
541			{	I(-1),
542				T(LeftParen),
543				S("ABCD"),
544				T(RightParen)
545			}
546		},
547		{ "+1 ('ABCD')", 4,
548			{	I(1),
549				T(LeftParen),
550				S("ABCD"),
551				T(RightParen)
552			}
553		},
554		// Unsigned extended floats
555		{ "1E25 ('ABCD')", 4,
556			{	F(1e25),
557				T(LeftParen),
558				S("ABCD"),
559				T(RightParen)
560			}
561		},
562		{ "1e25 ('ABCD')", 4,
563			{	F(1e25),
564				T(LeftParen),
565				S("ABCD"),
566				T(RightParen)
567			}
568		},
569		{ "1E+25 ('ABCD')", 4,
570			{	F(1e25),
571				T(LeftParen),
572				S("ABCD"),
573				T(RightParen)
574			}
575		},
576		{ "1e+25 ('ABCD')", 4,
577			{	F(1e25),
578				T(LeftParen),
579				S("ABCD"),
580				T(RightParen)
581			}
582		},
583		{ "1E-25 ('ABCD')", 4,
584			{	F(1e-25),
585				T(LeftParen),
586				S("ABCD"),
587				T(RightParen)
588			}
589		},
590		{ "1e-25 ('ABCD')", 4,
591			{	F(1e-25),
592				T(LeftParen),
593				S("ABCD"),
594				T(RightParen)
595			}
596		},
597		// Positive signed extended floats
598		{ "+1E25 ('ABCD')", 4,
599			{	F(1e25),
600				T(LeftParen),
601				S("ABCD"),
602				T(RightParen)
603			}
604		},
605		{ "+1e25 ('ABCD')", 4,
606			{	F(1e25),
607				T(LeftParen),
608				S("ABCD"),
609				T(RightParen)
610			}
611		},
612		{ "+1E+25 ('ABCD')", 4,
613			{	F(1e25),
614				T(LeftParen),
615				S("ABCD"),
616				T(RightParen)
617			}
618		},
619		{ "+1e+25 ('ABCD')", 4,
620			{	F(1e25),
621				T(LeftParen),
622				S("ABCD"),
623				T(RightParen)
624			}
625		},
626		{ "+1E-25 ('ABCD')", 4,
627			{	F(1e-25),
628				T(LeftParen),
629				S("ABCD"),
630				T(RightParen)
631			}
632		},
633		{ "+1e-25 ('ABCD')", 4,
634			{	F(1e-25),
635				T(LeftParen),
636				S("ABCD"),
637				T(RightParen)
638			}
639		},
640		// Negative signed extended floats
641		{ "-1E25 ('ABCD')", 4,
642			{	F(-1e25),
643				T(LeftParen),
644				S("ABCD"),
645				T(RightParen)
646			}
647		},
648		{ "-1e25 ('ABCD')", 4,
649			{	F(-1e25),
650				T(LeftParen),
651				S("ABCD"),
652				T(RightParen)
653			}
654		},
655		{ "-1E+25 ('ABCD')", 4,
656			{	F(-1e25),
657				T(LeftParen),
658				S("ABCD"),
659				T(RightParen)
660			}
661		},
662		{ "-1e+25 ('ABCD')", 4,
663			{	F(-1e25),
664				T(LeftParen),
665				S("ABCD"),
666				T(RightParen)
667			}
668		},
669		{ "-1E-25 ('ABCD')", 4,
670			{	F(-1e-25),
671				T(LeftParen),
672				S("ABCD"),
673				T(RightParen)
674			}
675		},
676		{ "-1e-25 ('ABCD')", 4,
677			{	F(-1e-25),
678				T(LeftParen),
679				S("ABCD"),
680				T(RightParen)
681			}
682		},
683		// Miscellaneous extended floats
684		{ ".1E-25 ('ABCD')", 4,
685			{	F(0.1e-25),
686				T(LeftParen),
687				S("ABCD"),
688				T(RightParen)
689			}
690		},
691		{ "-.1e-25 ('ABCD')", 4,
692			{	F(-0.1e-25),
693				T(LeftParen),
694				S("ABCD"),
695				T(RightParen)
696			}
697		},
698		// Signed floats
699		{ "-1.0 ('ABCD')", 4,
700			{	F(-1.0),
701				T(LeftParen),
702				S("ABCD"),
703				T(RightParen)
704			}
705		},
706		{ "+1.0 ('ABCD')", 4,
707			{	F(1.0),
708				T(LeftParen),
709				S("ABCD"),
710				T(RightParen)
711			}
712		},
713		// The uber test
714		{ "0 -0 +0 1 -2 +3 0. -0. +0. 1. -2. +3. 0.0 -0.1 +0.2 1.0 -2.1 +3.2 "
715		  "0.e0 0.e-1 0.e+2 1.e1 2.e-2 3.e+3 -1.e1 -2.e-2 -3.e+3 +1.e1 +2.e-2 +3.e+3 "
716		  "0.012345 1.23456 ( ) [ ] | & : -i "
717		  " \"abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \\\" ' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 \" "
718		  " 'abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" \\' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 ' "
719		  " \\000abc_xyz123\"'\"'456 \\xA1a1 \\!\\?\\\\ "
720		  " 0x00 0x12 0xabCD 0xaBcD 0x0123456789aBcDeFfEdCbA", 50,
721		  	{	I(0), I(0), I(0), I(1), I(-2), I(3), F(0.0), F(0.0), F(0.0),
722		  			F(1.0), F(-2.0), F(3.0), F(0.0), F(-0.1), F(0.2), F(1.0), F(-2.1), F(3.2),
723		  		F(0.0), F(0.0e-1), F(0.0e2), F(1.0e1), F(2.0e-2), F(3.0e3),
724		  			F(-1.0e1), F(-2.0e-2), F(-3.0e3), F(1.0e1), F(2.0e-2), F(3.0e3),
725		  		F(0.012345), F(1.23456), T(LeftParen), T(RightParen), T(LeftBracket),
726		  			T(RightBracket), T(Divider), T(Ampersand), T(Colon), T(CaseInsensitiveFlag),
727		  		S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)),
728		  		S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)),
729		  		S(std::string("\000abc_xyz123\"'\"'456", 18)),
730		  		S("\241a1"),
731		  		S("!?\\"),
732		  		S(std::string("\x00", 1)), S("\x12"), S("\xAB\xCD"), S("\xAB\xCD"),
733		  			S("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA")
734		  	}
735		},
736	};
737
738// Undefine our nasty macros
739#undef T(type)
740#undef S(str)
741#undef I(val)
742#undef F(val)
743
744	const int testCaseCount = sizeof(testCases) / sizeof(test_case);
745	for (int i = 0; i < testCaseCount; i++) {
746		NextSubTest();
747//		cout << endl << testCases[i].rule << endl;
748		TokenStream stream;
749		try {
750			stream.SetTo(testCases[i].rule);
751
752			CHK(stream.InitCheck() == B_OK);
753			for (int j = 0; j < testCases[i].tokenCount; j++) {
754				const Token *token = stream.Get();
755				CHK(token);
756/*
757				cout << tokenTypeToString(token->Type()) << endl;
758
759				if (token->Type() == CharacterString)
760					cout << " token1 == " << token->String() << endl;
761				if (testCases[i].tokens[j]->Type() == CharacterString)
762					cout << " token2 == " << (testCases[i].tokens[j])->String() << endl;
763
764				if (token->Type() == CharacterString)
765				{
766					const std::string &str = token->String();
767					printf("parser: ");
768					for (int i = 0; i < str.length(); i++)
769						printf("%x ", str[i]);
770					printf("\n");
771				}
772				if (testCases[i].tokens[j]->Type() == CharacterString)
773				{
774					const std::string &str = (testCases[i].tokens[j])->String();
775					printf("tester: ");
776					for (int i = 0; i < str.length(); i++)
777						printf("%x ", str[i]);
778					printf("\n");
779				}
780
781				switch (token->Type()) {
782					case CharacterString:
783						cout << " string == " << token->String() << endl;
784						break;
785					case Integer:
786						cout << " int == " << token->Int() << endl;
787						break;
788					case FloatingPoint:
789						cout << " float == " << token->Float() << endl;
790						break;
791				}
792*/
793				CHK(*token == *(testCases[i].tokens[j]));
794				delete testCases[i].tokens[j];
795			}
796			CHK(stream.IsEmpty());
797		} catch (Err *e) {
798			CppUnit::Exception *err = new CppUnit::Exception(e->Msg());
799			delete e;
800			throw *err;
801		}
802	}
803
804#endif	// !TEST_R5
805}
806
807// Parser Test
808void
809MimeSnifferTest::ParserTest() {
810	// test a couple of valid and invalid rules
811	struct test_case {
812		const char	*rule;
813		const char	*error;	// NULL, if valid
814	} testCases[] = {
815		// valid rules
816		{ "1.0 (\"ABCD\")", NULL },
817		{ "1.0 ('ABCD')", NULL },
818		{ "  1.0 ('ABCD')  ", NULL },
819		{ "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", NULL },
820		{ "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", NULL } ,
821		{ "0.5  \n   [0:3]  \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL },
822		{ "0.8 [  0  :  3  ] ('ABCDEFG' | 'abcdefghij')", NULL },
823		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL },
824// These two rules are accepted by the R5 sniffer checker, but not
825// by the parser. Thus, we're not accepting them with either.
826//		{ "1.0 ('ABCD') | ('EFGH')", NULL },
827//		{ "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL },
828		{ "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL },
829		{ "0.8 [0:3] (\\077034 & 'abcd')", NULL },
830		{ "0.8 [0:3] (\\077\\034 & 'ab')", NULL },
831		{ "0.8 [0:3] (\\77\\034 & 'ab')", NULL },
832		{ "0.8 [0:3] (\\7 & 'a')", NULL },
833		{ "0.8 [0:3] (\"\\17\" & 'a')", NULL },
834		{ "0.8 [0:3] ('\\17' & 'a')", NULL },
835		{ "0.8 [0:3] (\\g & 'a')", NULL },
836		{ "0.8 [0:3] (\\g&\\b)", NULL },
837		{ "0.8 [0:3] (\\g\\&b & 'abc')", NULL },
838		{ "0.8 [0:3] (0x3457 & 'ab')", NULL },
839		{ "0.8 [0:3] (0xA4b7 & 'ab')", NULL },
840		{ "0.8 [0:3] ('ab\"' & 'abc')", NULL },
841		{ "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL },
842		{ "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL },
843		{ "0.8 [0:3] (\"ab'\" & 'abc')", NULL },
844		{ "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL },
845		{ "0.8 [-5:-3] (\"abc\" & 'abc')", NULL },
846// Also accepted by the R5 sniffer but not the R5 parser. We reject.
847//		{ "0.8 [5:3] (\"abc\" & 'abc')", NULL },
848		{ "1.0 ('ABCD')", NULL },
849		{ ".2 ('ABCD')", NULL },
850		{ "0. ('ABCD')", NULL },
851		{ "1 ('ABCD')", NULL },
852		{ "+1 ('ABCD')", NULL },
853// We accept extended notation floating point numbers now, but
854// not invalid priorities.
855//		{ "1E25 ('ABCD')", NULL },
856//		{ "1e25 ('ABCD')", NULL },
857// R5 chokes on this rule :-(
858#if !TEST_R5
859		{ "1e-3 ('ABCD')", NULL },
860#endif
861		{ "+.003e2 ('ABCD')", NULL },
862// This one too. See how much better our parser is? :-)
863#if !TEST_R5
864		{ "-123e-9999999999 ('ABCD')", NULL },	// Hooray for the stunning accuracy of floating point ;-)
865#endif
866		// invalid rules
867		{ "0.0 ('')",
868			"Sniffer pattern error: illegal empty pattern" },
869		{ "('ABCD')",
870			"Sniffer pattern error: match level expected" },
871		{ "[0:3] ('ABCD')",
872			"Sniffer pattern error: match level expected" },
873		{ "0.8 [0:3] ( | 'abcdefghij')",
874		  "Sniffer pattern error: missing pattern" },
875		{ "0.8 [0:3] ('ABCDEFG' | )",
876		  "Sniffer pattern error: missing pattern" },
877		{ "[0:3] ('ABCD')",
878			"Sniffer pattern error: match level expected" },
879		{ "1.0 (ABCD')",
880#if TEST_R5
881			"Sniffer pattern error: misplaced single quote"
882#else
883			"Sniffer pattern error: invalid character 'A'"
884#endif
885		},
886		{ "1.0 ('ABCD)",
887#if TEST_R5
888			"Sniffer pattern error: unterminated rule"
889#else
890			"Sniffer pattern error: unterminated single-quoted string"
891#endif
892		},
893		{ "1.0 (ABCD)",
894#if TEST_R5
895			"Sniffer pattern error: missing pattern"
896#else
897			"Sniffer pattern error: invalid character 'A'"
898#endif
899		},
900		{ "1.0 (ABCD 'ABCD')",
901#if TEST_R5
902			"Sniffer pattern error: missing pattern"
903#else
904			"Sniffer pattern error: invalid character 'A'"
905#endif
906		},
907		{ "1.0 'ABCD')",
908#if TEST_R5
909			"Sniffer pattern error: missing pattern"
910#else
911			"Sniffer pattern error: missing pattern"
912#endif
913		},
914		{ "1.0 ('ABCD'",
915			"Sniffer pattern error: unterminated rule" },
916		{ "1.0 'ABCD'",
917#if TEST_R5
918			"Sniffer pattern error: missing sniff pattern"
919#else
920			"Sniffer pattern error: missing pattern"
921#endif
922		},
923		{ "0.5 [0:3] ('ABCD' | 'abcd' | [13] 'EFGH')",
924		  	"Sniffer pattern error: missing pattern" },
925		{ "0.5('ABCD'|'abcd'|[13]'EFGH')",
926		  	"Sniffer pattern error: missing pattern" },
927		{ "0.5[0:3]([10]'ABCD'|[17]'abcd'|[13]'EFGH')",
928		  	"Sniffer pattern error: missing pattern" },
929		{ "0.8 [0x10:3] ('ABCDEFG' | 'abcdefghij')",
930		  	"Sniffer pattern error: pattern offset expected" },
931		{ "0.8 [0:A] ('ABCDEFG' | 'abcdefghij')",
932#if TEST_R5
933		  	"Sniffer pattern error: pattern range end expected"
934#else
935			"Sniffer pattern error: invalid character 'A'"
936#endif
937		},
938		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefghij')",
939		  	"Sniffer pattern error: pattern and mask lengths do not match" },
940		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg' & 'xyzwmno')",
941#if TEST_R5
942		  	"Sniffer pattern error: unterminated rule"
943#else
944			"Sniffer pattern error: expecting '|', ')', or possibly '&'"
945#endif
946		},
947		{ "0.8 [0:3] (\\g&b & 'a')",
948#if TEST_R5
949			"Sniffer pattern error: missing mask"
950#else
951			"Sniffer pattern error: invalid character 'b'"
952#endif
953		},
954		{ "0.8 [0:3] (\\19 & 'a')",
955		  	"Sniffer pattern error: pattern and mask lengths do not match" },
956		{ "0.8 [0:3] (0x345 & 'ab')",
957		  	"Sniffer pattern error: bad hex literal" },
958		{ "0.8 [0:3] (0x3457M & 'abc')",
959#if TEST_R5
960		  	"Sniffer pattern error: expecting '|' or '&'"
961#else
962			"Sniffer pattern error: invalid character 'M'"
963#endif
964		},
965		{ "0.8 [0:3] (0x3457\\7 & 'abc')",
966#if TEST_R5
967		  	"Sniffer pattern error: expecting '|' or '&'"
968#else
969			"Sniffer pattern error: expecting '|', ')', or possibly '&'"
970#endif
971		},
972
973		// Miscellaneous tests designed to hit every remaining
974		// relevant "throw new Err()" statement in the scanner.
975		// R5 versions will come later...
976#if !TEST_R5
977		{ "\x03  ", "Sniffer pattern error: invalid character '\x03'" },
978		{ "\"blah", "Sniffer pattern error: unterminated double-quoted string" },
979		{ "0xThisIsNotAHexCode", "Sniffer pattern error: incomplete hex code" },
980		{ "0xAndNeitherIsThis:-)", "Sniffer pattern error: bad hex literal" },
981		{ ".NotAFloat", "Sniffer pattern error: incomplete floating point number" },
982		{ "-NotANumber", "Sniffer pattern error: incomplete signed number" },
983		{ "+NotANumber", "Sniffer pattern error: incomplete signed number" },
984
985		{ "0.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
986		{ "1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
987		{ ".0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
988		{ "0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
989		{ "1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
990		{ "-1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
991		{ "+1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
992		{ "-1.e", "Sniffer pattern error: incomplete extended-notation floating point number" },
993		{ "+1.e", "Sniffer pattern error: incomplete extended-notation floating point number" },
994		{ "-1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
995		{ "+1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
996
997		{ "0.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
998		{ "1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
999		{ ".0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1000		{ "0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1001		{ "1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1002		{ "-1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1003		{ "+1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1004		{ "-1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1005		{ "+1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1006		{ "-1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1007		{ "+1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1008
1009		{ "0.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1010		{ "1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1011		{ ".0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1012		{ "0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1013		{ "1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1014		{ "-1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1015		{ "+1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1016		{ "-1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1017		{ "+1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1018		{ "-1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1019		{ "+1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1020
1021		{ "\\11\\", "Sniffer pattern error: incomplete escape sequence" },
1022		{ "\"Escape!! \\", "Sniffer pattern error: incomplete escape sequence" },
1023		{ "'Escape!! \\", "Sniffer pattern error: incomplete escape sequence" },
1024
1025		{ "\\x", "Sniffer pattern error: incomplete escaped hex code" },
1026		{ "\\xNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" },
1027		{ "\\xAlsoNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" },
1028		{ "\\x0", "Sniffer pattern error: incomplete escaped hex code" },
1029
1030		{ "1.0 (\\377)", NULL },
1031		{ "\\400", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" },
1032		{ "\\777", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" },
1033		{ "1.0 (\\800)", NULL },
1034
1035		{ NULL, "Sniffer pattern error: NULL pattern" },
1036
1037		{ "-2", "Sniffer pattern error: invalid priority" },
1038		{ "+2", "Sniffer pattern error: invalid priority" },
1039
1040		{ "1.0", "Sniffer pattern error: missing expression" },
1041#endif	// !TEST_R5
1042
1043
1044//		{ "1E-25 ('ABCD')", "Sniffer pattern error: missing pattern" },
1045			// I don't currently understand what's wrong with the above rule... R5
1046			// rejects it though, for some reason.
1047	};
1048	const int testCaseCount = sizeof(testCases) / sizeof(test_case);
1049	BMimeType type;
1050	for (int32 i = 0; i < testCaseCount; i++) {
1051//cout << endl << "----------------------------------------------------------------------" << endl;
1052		NextSubTest();
1053		test_case &testCase = testCases[i];
1054//cout << endl << testCase.rule << endl;
1055		BString parseError;
1056		status_t error = BMimeType::CheckSnifferRule(testCase.rule,
1057													 &parseError);
1058		if (testCase.error == NULL) {
1059			if (error != B_OK) {
1060				cout << endl << "This sucker's gonna fail..."
1061				     << endl << "RULE: '" << testCase.rule << "'"
1062				     << endl << "ERROR: "
1063				     << endl << parseError.String()
1064				     << endl;
1065			}
1066			CHK(error == B_OK);
1067		} else {
1068
1069//			if (parseError.FindLast(testCase.error) >= 0) {
1070//				cout << endl << parseError.String(); // << endl;
1071//				cout << endl << testCase.error << endl;
1072//			}
1073//			cout << endl << parseError.String(); // << endl;
1074/*
1075			if (parseError.FindLast(testCase.error) >= 0) {
1076				cout << " -- OKAY" << endl;
1077			} else {
1078				cout << " -- NOGO" << endl;
1079				cout << testCase.error << endl;
1080			}
1081*/
1082if (testCase.rule && error != B_BAD_MIME_SNIFFER_RULE) {
1083printf("rule: `%s'", testCase.rule);
1084RES(error);
1085}
1086			CHK(error == (testCase.rule ? B_BAD_MIME_SNIFFER_RULE : B_BAD_VALUE));
1087			CHK(parseError.FindLast(testCase.error) >= 0);
1088		}
1089	}
1090}
1091
1092void dumpStr(const std::string &string, const char *label = NULL) {
1093	if (label)
1094		printf("%s: ", label);
1095	for (uint i = 0; i < string.length(); i++)
1096		printf("%x ", string[i]);
1097	printf("\n");
1098}
1099
1100
1101void
1102MimeSnifferTest::SnifferTest() {
1103#if TEST_R5
1104	Outputf("(no tests actually performed for R5 version)\n");
1105#else	// TEST_R5
1106	const char *rules[] = {
1107		// General tests
1108		"1.0 ('#include')",
1109		"0.0 [0:32] ('#include')",
1110		"0.e-230 [0:32] (\\#include | \\#ifndef)",
1111		".2 ([0:32] \"#include\" | [0] '#define' | [0:200] 'int main(')",
1112		"1.0 [0:32] ('<html>' | '<head>' | '<body>')",
1113		// Range tests
1114		"1.0 [0:9] ('rock')",
1115		"1.0 ([0:9] 'roll')",
1116		"1.0 ([0:9] 'rock' | [0:9] 'roll')",
1117		"1.0 [0:9] ('rock' | 'roll')",
1118		"1.0 ([0] 'rock')",
1119		"1.0 ([0] 'rock' | [0:9] 'roll')",
1120		"1.0 ([9] 'rock' | [10] 'roll')",
1121		// Mask, octal, and hex tests
1122		"1.0 (\\xFF\\xFF & '\\xF0\\xF0')",
1123		"1.0 ('\\33\\34' & \\xFF\\x00)",
1124		"1.0 (\\33\\34 & \"\\x00\\xFF\")",
1125		"1.0 (\\xFF & \\x05)",
1126		// Conjunctions
1127		"1.0 ([4] 'rock') ([9] 'roll')",
1128		"1.0 [5] ('roll') [10] ('rock')",
1129		"1.0 [4] ('rock' | 'roll') ([9] 'rock' | [10] 'roll')",
1130		// Case insensitivity tests
1131		"1.0 [4] (-i 'Rock' | 'Roll')",
1132		"1.0 [9] ('Rock' | -i 'Roll')",
1133		"1.0 (-i [4] 'Rock' | [9] 'Roll')",
1134		"1.0 ([9] 'Rock' | -i [4] 'Roll')",
1135	};
1136	const int ruleCount = sizeof(rules)/sizeof(char*);
1137	struct test_case {
1138		const std::string data;
1139		const bool	result[ruleCount];
1140	} tests[] = {
1141
1142//------------------------------
1143{
1144"#include <stdio.h>		\n\
1145#include <stdlib.h>		\n\
1146						\n\
1147int main() {			\n\
1148	return 0;			\n\
1149}						\n\
1150\n\
1151",	{	true, true, true, true, false,
1152		false, false, false, false, false, false, false,
1153		false, false, false, false,
1154		false, false, false,
1155		false, false, false, false
1156	}
1157},
1158//------------------------------
1159{
1160"	#include <stdio.h>		\n\
1161	#include <stdlib.h>		\n\
1162						\n\
1163	int main() {			\n\
1164		return 0;			\n\
1165	}						\n\
1166\n\
1167",	{	false, true, true, true, false,
1168		false, false, false, false, false, false, false,
1169		false, false, false, false,
1170		false, false, false,
1171		false, false, false, false
1172	}
1173},
1174//------------------------------
1175{
1176"#ifndef SOME_TEST_H		\n\
1177#define SOME_TEST_H			\n\
1178							\n\
1179void main();				\n\
1180							\n\
1181#endif	// SOME_TEST_H		\n\
1182							\n\
1183",	{	false, false, true, false, false,
1184		false, false, false, false, false, false, false,
1185		false, false, false, false,
1186		false, false, false,
1187		false, false, false, false
1188	}
1189},
1190//------------------------------
1191{
1192"//------------------		\n\
1193// SomeTest.cpp				\n\
1194//------------------		\n\
1195#include <stdio.h>			\n\
1196							\n\
1197int main() {				\n\
1198	return 0;				\n\
1199}							\n\
1200							\n\
1201",	{	false, false, false, true, false,
1202		false, false, false, false, false, false, false,
1203		false, false, false, true,
1204		//                   ^^^^ <= coincedence
1205		false, false, false,
1206		false, false, false, false
1207	}
1208},
1209//------------------------------
1210{
1211"<html>									\n\
1212<body bgcolor='#ffffff'>				\n\
1213HTML is boring as hell		<br>		\n\
1214when i write it too much	<br>		\n\
1215my head starts to swell		<br>		\n\
1216<br>									\n\
1217HTML is stupid and dumb		<br>		\n\
1218running through traffic		<br>		\n\
1219is ten times as fun			<br>		\n\
1220</body>									\n\
1221</html>									\n\
1222",	{	false, false, false, false, true,
1223		false, false, false, false, false, false, false,
1224		false, false, false, false,
1225		false, false, false,
1226		false, false, false, false
1227	}
1228},
1229//---------  <= Ten characters in
1230{
1231"     rock&roll",		// 5,10
1232	{	false, false, false, false, false,
1233		true, false, true, true, false, false, true,
1234		false, false, false, false,
1235		false, false, false,
1236		false, false, false, false
1237	}
1238},
1239//---------  <= Ten characters in
1240{
1241"    rock&roll",		// 4,9
1242	{ 	false, false, false, false, false,
1243		true, true, true, true, false, true, false,
1244		false, false, false, false,
1245		true, false, false,
1246		true, true, true, false
1247	}
1248},
1249//---------  <= Ten characters in
1250{
1251"     roll&rock",		// 5,10
1252	{	false, false, false, false, false,
1253		false, true, true, true, false, true, false,
1254		false, false, false, false,
1255		false, true, false,
1256		false, false, false, false
1257	}
1258},
1259//---------  <= Ten characters in
1260{
1261"    roll&rock",		// 4,9
1262	{ 	false, false, false, false, false,
1263		true, true, true, true, false, true, true,
1264		false, false, false, false,
1265		false, false, true,
1266		true, true, false, true
1267	}
1268},
1269//---------  <= Ten characters in
1270{
1271"    ROCK&ROLL",		// 4,9
1272	{ 	false, false, false, false, false,
1273		false, false, false, false, false, false, false,
1274		false, false, false, false,
1275		false, false, false,
1276		true, true, true, false
1277	}
1278},
1279//---------  <= Ten characters in
1280{
1281"    rOlL&RoCk",		// 4,9
1282	{ 	false, false, false, false, false,
1283		false, false, false, false, false, false, false,
1284		false, false, false, false,
1285		false, false, false,
1286		true, true, false, true
1287	}
1288},
1289//------------------------------
1290{
1291"\xFF\xFF	FF FF",
1292	{	false, false, false, false, false,
1293		false, false, false, false, false, false, false,
1294		true, false, false, true,
1295		false, false, false,
1296		false, false, false, false
1297	}
1298},
1299//------------------------------
1300{
1301"\xFA\xFA	FA FA",
1302	{	false, false, false, false, false,
1303		false, false, false, false, false, false, false,
1304		true, false, false, false,
1305		false, false, false,
1306		false, false, false, false
1307	}
1308},
1309//------------------------------
1310{
1311"\xAF\xAF	AF AF",
1312	{	false, false, false, false, false,
1313		false, false, false, false, false, false, false,
1314		false, false, false, true,
1315		false, false, false,
1316		false, false, false, false
1317	}
1318},
1319//------------------------------
1320{
1321std::string("\033\000	033 000", 10),	// Otherwise, it thinks the NULL is the end of the string
1322	{	false, false, false, false, false,
1323		false, false, false, false, false, false, false,
1324		false, true, false, false,
1325		false, false, false,
1326		false, false, false, false
1327	}
1328},
1329//------------------------------
1330{
1331std::string("\000\034	000 034", 10),	// Otherwise, it thinks the NULL is the end of the string
1332	{	false, false, false, false, false,
1333		false, false, false, false, false, false, false,
1334		false, false, true, false,
1335		false, false, false,
1336		false, false, false, false
1337	}
1338},
1339//------------------------------
1340{
1341"\033\034	033 034",
1342	{	false, false, false, false, false,
1343		false, false, false, false, false, false, false,
1344		false, true, true, false,
1345		false, false, false,
1346		false, false, false, false
1347	}
1348},
1349	};	// tests[]
1350	const int32 testCount = sizeof(tests)/sizeof(test_case);
1351
1352	for (int i = 0; i < testCount; i++) {
1353		if (i > 0)
1354			NextSubTestBlock();
1355		test_case &test = tests[i];
1356//		cout << "--------------------------------------------------------------------------------" << endl;
1357//		cout << test.data << endl;
1358
1359		for (int j = 0; j < ruleCount; j++) {
1360			NextSubTest();
1361//			cout << "############################################################" << endl;
1362//			cout << rules[j] << endl;
1363//			cout << test.result[j] << endl;
1364			Rule rule;
1365			BString errorMsg;
1366			status_t err = parse(rules[j], &rule, &errorMsg);
1367//			dumpStr(test.data, "str ");
1368			if (err) {
1369//				cout << "PARSE FAILURE!!!" << endl;
1370//				cout << errorMsg.String() << endl;
1371			}
1372			CHK(err == B_OK);
1373			if (!err) {
1374				BMallocIO data;
1375				data.Write(test.data.data(), test.data.length());//strlen(test.data));
1376				bool match = rule.Sniff(&data);
1377//				cout << match << endl;
1378//				cout << "match == " << (match ? "yes" : "no") << ", "
1379//					 << ((match == test.result[j]) ? "SUCCESS" : "FAILURE") << endl;
1380				CHK(match == test.result[j]);
1381			}
1382		}
1383	}
1384#endif // !TEST_R5
1385}
1386