1// MimeSnifferTest.cpp
2
3#include "MimeSnifferTest.h"
4
5#include <cppunit/Test.h>
6#include <cppunit/TestSuite.h>
7#include <cppunit/TestCaller.h>
8#include <sniffer/Rule.h>
9#include <sniffer/Parser.h>
10#include <DataIO.h>
11#include <Mime.h>
12#include <String.h>		// BString
13#include <TestUtils.h>
14
15#include <stdio.h>
16
17#include <iostream>
18using std::cout;
19using std::endl;
20
21using namespace BPrivate::Storage::Sniffer;
22
23// Suite
24CppUnit::Test*
25MimeSnifferTest::Suite() {
26	CppUnit::TestSuite *suite = new CppUnit::TestSuite();
27	typedef CppUnit::TestCaller<MimeSnifferTest> TC;
28
29	suite->addTest( new TC("Mime Sniffer::Scanner Test",
30						   &MimeSnifferTest::ScannerTest) );
31	suite->addTest( new TC("Mime Sniffer::Parser Test",
32						   &MimeSnifferTest::ParserTest) );
33	suite->addTest( new TC("Mime Sniffer::Sniffer Test",
34						   &MimeSnifferTest::SnifferTest) );
35
36	return suite;
37}
38
39// Scanner Test
40void
41MimeSnifferTest::ScannerTest() {
42#if TEST_R5
43	Outputf("(no tests actually performed for R5 version)\n");
44#else	// TEST_R5
45
46
47	// tests:
48	// Internal TokenStream and CharStream classes
49
50// Define some useful macros for dynamically allocating
51// various Token classes
52#define T(type) (new Token(type, -1))
53#define S(str) (new StringToken(str, -1))
54#define I(val) (new IntToken(val, -1))
55#define F(val) (new FloatToken(val, -1))
56
57	struct test_case {
58		const char *rule;
59		int tokenCount;
60		Token *tokens[256];
61	} testCases[] = {
62		{ "'Hey'[]:", 4,
63			{	S("Hey"),
64				T(LeftBracket),
65				T(RightBracket),
66				T(Colon)
67			}
68		},
69		{ "1", 1, { I(1) } },
70		{ "1.0", 1, { F(1.0) } },
71
72		{ "1.0 (\"ABCD\")", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
73		{ "1.0 ('ABCD')", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
74		{ "  1.0 ('ABCD')  ", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
75		{ "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", 11,
76			{	F(0.8),
77				T(LeftBracket),
78				I(0),
79				T(Colon),
80				I(3),
81				T(RightBracket),
82				T(LeftParen),
83				S("ABCDEFG"),
84				T(Divider),
85				S("abcdefghij"),
86				T(RightParen)
87			}
88		},
89		{ "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 17,
90			{	F(0.5),
91				T(LeftParen),
92				T(LeftBracket),
93				I(10),
94				T(RightBracket),
95				S("ABCD"),
96				T(Divider),
97				T(LeftBracket),
98				I(17),
99				T(RightBracket),
100				S("abcd"),
101				T(Divider),
102				T(LeftBracket),
103				I(13),
104				T(RightBracket),
105				S("EFGH"),
106				T(RightParen)
107			}
108		},
109		{ "0.5  \n   [0:3]  \t ('ABCD' \n | 'abcd' | 'EFGH')", 13,
110			{	F(0.5),
111				T(LeftBracket),
112				I(0),
113				T(Colon),
114				I(3),
115				T(RightBracket),
116				T(LeftParen),
117				S("ABCD"),
118				T(Divider),
119				S("abcd"),
120				T(Divider),
121				S("EFGH"),
122				T(RightParen)
123			}
124		},
125		{ "0.8 [  0  :  3  ] ('ABCDEFG' | 'abcdefghij')", 11,
126			{	F(0.8),
127				T(LeftBracket),
128				I(0),
129				T(Colon),
130				I(3),
131				T(RightBracket),
132				T(LeftParen),
133				S("ABCDEFG"),
134				T(Divider),
135				S("abcdefghij"),
136				T(RightParen)
137			}
138		},
139		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11,
140			{	F(0.8),
141				T(LeftBracket),
142				I(0),
143				T(Colon),
144				I(3),
145				T(RightBracket),
146				T(LeftParen),
147				S("ABCDEFG"),
148				T(Ampersand),
149				S("abcdefg"),
150				T(RightParen)
151			}
152		},
153		{ "1.0 ('ABCD') | ('EFGH')", 8,
154			{	F(1.0),
155				T(LeftParen),
156				S("ABCD"),
157				T(RightParen),
158				T(Divider),
159				T(LeftParen),
160				S("EFGH"),
161				T(RightParen)
162			}
163		},
164		{ "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18,
165			{	F(1.0),
166				T(LeftBracket),
167				I(0),
168				T(Colon),
169				I(3),
170				T(RightBracket),
171				T(LeftParen),
172				S("ABCD"),
173				T(RightParen),
174				T(Divider),
175				T(LeftBracket),
176				I(2),
177				T(Colon),
178				I(4),
179				T(RightBracket),
180				T(LeftParen),
181				S("EFGH"),
182				T(RightParen)
183			}
184		},
185		{ "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11,
186			{	F(0.8),
187				T(LeftBracket),
188				I(0),
189				T(Colon),
190				I(4),
191				T(RightBracket),
192				T(LeftParen),
193				S("\077Mkj0x34"),
194				T(Ampersand),
195				S("abcdefgh"),
196				T(RightParen)
197			}
198		},
199		{ "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11,
200			{	F(0.8),
201				T(LeftBracket),
202				I(0),
203				T(Colon),
204				I(4),
205				T(RightBracket),
206				T(LeftParen),
207				S("\077Mkj\x34"),
208				T(Ampersand),
209				S("abcdefgh"),
210				T(RightParen)
211			}
212		},
213		{ "0.8 [0:3] (\\077034 & 'abcd')", 11,
214			{	F(0.8),
215				T(LeftBracket),
216				I(0),
217				T(Colon),
218				I(3),
219				T(RightBracket),
220				T(LeftParen),
221				S("\077034"),
222				T(Ampersand),
223				S("abcd"),
224				T(RightParen)
225			}
226		},
227		{ "0.8 [0:3] (\\077\\034 & 'ab')", 11,
228			{	F(0.8),
229				T(LeftBracket),
230				I(0),
231				T(Colon),
232				I(3),
233				T(RightBracket),
234				T(LeftParen),
235				S("\077\034"),
236				T(Ampersand),
237				S("ab"),
238				T(RightParen)
239			}
240		},
241		{ "0.8 [0:3] (\\77\\034 & 'ab')", 11,
242			{	F(0.8),
243				T(LeftBracket),
244				I(0),
245				T(Colon),
246				I(3),
247				T(RightBracket),
248				T(LeftParen),
249				S("\077\034"),
250				T(Ampersand),
251				S("ab"),
252				T(RightParen)
253			}
254		},
255		{ "0.8 [0:3] (\\7 & 'a')", 11,
256			{	F(0.8),
257				T(LeftBracket),
258				I(0),
259				T(Colon),
260				I(3),
261				T(RightBracket),
262				T(LeftParen),
263				S("\007"),
264				T(Ampersand),
265				S("a"),
266				T(RightParen)
267			}
268		},
269		{ "0.8 [0:3] (\"\\17\" & 'a')", 11,
270			{	F(0.8),
271				T(LeftBracket),
272				I(0),
273				T(Colon),
274				I(3),
275				T(RightBracket),
276				T(LeftParen),
277				S("\017"),
278				T(Ampersand),
279				S("a"),
280				T(RightParen)
281			}
282		},
283		{ "0.8 [0:3] ('\\17' & 'a')", 11,
284			{	F(0.8),
285				T(LeftBracket),
286				I(0),
287				T(Colon),
288				I(3),
289				T(RightBracket),
290				T(LeftParen),
291				S("\017"),
292				T(Ampersand),
293				S("a"),
294				T(RightParen)
295			}
296		},
297		{ "0.8 [0:3] (\\g & 'a')", 11,
298			{	F(0.8),
299				T(LeftBracket),
300				I(0),
301				T(Colon),
302				I(3),
303				T(RightBracket),
304				T(LeftParen),
305				S("g"),
306				T(Ampersand),
307				S("a"),
308				T(RightParen)
309			}
310		},
311		{ "0.8 [0:3] (\\g&\\b)", 11,
312			{	F(0.8),
313				T(LeftBracket),
314				I(0),
315				T(Colon),
316				I(3),
317				T(RightBracket),
318				T(LeftParen),
319				S("g"),
320				T(Ampersand),
321				S("\b"),
322				T(RightParen)
323			}
324		},
325		{ "0.8 [0:3] (\\g\\&b & 'abc')", 11,
326			{	F(0.8),
327				T(LeftBracket),
328				I(0),
329				T(Colon),
330				I(3),
331				T(RightBracket),
332				T(LeftParen),
333				S("g&b"),
334				T(Ampersand),
335				S("abc"),
336				T(RightParen)
337			}
338		},
339		{ "0.8 [0:3] (0x3457 & 'ab')", 11,
340			{	F(0.8),
341				T(LeftBracket),
342				I(0),
343				T(Colon),
344				I(3),
345				T(RightBracket),
346				T(LeftParen),
347				S("\x34\x57"),
348				T(Ampersand),
349				S("ab"),
350				T(RightParen)
351			}
352		},
353		{ "0.8 [0:3] (\\x34\\x57 & 'ab')", 11,
354			{	F(0.8),
355				T(LeftBracket),
356				I(0),
357				T(Colon),
358				I(3),
359				T(RightBracket),
360				T(LeftParen),
361				S("\x34\x57"),
362				T(Ampersand),
363				S("ab"),
364				T(RightParen)
365			}
366		},
367		{ "0.8 [0:3] (0xA4b7 & 'ab')", 11,
368			{	F(0.8),
369				T(LeftBracket),
370				I(0),
371				T(Colon),
372				I(3),
373				T(RightBracket),
374				T(LeftParen),
375				S("\xA4\xb7"),
376				T(Ampersand),
377				S("ab"),
378				T(RightParen)
379			}
380		},
381		{ "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11,
382			{	F(0.8),
383				T(LeftBracket),
384				I(0),
385				T(Colon),
386				I(3),
387				T(RightBracket),
388				T(LeftParen),
389				S("\xA4\xb7"),
390				T(Ampersand),
391				S("ab"),
392				T(RightParen)
393			}
394		},
395		{ "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11,
396			{	F(0.8),
397				T(LeftBracket),
398				I(0),
399				T(Colon),
400				I(3),
401				T(RightBracket),
402				T(LeftParen),
403				S("\xA4\xb7"),
404				T(Ampersand),
405				S("ab"),
406				T(RightParen)
407			}
408		},
409		{ "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11,
410			{	F(0.8),
411				T(LeftBracket),
412				I(0),
413				T(Colon),
414				I(3),
415				T(RightBracket),
416				T(LeftParen),
417				S("\xA4\xb7"),
418				T(Ampersand),
419				S("ab"),
420				T(RightParen)
421			}
422		},
423		{ "0.8 [0:3] ('ab\"' & 'abc')", 11,
424			{	F(0.8),
425				T(LeftBracket),
426				I(0),
427				T(Colon),
428				I(3),
429				T(RightBracket),
430				T(LeftParen),
431				S("ab\""),
432				T(Ampersand),
433				S("abc"),
434				T(RightParen)
435			}
436		},
437		{ "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11,
438			{	F(0.8),
439				T(LeftBracket),
440				I(0),
441				T(Colon),
442				I(3),
443				T(RightBracket),
444				T(LeftParen),
445				S("ab\""),
446				T(Ampersand),
447				S("abc"),
448				T(RightParen)
449			}
450		},
451		{ "0.8 [0:3] (\"ab\\A\" & 'abc')", 11,
452			{	F(0.8),
453				T(LeftBracket),
454				I(0),
455				T(Colon),
456				I(3),
457				T(RightBracket),
458				T(LeftParen),
459				S("abA"),
460				T(Ampersand),
461				S("abc"),
462				T(RightParen)
463			}
464		},
465		{ "0.8 [0:3] (\"ab'\" & 'abc')", 11,
466			{	F(0.8),
467				T(LeftBracket),
468				I(0),
469				T(Colon),
470				I(3),
471				T(RightBracket),
472				T(LeftParen),
473				S("ab'"),
474				T(Ampersand),
475				S("abc"),
476				T(RightParen)
477			}
478		},
479		{ "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11,
480			{	F(0.8),
481				T(LeftBracket),
482				I(0),
483				T(Colon),
484				I(3),
485				T(RightBracket),
486				T(LeftParen),
487				S("ab\\"),
488				T(Ampersand),
489				S("abc"),
490				T(RightParen)
491			}
492		},
493		{ "0.8 [-5:-3] (\"abc\" & 'abc')", 11,
494			{	F(0.8),
495				T(LeftBracket),
496				I(-5),
497				T(Colon),
498				I(-3),
499				T(RightBracket),
500				T(LeftParen),
501				S("abc"),
502				T(Ampersand),
503				S("abc"),
504				T(RightParen)
505			}
506		},
507		{ "0.8 [5:3] (\"abc\" & 'abc')", 11,
508			{	F(0.8),
509				T(LeftBracket),
510				I(5),
511				T(Colon),
512				I(3),
513				T(RightBracket),
514				T(LeftParen),
515				S("abc"),
516				T(Ampersand),
517				S("abc"),
518				T(RightParen)
519			}
520		},
521		{ "1.2 ('ABCD')", 4,
522			{	F(1.2),
523				T(LeftParen),
524				S("ABCD"),
525				T(RightParen)
526			}
527		},
528		{ ".2 ('ABCD')", 4,
529			{	F(0.2),
530				T(LeftParen),
531				S("ABCD"),
532				T(RightParen)
533			}
534		},
535		{ "0. ('ABCD')", 4,
536			{	F(0.0),
537				T(LeftParen),
538				S("ABCD"),
539				T(RightParen)
540			}
541		},
542		// Signed integers
543		{ "-1 ('ABCD')", 4,
544			{	I(-1),
545				T(LeftParen),
546				S("ABCD"),
547				T(RightParen)
548			}
549		},
550		{ "+1 ('ABCD')", 4,
551			{	I(1),
552				T(LeftParen),
553				S("ABCD"),
554				T(RightParen)
555			}
556		},
557		// Unsigned extended floats
558		{ "1E25 ('ABCD')", 4,
559			{	F(1e25),
560				T(LeftParen),
561				S("ABCD"),
562				T(RightParen)
563			}
564		},
565		{ "1e25 ('ABCD')", 4,
566			{	F(1e25),
567				T(LeftParen),
568				S("ABCD"),
569				T(RightParen)
570			}
571		},
572		{ "1E+25 ('ABCD')", 4,
573			{	F(1e25),
574				T(LeftParen),
575				S("ABCD"),
576				T(RightParen)
577			}
578		},
579		{ "1e+25 ('ABCD')", 4,
580			{	F(1e25),
581				T(LeftParen),
582				S("ABCD"),
583				T(RightParen)
584			}
585		},
586		{ "1E-25 ('ABCD')", 4,
587			{	F(1e-25),
588				T(LeftParen),
589				S("ABCD"),
590				T(RightParen)
591			}
592		},
593		{ "1e-25 ('ABCD')", 4,
594			{	F(1e-25),
595				T(LeftParen),
596				S("ABCD"),
597				T(RightParen)
598			}
599		},
600		// Positive signed extended floats
601		{ "+1E25 ('ABCD')", 4,
602			{	F(1e25),
603				T(LeftParen),
604				S("ABCD"),
605				T(RightParen)
606			}
607		},
608		{ "+1e25 ('ABCD')", 4,
609			{	F(1e25),
610				T(LeftParen),
611				S("ABCD"),
612				T(RightParen)
613			}
614		},
615		{ "+1E+25 ('ABCD')", 4,
616			{	F(1e25),
617				T(LeftParen),
618				S("ABCD"),
619				T(RightParen)
620			}
621		},
622		{ "+1e+25 ('ABCD')", 4,
623			{	F(1e25),
624				T(LeftParen),
625				S("ABCD"),
626				T(RightParen)
627			}
628		},
629		{ "+1E-25 ('ABCD')", 4,
630			{	F(1e-25),
631				T(LeftParen),
632				S("ABCD"),
633				T(RightParen)
634			}
635		},
636		{ "+1e-25 ('ABCD')", 4,
637			{	F(1e-25),
638				T(LeftParen),
639				S("ABCD"),
640				T(RightParen)
641			}
642		},
643		// Negative signed extended floats
644		{ "-1E25 ('ABCD')", 4,
645			{	F(-1e25),
646				T(LeftParen),
647				S("ABCD"),
648				T(RightParen)
649			}
650		},
651		{ "-1e25 ('ABCD')", 4,
652			{	F(-1e25),
653				T(LeftParen),
654				S("ABCD"),
655				T(RightParen)
656			}
657		},
658		{ "-1E+25 ('ABCD')", 4,
659			{	F(-1e25),
660				T(LeftParen),
661				S("ABCD"),
662				T(RightParen)
663			}
664		},
665		{ "-1e+25 ('ABCD')", 4,
666			{	F(-1e25),
667				T(LeftParen),
668				S("ABCD"),
669				T(RightParen)
670			}
671		},
672		{ "-1E-25 ('ABCD')", 4,
673			{	F(-1e-25),
674				T(LeftParen),
675				S("ABCD"),
676				T(RightParen)
677			}
678		},
679		{ "-1e-25 ('ABCD')", 4,
680			{	F(-1e-25),
681				T(LeftParen),
682				S("ABCD"),
683				T(RightParen)
684			}
685		},
686		// Miscellaneous extended floats
687		{ ".1E-25 ('ABCD')", 4,
688			{	F(0.1e-25),
689				T(LeftParen),
690				S("ABCD"),
691				T(RightParen)
692			}
693		},
694		{ "-.1e-25 ('ABCD')", 4,
695			{	F(-0.1e-25),
696				T(LeftParen),
697				S("ABCD"),
698				T(RightParen)
699			}
700		},
701		// Signed floats
702		{ "-1.0 ('ABCD')", 4,
703			{	F(-1.0),
704				T(LeftParen),
705				S("ABCD"),
706				T(RightParen)
707			}
708		},
709		{ "+1.0 ('ABCD')", 4,
710			{	F(1.0),
711				T(LeftParen),
712				S("ABCD"),
713				T(RightParen)
714			}
715		},
716		// The uber test
717		{ "0 -0 +0 1 -2 +3 0. -0. +0. 1. -2. +3. 0.0 -0.1 +0.2 1.0 -2.1 +3.2 "
718		  "0.e0 0.e-1 0.e+2 1.e1 2.e-2 3.e+3 -1.e1 -2.e-2 -3.e+3 +1.e1 +2.e-2 +3.e+3 "
719		  "0.012345 1.23456 ( ) [ ] | & : -i "
720		  " \"abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \\\" ' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 \" "
721		  " 'abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" \\' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 ' "
722		  " \\000abc_xyz123\"'\"'456 \\xA1a1 \\!\\?\\\\ "
723		  " 0x00 0x12 0xabCD 0xaBcD 0x0123456789aBcDeFfEdCbA", 50,
724		  	{	I(0), I(0), I(0), I(1), I(-2), I(3), F(0.0), F(0.0), F(0.0),
725		  			F(1.0), F(-2.0), F(3.0), F(0.0), F(-0.1), F(0.2), F(1.0), F(-2.1), F(3.2),
726		  		F(0.0), F(0.0e-1), F(0.0e2), F(1.0e1), F(2.0e-2), F(3.0e3),
727		  			F(-1.0e1), F(-2.0e-2), F(-3.0e3), F(1.0e1), F(2.0e-2), F(3.0e3),
728		  		F(0.012345), F(1.23456), T(LeftParen), T(RightParen), T(LeftBracket),
729		  			T(RightBracket), T(Divider), T(Ampersand), T(Colon), T(CaseInsensitiveFlag),
730		  		S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)),
731		  		S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)),
732		  		S(std::string("\000abc_xyz123\"'\"'456", 18)),
733		  		S("\241a1"),
734		  		S("!?\\"),
735		  		S(std::string("\x00", 1)), S("\x12"), S("\xAB\xCD"), S("\xAB\xCD"),
736		  			S("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA")
737		  	}
738		},
739	};
740
741// Undefine our nasty macros
742#undef T
743#undef S
744#undef I
745#undef F
746
747	const int testCaseCount = sizeof(testCases) / sizeof(test_case);
748	for (int i = 0; i < testCaseCount; i++) {
749		NextSubTest();
750//		cout << endl << testCases[i].rule << endl;
751		TokenStream stream;
752		try {
753			stream.SetTo(testCases[i].rule);
754
755			CHK(stream.InitCheck() == B_OK);
756			for (int j = 0; j < testCases[i].tokenCount; j++) {
757				const Token *token = stream.Get();
758				CHK(token);
759/*
760				cout << tokenTypeToString(token->Type()) << endl;
761
762				if (token->Type() == CharacterString)
763					cout << " token1 == " << token->String() << endl;
764				if (testCases[i].tokens[j]->Type() == CharacterString)
765					cout << " token2 == " << (testCases[i].tokens[j])->String() << endl;
766
767				if (token->Type() == CharacterString)
768				{
769					const std::string &str = token->String();
770					printf("parser: ");
771					for (int i = 0; i < str.length(); i++)
772						printf("%x ", str[i]);
773					printf("\n");
774				}
775				if (testCases[i].tokens[j]->Type() == CharacterString)
776				{
777					const std::string &str = (testCases[i].tokens[j])->String();
778					printf("tester: ");
779					for (int i = 0; i < str.length(); i++)
780						printf("%x ", str[i]);
781					printf("\n");
782				}
783
784				switch (token->Type()) {
785					case CharacterString:
786						cout << " string == " << token->String() << endl;
787						break;
788					case Integer:
789						cout << " int == " << token->Int() << endl;
790						break;
791					case FloatingPoint:
792						cout << " float == " << token->Float() << endl;
793						break;
794				}
795*/
796				CHK(*token == *(testCases[i].tokens[j]));
797				delete testCases[i].tokens[j];
798			}
799			CHK(stream.IsEmpty());
800		} catch (Err *e) {
801			CppUnit::Exception *err = new CppUnit::Exception(e->Msg());
802			delete e;
803			throw *err;
804		}
805	}
806
807#endif	// !TEST_R5
808}
809
810// Parser Test
811void
812MimeSnifferTest::ParserTest() {
813	// test a couple of valid and invalid rules
814	struct test_case {
815		const char	*rule;
816		const char	*error;	// NULL, if valid
817	} testCases[] = {
818		// valid rules
819		{ "1.0 (\"ABCD\")", NULL },
820		{ "1.0 ('ABCD')", NULL },
821		{ "  1.0 ('ABCD')  ", NULL },
822		{ "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", NULL },
823		{ "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", NULL } ,
824		{ "0.5  \n   [0:3]  \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL },
825		{ "0.8 [  0  :  3  ] ('ABCDEFG' | 'abcdefghij')", NULL },
826		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL },
827// These two rules are accepted by the R5 sniffer checker, but not
828// by the parser. Thus, we're not accepting them with either.
829//		{ "1.0 ('ABCD') | ('EFGH')", NULL },
830//		{ "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL },
831		{ "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL },
832		{ "0.8 [0:3] (\\077034 & 'abcd')", NULL },
833		{ "0.8 [0:3] (\\077\\034 & 'ab')", NULL },
834		{ "0.8 [0:3] (\\77\\034 & 'ab')", NULL },
835		{ "0.8 [0:3] (\\7 & 'a')", NULL },
836		{ "0.8 [0:3] (\"\\17\" & 'a')", NULL },
837		{ "0.8 [0:3] ('\\17' & 'a')", NULL },
838		{ "0.8 [0:3] (\\g & 'a')", NULL },
839		{ "0.8 [0:3] (\\g&\\b)", NULL },
840		{ "0.8 [0:3] (\\g\\&b & 'abc')", NULL },
841		{ "0.8 [0:3] (0x3457 & 'ab')", NULL },
842		{ "0.8 [0:3] (0xA4b7 & 'ab')", NULL },
843		{ "0.8 [0:3] ('ab\"' & 'abc')", NULL },
844		{ "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL },
845		{ "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL },
846		{ "0.8 [0:3] (\"ab'\" & 'abc')", NULL },
847		{ "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL },
848		{ "0.8 [-5:-3] (\"abc\" & 'abc')", NULL },
849// Also accepted by the R5 sniffer but not the R5 parser. We reject.
850//		{ "0.8 [5:3] (\"abc\" & 'abc')", NULL },
851		{ "1.0 ('ABCD')", NULL },
852		{ ".2 ('ABCD')", NULL },
853		{ "0. ('ABCD')", NULL },
854		{ "1 ('ABCD')", NULL },
855		{ "+1 ('ABCD')", NULL },
856// We accept extended notation floating point numbers now, but
857// not invalid priorities.
858//		{ "1E25 ('ABCD')", NULL },
859//		{ "1e25 ('ABCD')", NULL },
860// R5 chokes on this rule :-(
861#if !TEST_R5
862		{ "1e-3 ('ABCD')", NULL },
863#endif
864		{ "+.003e2 ('ABCD')", NULL },
865// This one too. See how much better our parser is? :-)
866#if !TEST_R5
867		{ "-123e-9999999999 ('ABCD')", NULL },	// Hooray for the stunning accuracy of floating point ;-)
868#endif
869		// invalid rules
870		{ "0.0 ('')",
871			"Sniffer pattern error: illegal empty pattern" },
872		{ "('ABCD')",
873			"Sniffer pattern error: match level expected" },
874		{ "[0:3] ('ABCD')",
875			"Sniffer pattern error: match level expected" },
876		{ "0.8 [0:3] ( | 'abcdefghij')",
877		  "Sniffer pattern error: missing pattern" },
878		{ "0.8 [0:3] ('ABCDEFG' | )",
879		  "Sniffer pattern error: missing pattern" },
880		{ "[0:3] ('ABCD')",
881			"Sniffer pattern error: match level expected" },
882		{ "1.0 (ABCD')",
883#if TEST_R5
884			"Sniffer pattern error: misplaced single quote"
885#else
886			"Sniffer pattern error: invalid character 'A'"
887#endif
888		},
889		{ "1.0 ('ABCD)",
890#if TEST_R5
891			"Sniffer pattern error: unterminated rule"
892#else
893			"Sniffer pattern error: unterminated single-quoted string"
894#endif
895		},
896		{ "1.0 (ABCD)",
897#if TEST_R5
898			"Sniffer pattern error: missing pattern"
899#else
900			"Sniffer pattern error: invalid character 'A'"
901#endif
902		},
903		{ "1.0 (ABCD 'ABCD')",
904#if TEST_R5
905			"Sniffer pattern error: missing pattern"
906#else
907			"Sniffer pattern error: invalid character 'A'"
908#endif
909		},
910		{ "1.0 'ABCD')",
911#if TEST_R5
912			"Sniffer pattern error: missing pattern"
913#else
914			"Sniffer pattern error: missing pattern"
915#endif
916		},
917		{ "1.0 ('ABCD'",
918			"Sniffer pattern error: unterminated rule" },
919		{ "1.0 'ABCD'",
920#if TEST_R5
921			"Sniffer pattern error: missing sniff pattern"
922#else
923			"Sniffer pattern error: missing pattern"
924#endif
925		},
926		{ "0.5 [0:3] ('ABCD' | 'abcd' | [13] 'EFGH')",
927		  	"Sniffer pattern error: missing pattern" },
928		{ "0.5('ABCD'|'abcd'|[13]'EFGH')",
929		  	"Sniffer pattern error: missing pattern" },
930		{ "0.5[0:3]([10]'ABCD'|[17]'abcd'|[13]'EFGH')",
931		  	"Sniffer pattern error: missing pattern" },
932		{ "0.8 [0x10:3] ('ABCDEFG' | 'abcdefghij')",
933		  	"Sniffer pattern error: pattern offset expected" },
934		{ "0.8 [0:A] ('ABCDEFG' | 'abcdefghij')",
935#if TEST_R5
936		  	"Sniffer pattern error: pattern range end expected"
937#else
938			"Sniffer pattern error: invalid character 'A'"
939#endif
940		},
941		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefghij')",
942		  	"Sniffer pattern error: pattern and mask lengths do not match" },
943		{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg' & 'xyzwmno')",
944#if TEST_R5
945		  	"Sniffer pattern error: unterminated rule"
946#else
947			"Sniffer pattern error: expecting '|', ')', or possibly '&'"
948#endif
949		},
950		{ "0.8 [0:3] (\\g&b & 'a')",
951#if TEST_R5
952			"Sniffer pattern error: missing mask"
953#else
954			"Sniffer pattern error: invalid character 'b'"
955#endif
956		},
957		{ "0.8 [0:3] (\\19 & 'a')",
958		  	"Sniffer pattern error: pattern and mask lengths do not match" },
959		{ "0.8 [0:3] (0x345 & 'ab')",
960		  	"Sniffer pattern error: bad hex literal" },
961		{ "0.8 [0:3] (0x3457M & 'abc')",
962#if TEST_R5
963		  	"Sniffer pattern error: expecting '|' or '&'"
964#else
965			"Sniffer pattern error: invalid character 'M'"
966#endif
967		},
968		{ "0.8 [0:3] (0x3457\\7 & 'abc')",
969#if TEST_R5
970		  	"Sniffer pattern error: expecting '|' or '&'"
971#else
972			"Sniffer pattern error: expecting '|', ')', or possibly '&'"
973#endif
974		},
975
976		// Miscellaneous tests designed to hit every remaining
977		// relevant "throw new Err()" statement in the scanner.
978		// R5 versions will come later...
979#if !TEST_R5
980		{ "\x03  ", "Sniffer pattern error: invalid character '\x03'" },
981		{ "\"blah", "Sniffer pattern error: unterminated double-quoted string" },
982		{ "0xThisIsNotAHexCode", "Sniffer pattern error: incomplete hex code" },
983		{ "0xAndNeitherIsThis:-)", "Sniffer pattern error: bad hex literal" },
984		{ ".NotAFloat", "Sniffer pattern error: incomplete floating point number" },
985		{ "-NotANumber", "Sniffer pattern error: incomplete signed number" },
986		{ "+NotANumber", "Sniffer pattern error: incomplete signed number" },
987
988		{ "0.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
989		{ "1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
990		{ ".0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
991		{ "0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
992		{ "1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
993		{ "-1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
994		{ "+1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
995		{ "-1.e", "Sniffer pattern error: incomplete extended-notation floating point number" },
996		{ "+1.e", "Sniffer pattern error: incomplete extended-notation floating point number" },
997		{ "-1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
998		{ "+1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
999
1000		{ "0.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1001		{ "1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1002		{ ".0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1003		{ "0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1004		{ "1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1005		{ "-1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1006		{ "+1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1007		{ "-1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1008		{ "+1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1009		{ "-1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1010		{ "+1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1011
1012		{ "0.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1013		{ "1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1014		{ ".0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1015		{ "0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1016		{ "1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1017		{ "-1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1018		{ "+1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1019		{ "-1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1020		{ "+1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1021		{ "-1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1022		{ "+1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1023
1024		{ "\\11\\", "Sniffer pattern error: incomplete escape sequence" },
1025		{ "\"Escape!! \\", "Sniffer pattern error: incomplete escape sequence" },
1026		{ "'Escape!! \\", "Sniffer pattern error: incomplete escape sequence" },
1027
1028		{ "\\x", "Sniffer pattern error: incomplete escaped hex code" },
1029		{ "\\xNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" },
1030		{ "\\xAlsoNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" },
1031		{ "\\x0", "Sniffer pattern error: incomplete escaped hex code" },
1032
1033		{ "1.0 (\\377)", NULL },
1034		{ "\\400", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" },
1035		{ "\\777", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" },
1036		{ "1.0 (\\800)", NULL },
1037
1038		{ NULL, "Sniffer pattern error: NULL pattern" },
1039
1040		{ "-2", "Sniffer pattern error: invalid priority" },
1041		{ "+2", "Sniffer pattern error: invalid priority" },
1042
1043		{ "1.0", "Sniffer pattern error: missing expression" },
1044#endif	// !TEST_R5
1045
1046
1047//		{ "1E-25 ('ABCD')", "Sniffer pattern error: missing pattern" },
1048			// I don't currently understand what's wrong with the above rule... R5
1049			// rejects it though, for some reason.
1050	};
1051	const int testCaseCount = sizeof(testCases) / sizeof(test_case);
1052	BMimeType type;
1053	for (int32 i = 0; i < testCaseCount; i++) {
1054//cout << endl << "----------------------------------------------------------------------" << endl;
1055		NextSubTest();
1056		test_case &testCase = testCases[i];
1057//cout << endl << testCase.rule << endl;
1058		BString parseError;
1059		status_t error = BMimeType::CheckSnifferRule(testCase.rule,
1060													 &parseError);
1061		if (testCase.error == NULL) {
1062			if (error != B_OK) {
1063				cout << endl << "This sucker's gonna fail..."
1064				     << endl << "RULE: '" << testCase.rule << "'"
1065				     << endl << "ERROR: "
1066				     << endl << parseError.String()
1067				     << endl;
1068			}
1069			CHK(error == B_OK);
1070		} else {
1071
1072//			if (parseError.FindLast(testCase.error) >= 0) {
1073//				cout << endl << parseError.String(); // << endl;
1074//				cout << endl << testCase.error << endl;
1075//			}
1076//			cout << endl << parseError.String(); // << endl;
1077/*
1078			if (parseError.FindLast(testCase.error) >= 0) {
1079				cout << " -- OKAY" << endl;
1080			} else {
1081				cout << " -- NOGO" << endl;
1082				cout << testCase.error << endl;
1083			}
1084*/
1085if (testCase.rule && error != B_BAD_MIME_SNIFFER_RULE) {
1086printf("rule: `%s'", testCase.rule);
1087RES(error);
1088}
1089			CHK(error == (testCase.rule ? B_BAD_MIME_SNIFFER_RULE : B_BAD_VALUE));
1090			CHK(parseError.FindLast(testCase.error) >= 0);
1091		}
1092	}
1093}
1094
1095void dumpStr(const std::string &string, const char *label = NULL) {
1096	if (label)
1097		printf("%s: ", label);
1098	for (uint i = 0; i < string.length(); i++)
1099		printf("%x ", string[i]);
1100	printf("\n");
1101}
1102
1103
1104void
1105MimeSnifferTest::SnifferTest() {
1106#if TEST_R5
1107	Outputf("(no tests actually performed for R5 version)\n");
1108#else	// TEST_R5
1109	const char *rules[] = {
1110		// General tests
1111		"1.0 ('#include')",
1112		"0.0 [0:32] ('#include')",
1113		"0.e-230 [0:32] (\\#include | \\#ifndef)",
1114		".2 ([0:32] \"#include\" | [0] '#define' | [0:200] 'int main(')",
1115		"1.0 [0:32] ('<html>' | '<head>' | '<body>')",
1116		// Range tests
1117		"1.0 [0:9] ('rock')",
1118		"1.0 ([0:9] 'roll')",
1119		"1.0 ([0:9] 'rock' | [0:9] 'roll')",
1120		"1.0 [0:9] ('rock' | 'roll')",
1121		"1.0 ([0] 'rock')",
1122		"1.0 ([0] 'rock' | [0:9] 'roll')",
1123		"1.0 ([9] 'rock' | [10] 'roll')",
1124		// Mask, octal, and hex tests
1125		"1.0 (\\xFF\\xFF & '\\xF0\\xF0')",
1126		"1.0 ('\\33\\34' & \\xFF\\x00)",
1127		"1.0 (\\33\\34 & \"\\x00\\xFF\")",
1128		"1.0 (\\xFF & \\x05)",
1129		// Conjunctions
1130		"1.0 ([4] 'rock') ([9] 'roll')",
1131		"1.0 [5] ('roll') [10] ('rock')",
1132		"1.0 [4] ('rock' | 'roll') ([9] 'rock' | [10] 'roll')",
1133		// Case insensitivity tests
1134		"1.0 [4] (-i 'Rock' | 'Roll')",
1135		"1.0 [9] ('Rock' | -i 'Roll')",
1136		"1.0 (-i [4] 'Rock' | [9] 'Roll')",
1137		"1.0 ([9] 'Rock' | -i [4] 'Roll')",
1138	};
1139	const int ruleCount = sizeof(rules)/sizeof(char*);
1140	struct test_case {
1141		const std::string data;
1142		const bool	result[ruleCount];
1143	} tests[] = {
1144
1145//------------------------------
1146{
1147"#include <stdio.h>		\n\
1148#include <stdlib.h>		\n\
1149						\n\
1150int main() {			\n\
1151	return 0;			\n\
1152}						\n\
1153\n\
1154",	{	true, true, true, true, false,
1155		false, false, false, false, false, false, false,
1156		false, false, false, false,
1157		false, false, false,
1158		false, false, false, false
1159	}
1160},
1161//------------------------------
1162{
1163"	#include <stdio.h>		\n\
1164	#include <stdlib.h>		\n\
1165						\n\
1166	int main() {			\n\
1167		return 0;			\n\
1168	}						\n\
1169\n\
1170",	{	false, true, true, true, false,
1171		false, false, false, false, false, false, false,
1172		false, false, false, false,
1173		false, false, false,
1174		false, false, false, false
1175	}
1176},
1177//------------------------------
1178{
1179"#ifndef SOME_TEST_H		\n\
1180#define SOME_TEST_H			\n\
1181							\n\
1182void main();				\n\
1183							\n\
1184#endif	// SOME_TEST_H		\n\
1185							\n\
1186",	{	false, false, true, false, false,
1187		false, false, false, false, false, false, false,
1188		false, false, false, false,
1189		false, false, false,
1190		false, false, false, false
1191	}
1192},
1193//------------------------------
1194{
1195"//------------------		\n\
1196// SomeTest.cpp				\n\
1197//------------------		\n\
1198#include <stdio.h>			\n\
1199							\n\
1200int main() {				\n\
1201	return 0;				\n\
1202}							\n\
1203							\n\
1204",	{	false, false, false, true, false,
1205		false, false, false, false, false, false, false,
1206		false, false, false, true,
1207		//                   ^^^^ <= coincedence
1208		false, false, false,
1209		false, false, false, false
1210	}
1211},
1212//------------------------------
1213{
1214"<html>									\n\
1215<body bgcolor='#ffffff'>				\n\
1216HTML is boring as hell		<br>		\n\
1217when i write it too much	<br>		\n\
1218my head starts to swell		<br>		\n\
1219<br>									\n\
1220HTML is stupid and dumb		<br>		\n\
1221running through traffic		<br>		\n\
1222is ten times as fun			<br>		\n\
1223</body>									\n\
1224</html>									\n\
1225",	{	false, false, false, false, true,
1226		false, false, false, false, false, false, false,
1227		false, false, false, false,
1228		false, false, false,
1229		false, false, false, false
1230	}
1231},
1232//---------  <= Ten characters in
1233{
1234"     rock&roll",		// 5,10
1235	{	false, false, false, false, false,
1236		true, false, true, true, false, false, true,
1237		false, false, false, false,
1238		false, false, false,
1239		false, false, false, false
1240	}
1241},
1242//---------  <= Ten characters in
1243{
1244"    rock&roll",		// 4,9
1245	{ 	false, false, false, false, false,
1246		true, true, true, true, false, true, false,
1247		false, false, false, false,
1248		true, false, false,
1249		true, true, true, false
1250	}
1251},
1252//---------  <= Ten characters in
1253{
1254"     roll&rock",		// 5,10
1255	{	false, false, false, false, false,
1256		false, true, true, true, false, true, false,
1257		false, false, false, false,
1258		false, true, false,
1259		false, false, false, false
1260	}
1261},
1262//---------  <= Ten characters in
1263{
1264"    roll&rock",		// 4,9
1265	{ 	false, false, false, false, false,
1266		true, true, true, true, false, true, true,
1267		false, false, false, false,
1268		false, false, true,
1269		true, true, false, true
1270	}
1271},
1272//---------  <= Ten characters in
1273{
1274"    ROCK&ROLL",		// 4,9
1275	{ 	false, false, false, false, false,
1276		false, false, false, false, false, false, false,
1277		false, false, false, false,
1278		false, false, false,
1279		true, true, true, false
1280	}
1281},
1282//---------  <= Ten characters in
1283{
1284"    rOlL&RoCk",		// 4,9
1285	{ 	false, false, false, false, false,
1286		false, false, false, false, false, false, false,
1287		false, false, false, false,
1288		false, false, false,
1289		true, true, false, true
1290	}
1291},
1292//------------------------------
1293{
1294"\xFF\xFF	FF FF",
1295	{	false, false, false, false, false,
1296		false, false, false, false, false, false, false,
1297		true, false, false, true,
1298		false, false, false,
1299		false, false, false, false
1300	}
1301},
1302//------------------------------
1303{
1304"\xFA\xFA	FA FA",
1305	{	false, false, false, false, false,
1306		false, false, false, false, false, false, false,
1307		true, false, false, false,
1308		false, false, false,
1309		false, false, false, false
1310	}
1311},
1312//------------------------------
1313{
1314"\xAF\xAF	AF AF",
1315	{	false, false, false, false, false,
1316		false, false, false, false, false, false, false,
1317		false, false, false, true,
1318		false, false, false,
1319		false, false, false, false
1320	}
1321},
1322//------------------------------
1323{
1324std::string("\033\000	033 000", 10),	// Otherwise, it thinks the NULL is the end of the string
1325	{	false, false, false, false, false,
1326		false, false, false, false, false, false, false,
1327		false, true, false, false,
1328		false, false, false,
1329		false, false, false, false
1330	}
1331},
1332//------------------------------
1333{
1334std::string("\000\034	000 034", 10),	// Otherwise, it thinks the NULL is the end of the string
1335	{	false, false, false, false, false,
1336		false, false, false, false, false, false, false,
1337		false, false, true, false,
1338		false, false, false,
1339		false, false, false, false
1340	}
1341},
1342//------------------------------
1343{
1344"\033\034	033 034",
1345	{	false, false, false, false, false,
1346		false, false, false, false, false, false, false,
1347		false, true, true, false,
1348		false, false, false,
1349		false, false, false, false
1350	}
1351},
1352	};	// tests[]
1353	const int32 testCount = sizeof(tests)/sizeof(test_case);
1354
1355	for (int i = 0; i < testCount; i++) {
1356		if (i > 0)
1357			NextSubTestBlock();
1358		test_case &test = tests[i];
1359//		cout << "--------------------------------------------------------------------------------" << endl;
1360//		cout << test.data << endl;
1361
1362		for (int j = 0; j < ruleCount; j++) {
1363			NextSubTest();
1364//			cout << "############################################################" << endl;
1365//			cout << rules[j] << endl;
1366//			cout << test.result[j] << endl;
1367			Rule rule;
1368			BString errorMsg;
1369			status_t err = parse(rules[j], &rule, &errorMsg);
1370//			dumpStr(test.data, "str ");
1371			if (err) {
1372//				cout << "PARSE FAILURE!!!" << endl;
1373//				cout << errorMsg.String() << endl;
1374			}
1375			CHK(err == B_OK);
1376			if (!err) {
1377				BMallocIO data;
1378				data.Write(test.data.data(), test.data.length());//strlen(test.data));
1379				bool match = rule.Sniff(&data);
1380//				cout << match << endl;
1381//				cout << "match == " << (match ? "yes" : "no") << ", "
1382//					 << ((match == test.result[j]) ? "SUCCESS" : "FAILURE") << endl;
1383				CHK(match == test.result[j]);
1384			}
1385		}
1386	}
1387#endif // !TEST_R5
1388}
1389