1/*
2 * Copyright 2002-2006, Haiku Inc.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Tyler Dauwalder
7 *		Ingo Weinhold, bonefish@users.sf.net
8 */
9
10/*!
11	\file SnifferRules.cpp
12	SnifferRules class implementation
13*/
14
15#include <mime/SnifferRules.h>
16
17#include <stdio.h>
18#include <sys/stat.h>
19
20#include <Directory.h>
21#include <Entry.h>
22#include <File.h>
23#include <MimeType.h>
24#include <mime/database_support.h>
25#include <mime/DatabaseDirectory.h>
26#include <mime/DatabaseLocation.h>
27#include <mime/MimeSniffer.h>
28#include <sniffer/Parser.h>
29#include <sniffer/Rule.h>
30#include <StorageDefs.h>
31#include <storage_support.h>
32#include <String.h>
33
34
35#define DBG(x) x
36//#define DBG(x)
37#define OUT printf
38
39namespace BPrivate {
40namespace Storage {
41namespace Mime {
42
43using namespace BPrivate::Storage;
44
45/*!
46	\struct SnifferRules::sniffer_rule
47	\brief A parsed sniffer rule and its corresponding mime type and rule string
48
49	The parse sniffer rule is stored in the \c rule member, which is a pointer
50	to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
51	objects	(as opposed to \c sniffer_rule pointers) to be used with STL objects
52	without unnecessary copying. As a consequence of this decision, the
53	\c SnifferRules object managing the rule list is responsible for actually
54	deleting each \c sniffer_rule's \c Sniffer::Rule object.
55*/
56
57// sniffer_rule Constructor
58//! Creates a new \c sniffer_rule object
59SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
60	: rule(rule)
61{
62}
63
64// sniffer_rule Destructor
65//! Destroys the \c sniffer_rule object.
66/*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
67	object's \c rule member is *NOT* deleted by this function.
68*/
69SnifferRules::sniffer_rule::~sniffer_rule()
70{
71}
72
73// private functions
74/*! \brief Returns true if \a left's priority is greater than \a right's
75
76	This may seem slightly backwards, but since sort() using
77	operator<() sorts in ascending order, we say "left < right"
78	if "left.priority > right.priority" to get them sorted in
79	ascending order. Super, no?
80
81	Also, sniffer_rule objects with \c NULL \c rule members are
82	treated as having minimal priority (and thus are placed at
83	the end of the list of rules).
84
85	Finally, sniffer_rule objects that are otherwise equal are
86	sorted in reverse alphabetic order (thus placing sniffer
87	rules for supertypes *after* sniffer rules for subtypes
88	of said supertype when both rules have identical priorities).
89*/
90bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right)
91{
92	if (left.rule && right.rule) {
93		double leftPriority = left.rule->Priority();
94		double rightPriority = right.rule->Priority();
95		if (leftPriority > rightPriority) {
96			return true;	// left < right
97		} else if (rightPriority > leftPriority) {
98			return false;	// right < left
99		} else {
100			return left.type > right.type;
101		}
102	} else if (left.rule) {
103		return true; 	// left < right
104	} else {
105		return false;	// right < left
106	}
107}
108
109/*!
110	\class SnifferRules
111	\brief Manages the sniffer rules for the entire database
112*/
113
114// Constructor
115//! Constructs a new SnifferRules object
116SnifferRules::SnifferRules(DatabaseLocation* databaseLocation,
117	MimeSniffer* mimeSniffer)
118	:
119	fDatabaseLocation(databaseLocation),
120	fMimeSniffer(mimeSniffer),
121	fMaxBytesNeeded(0),
122	fHaveDoneFullBuild(false)
123{
124}
125
126// Destructor
127/*! \brief Destroys the \c SnifferRules object and all dynamically allocated
128	\c Sniffer::Rule objects scattered throughout the rule list in
129	\c sniffer_rule::rule members.
130*/
131SnifferRules::~SnifferRules()
132{
133	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
134		   i != fRuleList.end(); i++) {
135		delete i->rule;
136		i->rule = NULL;
137	}
138}
139
140// GuessMimeType
141/*!	\brief Guesses a MIME type for the supplied entry_ref.
142
143	Only the data in the given entry is considered, not the filename or
144	its extension. Please see GuessMimeType(BFile*, const void *, int32,
145	BString*) for more details.
146
147	\param ref The entry to sniff
148	\param type Pointer to a pre-allocated BString which is set to the
149		   resulting MIME type.
150	\return
151	- \c B_OK: success
152	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
153	- error code: failure
154*/
155status_t
156SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
157{
158	status_t err = ref && type ? B_OK : B_BAD_VALUE;
159	ssize_t bytes = 0;
160	char *buffer = NULL;
161	BFile file;
162
163	// First find out the max number of bytes we need to read
164	// from the file to fully accomodate all of our currently
165	// installed sniffer rules
166	if (!err) {
167		bytes = MaxBytesNeeded();
168		if (bytes < 0)
169			err = bytes;
170	}
171
172	// Next read that many bytes (or fewer, if the file isn't
173	// that long) into a buffer
174	if (!err) {
175		buffer = new(std::nothrow) char[bytes];
176		if (!buffer)
177			err = B_NO_MEMORY;
178	}
179
180	if (!err)
181		err = file.SetTo(ref, B_READ_ONLY);
182	if (!err) {
183		bytes = file.Read(buffer, bytes);
184		if (bytes < 0)
185			err = bytes;
186	}
187
188	// Now sniff the buffer
189	if (!err)
190		err = GuessMimeType(&file, buffer, bytes, type);
191
192	delete[] buffer;
193
194	return err;
195}
196
197// GuessMimeType
198/*!	\brief Guesses a MIME type for the given chunk of data.
199
200	Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
201	details.
202
203	\param buffer Pointer to a data buffer to sniff
204	\param length The length of the data buffer pointed to by \a buffer
205	\param type Pointer to a pre-allocated BString which is set to the
206		   resulting MIME type.
207	\return
208	- \c B_OK: success
209	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
210	- error code: failure
211*/
212status_t
213SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
214{
215	return GuessMimeType(NULL, buffer, length, type);
216}
217
218// SetSnifferRule
219/*! Updates the sniffer rule for the given type
220
221	If the a rule currently exists in the rule list for the given type,
222	it is first removed before the new rule is inserted.
223
224	The new rule is inserted in its proper, sorted position in the list.
225
226	\param type The type of interest
227	\param rule The new sniffer rule
228	\return
229	- \c B_OK: success
230	- other error code: failure
231*/
232status_t
233SnifferRules::SetSnifferRule(const char *type, const char *rule)
234{
235	status_t err = type && rule ? B_OK : B_BAD_VALUE;
236	if (!err && !fHaveDoneFullBuild)
237		return B_OK;
238
239	sniffer_rule item(new Sniffer::Rule());
240	BString parseError;
241
242	// Check the mem alloc
243	if (!err)
244		err = item.rule ? B_OK : B_NO_MEMORY;
245	// Prepare the sniffer_rule
246	if (!err) {
247		item.type = type;
248		item.rule_string = rule;
249		err = Sniffer::parse(rule, item.rule, &parseError);
250		if (err)
251			DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
252				parseError.String()));
253	}
254	// Remove any previous rule for this type
255	if (!err)
256		err = DeleteSnifferRule(type);
257	// Insert the new rule at the proper position in
258	// the sorted rule list (remembering that our list
259	// is sorted in ascending order using
260	// operator<(sniffer_rule&, sniffer_rule&))
261	if (!err) {
262		std::list<sniffer_rule>::iterator i;
263		for (i = fRuleList.begin(); i != fRuleList.end(); i++) {
264			 if (item < (*i)) {
265			 	fRuleList.insert(i, item);
266			 	break;
267			 }
268		}
269		if (i == fRuleList.end())
270			fRuleList.push_back(item);
271	}
272
273	return err;
274}
275
276// DeleteSnifferRule
277/*! \brief Removes the sniffer rule for the given type from the rule list
278	\param type The type of interest
279	\return
280	- \c B_OK: success (even if no rule existed for the given type)
281	- other error code: failure
282*/
283status_t
284SnifferRules::DeleteSnifferRule(const char *type)
285{
286	status_t err = type ? B_OK : B_BAD_VALUE;
287	if (!err && !fHaveDoneFullBuild)
288		return B_OK;
289
290	// Find the rule in the list and remove it
291	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
292		   i != fRuleList.end(); i++) {
293		if (i->type == type) {
294			fRuleList.erase(i);
295			break;
296		}
297	}
298
299	return err;
300}
301
302// PrintToStream
303//! Dumps the list of sniffer rules in sorted order to standard output
304void
305SnifferRules::PrintToStream() const
306{
307	printf("\n");
308	printf("--------------\n");
309	printf("Sniffer Rules:\n");
310	printf("--------------\n");
311
312	if (fHaveDoneFullBuild) {
313		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
314			   i != fRuleList.end(); i++) {
315			printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
316		}
317	} else {
318		printf("You haven't built your rule list yet, chump. ;-)\n");
319	}
320}
321
322// BuildRuleList
323/*! \brief Crawls through the database, parses each sniffer rule it finds, adds
324	each parsed rule to the rule list, and sorts the list by priority, largest first.
325
326	Initial MaxBytesNeeded() info is compiled by this function as well.
327*/
328status_t
329SnifferRules::BuildRuleList()
330{
331	fRuleList.clear();
332
333	ssize_t maxBytesNeeded = 0;
334	ssize_t bytesNeeded = 0;
335	DatabaseDirectory root;
336
337	status_t err = root.Init(fDatabaseLocation);
338	if (!err) {
339		root.Rewind();
340		while (true) {
341			BEntry entry;
342			err = root.GetNextEntry(&entry);
343			if (err) {
344				// If we've come to the end of list, it's not an error
345				if (err == B_ENTRY_NOT_FOUND)
346					err = B_OK;
347				break;
348			} else {
349				// Check that this entry is both a directory and a valid MIME string
350				char supertype[B_PATH_NAME_LENGTH];
351				if (entry.IsDirectory()
352				      && entry.GetName(supertype) == B_OK
353				         && BMimeType::IsValid(supertype)) {
354					// Make sure the supertype string is all lowercase
355					BPrivate::Storage::to_lower(supertype);
356
357					// First, iterate through this supertype directory and process
358					// all of its subtypes
359					DatabaseDirectory dir;
360					if (dir.Init(fDatabaseLocation, supertype) == B_OK) {
361						dir.Rewind();
362						while (true) {
363							BEntry subEntry;
364							err = dir.GetNextEntry(&subEntry);
365							if (err) {
366								// If we've come to the end of list, it's not an error
367								if (err == B_ENTRY_NOT_FOUND)
368									err = B_OK;
369								break;
370							} else {
371								// Get the subtype's name
372								char subtype[B_PATH_NAME_LENGTH];
373								if (subEntry.GetName(subtype) == B_OK) {
374									BPrivate::Storage::to_lower(subtype);
375
376									char fulltype[B_PATH_NAME_LENGTH];
377									snprintf(fulltype, B_PATH_NAME_LENGTH, "%s/%s",
378										supertype, subtype);
379
380									// Process the subtype
381									ProcessType(fulltype, &bytesNeeded);
382									if (bytesNeeded > maxBytesNeeded)
383										maxBytesNeeded = bytesNeeded;
384								}
385							}
386						}
387					} else {
388						DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
389						          "Failed opening supertype directory '%s'\n",
390						            supertype));
391					}
392
393					// Second, process the supertype
394					ProcessType(supertype, &bytesNeeded);
395					if (bytesNeeded > maxBytesNeeded)
396						maxBytesNeeded = bytesNeeded;
397				}
398			}
399		}
400	} else {
401		DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
402		          "Failed opening mime database directory.\n"));
403	}
404
405	if (!err) {
406		fRuleList.sort();
407		fMaxBytesNeeded = maxBytesNeeded;
408		fHaveDoneFullBuild = true;
409//		PrintToStream();
410	} else {
411		DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%"
412			B_PRIx32 "\n", err));
413	}
414	return err;
415}
416
417// GuessMimeType
418/*!	\brief Guesses a MIME type for the supplied chunk of data.
419
420	This is accomplished by searching through the currently installed
421	list of sniffer rules for a rule that matches on the given data buffer.
422	Rules are searched in order of priority (higher priority first). Rules
423	of equal priority are searched in reverse-alphabetical order (that way
424	"supertype/subtype" form rules are checked before "supertype-only" form
425	rules if their priorities happen to be identical).
426
427	\param file The file to sniff. May be \c NULL. \a buffer is always given.
428	\param buffer Pointer to a data buffer to sniff
429	\param length The length of the data buffer pointed to by \a buffer
430	\param type Pointer to a pre-allocated BString which is set to the
431		   resulting MIME type.
432	\return
433	- \c B_OK: success
434	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
435	- error code: failure
436*/
437status_t
438SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
439	BString *type)
440{
441	status_t err = buffer && type ? B_OK : B_BAD_VALUE;
442	if (err)
443		return err;
444
445	// wrap the buffer by a BMemoryIO
446	BMemoryIO data(buffer, length);
447
448	if (!fHaveDoneFullBuild)
449		err = BuildRuleList();
450
451	// first ask the MIME sniffer for a suitable type
452	float addonPriority = -1;
453	BMimeType mimeType;
454	if (!err && fMimeSniffer != NULL) {
455		addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length,
456			&mimeType);
457	}
458
459	if (!err) {
460		// Run through our rule list, which is sorted in order of
461		// descreasing priority, and see if one of the rules sniffs
462		// out a match
463		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
464			   i != fRuleList.end(); i++) {
465			if (i->rule) {
466				// If an add-on identified the type with a priority at least
467				// as great as the remaining rules, we can stop further
468				// processing and return the type found by the add-on.
469				if (i->rule->Priority() <= addonPriority) {
470					*type = mimeType.Type();
471					return B_OK;
472				}
473
474				if (i->rule->Sniff(&data)) {
475					type->SetTo(i->type.c_str());
476					return B_OK;
477				}
478			} else {
479				DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
480					"NULL sniffer_rule::rule member found in rule list for type == '%s', "
481					"rule_string == '%s'\n",
482					i->type.c_str(), i->rule_string.c_str()));
483			}
484		}
485
486		// The sniffer add-on manager might have returned a low priority
487		// (lower than any of a rule).
488		if (addonPriority >= 0) {
489			*type = mimeType.Type();
490			return B_OK;
491		}
492
493		// If we get here, we didn't find a damn thing
494		err = kMimeGuessFailureError;
495	}
496	return err;
497}
498
499// MaxBytesNeeded
500/*! \brief Returns the maxmimum number of bytes needed in a data buffer for
501	all the currently installed rules to be able to perform a complete sniff,
502	or an error code if something goes wrong.
503
504	If the internal rule list has not yet been built (this includes parsing
505	all the installed rules), it will be.
506
507	\return: If the return value is non-negative, it represents	the max number
508	of bytes needed to do a complete sniff. Otherwise, the number returned is
509	an error code.
510*/
511ssize_t
512SnifferRules::MaxBytesNeeded()
513{
514	ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
515	if (!err) {
516		err = fMaxBytesNeeded;
517
518		if (fMimeSniffer != NULL) {
519			fMaxBytesNeeded = max_c(fMaxBytesNeeded,
520				(ssize_t)fMimeSniffer->MinimalBufferSize());
521		}
522	}
523	return err;
524}
525
526// ProcessType
527/*! \brief Handles a portion of the initial rule list construction for
528	the given mime type.
529
530	\note To be called by BuildRuleList() *ONLY*. :-)
531
532	\param type The mime type of interest. The mime string is expected to be valid
533	            and lowercase. Both "supertype" and "supertype/subtype" mime types
534	            are allowed.
535	\param bytesNeeded Returns the minimum number of bytes needed for this rule to
536	                   perform a complete sniff. May not be NULL because I'm lazy
537	                   and this function is for internal use only anyway.
538	\return
539	The return value is essentially ignored (as this function prints out the
540	debug warning if a parse fails), but that being said:
541	- \c B_OK: success
542	- \c other error code: failure
543*/
544status_t
545SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
546{
547	status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
548	if (!err)
549		*bytesNeeded = 0;
550
551	BString str;
552	BString errorMsg;
553	sniffer_rule rule(new Sniffer::Rule());
554
555	// Check the mem alloc
556	if (!err)
557		err = rule.rule ? B_OK : B_NO_MEMORY;
558	// Read the attr
559	if (!err) {
560		err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr,
561			str);
562	}
563	// Parse the rule
564	if (!err) {
565		err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
566		if (err)
567			DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
568	}
569	if (!err) {
570		// Note the bytes needed
571		*bytesNeeded = rule.rule->BytesNeeded();
572
573		// Add the rule to the list
574		rule.type = type;
575		rule.rule_string = str.String();
576		fRuleList.push_back(rule);
577	}
578	return err;
579}
580
581} // namespace Mime
582} // namespace Storage
583} // namespace BPrivate
584
585