174cd43ecSTyler Dauwalder//----------------------------------------------------------------------
274cd43ecSTyler Dauwalder//  This software is part of the OpenBeOS distribution and is covered
3b6f76ebeSAugustin Cavalier//  by the MIT License.
474cd43ecSTyler Dauwalder//---------------------------------------------------------------------
574cd43ecSTyler Dauwalder/*!
674cd43ecSTyler Dauwalder	\file Pattern.cpp
774cd43ecSTyler Dauwalder	MIME sniffer pattern implementation
874cd43ecSTyler Dauwalder*/
974cd43ecSTyler Dauwalder
1074cd43ecSTyler Dauwalder#include <sniffer/Err.h>
1174cd43ecSTyler Dauwalder#include <sniffer/Pattern.h>
1274cd43ecSTyler Dauwalder#include <DataIO.h>
1374cd43ecSTyler Dauwalder#include <stdio.h>	// for SEEK_* defines
14e9e56038STyler Dauwalder#include <new>
1574cd43ecSTyler Dauwalder
16b4080297SIngo Weinhold#include <AutoDeleter.h>
17b4080297SIngo Weinhold
1809d84e61STyler Dauwalderusing namespace BPrivate::Storage::Sniffer;
1974cd43ecSTyler Dauwalder
20fe70cd16STyler DauwalderPattern::Pattern(const std::string &string, const std::string &mask)
2174cd43ecSTyler Dauwalder	: fCStatus(B_NO_INIT)
2274cd43ecSTyler Dauwalder	, fErrorMessage(NULL)
2374cd43ecSTyler Dauwalder{
2474cd43ecSTyler Dauwalder	SetTo(string, mask);
2574cd43ecSTyler Dauwalder}
2674cd43ecSTyler Dauwalder
27fe70cd16STyler DauwalderPattern::Pattern(const std::string &string)
28fe70cd16STyler Dauwalder	: fCStatus(B_NO_INIT)
29fe70cd16STyler Dauwalder	, fErrorMessage(NULL)
30fe70cd16STyler Dauwalder{
31fe70cd16STyler Dauwalder	// Build a mask with all bits turned on of the
32fe70cd16STyler Dauwalder	// appropriate length
33fe70cd16STyler Dauwalder	std::string mask = "";
340a87d01cSTyler Dauwalder	for (uint i = 0; i < string.length(); i++)
35fe70cd16STyler Dauwalder		mask += (char)0xFF;
36fe70cd16STyler Dauwalder	SetTo(string, mask);
37fe70cd16STyler Dauwalder}
38fe70cd16STyler Dauwalder
3974cd43ecSTyler DauwalderPattern::~Pattern() {
4074cd43ecSTyler Dauwalder	delete fErrorMessage;
4174cd43ecSTyler Dauwalder}
4274cd43ecSTyler Dauwalder
4374cd43ecSTyler Dauwalderstatus_t
4474cd43ecSTyler DauwalderPattern::InitCheck() const {
4574cd43ecSTyler Dauwalder	return fCStatus;
4674cd43ecSTyler Dauwalder}
4774cd43ecSTyler Dauwalder
4874cd43ecSTyler DauwalderErr*
4974cd43ecSTyler DauwalderPattern::GetErr() const {
5074cd43ecSTyler Dauwalder	if (fCStatus == B_OK)
5174cd43ecSTyler Dauwalder		return NULL;
5274cd43ecSTyler Dauwalder	else
5317e6de7aSshadow		return new(std::nothrow) Err(*fErrorMessage);
5474cd43ecSTyler Dauwalder}
5574cd43ecSTyler Dauwalder
56fe70cd16STyler Dauwaldervoid dumpStr(const std::string &string, const char *label = NULL) {
57fe70cd16STyler Dauwalder	if (label)
58fe70cd16STyler Dauwalder		printf("%s: ", label);
590a87d01cSTyler Dauwalder	for (uint i = 0; i < string.length(); i++)
60fe70cd16STyler Dauwalder		printf("%x ", string[i]);
61fe70cd16STyler Dauwalder	printf("\n");
62fe70cd16STyler Dauwalder}
63fe70cd16STyler Dauwalder
6474cd43ecSTyler Dauwalderstatus_t
65fe70cd16STyler DauwalderPattern::SetTo(const std::string &string, const std::string &mask) {
66fe70cd16STyler Dauwalder	fString = string;
67fe70cd16STyler Dauwalder	if (fString.length() == 0) {
68fe70cd16STyler Dauwalder		SetStatus(B_BAD_VALUE, "Sniffer pattern error: illegal empty pattern");
6974cd43ecSTyler Dauwalder	} else {
70fe70cd16STyler Dauwalder		fMask = mask;
71fe70cd16STyler Dauwalder//		dumpStr(string, "data");
72fe70cd16STyler Dauwalder//		dumpStr(mask, "mask");
73fe70cd16STyler Dauwalder		if (fString.length() != fMask.length()) {
74fe70cd16STyler Dauwalder			SetStatus(B_BAD_VALUE, "Sniffer pattern error: pattern and mask lengths do not match");
75fe70cd16STyler Dauwalder		} else {
76fe70cd16STyler Dauwalder			SetStatus(B_OK);
77fe70cd16STyler Dauwalder		}
7874cd43ecSTyler Dauwalder	}
79201e236eSIngo Weinhold	return fCStatus;
8074cd43ecSTyler Dauwalder}
8174cd43ecSTyler Dauwalder
8210db8711STyler Dauwalder/*! \brief Looks for a pattern match in the given data stream, starting from
8310db8711STyler Dauwalder	each offset withing the given range. Returns true is a match is found,
8410db8711STyler Dauwalder	false if not.
8510db8711STyler Dauwalder*/
8674cd43ecSTyler Dauwalderbool
8793d145bbSTyler DauwalderPattern::Sniff(Range range, BPositionIO *data, bool caseInsensitive) const {
8874cd43ecSTyler Dauwalder	int32 start = range.Start();
8974cd43ecSTyler Dauwalder	int32 end = range.End();
9074cd43ecSTyler Dauwalder	off_t size = data->Seek(0, SEEK_END);
9174cd43ecSTyler Dauwalder	if (end >= size)
9293d145bbSTyler Dauwalder		end = size-1;	// Don't bother searching beyond the end of the stream
9374cd43ecSTyler Dauwalder	for (int i = start; i <= end; i++) {
9493d145bbSTyler Dauwalder		if (Sniff(i, size, data, caseInsensitive))
9574cd43ecSTyler Dauwalder			return true;
9674cd43ecSTyler Dauwalder	}
97fe70cd16STyler Dauwalder	return false;
9874cd43ecSTyler Dauwalder}
9974cd43ecSTyler Dauwalder
1002150894bSTyler Dauwalder// BytesNeeded
1012150894bSTyler Dauwalder/*! \brief Returns the number of bytes needed to perform a complete sniff, or an error
1022150894bSTyler Dauwalder	code if something goes wrong.
1032150894bSTyler Dauwalder*/
1042150894bSTyler Dauwalderssize_t
1052150894bSTyler DauwalderPattern::BytesNeeded() const
1062150894bSTyler Dauwalder{
1072150894bSTyler Dauwalder	ssize_t result = InitCheck();
1082150894bSTyler Dauwalder	if (result == B_OK)
1092150894bSTyler Dauwalder		result = fString.length();
1102150894bSTyler Dauwalder	return result;
1112150894bSTyler Dauwalder}
1122150894bSTyler Dauwalder
1132150894bSTyler Dauwalder//#define OPTIMIZATION_IS_FOR_CHUMPS
1142150894bSTyler Dauwalder#if OPTIMIZATION_IS_FOR_CHUMPS
1152150894bSTyler Dauwalderbool
1162150894bSTyler DauwalderPattern::Sniff(off_t start, off_t size, BPositionIO *data, bool caseInsensitive) const {
1172150894bSTyler Dauwalder	off_t len = fString.length();
1182150894bSTyler Dauwalder	char *buffer = new(nothrow) char[len+1];
1192150894bSTyler Dauwalder	if (buffer) {
120b4080297SIngo Weinhold		ArrayDeleter<char> _(buffer);
1212150894bSTyler Dauwalder		ssize_t bytesRead = data->ReadAt(start, buffer, len);
1222150894bSTyler Dauwalder		// \todo If there are fewer bytes left in the data stream
1232150894bSTyler Dauwalder		// from the given position than the length of our data
1242150894bSTyler Dauwalder		// string, should we just return false (which is what we're
1252150894bSTyler Dauwalder		// doing now), or should we compare as many bytes as we
1262150894bSTyler Dauwalder		// can and return true if those match?
1272150894bSTyler Dauwalder		if (bytesRead < len)
1282150894bSTyler Dauwalder			return false;
1292150894bSTyler Dauwalder		else {
1302150894bSTyler Dauwalder			bool result = true;
1312150894bSTyler Dauwalder			if (caseInsensitive) {
1322150894bSTyler Dauwalder				for (int i = 0; i < len; i++) {
1332150894bSTyler Dauwalder					char secondChar;
1342150894bSTyler Dauwalder					if ('A' <= fString[i] && fString[i] <= 'Z')
1352150894bSTyler Dauwalder						secondChar = 'a' + (fString[i] - 'A');	// Also check lowercase
1362150894bSTyler Dauwalder					else if ('a' <= fString[i] && fString[i] <= 'z')
1372150894bSTyler Dauwalder						secondChar = 'A' + (fString[i] - 'a');	// Also check uppercase
1382150894bSTyler Dauwalder					else
1392150894bSTyler Dauwalder						secondChar = fString[i]; // Check the same char twice as punishment for doing a case insensitive search ;-)
1402150894bSTyler Dauwalder					if (((fString[i] & fMask[i]) != (buffer[i] & fMask[i]))
1412150894bSTyler Dauwalder					     && ((secondChar & fMask[i]) != (buffer[i] & fMask[i])))
1422150894bSTyler Dauwalder					{
1432150894bSTyler Dauwalder						result = false;
1442150894bSTyler Dauwalder						break;
1452150894bSTyler Dauwalder					}
1462150894bSTyler Dauwalder				}
1472150894bSTyler Dauwalder			} else {
1482150894bSTyler Dauwalder				for (int i = 0; i < len; i++) {
1492150894bSTyler Dauwalder					if ((fString[i] & fMask[i]) != (buffer[i] & fMask[i])) {
1502150894bSTyler Dauwalder						result = false;
1512150894bSTyler Dauwalder						break;
1522150894bSTyler Dauwalder					}
1532150894bSTyler Dauwalder				}
1542150894bSTyler Dauwalder			}
1552150894bSTyler Dauwalder			return result;
1562150894bSTyler Dauwalder		}
1572150894bSTyler Dauwalder	} else
1582150894bSTyler Dauwalder		return false;
1592150894bSTyler Dauwalder}
1602150894bSTyler Dauwalder#else
16174cd43ecSTyler Dauwalderbool
16293d145bbSTyler DauwalderPattern::Sniff(off_t start, off_t size, BPositionIO *data, bool caseInsensitive) const {
16374cd43ecSTyler Dauwalder	off_t len = fString.length();
16417e6de7aSshadow	char *buffer = new(std::nothrow) char[len+1];
16574cd43ecSTyler Dauwalder	if (buffer) {
166b4080297SIngo Weinhold		ArrayDeleter<char> _(buffer);
16774cd43ecSTyler Dauwalder		ssize_t bytesRead = data->ReadAt(start, buffer, len);
16893d145bbSTyler Dauwalder		// \todo If there are fewer bytes left in the data stream
16974cd43ecSTyler Dauwalder		// from the given position than the length of our data
17074cd43ecSTyler Dauwalder		// string, should we just return false (which is what we're
17174cd43ecSTyler Dauwalder		// doing now), or should we compare as many bytes as we
17274cd43ecSTyler Dauwalder		// can and return true if those match?
17374cd43ecSTyler Dauwalder		if (bytesRead < len)
17474cd43ecSTyler Dauwalder			return false;
17574cd43ecSTyler Dauwalder		else {
17674cd43ecSTyler Dauwalder			bool result = true;
17793d145bbSTyler Dauwalder			if (caseInsensitive) {
17893d145bbSTyler Dauwalder				for (int i = 0; i < len; i++) {
17993d145bbSTyler Dauwalder					char secondChar;
18093d145bbSTyler Dauwalder					if ('A' <= fString[i] && fString[i] <= 'Z')
18193d145bbSTyler Dauwalder						secondChar = 'a' + (fString[i] - 'A');	// Also check lowercase
18293d145bbSTyler Dauwalder					else if ('a' <= fString[i] && fString[i] <= 'z')
18393d145bbSTyler Dauwalder						secondChar = 'A' + (fString[i] - 'a');	// Also check uppercase
18493d145bbSTyler Dauwalder					else
18593d145bbSTyler Dauwalder						secondChar = fString[i]; // Check the same char twice as punishment for doing a case insensitive search ;-)
18693d145bbSTyler Dauwalder					if (((fString[i] & fMask[i]) != (buffer[i] & fMask[i]))
18793d145bbSTyler Dauwalder					     && ((secondChar & fMask[i]) != (buffer[i] & fMask[i])))
18893d145bbSTyler Dauwalder					{
18993d145bbSTyler Dauwalder						result = false;
19093d145bbSTyler Dauwalder						break;
19193d145bbSTyler Dauwalder					}
19293d145bbSTyler Dauwalder				}
19393d145bbSTyler Dauwalder			} else {
19493d145bbSTyler Dauwalder				for (int i = 0; i < len; i++) {
19593d145bbSTyler Dauwalder					if ((fString[i] & fMask[i]) != (buffer[i] & fMask[i])) {
19693d145bbSTyler Dauwalder						result = false;
19793d145bbSTyler Dauwalder						break;
19893d145bbSTyler Dauwalder					}
19974cd43ecSTyler Dauwalder				}
20074cd43ecSTyler Dauwalder			}
20174cd43ecSTyler Dauwalder			return result;
20274cd43ecSTyler Dauwalder		}
20374cd43ecSTyler Dauwalder	} else
20474cd43ecSTyler Dauwalder		return false;
20574cd43ecSTyler Dauwalder}
2062150894bSTyler Dauwalder#endif
20774cd43ecSTyler Dauwalder
20874cd43ecSTyler Dauwaldervoid
20974cd43ecSTyler DauwalderPattern::SetStatus(status_t status, const char *msg) {
21074cd43ecSTyler Dauwalder	fCStatus = status;
21174cd43ecSTyler Dauwalder	if (status == B_OK)
21274cd43ecSTyler Dauwalder		SetErrorMessage(NULL);
21374cd43ecSTyler Dauwalder	else {
21474cd43ecSTyler Dauwalder		if (msg)
21574cd43ecSTyler Dauwalder			SetErrorMessage(msg);
21674cd43ecSTyler Dauwalder		else {
21774cd43ecSTyler Dauwalder			SetErrorMessage("Sniffer parser error: Pattern::SetStatus() -- NULL msg with non-B_OK status.\n"
21874cd43ecSTyler Dauwalder				"(This is officially the most helpful error message you will ever receive ;-)");
21974cd43ecSTyler Dauwalder		}
22074cd43ecSTyler Dauwalder	}
22174cd43ecSTyler Dauwalder}
22274cd43ecSTyler Dauwalder
22374cd43ecSTyler Dauwaldervoid
22474cd43ecSTyler DauwalderPattern::SetErrorMessage(const char *msg) {
22574cd43ecSTyler Dauwalder	delete fErrorMessage;
22617e6de7aSshadow	fErrorMessage = (msg) ? (new(std::nothrow) Err(msg, -1)) : (NULL);
22774cd43ecSTyler Dauwalder}
22809d84e61STyler Dauwalder
22909d84e61STyler Dauwalder
23009d84e61STyler Dauwalder