1/*
2 * Copyright 2002-2011, Haiku, Inc. All rights reserved.
3 * Copyright 2002 Alexander G. M. Smith.
4 * Copyright 2011, Clemens Zeidler <haiku@clemens-zeidler.de>
5 * Distributed under the terms of the MIT License.
6 */
7#ifndef AGMS_BAYESIAN_SPAM_FILTER_H
8#define AGMS_BAYESIAN_SPAM_FILTER_H
9/******************************************************************************
10 * $Id: SpamFilter.h 13952 2005-08-13 19:31:42Z agmsmith $
11 *
12 * SpamFilter - Uses Bayesian statistics to evaluate the spaminess of a
13 * message.  The evaluation is done by a separate server, this add-on just gets
14 * the text and uses scripting commands to get an evaluation from the server.
15 * If the server isn't running, it will be found and started up.  Once the
16 * evaluation has been received, it is added to the message as an attribute and
17 * optionally as an addition to the subject.  Some other add-on later in the
18 * pipeline will use the attribute to delete the message or move it to some
19 * other folder.
20 *
21 * Public Domain 2002, by Alexander G. M. Smith, no warranty.
22 *
23 * $Log: SpamFilter.cpp,v $ (SVN doesn't support log messages so manually done)
24 * r11769 | bonefish | 2005-03-17 03:30:54 -0500 (Thu, 17 Mar 2005) | 1 line
25 * Move trunk into respective module.
26 *
27 * r9669 | brunoga | 2004-10-30 18:23:26 -0400 (Sat, 30 Oct 2004) | 2 lines
28 * AGMS Spam Filter.
29 *
30 * Revision 1.8  2004/09/20 15:57:30  nwhitehorn
31 * Mostly updated the tree to Be/Haiku style identifier naming conventions. I have a few more things to work out, mostly in mail_util.h, and then I'm proceeding to jamify the build system. Then we go into Haiku CVS.
32 *
33 * Revision 1.7  2003/05/27 17:12:59  nwhitehorn
34 * Massive refactoring of the Protocol/ChainRunner/Filter system. You can probably
35 * examine its scope by examining the number of files changed. Regardless, this is
36 * preparation for lots of new features, and REAL WORKING IMAP. Yes, you heard me.
37 * Enjoy, and prepare for bugs (although I've fixed all the ones I've found, I susp
38 * ect there are some memory leaks in ChainRunner).
39 *
40 * Revision 1.6  2003/02/08 21:54:17  agmsmith
41 * Updated the AGMSBayesianSpamServer documentation to match the current
42 * version.  Also removed the Beep options from the spam filter, now they
43 * are turned on or off in the system sound preferences.
44 *
45 * Revision 1.5  2002/12/18 02:27:45  agmsmith
46 * Added uncertain classification as suggested by BiPolar.
47 *
48 * Revision 1.4  2002/12/12 00:56:28  agmsmith
49 * Added some new spam filter options - self training (not implemented yet)
50 * and a button to edit the server settings.
51 *
52 * Revision 1.3  2002/11/28 20:20:57  agmsmith
53 * Now checks if the spam database is running in headers only mode, and
54 * then only downloads headers if that is the case.
55 *
56 * Revision 1.2  2002/11/10 19:36:27  agmsmith
57 * Retry launching server a few times, but not too many.
58 *
59 * Revision 1.1  2002/11/03 02:06:15  agmsmith
60 * Added initial version.
61 *
62 * Revision 1.5  2002/10/21 16:13:59  agmsmith
63 * Added option to have no words mean spam.
64 *
65 * Revision 1.4  2002/10/11 20:01:28  agmsmith
66 * Added sound effects (system beep) for genuine and spam, plus config option
67 * for it.
68 *
69 * Revision 1.3  2002/09/23 19:14:13  agmsmith
70 * Added an option to have the server quit when done.
71 *
72 * Revision 1.2  2002/09/23 03:33:34  agmsmith
73 * First working version, with cutoff ratio and subject modification,
74 * and an attribute added if a patch is made to the Folder filter.
75 *
76 * Revision 1.1  2002/09/21 20:47:57  agmsmith
77 * Initial revision
78 */
79
80#include <Message.h>
81#include <List.h>
82#include <MailAddon.h>
83
84
85class AGMSBayesianSpamFilter : public MailFilter {
86public:
87								AGMSBayesianSpamFilter(MailProtocol& protocol,
88									AddonSettings* settings);
89								~AGMSBayesianSpamFilter();
90
91			void				HeaderFetched(const entry_ref& ref, BFile* file);
92			void				BodyFetched(const entry_ref& ref, BFile* file);
93
94private:
95			status_t			_CheckForSpam(BFile* file);
96			//! if the server is not running start it
97			status_t			_CheckForSpamServer();
98			status_t			_GetTokenizeMode();
99			status_t			_GetSpamRatio(const char* data, off_t dataSize,
100									float& ratio);
101			status_t			_TrainServer(const char* data, off_t dataSize,
102									float spamRatio);
103			status_t			_AddSpamToSubject(BNode* file, float spamRatio);
104
105			bool				fAddSpamToSubject;
106			bool				fAutoTraining;
107			float				fGenuineCutoffRatio;
108			bool				fHeaderOnly;
109			int					fLaunchAttemptCount;
110			BMessenger			fMessengerToServer;
111			bool				fNoWordsMeansSpam;
112			bool				fQuitServerWhenFinished;
113			float				fSpamCutoffRatio;
114};
115
116#endif	/* AGMS_BAYESIAN_SPAM_FILTER_H */
117