1/* 2 * Copyright 2002-2011, Haiku, Inc. All rights reserved. 3 * Copyright 2002 Alexander G. M. Smith. 4 * Copyright 2011, Clemens Zeidler <haiku@clemens-zeidler.de> 5 * Distributed under the terms of the MIT License. 6 */ 7#ifndef AGMS_BAYESIAN_SPAM_FILTER_H 8#define AGMS_BAYESIAN_SPAM_FILTER_H 9/****************************************************************************** 10 * $Id: SpamFilter.h 13952 2005-08-13 19:31:42Z agmsmith $ 11 * 12 * SpamFilter - Uses Bayesian statistics to evaluate the spaminess of a 13 * message. The evaluation is done by a separate server, this add-on just gets 14 * the text and uses scripting commands to get an evaluation from the server. 15 * If the server isn't running, it will be found and started up. Once the 16 * evaluation has been received, it is added to the message as an attribute and 17 * optionally as an addition to the subject. Some other add-on later in the 18 * pipeline will use the attribute to delete the message or move it to some 19 * other folder. 20 * 21 * Public Domain 2002, by Alexander G. M. Smith, no warranty. 22 * 23 * $Log: SpamFilter.cpp,v $ (SVN doesn't support log messages so manually done) 24 * r11769 | bonefish | 2005-03-17 03:30:54 -0500 (Thu, 17 Mar 2005) | 1 line 25 * Move trunk into respective module. 26 * 27 * r9669 | brunoga | 2004-10-30 18:23:26 -0400 (Sat, 30 Oct 2004) | 2 lines 28 * AGMS Spam Filter. 29 * 30 * Revision 1.8 2004/09/20 15:57:30 nwhitehorn 31 * Mostly updated the tree to Be/Haiku style identifier naming conventions. I have a few more things to work out, mostly in mail_util.h, and then I'm proceeding to jamify the build system. Then we go into Haiku CVS. 32 * 33 * Revision 1.7 2003/05/27 17:12:59 nwhitehorn 34 * Massive refactoring of the Protocol/ChainRunner/Filter system. You can probably 35 * examine its scope by examining the number of files changed. Regardless, this is 36 * preparation for lots of new features, and REAL WORKING IMAP. Yes, you heard me. 37 * Enjoy, and prepare for bugs (although I've fixed all the ones I've found, I susp 38 * ect there are some memory leaks in ChainRunner). 39 * 40 * Revision 1.6 2003/02/08 21:54:17 agmsmith 41 * Updated the AGMSBayesianSpamServer documentation to match the current 42 * version. Also removed the Beep options from the spam filter, now they 43 * are turned on or off in the system sound preferences. 44 * 45 * Revision 1.5 2002/12/18 02:27:45 agmsmith 46 * Added uncertain classification as suggested by BiPolar. 47 * 48 * Revision 1.4 2002/12/12 00:56:28 agmsmith 49 * Added some new spam filter options - self training (not implemented yet) 50 * and a button to edit the server settings. 51 * 52 * Revision 1.3 2002/11/28 20:20:57 agmsmith 53 * Now checks if the spam database is running in headers only mode, and 54 * then only downloads headers if that is the case. 55 * 56 * Revision 1.2 2002/11/10 19:36:27 agmsmith 57 * Retry launching server a few times, but not too many. 58 * 59 * Revision 1.1 2002/11/03 02:06:15 agmsmith 60 * Added initial version. 61 * 62 * Revision 1.5 2002/10/21 16:13:59 agmsmith 63 * Added option to have no words mean spam. 64 * 65 * Revision 1.4 2002/10/11 20:01:28 agmsmith 66 * Added sound effects (system beep) for genuine and spam, plus config option 67 * for it. 68 * 69 * Revision 1.3 2002/09/23 19:14:13 agmsmith 70 * Added an option to have the server quit when done. 71 * 72 * Revision 1.2 2002/09/23 03:33:34 agmsmith 73 * First working version, with cutoff ratio and subject modification, 74 * and an attribute added if a patch is made to the Folder filter. 75 * 76 * Revision 1.1 2002/09/21 20:47:57 agmsmith 77 * Initial revision 78 */ 79 80#include <Message.h> 81#include <List.h> 82#include <MailAddon.h> 83 84 85class AGMSBayesianSpamFilter : public MailFilter { 86public: 87 AGMSBayesianSpamFilter(MailProtocol& protocol, 88 AddonSettings* settings); 89 ~AGMSBayesianSpamFilter(); 90 91 void HeaderFetched(const entry_ref& ref, BFile* file); 92 void BodyFetched(const entry_ref& ref, BFile* file); 93 94private: 95 status_t _CheckForSpam(BFile* file); 96 //! if the server is not running start it 97 status_t _CheckForSpamServer(); 98 status_t _GetTokenizeMode(); 99 status_t _GetSpamRatio(const char* data, off_t dataSize, 100 float& ratio); 101 status_t _TrainServer(const char* data, off_t dataSize, 102 float spamRatio); 103 status_t _AddSpamToSubject(BNode* file, float spamRatio); 104 105 bool fAddSpamToSubject; 106 bool fAutoTraining; 107 float fGenuineCutoffRatio; 108 bool fHeaderOnly; 109 int fLaunchAttemptCount; 110 BMessenger fMessengerToServer; 111 bool fNoWordsMeansSpam; 112 bool fQuitServerWhenFinished; 113 float fSpamCutoffRatio; 114}; 115 116#endif /* AGMS_BAYESIAN_SPAM_FILTER_H */ 117