1/******************************************************************************
2 * $Id: spamdbm.cpp 30630 2009-05-05 01:31:01Z bga $
3 *
4 * This is a BeOS program for classifying e-mail messages as spam (unwanted
5 * junk mail) or as genuine mail using a Bayesian statistical approach.  There
6 * is also a Mail Daemon Replacement add-on to filter mail using the
7 * classification statistics collected earlier.
8 *
9 * See also http://www.paulgraham.com/spam.html for a good writeup and
10 * http://www.tuxedo.org/~esr/bogofilter/ for another implementation.
11 * And more recently, Gary Robinson's write up of his improved algorithm
12 * at http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html
13 * which gives a better spread in spam ratios and slightly fewer
14 * misclassifications.
15 *
16 * Note that this uses the AGMS vacation coding style, not the OpenTracker one.
17 * That means no tabs, indents are two spaces, m_ is the prefix for member
18 * variables, g_ is the prefix for global names, C style comments, constants
19 * are in all capital letters and most other things are mixed case, it's word
20 * wrapped to fit in 79 characters per line to make proofreading on paper
21 * easier, and functions are listed in reverse dependency order so that forward
22 * declarations (function prototypes with no code) aren't needed.
23 *
24 * The Original Design:
25 * There is a spam database (just a file listing words and number of times they
26 * were used in spam and non-spam messages) that a BeMailDaemon input filter
27 * will use when scanning email.  It will mark the mail with the spam
28 * probability (an attribute, optionally a mail header field) and optionally do
29 * something if the probability exceeds a user defined level (delete message,
30 * change subject, file in a different folder).  Or should that be a different
31 * filter?  Outside the mail system, the probability can be used in queries to
32 * find spam.
33 *
34 * A second user application will be used to update the database.  Besides
35 * showing you the current list of words, you can drag and drop files to mark
36 * them as spam or non-spam (a balanced binary tree is used internally to make
37 * word storage fast).  It will add a second attribute to the files to show how
38 * they have been classified by the user (and won't update the database if you
39 * accidentally try to classify a file again).  Besides drag and drop, there
40 * will be a command line interface and a message passing interface.  BeMail
41 * (or other programs) will then communicate via messages to tell it when the
42 * user marks a message as spam or not (via having separate delete spam /
43 * delete genuine mail buttons and a menu item or two).
44 *
45 * Plus lots of details, like the rename swap method to update the database
46 * file (so programs with the old file open aren't affected).  A nice tab text
47 * format so you can open the database in a spreadsheet.  Startup and shutdown
48 * control of the updater from BeMail.  Automatic creation of the indices
49 * needed by the filter.  MIME types for the database file.  Icons for the app.
50 * System settings to enable tracker to display the new attributes when viewing
51 * e-mail (and maybe news articles if someone ever gets around to an NNTP as
52 * files reader).  Documentation.  Recursive directory traversal for the
53 * command line or directory drag and drop.  Options for the updater to warn or
54 * ignore non-email files.  Etc.
55 *
56 * The Actual Implementation:
57 * The spam database updates and the test for spam have been combined into one
58 * program which runs as a server.  That way there won't be as long a delay
59 * when the e-mail system wants to check for spam, because the database is
60 * already loaded by the server and in memory.  The MDR mail filter add-on
61 * simply sends scripting commands to the server (and starts it up if it isn't
62 * already running).  The filter takes care of marking the messages when it
63 * gets the rating back from the server, and then the rest of the mail system
64 * rule chain can delete the message or otherwise manipulate it.
65 *
66 * Revision History (now manually updated due to SVN's philosophy)
67 * $Log: spamdbm.cpp,v $
68 * ------------------------------------------------------------------------
69 * r15195 | agmsmith | 2005-11-27 21:07:55 -0500 (Sun, 27 Nov 2005) | 4 lines
70 * Just a few minutes after checking in, I mentioned it to Japanese expert Koki
71 * and he suggested also including the Japanese comma.  So before I forget to
72 * do it...
73 *
74 * ------------------------------------------------------------------------
75 * r15194 | agmsmith | 2005-11-27 20:37:13 -0500 (Sun, 27 Nov 2005) | 5 lines
76 * Truncate overly long URLs to the maximum word length.  Convert Japanese
77 * periods to spaces so that more "words" are found.  Fix UTF-8 comparison
78 * problems with tolower() incorrectly converting characters with the high bit
79 * set.
80 *
81 * r15098 | agmsmith | 2005-11-23 23:17:00 -0500 (Wed, 23 Nov 2005) | 5 lines
82 * Added better tokenization so that HTML is parsed and things like tags
83 * between letters of a word no longer hide that word.  After testing, the
84 * result seems to be a tighter spread of ratings when done in full text plus
85 * header mode.
86 *
87 * Revision 1.10  2005/11/24 02:08:39  agmsmith
88 * Fixed up prefix codes, Z for things that are inside other things.
89 *
90 * Revision 1.9  2005/11/21 03:28:03  agmsmith
91 * Added a function for extracting URLs.
92 *
93 * Revision 1.8  2005/11/09 03:36:18  agmsmith
94 * Removed noframes detection (doesn't show up in e-mails).  Now use
95 * just H for headers and Z for HTML tag junk.
96 *
97 * Revision 1.7  2005/10/24 00:00:08  agmsmith
98 * Adding HTML tag removal, which also affected the search function so it
99 * could search for single part things like  .
100 *
101 * Revision 1.6  2005/10/17 01:55:08  agmsmith
102 * Remove HTML comments and a few other similar things.
103 *
104 * Revision 1.5  2005/10/16 18:35:36  agmsmith
105 * Under construction - looking into HTML not being in UTF-8.
106 *
107 * Revision 1.4  2005/10/11 01:51:21  agmsmith
108 * Starting on the tokenising passes.  Still need to test asian truncation.
109 *
110 * Revision 1.3  2005/10/06 11:54:07  agmsmith
111 * Not much.
112 *
113 * Revision 1.2  2005/09/12 01:49:37  agmsmith
114 * Enable case folding for the whole file tokenizer.
115 *
116 * r13961 | agmsmith | 2005-08-13 22:25:28 -0400 (Sat, 13 Aug 2005) | 2 lines
117 * Source code changes so that mboxtobemail now compiles and is in the build
118 * system.
119 *
120 * r13959 | agmsmith | 2005-08-13 22:05:27 -0400 (Sat, 13 Aug 2005) | 2 lines
121 * Rename the directory before doing anything else, otherwise svn dies badly.
122 *
123 * r13952 | agmsmith | 2005-08-13 15:31:42 -0400 (Sat, 13 Aug 2005) | 3 lines
124 * Added the resources and file type associations, changed the application
125 * signature and otherwise made the spam detection system work properly again.
126 *
127 * r13951 | agmsmith | 2005-08-13 11:40:01 -0400 (Sat, 13 Aug 2005) | 2 lines
128 * Had to do the file rename as a separate operation due to SVN limitations.
129 *
130 * r13950 | agmsmith | 2005-08-13 11:38:44 -0400 (Sat, 13 Aug 2005) | 3 lines
131 * Oops, "spamdb" is already used for a Unix package.  And spamdatabase is
132 * already reserved by a domain name squatter.  Use "spamdbm" instead.
133 *
134 * r13949 | agmsmith | 2005-08-13 11:17:52 -0400 (Sat, 13 Aug 2005) | 3 lines
135 * Renamed spamfilter to be the more meaningful spamdb (spam database) and
136 * moved it into its own source directory in preparation for adding resources.
137 *
138 * r13628 | agmsmith | 2005-07-10 20:11:29 -0400 (Sun, 10 Jul 2005) | 3 lines
139 * Updated keyword expansion to use SVN keywords.  Also seeing if svn is
140 * working well enough for me to update files from BeOS R5.
141 *
142 * r11909 | axeld | 2005-03-18 19:09:19 -0500 (Fri, 18 Mar 2005) | 2 lines
143 * Moved bin/ directory out of apps/.
144 *
145 * r11769 | bonefish | 2005-03-17 03:30:54 -0500 (Thu, 17 Mar 2005) | 1 line
146 * Move trunk into respective module.
147 *
148 * r10362 | nwhitehorn | 2004-12-06 20:14:05 -0500 (Mon, 06 Dec 2004) | 2 lines
149 * Fixed the spam filter so it works correctly now.
150 *
151 * r9934 | nwhitehorn | 2004-11-11 21:55:05 -0500 (Thu, 11 Nov 2004) | 2 lines
152 * Added AGMS's excellent spam detection software.  Still some weirdness with
153 * the configuration interface from E-mail prefs.
154 *
155 * Revision 1.2  2004/12/07 01:14:05  nwhitehorn
156 * Fixed the spam filter so it works correctly now.
157 *
158 * Revision 1.87  2004/09/20 15:57:26  nwhitehorn
159 * Mostly updated the tree to Be/Haiku style identifier naming conventions.  I
160 * have a few more things to work out, mostly in mail_util.h, and then I'm
161 * proceeding to jamify the build system.  Then we go into Haiku CVS.
162 *
163 * Revision 1.86  2003/07/26 16:47:46  agmsmith
164 * Bug - wasn't allowing double classification if the user had turned on
165 * the option to ignore the previous classification.
166 *
167 * Revision 1.85  2003/07/08 14:52:57  agmsmith
168 * Fix bug with classification choices dialog box coming up with weird
169 * sizes due to RefsReceived message coming in before ReadyToRun had
170 * finished setting up the default sizes of the controls.
171 *
172 * Revision 1.84  2003/07/04 19:59:29  agmsmith
173 * Now with a GUI option to let you declassify messages (set them back
174 * to uncertain, rather than spam or genuine).  Required a BAlert
175 * replacement since BAlerts can't do four buttons.
176 *
177 * Revision 1.83  2003/07/03 20:40:36  agmsmith
178 * Added Uncertain option for declassifying messages.
179 *
180 * Revision 1.82  2003/06/16 14:57:13  agmsmith
181 * Detect spam which uses mislabeled text attachments, going by the file name
182 * extension.
183 *
184 * Revision 1.81  2003/04/08 20:27:04  agmsmith
185 * AGMSBayesianSpamServer now shuts down immediately and returns true if
186 * it is asked to quit by the registrar.
187 *
188 * Revision 1.80  2003/04/07 19:20:27  agmsmith
189 * Ooops, int64 doesn't exist, use long long instead.
190 *
191 * Revision 1.79  2003/04/07 19:05:22  agmsmith
192 * Now with Allen Brunson's atoll for PPC (you need the %Ld, but that
193 * becomes %lld on other systems).
194 *
195 * Revision 1.78  2003/04/04 22:43:53  agmsmith
196 * Fixed up atoll PPC processor hack so it would actually work, was just
197 * returning zero which meant that it wouldn't load in the database file
198 * (read the size as zero).
199 *
200 * Revision 1.77  2003/01/22 03:19:48  agmsmith
201 * Don't convert words to lower case, the case is important for spam.
202 * Particularly sentences which start with exciting words, which you
203 * normally won't use at the start of a sentence (and thus capitalize).
204 *
205 * Revision 1.76  2002/12/18 02:29:22  agmsmith
206 * Add space for the Uncertain display in Tracker.
207 *
208 * Revision 1.75  2002/12/18 01:54:37  agmsmith
209 * Added uncertain sound effect.
210 *
211 * Revision 1.74  2002/12/13 23:53:12  agmsmith
212 * Minimize the window before opening it so that it doesn't flash on the
213 * screen in server mode.  Also load the database when the window is
214 * displayed so that the user can see the words.
215 *
216 * Revision 1.73  2002/12/13 20:55:57  agmsmith
217 * Documentation.
218 *
219 * Revision 1.72  2002/12/13 20:26:11  agmsmith
220 * Fixed bug with adding messages in strings to database (was limited to
221 * messages at most 1K long).  Also changed default server mode to true
222 * since that's what people use most.
223 *
224 * Revision 1.71  2002/12/11 22:37:30  agmsmith
225 * Added commands to train on spam and genuine e-mail messages passed
226 * in string arguments rather then via external files.
227 *
228 * Revision 1.70  2002/12/10 22:12:41  agmsmith
229 * Adding a message to the database now uses a BPositionIO rather than a
230 * file and file name (for future string rather than file additions).  Also
231 * now re-evaluate a file after reclassifying it so that the user can see
232 * the new ratio.  Also remove the [Spam 99.9%] subject prefix when doing
233 * a re-evaluation or classification (the number would be wrong).
234 *
235 * Revision 1.69  2002/12/10 01:46:04  agmsmith
236 * Added the Chi-Squared scoring method.
237 *
238 * Revision 1.68  2002/11/29 22:08:25  agmsmith
239 * Change default purge age to 2000 so that hitting the purge button
240 * doesn't erase stuff from the new sample database.
241 *
242 * Revision 1.67  2002/11/25 20:39:39  agmsmith
243 * Don't need to massage the MIME type since the mail library now does
244 * the lower case conversion and converts TEXT to text/plain too.
245 *
246 * Revision 1.66  2002/11/20 22:57:12  nwhitehorn
247 * PPC Compatibility Fixes
248 *
249 * Revision 1.65  2002/11/10 18:43:55  agmsmith
250 * Added a time delay to some quitting operations so that scripting commands
251 * from a second client (like a second e-mail account) will make the program
252 * abort the quit operation.
253 *
254 * Revision 1.64  2002/11/05 18:05:16  agmsmith
255 * Looked at Nathan's PPC changes (thanks!), modified style a bit.
256 *
257 * Revision 1.63  2002/11/04 03:30:22  nwhitehorn
258 * Now works (or compiles at least) on PowerPC.  I'll get around to testing it
259 * later.
260 *
261 * Revision 1.62  2002/11/04 01:03:33  agmsmith
262 * Fixed warnings so it compiles under the bemaildaemon system.
263 *
264 * Revision 1.61  2002/11/03 23:00:37  agmsmith
265 * Added to the bemaildaemon project on SourceForge.  Hmmmm, seems to switch to
266 * a new version if I commit and specify a message, but doesn't accept the
267 * message and puts up the text editor.  Must be a bug where cvs eats the first
268 * option after "commit".
269 *
270 * Revision 1.60.1.1  2002/10/22 14:29:27  agmsmith
271 * Needed to recompile with the original Libmail.so from Beta/1 since
272 * the current library uses a different constructor, and thus wouldn't
273 * run when used with the old library.
274 *
275 * Revision 1.60  2002/10/21 16:41:27  agmsmith
276 * Return a special error code when no words are found in a message,
277 * so that messages without text/plain parts can be recognized as
278 * spam by the mail filter.
279 *
280 * Revision 1.59  2002/10/20 21:29:47  agmsmith
281 * Watch out for MIME types of "text", treat as text/plain.
282 *
283 * Revision 1.58  2002/10/20 18:29:07  agmsmith
284 * *** empty log message ***
285 *
286 * Revision 1.57  2002/10/20 18:25:02  agmsmith
287 * Fix case sensitivity in MIME type tests, and fix text/any test.
288 *
289 * Revision 1.56  2002/10/19 17:00:10  agmsmith
290 * Added the pop-up menu for the tokenize modes.
291 *
292 * Revision 1.55  2002/10/19 14:54:06  agmsmith
293 * Fudge MIME type of body text components so that they get
294 * treated as text.
295 *
296 * Revision 1.54  2002/10/19 00:56:37  agmsmith
297 * The parsing of e-mail messages seems to be working now, just need
298 * to add some user interface stuff for the tokenizing mode.
299 *
300 * Revision 1.53  2002/10/18 23:37:56  agmsmith
301 * More mail kit usage, can now decode headers, but more to do.
302 *
303 * Revision 1.52  2002/10/16 23:52:33  agmsmith
304 * Getting ready to add more tokenizing modes, exploring Mail Kit to break
305 * apart messages into components (and decode BASE64 and other encodings).
306 *
307 * Revision 1.51  2002/10/11 20:05:31  agmsmith
308 * Added installation of sound effect names, which the filter will use.
309 *
310 * Revision 1.50  2002/10/02 16:50:02  agmsmith
311 * Forgot to add credits to the algorithm inventors.
312 *
313 * Revision 1.49  2002/10/01 00:39:29  agmsmith
314 * Added drag and drop to evaluate files or to add them to the list.
315 *
316 * Revision 1.48  2002/09/30 19:44:17  agmsmith
317 * Switched to Gary Robinson's method, removed max spam/genuine word.
318 *
319 * Revision 1.47  2002/09/23 17:08:55  agmsmith
320 * Add an attribute with the spam ratio to files which have been evaluated.
321 *
322 * Revision 1.46  2002/09/23 02:50:32  agmsmith
323 * Fiddling with display width of e-mail attributes.
324 *
325 * Revision 1.45  2002/09/23 01:13:56  agmsmith
326 * Oops, bug in string evaluation scripting.
327 *
328 * Revision 1.44  2002/09/22 21:00:55  agmsmith
329 * Added EvaluateString so that the BeMail add-on can pass the info without
330 * having to create a temporary file.
331 *
332 * Revision 1.43  2002/09/20 19:56:02  agmsmith
333 * Added about box and button for estimating the spam ratio of a file.
334 *
335 * Revision 1.42  2002/09/20 01:22:26  agmsmith
336 * More testing, decide that an extreme ratio bias point of 0.5 is good.
337 *
338 * Revision 1.41  2002/09/19 21:17:12  agmsmith
339 * Changed a few names and proofread the program.
340 *
341 * Revision 1.40  2002/09/19 14:27:17  agmsmith
342 * Rearranged execution of commands, moving them to a separate looper
343 * rather than the BApplication, so that thousands of files could be
344 * processed without worrying about the message queue filling up.
345 *
346 * Revision 1.39  2002/09/18 18:47:16  agmsmith
347 * Stop flickering when the view is partially obscured, update cached
348 * values in all situations except when app is busy.
349 *
350 * Revision 1.38  2002/09/18 18:08:11  agmsmith
351 * Add a function for evaluating the spam ratio of a message.
352 *
353 * Revision 1.37  2002/09/16 01:30:16  agmsmith
354 * Added Get Oldest command.
355 *
356 * Revision 1.36  2002/09/16 00:47:52  agmsmith
357 * Change the display to counter-weigh the spam ratio by the number of
358 * messages.
359 *
360 * Revision 1.35  2002/09/15 20:49:35  agmsmith
361 * Scrolling improved, buttons, keys and mouse wheel added.
362 *
363 * Revision 1.34  2002/09/15 03:46:10  agmsmith
364 * Up and down buttons under construction.
365 *
366 * Revision 1.33  2002/09/15 02:09:21  agmsmith
367 * Took out scroll bar.
368 *
369 * Revision 1.32  2002/09/15 02:05:30  agmsmith
370 * Trying to add a scroll bar, but it isn't very useful.
371 *
372 * Revision 1.31  2002/09/14 23:06:28  agmsmith
373 * Now has live updates of the list of words.
374 *
375 * Revision 1.30  2002/09/14 19:53:11  agmsmith
376 * Now with a better display of the words.
377 *
378 * Revision 1.29  2002/09/13 21:33:54  agmsmith
379 * Now draws the words in the word display view, but still primitive.
380 *
381 * Revision 1.28  2002/09/13 19:28:02  agmsmith
382 * Added display of most genuine and most spamiest, fixed up cursor.
383 *
384 * Revision 1.27  2002/09/13 03:08:42  agmsmith
385 * Show current word and message counts, and a busy cursor.
386 *
387 * Revision 1.26  2002/09/13 00:00:08  agmsmith
388 * Fixed up some deadlock problems, now using asynchronous message replies.
389 *
390 * Revision 1.25  2002/09/12 17:56:58  agmsmith
391 * Keep track of words which are spamiest and genuinest.
392 *
393 * Revision 1.24  2002/09/12 01:57:10  agmsmith
394 * Added server mode.
395 *
396 * Revision 1.23  2002/09/11 23:30:45  agmsmith
397 * Added Purge button and ignore classification checkbox.
398 *
399 * Revision 1.22  2002/09/11 21:23:13  agmsmith
400 * Added bulk update choice, purge button, moved to a BView container
401 * for all the controls (so background colour could be set, and Pulse
402 * works normally for it too).
403 *
404 * Revision 1.21  2002/09/10 22:52:49  agmsmith
405 * You can now change the database name in the GUI.
406 *
407 * Revision 1.20  2002/09/09 14:20:42  agmsmith
408 * Now can have multiple backups, and implemented refs received.
409 *
410 * Revision 1.19  2002/09/07 19:14:56  agmsmith
411 * Added standard GUI measurement code.
412 *
413 * Revision 1.18  2002/09/06 21:03:03  agmsmith
414 * Rearranging code to avoid forward references when adding a window class.
415 *
416 * Revision 1.17  2002/09/06 02:54:00  agmsmith
417 * Added the ability to purge old words from the database.
418 *
419 * Revision 1.16  2002/09/05 00:46:03  agmsmith
420 * Now adds spam to the database!
421 *
422 * Revision 1.15  2002/09/04 20:32:15  agmsmith
423 * Read ahead a couple of letters to decode quoted-printable better.
424 *
425 * Revision 1.14  2002/09/04 03:10:03  agmsmith
426 * Can now tokenize (break into words) a text file.
427 *
428 * Revision 1.13  2002/09/03 21:50:54  agmsmith
429 * Count database command, set up MIME type for the database file.
430 *
431 * Revision 1.12  2002/09/03 19:55:54  agmsmith
432 * Added loading and saving the database.
433 *
434 * Revision 1.11  2002/09/02 03:35:33  agmsmith
435 * Create indices and set up attribute associations with the e-mail MIME type.
436 *
437 * Revision 1.10  2002/09/01 15:52:49  agmsmith
438 * Can now delete the database.
439 *
440 * Revision 1.9  2002/08/31 21:55:32  agmsmith
441 * Yet more scripting.
442 *
443 * Revision 1.8  2002/08/31 21:41:37  agmsmith
444 * Under construction, with example code to decode a B_REPLY.
445 *
446 * Revision 1.7  2002/08/30 19:29:06  agmsmith
447 * Combined loading and saving settings into one function.
448 *
449 * Revision 1.6  2002/08/30 02:01:10  agmsmith
450 * Working on loading and saving settings.
451 *
452 * Revision 1.5  2002/08/29 23:17:42  agmsmith
453 * More scripting.
454 *
455 * Revision 1.4  2002/08/28 00:40:52  agmsmith
456 * Scripting now seems to work, at least the messages flow properly.
457 *
458 * Revision 1.3  2002/08/25 21:51:44  agmsmith
459 * Getting the about text formatting right.
460 *
461 * Revision 1.2  2002/08/25 21:28:20  agmsmith
462 * Trying out the BeOS scripting system as a way of implementing the program.
463 *
464 * Revision 1.1  2002/08/24 02:27:51  agmsmith
465 * Initial revision
466 */
467
468/* Standard C Library. */
469
470#include <stdio.h>
471#include <stdlib.h>
472#include <errno.h>
473
474/* Standard C++ library. */
475
476#include <iostream>
477
478/* STL (Standard Template Library) headers. */
479
480#include <map>
481#include <queue>
482#include <set>
483#include <string>
484#include <vector>
485
486using namespace std;
487
488/* BeOS (Be Operating System) headers. */
489
490#include <Alert.h>
491#include <Application.h>
492#include <Beep.h>
493#include <Button.h>
494#include <CheckBox.h>
495#include <Cursor.h>
496#include <Directory.h>
497#include <Entry.h>
498#include <File.h>
499#include <FilePanel.h>
500#include <FindDirectory.h>
501#include <fs_index.h>
502#include <fs_info.h>
503#include <MenuBar.h>
504#include <MenuItem.h>
505#include <Message.h>
506#include <MessageQueue.h>
507#include <MessageRunner.h>
508#include <Mime.h>
509#include <NodeInfo.h>
510#include <Path.h>
511#include <Picture.h>
512#include <PictureButton.h>
513#include <Point.h>
514#include <Polygon.h>
515#include <PopUpMenu.h>
516#include <PropertyInfo.h>
517#include <RadioButton.h>
518#include <Resources.h>
519#include <Screen.h>
520#include <ScrollBar.h>
521#include <String.h>
522#include <StringView.h>
523#include <TextControl.h>
524#include <View.h>
525
526/* Included from the Mail Daemon Replacement project (MDR) include/public
527directory, available from http://sourceforge.net/projects/bemaildaemon/ */
528
529#include <MailMessage.h>
530#include <MailAttachment.h>
531
532
533/******************************************************************************
534 * Global variables, and not-so-variable things too.  Grouped by functionality.
535 */
536
537static float g_MarginBetweenControls; /* Space of a letter "M" between them. */
538static float g_LineOfTextHeight;      /* Height of text the current font. */
539static float g_StringViewHeight;      /* Height of a string view text box. */
540static float g_ButtonHeight;          /* How many pixels tall buttons are. */
541static float g_CheckBoxHeight;        /* Same for check boxes. */
542static float g_RadioButtonHeight;     /* Also for radio buttons. */
543static float g_PopUpMenuHeight;       /* Again for pop-up menus. */
544static float g_TextBoxHeight;         /* Ditto for editable text controls. */
545
546static const char *g_ABSAppSignature =
547  "application/x-vnd.agmsmith.spamdbm";
548
549static const char *g_ABSDatabaseFileMIMEType =
550  "text/x-vnd.agmsmith.spam_probability_database";
551
552static const char *g_DefaultDatabaseFileName =
553  "SpamDBM Database";
554
555static const char *g_DatabaseRecognitionString =
556  "Spam Database File";
557
558static const char *g_AttributeNameClassification = "MAIL:classification";
559static const char *g_AttributeNameSpamRatio = "MAIL:ratio_spam";
560static const char *g_BeepGenuine = "SpamFilter-Genuine";
561static const char *g_BeepSpam = "SpamFilter-Spam";
562static const char *g_BeepUncertain = "SpamFilter-Uncertain";
563static const char *g_ClassifiedSpam = "Spam";
564static const char *g_ClassifiedGenuine = "Genuine";
565static const char *g_DataName = "data";
566static const char *g_ResultName = "result";
567
568static const char *g_SettingsDirectoryName = "Mail";
569static const char *g_SettingsFileName = "SpamDBM Settings";
570static const uint32 g_SettingsWhatCode = 'SDBM';
571static const char *g_BackupSuffix = ".backup %d";
572static const int g_MaxBackups = 10; /* Numbered from 0 to g_MaxBackups - 1. */
573static const size_t g_MaxWordLength = 50; /* Words longer than this aren't. */
574static const int g_MaxInterestingWords = 150; /* Top N words are examined. */
575static const double g_RobinsonS = 0.45; /* Default weight for no data. */
576static const double g_RobinsonX = 0.5; /* Halfway point for no data. */
577
578static bool g_CommandLineMode;
579  /* TRUE if the program was started from the command line (and thus should
580  exit after processing the command), FALSE if it is running with a graphical
581  user interface. */
582
583static bool g_ServerMode;
584  /* When TRUE the program runs in server mode - error messages don't result in
585  pop-up dialog boxes, but you can still see them in stderr.  Also the window
586  is minimized, if it exists. */
587
588static int g_QuitCountdown = -1;
589  /* Set to the number of pulse timing events (about one every half second) to
590  count down before the program quits.  Negative means stop counting.  Zero
591  means quit at the next pulse event.  This is used to keep the program alive
592  for a short while after someone requests that it quit, in case more scripting
593  commands come in, which will stop the countdown.  Needed to handle the case
594  where there are multiple e-mail accounts all requesting spam identification,
595  and one finishes first and tells the server to quit.  It also checks to see
596  that there is no more work to do before trying to quit. */
597
598static volatile bool g_AppReadyToRunCompleted = false;
599  /* The BApplication starts processing messages before ReadyToRun finishes,
600  which can lead to initialisation problems (button heights not determined).
601  So wait for this to turn TRUE in code that might run early, like
602  RefsReceived. */
603
604static class CommanderLooper *g_CommanderLooperPntr = NULL;
605static BMessenger *g_CommanderMessenger = NULL;
606  /* Some globals for use with the looper which processes external commands
607  (arguments received, file references received), needed for avoiding deadlocks
608  which would happen if the BApplication sent a scripting message to itself. */
609
610static BCursor *g_BusyCursor = NULL;
611  /* The busy cursor, will be loaded from the resource file during application
612  startup. */
613
614typedef enum PropertyNumbersEnum
615{
616  PN_DATABASE_FILE = 0,
617  PN_SPAM,
618  PN_SPAM_STRING,
619  PN_GENUINE,
620  PN_GENUINE_STRING,
621  PN_UNCERTAIN,
622  PN_IGNORE_PREVIOUS_CLASSIFICATION,
623  PN_SERVER_MODE,
624  PN_FLUSH,
625  PN_PURGE_AGE,
626  PN_PURGE_POPULARITY,
627  PN_PURGE,
628  PN_OLDEST,
629  PN_EVALUATE,
630  PN_EVALUATE_STRING,
631  PN_RESET_TO_DEFAULTS,
632  PN_INSTALL_THINGS,
633  PN_TOKENIZE_MODE,
634  PN_SCORING_MODE,
635  PN_MAX
636} PropertyNumbers;
637
638static const char * g_PropertyNames [PN_MAX] =
639{
640  "DatabaseFile",
641  "Spam",
642  "SpamString",
643  "Genuine",
644  "GenuineString",
645  "Uncertain",
646  "IgnorePreviousClassification",
647  "ServerMode",
648  "Flush",
649  "PurgeAge",
650  "PurgePopularity",
651  "Purge",
652  "Oldest",
653  "Evaluate",
654  "EvaluateString",
655  "ResetToDefaults",
656  "InstallThings",
657  "TokenizeMode",
658  "ScoringMode"
659};
660
661/* This array lists the scripting commands we can handle, in a format that the
662scripting system can understand too. */
663
664static struct property_info g_ScriptingPropertyList [] =
665{
666  /* *name; commands[10]; specifiers[10]; *usage; extra_data; ... */
667  {g_PropertyNames[PN_DATABASE_FILE], {B_GET_PROPERTY, 0},
668    {B_DIRECT_SPECIFIER, 0}, "Get the pathname of the current database file.  "
669    "The default name is something like B_USER_SETTINGS_DIRECTORY / "
670    "Mail / SpamDBM Database", PN_DATABASE_FILE,
671    {}, {}, {}},
672  {g_PropertyNames[PN_DATABASE_FILE], {B_SET_PROPERTY, 0},
673    {B_DIRECT_SPECIFIER, 0}, "Change the pathname of the database file to "
674    "use.  It will automatically be converted to an absolute path name, "
675    "so make sure the parent directories exist before setting it.  If it "
676    "doesn't exist, you'll have to use the create command next.",
677    PN_DATABASE_FILE, {}, {}, {}},
678  {g_PropertyNames[PN_DATABASE_FILE], {B_CREATE_PROPERTY, 0},
679    {B_DIRECT_SPECIFIER, 0}, "Creates a new empty database, will replace "
680    "the existing database file too.", PN_DATABASE_FILE, {}, {}, {}},
681  {g_PropertyNames[PN_DATABASE_FILE], {B_DELETE_PROPERTY, 0},
682    {B_DIRECT_SPECIFIER, 0}, "Deletes the database file and all backup copies "
683    "of that file too.  Really only of use for uninstallers.",
684    PN_DATABASE_FILE, {}, {}, {}},
685  {g_PropertyNames[PN_DATABASE_FILE], {B_COUNT_PROPERTIES, 0},
686    {B_DIRECT_SPECIFIER, 0}, "Returns the number of words in the database.",
687    PN_DATABASE_FILE, {}, {}, {}},
688  {g_PropertyNames[PN_SPAM], {B_SET_PROPERTY, 0}, {B_DIRECT_SPECIFIER, 0},
689    "Adds the spam in the given file (specify full pathname to be safe) to "
690    "the database.  The words in the files will be added to the list of words "
691    "in the database that identify spam messages.  The files processed will "
692    "also have the attribute MAIL:classification added with a value of "
693    "\"Spam\" or \"Genuine\" as specified.  They also have their spam ratio "
694    "attribute updated, as if you had also used the Evaluate command on "
695    "them.  If they already have the MAIL:classification "
696    "attribute and it matches the new classification then they won't get "
697    "processed (and if it is different, they will get removed from the "
698    "statistics for the old class and added to the statistics for the new "
699    "one).  You can turn off that behaviour with the "
700    "IgnorePreviousClassification property.  The command line version lets "
701    "you specify more than one pathname.", PN_SPAM, {}, {}, {}},
702  {g_PropertyNames[PN_SPAM], {B_COUNT_PROPERTIES, 0}, {B_DIRECT_SPECIFIER, 0},
703    "Returns the number of spam messages in the database.", PN_SPAM,
704    {}, {}, {}},
705  {g_PropertyNames[PN_SPAM_STRING], {B_SET_PROPERTY, 0},
706    {B_DIRECT_SPECIFIER, 0}, "Adds the spam in the given string (assumed to "
707    "be the text of a whole e-mail message, not just a file name) to the "
708    "database.", PN_SPAM_STRING, {}, {}, {}},
709  {g_PropertyNames[PN_GENUINE], {B_SET_PROPERTY, 0}, {B_DIRECT_SPECIFIER, 0},
710    "Similar to adding spam except that the message file is added to the "
711    "genuine statistics.", PN_GENUINE, {}, {}, {}},
712  {g_PropertyNames[PN_GENUINE], {B_COUNT_PROPERTIES, 0},
713    {B_DIRECT_SPECIFIER, 0}, "Returns the number of genuine messages in the "
714    "database.", PN_GENUINE, {}, {}, {}},
715  {g_PropertyNames[PN_GENUINE_STRING], {B_SET_PROPERTY, 0},
716    {B_DIRECT_SPECIFIER, 0}, "Adds the genuine message in the given string "
717    "(assumed to be the text of a whole e-mail message, not just a file name) "
718    "to the database.", PN_GENUINE_STRING, {}, {}, {}},
719  {g_PropertyNames[PN_UNCERTAIN], {B_SET_PROPERTY, 0}, {B_DIRECT_SPECIFIER, 0},
720    "Similar to adding spam except that the message file is removed from the "
721    "database, undoing the previous classification.  Obviously, it needs to "
722    "have been classified previously (using the file attributes) so it can "
723    "tell if it is removing spam or genuine words.", PN_UNCERTAIN, {}, {}, {}},
724  {g_PropertyNames[PN_IGNORE_PREVIOUS_CLASSIFICATION], {B_SET_PROPERTY, 0},
725    {B_DIRECT_SPECIFIER, 0}, "If set to true then the previous classification "
726    "(which was saved as an attribute of the e-mail message file) will be "
727    "ignored, so that you can add the message to the database again.  If set "
728    "to false (the normal case), the attribute will be examined, and if the "
729    "message has already been classified as what you claim it is, nothing "
730    "will be done.  If it was misclassified, then the message will be removed "
731    "from the statistics for the old class and added to the stats for the "
732    "new classification you have requested.",
733    PN_IGNORE_PREVIOUS_CLASSIFICATION, {}, {}, {}},
734  {g_PropertyNames[PN_IGNORE_PREVIOUS_CLASSIFICATION], {B_GET_PROPERTY, 0},
735    {B_DIRECT_SPECIFIER, 0}, "Find out the current setting of the flag for "
736    "ignoring the previously recorded classification.",
737    PN_IGNORE_PREVIOUS_CLASSIFICATION, {}, {}, {}},
738  {g_PropertyNames[PN_SERVER_MODE], {B_SET_PROPERTY, 0},
739    {B_DIRECT_SPECIFIER, 0}, "If set to true then error messages get printed "
740    "to the standard error stream rather than showing up in an alert box.  "
741    "It also starts up with the window minimized.", PN_SERVER_MODE,
742    {}, {}, {}},
743  {g_PropertyNames[PN_SERVER_MODE], {B_GET_PROPERTY, 0},
744    {B_DIRECT_SPECIFIER, 0}, "Find out the setting of the server mode flag.",
745    PN_SERVER_MODE, {}, {}, {}},
746  {g_PropertyNames[PN_FLUSH], {B_EXECUTE_PROPERTY, 0},
747    {B_DIRECT_SPECIFIER, 0}, "Writes out the database file to disk, if it has "
748    "been updated in memory but hasn't been saved to disk.  It will "
749    "automatically get written when the program exits, so this command is "
750    "mostly useful for server mode.", PN_FLUSH, {}, {}, {}},
751  {g_PropertyNames[PN_PURGE_AGE], {B_SET_PROPERTY, 0},
752    {B_DIRECT_SPECIFIER, 0}, "Sets the old age limit.  Words which haven't "
753      "been updated since this many message additions to the database may be "
754      "deleted when you do a purge.  A good value is 1000, meaning that if a "
755      "word hasn't appeared in the last 1000 spam/genuine messages, it will "
756      "be forgotten.  Zero will purge all words, 1 will purge words not in "
757      "the last message added to the database, 2 will purge words not in the "
758      "last two messages added, and so on.  This is mostly useful for "
759      "removing those one time words which are often hunks of binary garbage, "
760      "not real words.  This acts in combination with the popularity limit; "
761      "both conditions have to be valid before the word gets deleted.",
762      PN_PURGE_AGE, {}, {}, {}},
763  {g_PropertyNames[PN_PURGE_AGE], {B_GET_PROPERTY, 0},
764    {B_DIRECT_SPECIFIER, 0}, "Gets the old age limit.", PN_PURGE_AGE,
765    {}, {}, {}},
766  {g_PropertyNames[PN_PURGE_POPULARITY], {B_SET_PROPERTY, 0},
767    {B_DIRECT_SPECIFIER, 0}, "Sets the popularity limit.  Words which aren't "
768    "this popular may be deleted when you do a purge.  A good value is 5, "
769    "which means that the word is safe from purging if it has been seen in 6 "
770    "or more e-mail messages.  If it's only in 5 or less, then it may get "
771    "purged.  The extreme is zero, where only words that haven't been seen "
772    "in any message are deleted (usually means no words).  This acts in "
773    "combination with the old age limit; both conditions have to be valid "
774    "before the word gets deleted.", PN_PURGE_POPULARITY, {}, {}, {}},
775  {g_PropertyNames[PN_PURGE_POPULARITY], {B_GET_PROPERTY, 0},
776    {B_DIRECT_SPECIFIER, 0}, "Gets the purge popularity limit.",
777    PN_PURGE_POPULARITY, {}, {}, {}},
778  {g_PropertyNames[PN_PURGE], {B_EXECUTE_PROPERTY, 0},
779    {B_DIRECT_SPECIFIER, 0}, "Purges the old obsolete words from the "
780    "database, if they are old enough according to the age limit and also "
781    "unpopular enough according to the popularity limit.", PN_PURGE,
782    {}, {}, {}},
783  {g_PropertyNames[PN_OLDEST], {B_GET_PROPERTY, 0},
784    {B_DIRECT_SPECIFIER, 0}, "Gets the age of the oldest message in the "
785    "database.  It's relative to the beginning of time, so you need to do "
786    "(total messages - age - 1) to see how many messages ago it was added.",
787    PN_OLDEST, {}, {}, {}},
788  {g_PropertyNames[PN_EVALUATE], {B_SET_PROPERTY, 0},
789    {B_DIRECT_SPECIFIER, 0}, "Evaluates a given file (by path name) to see "
790    "if it is spam or not.  Returns the ratio of spam probability vs genuine "
791    "probability, 0.0 meaning completely genuine, 1.0 for completely spam.  "
792    "Normally you should safely be able to consider it as spam if it is over "
793    "0.56 for the Robinson scoring method.  For the ChiSquared method, the "
794    "numbers are near 0 for genuine, near 1 for spam, and anywhere in the "
795    "middle means it can't decide.  The program attaches a MAIL:ratio_spam "
796    "attribute with the ratio as its "
797    "float32 value to the file.  Also returns the top few interesting words "
798    "in \"words\" and the associated per-word probability ratios in "
799    "\"ratios\".", PN_EVALUATE, {}, {}, {}},
800  {g_PropertyNames[PN_EVALUATE_STRING], {B_SET_PROPERTY, 0},
801    {B_DIRECT_SPECIFIER, 0}, "Like Evaluate, but rather than a file name, "
802    "the string argument contains the entire text of the message to be "
803    "evaluated.", PN_EVALUATE_STRING, {}, {}, {}},
804  {g_PropertyNames[PN_RESET_TO_DEFAULTS], {B_EXECUTE_PROPERTY, 0},
805    {B_DIRECT_SPECIFIER, 0}, "Resets all the configuration options to the "
806    "default values, including the database name.", PN_RESET_TO_DEFAULTS,
807    {}, {}, {}},
808  {g_PropertyNames[PN_INSTALL_THINGS], {B_EXECUTE_PROPERTY, 0},
809    {B_DIRECT_SPECIFIER, 0}, "Creates indices for the MAIL:classification and "
810    "MAIL:ratio_spam attributes on all volumes which support BeOS queries, "
811    "identifies them to the system as e-mail related attributes (modifies "
812    "the text/x-email MIME type), and sets up the new MIME type "
813    "(text/x-vnd.agmsmith.spam_probability_database) for the database file.  "
814    "Also registers names for the sound effects used by the separate filter "
815    "program (use the installsound BeOS program or the Sounds preferences "
816    "program to associate sound files with the names).", PN_INSTALL_THINGS,
817    {}, {}, {}},
818  {g_PropertyNames[PN_TOKENIZE_MODE], {B_SET_PROPERTY, 0},
819    {B_DIRECT_SPECIFIER, 0}, "Sets the method used for breaking up the "
820    "message into words.  Use \"Whole\" for the whole file (also use it for "
821    "non-email files).  The file isn't broken into parts; the whole thing is "
822    "converted into words, headers and attachments are just more raw data.  "
823    "Well, not quite raw data since it converts quoted-printable codes "
824    "(equals sign followed by hex digits or end of line) to the equivalent "
825    "single characters.  \"PlainText\" breaks the file into MIME components "
826    "and only looks at the ones which are of MIME type text/plain.  "
827    "\"AnyText\" will look for words in all text/* things, including "
828    "text/html attachments.  \"AllParts\" will decode all message components "
829    "and look for words in them, including binary attachments.  "
830    "\"JustHeader\" will only look for words in the message header.  "
831    "\"AllPartsAndHeader\", \"PlainTextAndHeader\" and \"AnyTextAndHeader\" "
832    "will also include the words from the message headers.", PN_TOKENIZE_MODE,
833    {}, {}, {}},
834  {g_PropertyNames[PN_TOKENIZE_MODE], {B_GET_PROPERTY, 0},
835    {B_DIRECT_SPECIFIER, 0}, "Gets the method used for breaking up the "
836    "message into words.", PN_TOKENIZE_MODE, {}, {}, {}},
837  {g_PropertyNames[PN_SCORING_MODE], {B_SET_PROPERTY, 0},
838    {B_DIRECT_SPECIFIER, 0}, "Sets the method used for combining the "
839    "probabilities of individual words into an overall score.  "
840    "\"Robinson\" mode will use Gary Robinson's nth root of the product "
841    "method.  It gives a nice range of values between 0 and 1 so you can "
842    "see shades of spaminess.  The cutoff point between spam and genuine "
843    "varies depending on your database of words (0.56 was one point in "
844    "some experiments).  \"ChiSquared\" mode will use chi-squared "
845    "statistics to evaluate the difference in probabilities that the lists "
846    "of word ratios are random.  The result is very close to 0 for genuine "
847    "and very close to 1 for spam, and near the middle if it is uncertain.",
848    PN_SCORING_MODE, {}, {}, {}},
849  {g_PropertyNames[PN_SCORING_MODE], {B_GET_PROPERTY, 0},
850    {B_DIRECT_SPECIFIER, 0}, "Gets the method used for combining the "
851    "individual word ratios into an overall score.", PN_SCORING_MODE,
852    {}, {}, {}},
853  {0, {0}, {0}, 0, 0, {}, {}, {}} /* End of list of property commands. */
854};
855
856
857/* The various scoring modes as text and enums.  See PN_SCORING_MODE. */
858
859typedef enum ScoringModeEnum
860{
861  SM_ROBINSON = 0,
862  SM_CHISQUARED,
863  SM_MAX
864} ScoringModes;
865
866static const char * g_ScoringModeNames [SM_MAX] =
867{
868  "Robinson",
869  "ChiSquared"
870};
871
872
873/* The various tokenizing modes as text and enums.  See PN_TOKENIZE_MODE. */
874
875typedef enum TokenizeModeEnum
876{
877  TM_WHOLE = 0,
878  TM_PLAIN_TEXT,
879  TM_PLAIN_TEXT_HEADER,
880  TM_ANY_TEXT,
881  TM_ANY_TEXT_HEADER,
882  TM_ALL_PARTS,
883  TM_ALL_PARTS_HEADER,
884  TM_JUST_HEADER,
885  TM_MAX
886} TokenizeModes;
887
888static const char * g_TokenizeModeNames [TM_MAX] =
889{
890  "All",
891  "Plain text",
892  "Plain text and header",
893  "Any text",
894  "Any text and header",
895  "All parts",
896  "All parts and header",
897  "Just header"
898};
899
900
901/* Possible message classifications. */
902
903typedef enum ClassificationTypesEnum
904{
905  CL_GENUINE = 0,
906  CL_SPAM,
907  CL_UNCERTAIN,
908  CL_MAX
909} ClassificationTypes;
910
911static const char * g_ClassificationTypeNames [CL_MAX] =
912{
913  g_ClassifiedGenuine,
914  g_ClassifiedSpam,
915  "Uncertain"
916};
917
918
919/* Some polygon graphics for the scroll arrows. */
920
921static BPoint g_UpLinePoints [] =
922{
923  BPoint (8, 2 * (1)),
924  BPoint (14, 2 * (6)),
925  BPoint (10, 2 * (6)),
926  BPoint (10, 2 * (13)),
927  BPoint (6, 2 * (13)),
928  BPoint (6, 2 * (6)),
929  BPoint (2, 2 * (6))
930};
931
932static BPoint g_DownLinePoints [] =
933{
934  BPoint (8, 2 * (14-1)),
935  BPoint (14, 2 * (14-6)),
936  BPoint (10, 2 * (14-6)),
937  BPoint (10, 2 * (14-13)),
938  BPoint (6, 2 * (14-13)),
939  BPoint (6, 2 * (14-6)),
940  BPoint (2, 2 * (14-6))
941};
942
943static BPoint g_UpPagePoints [] =
944{
945  BPoint (8, 2 * (1)),
946  BPoint (13, 2 * (6)),
947  BPoint (10, 2 * (6)),
948  BPoint (14, 2 * (10)),
949  BPoint (10, 2 * (10)),
950  BPoint (10, 2 * (13)),
951  BPoint (6, 2 * (13)),
952  BPoint (6, 2 * (10)),
953  BPoint (2, 2 * (10)),
954  BPoint (6, 2 * (6)),
955  BPoint (3, 2 * (6))
956};
957
958static BPoint g_DownPagePoints [] =
959{
960  BPoint (8, 2 * (14-1)),
961  BPoint (13, 2 * (14-6)),
962  BPoint (10, 2 * (14-6)),
963  BPoint (14, 2 * (14-10)),
964  BPoint (10, 2 * (14-10)),
965  BPoint (10, 2 * (14-13)),
966  BPoint (6, 2 * (14-13)),
967  BPoint (6, 2 * (14-10)),
968  BPoint (2, 2 * (14-10)),
969  BPoint (6, 2 * (14-6)),
970  BPoint (3, 2 * (14-6))
971};
972
973
974/* An array of flags to identify characters which are considered to be spaces.
975If character code X has g_SpaceCharacters[X] set to true then it is a
976space-like character.  Character codes 128 and above are always non-space since
977they are UTF-8 characters.  Initialised in the ABSApp constructor. */
978
979static bool g_SpaceCharacters [128];
980
981
982
983/******************************************************************************
984 * Each word in the spam database gets one of these structures.  The database
985 * has a string (the word) as the key and this structure as the value
986 * (statistics for that word).
987 */
988
989typedef struct StatisticsStruct
990{
991  uint32 age;
992    /* Sequence number for the time when this word was last updated in the
993    database, so that we can remove old words (haven't been seen in recent
994    spam).  It's zero for the first file ever added (spam or genuine) to the
995    database, 1 for all words added or updated by the second file, etc.  If a
996    later file updates an existing word, it gets the age of the later file. */
997
998  uint32 genuineCount;
999    /* Number of genuine messages that have this word. */
1000
1001  uint32 spamCount;
1002    /* A count of the number of spam e-mail messages which contain the word. */
1003
1004} StatisticsRecord, *StatisticsPointer;
1005
1006typedef map<string, StatisticsRecord> StatisticsMap;
1007  /* Define this type which will be used for our main data storage facility, so
1008  we can more conveniently specify things that are derived from it, like
1009  iterators. */
1010
1011
1012
1013/******************************************************************************
1014 * An alert box asking how the user wants to mark messages.  There are buttons
1015 * for each classification category, and a checkbox to mark all remaining N
1016 * messages the same way.  And a cancel button.  To use it, first create the
1017 * ClassificationChoicesWindow, specifying the input arguments.  Then call the
1018 * Go method which will show the window, stuff the user's answer into your
1019 * output arguments (class set to CL_MAX if the user cancels), and destroy the
1020 * window.  Implemented because BAlert only allows 3 buttons, max!
1021 */
1022
1023class ClassificationChoicesWindow : public BWindow
1024{
1025public:
1026  /* Constructor and destructor. */
1027  ClassificationChoicesWindow (BRect FrameRect,
1028    const char *FileName, int NumberOfFiles);
1029
1030  /* BeOS virtual functions. */
1031  virtual void MessageReceived (BMessage *MessagePntr);
1032
1033  /* Our methods. */
1034  void Go (bool *BulkModeSelectedPntr,
1035    ClassificationTypes *ChoosenClassificationPntr);
1036
1037  /* Various message codes for various buttons etc. */
1038  static const uint32 MSG_CLASS_BUTTONS = 'ClB0';
1039  static const uint32 MSG_CANCEL_BUTTON = 'Cncl';
1040  static const uint32 MSG_BULK_CHECKBOX = 'BlkK';
1041
1042private:
1043  /* Member variables. */
1044  bool *m_BulkModeSelectedPntr;
1045  ClassificationTypes *m_ChoosenClassificationPntr;
1046};
1047
1048class ClassificationChoicesView : public BView
1049{
1050public:
1051  /* Constructor and destructor. */
1052  ClassificationChoicesView (BRect FrameRect,
1053    const char *FileName, int NumberOfFiles);
1054
1055  /* BeOS virtual functions. */
1056  virtual void AttachedToWindow ();
1057  virtual void GetPreferredSize (float *width, float *height);
1058
1059private:
1060  /* Member variables. */
1061  const char *m_FileName;
1062  int         m_NumberOfFiles;
1063  float       m_PreferredBottomY;
1064};
1065
1066
1067
1068/******************************************************************************
1069 * Due to deadlock problems with the BApplication posting scripting messages to
1070 * itself, we need to add a second Looper.  Its job is to just to convert
1071 * command line arguments and arguments from the Tracker (refs received) into a
1072 * series of scripting commands sent to the main BApplication.  It also prints
1073 * out the replies received (to stdout for command line replies).  An instance
1074 * of this class will be created and run by the main() function, and shut down
1075 * by it too.
1076 */
1077
1078class CommanderLooper : public BLooper
1079{
1080public:
1081  CommanderLooper ();
1082  ~CommanderLooper ();
1083  virtual void MessageReceived (BMessage *MessagePntr);
1084
1085  void CommandArguments (int argc, char **argv);
1086  void CommandReferences (BMessage *MessagePntr,
1087    bool BulkMode = false,
1088    ClassificationTypes BulkClassification = CL_GENUINE);
1089  bool IsBusy ();
1090
1091private:
1092  void ProcessArgs (BMessage *MessagePntr);
1093  void ProcessRefs (BMessage *MessagePntr);
1094
1095  static const uint32 MSG_COMMAND_ARGUMENTS = 'CArg';
1096  static const uint32 MSG_COMMAND_FILE_REFS = 'CRef';
1097
1098  bool m_IsBusy;
1099};
1100
1101
1102
1103/******************************************************************************
1104 * This view contains the various buttons and other controls for setting
1105 * configuration options and displaying the state of the database (but not the
1106 * actual list of words).  It will appear in the top half of the
1107 * DatabaseWindow.
1108 */
1109
1110class ControlsView : public BView
1111{
1112public:
1113  /* Constructor and destructor. */
1114  ControlsView (BRect NewBounds);
1115  ~ControlsView ();
1116
1117  /* BeOS virtual functions. */
1118  virtual void AttachedToWindow ();
1119  virtual void FrameResized (float Width, float Height);
1120  virtual void MessageReceived (BMessage *MessagePntr);
1121  virtual void Pulse ();
1122
1123private:
1124  /* Various message codes for various buttons etc. */
1125  static const uint32 MSG_BROWSE_BUTTON = 'Brws';
1126  static const uint32 MSG_DATABASE_NAME = 'DbNm';
1127  static const uint32 MSG_ESTIMATE_BUTTON = 'Estm';
1128  static const uint32 MSG_ESTIMATE_FILE_REFS = 'ERef';
1129  static const uint32 MSG_IGNORE_CLASSIFICATION = 'IPCl';
1130  static const uint32 MSG_PURGE_AGE = 'PuAg';
1131  static const uint32 MSG_PURGE_BUTTON = 'Purg';
1132  static const uint32 MSG_PURGE_POPULARITY = 'PuPo';
1133  static const uint32 MSG_SERVER_MODE = 'SrvM';
1134
1135  /* Our member functions. */
1136  void BrowseForDatabaseFile ();
1137  void BrowseForFileToEstimate ();
1138  void PollServerForChanges ();
1139
1140  /* Member variables. */
1141  BButton        *m_AboutButtonPntr;
1142  BButton        *m_AddExampleButtonPntr;
1143  BButton        *m_BrowseButtonPntr;
1144  BFilePanel     *m_BrowseFilePanelPntr;
1145  BButton        *m_CreateDatabaseButtonPntr;
1146  char            m_DatabaseFileNameCachedValue [PATH_MAX];
1147  BTextControl   *m_DatabaseFileNameTextboxPntr;
1148  bool            m_DatabaseLoadDone;
1149  BButton        *m_EstimateSpamButtonPntr;
1150  BFilePanel     *m_EstimateSpamFilePanelPntr;
1151  uint32          m_GenuineCountCachedValue;
1152  BTextControl   *m_GenuineCountTextboxPntr;
1153  bool            m_IgnorePreviousClassCachedValue;
1154  BCheckBox      *m_IgnorePreviousClassCheckboxPntr;
1155  BButton        *m_InstallThingsButtonPntr;
1156  uint32          m_PurgeAgeCachedValue;
1157  BTextControl   *m_PurgeAgeTextboxPntr;
1158  BButton        *m_PurgeButtonPntr;
1159  uint32          m_PurgePopularityCachedValue;
1160  BTextControl   *m_PurgePopularityTextboxPntr;
1161  BButton        *m_ResetToDefaultsButtonPntr;
1162  ScoringModes    m_ScoringModeCachedValue;
1163  BMenuBar       *m_ScoringModeMenuBarPntr;
1164  BPopUpMenu     *m_ScoringModePopUpMenuPntr;
1165  bool            m_ServerModeCachedValue;
1166  BCheckBox      *m_ServerModeCheckboxPntr;
1167  uint32          m_SpamCountCachedValue;
1168  BTextControl   *m_SpamCountTextboxPntr;
1169  bigtime_t       m_TimeOfLastPoll;
1170  TokenizeModes   m_TokenizeModeCachedValue;
1171  BMenuBar       *m_TokenizeModeMenuBarPntr;
1172  BPopUpMenu     *m_TokenizeModePopUpMenuPntr;
1173  uint32          m_WordCountCachedValue;
1174  BTextControl   *m_WordCountTextboxPntr;
1175};
1176
1177
1178/* Various message codes for various buttons etc. */
1179static const uint32 MSG_LINE_DOWN = 'LnDn';
1180static const uint32 MSG_LINE_UP = 'LnUp';
1181static const uint32 MSG_PAGE_DOWN = 'PgDn';
1182static const uint32 MSG_PAGE_UP = 'PgUp';
1183
1184/******************************************************************************
1185 * This view contains the list of words.  It displays as many as can fit in the
1186 * view rectangle, starting at a specified word (so it can simulate scrolling).
1187 * Usually it will appear in the bottom half of the DatabaseWindow.
1188 */
1189
1190class WordsView : public BView
1191{
1192public:
1193  /* Constructor and destructor. */
1194  WordsView (BRect NewBounds);
1195
1196  /* BeOS virtual functions. */
1197  virtual void AttachedToWindow ();
1198  virtual void Draw (BRect UpdateRect);
1199  virtual void KeyDown (const char *BufferPntr, int32 NumBytes);
1200  virtual void MakeFocus (bool Focused);
1201  virtual void MessageReceived (BMessage *MessagePntr);
1202  virtual void MouseDown (BPoint point);
1203  virtual void Pulse ();
1204
1205private:
1206  /* Our member functions. */
1207  void MoveTextUpOrDown (uint32 MovementType);
1208  void RefsDroppedHere (BMessage *MessagePntr);
1209
1210  /* Member variables. */
1211  BPictureButton *m_ArrowLineDownPntr;
1212  BPictureButton *m_ArrowLineUpPntr;
1213  BPictureButton *m_ArrowPageDownPntr;
1214  BPictureButton *m_ArrowPageUpPntr;
1215    /* Various buttons for controlling scrolling, since we can't use a scroll
1216    bar.  To make them less obvious, their background view colour needs to be
1217    changed whenever the main view's colour changes. */
1218
1219  float m_AscentHeight;
1220    /* The ascent height for the font used to draw words.  Height from the top
1221    of the highest letter to the base line (which is near the middle bottom of
1222    the letters, the line where you would align your writing of the text by
1223    hand, all letters have part above, some also have descenders below this
1224    line). */
1225
1226  rgb_color m_BackgroundColour;
1227    /* The current background colour.  Changes when the focus changes. */
1228
1229  uint32 m_CachedTotalGenuineMessages;
1230  uint32 m_CachedTotalSpamMessages;
1231  uint32 m_CachedWordCount;
1232    /* These are cached copies of the similar values in the BApplication.  They
1233    reflect what's currently displayed.  If they are different than the values
1234    from the BApplication then the polling loop will try to redraw the display.
1235    They get set to the values actually used during drawing when drawing is
1236    successful. */
1237
1238  char m_FirstDisplayedWord [g_MaxWordLength + 1];
1239    /* The scrolling display starts at this word.  Since we can't use index
1240    numbers (word[12345] for example), we use the word itself.  The scroll
1241    buttons set this to the next or previous word in the database.  Typing by
1242    the user when the view has the focus will also change this starting word.
1243    */
1244
1245  rgb_color m_FocusedColour;
1246    /* The colour to use for focused mode (typing by the user is received by
1247    our view). */
1248
1249  bigtime_t m_LastTimeAKeyWasPressed;
1250    /* Records the time when a key was last pressed.  Used for determining when
1251    the user has stopped typing a batch of letters. */
1252
1253  float m_LineHeight;
1254    /* Height of a line of text in the font used for the word display.
1255    Includes the height of the letters plus a bit of extra space for between
1256    the lines (called leading). */
1257
1258  BFont m_TextFont;
1259    /* The font used to draw the text in the window. */
1260
1261  float m_TextHeight;
1262    /* Maximum total height of the letters in the text, includes the part above
1263    the baseline and the part below.  Doesn't include the sliver of space
1264    between lines. */
1265
1266  rgb_color m_UnfocusedColour;
1267    /* The colour to use for unfocused mode, when user typing isn't active. */
1268};
1269
1270
1271
1272/******************************************************************************
1273 * The BWindow class for this program.  It displays the database in real time,
1274 * and has various buttons and gadgets in the top half for changing settings
1275 * (live changes, no OK button, and they reflect changes done by other programs
1276 * using the server too).  The bottom half is a scrolling view listing all the
1277 * words in the database.  A simple graphic blotch behind each word shows
1278 * whether the word is strongly or weakly related to spam or genuine messages.
1279 * Most operations go through the scripting message system, but it also peeks
1280 * at the BApplication data for examining simple things and when redrawing the
1281 * list of words.
1282 */
1283
1284class DatabaseWindow : public BWindow
1285{
1286public:
1287  /* Constructor and destructor. */
1288  DatabaseWindow ();
1289
1290  /* BeOS virtual functions. */
1291  virtual void MessageReceived (BMessage *MessagePntr);
1292  virtual bool QuitRequested ();
1293
1294private:
1295  /* Member variables. */
1296  ControlsView *m_ControlsViewPntr;
1297  WordsView    *m_WordsViewPntr;
1298};
1299
1300
1301
1302/******************************************************************************
1303 * ABSApp is the BApplication class for this program.  This handles messages
1304 * from the outside world (requests to load a database, or to add files to the
1305 * collection).  It responds to command line arguments (if you start up the
1306 * program a second time, the system will just send the arguments to the
1307 * existing running program).  It responds to scripting messages.  And it
1308 * responds to messages from the window.  Its thread does the main work of
1309 * updating the database and reading / writing files.
1310 */
1311
1312class ABSApp : public BApplication
1313{
1314public:
1315  /* Constructor and destructor. */
1316  ABSApp ();
1317  ~ABSApp ();
1318
1319  /* BeOS virtual functions. */
1320  virtual void AboutRequested ();
1321  virtual void ArgvReceived (int32 argc, char **argv);
1322  virtual status_t GetSupportedSuites (BMessage *MessagePntr);
1323  virtual void MessageReceived (BMessage *MessagePntr);
1324  virtual void Pulse ();
1325  virtual bool QuitRequested ();
1326  virtual void ReadyToRun ();
1327  virtual void RefsReceived (BMessage *MessagePntr);
1328  virtual BHandler *ResolveSpecifier (BMessage *MessagePntr, int32 Index,
1329    BMessage *SpecifierMsgPntr, int32 SpecificationKind, const char *Property);
1330
1331private:
1332  /* Our member functions. */
1333  status_t AddFileToDatabase (ClassificationTypes IsSpamOrWhat,
1334    const char *FileName, char *ErrorMessage);
1335  status_t AddPositionIOToDatabase (ClassificationTypes IsSpamOrWhat,
1336    BPositionIO *MessageIOPntr, const char *OptionalFileName,
1337    char *ErrorMessage);
1338  status_t AddStringToDatabase (ClassificationTypes IsSpamOrWhat,
1339    const char *String, char *ErrorMessage);
1340  void AddWordsToSet (const char *InputString, size_t NumberOfBytes,
1341    char PrefixCharacter, set<string> &WordSet);
1342  status_t CreateDatabaseFile (char *ErrorMessage);
1343  void DefaultSettings ();
1344  status_t DeleteDatabaseFile (char *ErrorMessage);
1345  status_t EvaluateFile (const char *PathName, BMessage *ReplyMessagePntr,
1346    char *ErrorMessage);
1347  status_t EvaluatePositionIO (BPositionIO *PositionIOPntr,
1348    const char *OptionalFileName, BMessage *ReplyMessagePntr,
1349    char *ErrorMessage);
1350  status_t EvaluateString (const char *BufferPntr, ssize_t BufferSize,
1351    BMessage *ReplyMessagePntr, char *ErrorMessage);
1352  status_t GetWordsFromPositionIO (BPositionIO *PositionIOPntr,
1353    const char *OptionalFileName, set<string> &WordSet, char *ErrorMessage);
1354  status_t InstallThings (char *ErrorMessage);
1355  status_t LoadDatabaseIfNeeded (char *ErrorMessage);
1356  status_t LoadSaveDatabase (bool DoLoad, char *ErrorMessage);
1357public:
1358  status_t LoadSaveSettings (bool DoLoad);
1359private:
1360  status_t MakeBackup (char *ErrorMessage);
1361  void MakeDatabaseEmpty ();
1362  void ProcessScriptingMessage (BMessage *MessagePntr,
1363    struct property_info *PropInfoPntr);
1364  status_t PurgeOldWords (char *ErrorMessage);
1365  status_t RecursivelyTokenizeMailComponent (
1366    BMailComponent *ComponentPntr, const char *OptionalFileName,
1367    set<string> &WordSet, char *ErrorMessage,
1368    int RecursionLevel, int MaxRecursionLevel);
1369  status_t SaveDatabaseIfNeeded (char *ErrorMessage);
1370  status_t TokenizeParts (BPositionIO *PositionIOPntr,
1371    const char *OptionalFileName, set<string> &WordSet, char *ErrorMessage);
1372  status_t TokenizeWhole (BPositionIO *PositionIOPntr,
1373    const char *OptionalFileName, set<string> &WordSet, char *ErrorMessage);
1374
1375public:
1376  /* Member variables.  Many are read by the window thread to see if it needs
1377  updating, and to draw the words.  However, the other threads will lock the
1378  BApplication or using scripting commands if they want to make changes. */
1379
1380  bool m_DatabaseHasChanged;
1381    /* Set to TRUE when the in-memory database (stored in m_WordMap) has
1382    changed and is different from the on-disk database file.  When the
1383    application exits, the database will be written out if it has changed. */
1384
1385  BString m_DatabaseFileName;
1386    /* The absolute path name to use for the database file on disk. */
1387
1388  bool m_IgnorePreviousClassification;
1389    /* If TRUE then the previous classification of a message (stored in an
1390    attribute on the message file) will be ignored, and the message will be
1391    added to the requested spam/genuine list.  If this is FALSE then the spam
1392    won't be added to the list if it has already been classified as specified,
1393    but if it was mis-classified, it will be removed from the old list and
1394    added to the new list. */
1395
1396  uint32 m_OldestAge;
1397    /* The age of the oldest word.  This will be the smallest age number in the
1398    database.  Mostly useful for scaling graphics representing age in the word
1399    display.  If the oldest word is no longer the oldest, this variable won't
1400    get immediately updated since it would take a lot of effort to find the
1401    next older age.  Since it's only used for display, we'll let it be slightly
1402    incorrect.  The next database load or purge will fix it. */
1403
1404  uint32 m_PurgeAge;
1405    /* When purging old words, they have to be at least this old to be eligible
1406    for deletion.  Age is measured as the number of e-mails added to the
1407    database since the word was last updated in the database.  Zero means all
1408    words are old. */
1409
1410  uint32 m_PurgePopularity;
1411    /* When purging old words, they have to be less than or equal to this
1412    popularity limit to be eligible for deletion.  Popularity is measured as
1413    the number of messages (spam and genuine) which have the word.  Zero means
1414    no words. */
1415
1416  ScoringModes m_ScoringMode;
1417    /* Controls how to combine the word probabilities into an overall score.
1418    See the PN_SCORING_MODE comments for details. */
1419
1420  BPath m_SettingsDirectoryPath;
1421    /* The constructor initialises this to the settings directory path.  It
1422    never changes after that. */
1423
1424  bool m_SettingsHaveChanged;
1425    /* Set to TRUE when the settings are changed (different than the ones which
1426    were loaded).  When the application exits, the settings will be written out
1427    if they have changed. */
1428
1429  double m_SmallestUseableDouble;
1430    /* When multiplying fractional numbers together, avoid using numbers
1431    smaller than this because the double exponent range is close to being
1432    exhausted.  The IEEE STANDARD 754 floating-point arithmetic (used on the
1433    Intel i8087 and later math processors) has 64 bit numbers with 53 bits of
1434    mantissa, giving it an underflow starting at 0.5**1022 = 2.2e-308 where it
1435    rounds off to the nearest multiple of 0.5**1074 = 4.9e-324. */
1436
1437  TokenizeModes m_TokenizeMode;
1438    /* Controls how to convert the raw message text into words.  See the
1439    PN_TOKENIZE_MODE comments for details. */
1440
1441  uint32 m_TotalGenuineMessages;
1442    /* Number of genuine messages which are in the database. */
1443
1444  uint32 m_TotalSpamMessages;
1445    /* Number of spam messages which are in the database. */
1446
1447  uint32 m_WordCount;
1448    /* The number of words currently in the database.  Stored separately as a
1449    member variable to avoid having to call m_WordMap.size() all the time,
1450    which other threads can't do while the database is being updated (but they
1451    can look at the word count variable). */
1452
1453  StatisticsMap m_WordMap;
1454    /* The in-memory data structure holding the set of words and their
1455    associated statistics.  When the database isn't in use, it is an empty
1456    collection.  You should lock the BApplication if you are using the word
1457    collection (reading or writing) from another thread. */
1458};
1459
1460
1461
1462/******************************************************************************
1463 * Global utility function to display an error message and return.  The message
1464 * part describes the error, and if ErrorNumber is non-zero, gets the string
1465 * ", error code $X (standard description)." appended to it.  If the message
1466 * is NULL then it gets defaulted to "Something went wrong".  The title part
1467 * doesn't get displayed (no title bar in the dialog box, but you can see it in
1468 * the debugger as the window thread name), and defaults to "Error Message" if
1469 * you didn't specify one.  If running in command line mode, the error gets
1470 * printed to stderr rather than showing up in a dialog box.
1471 */
1472
1473static void
1474DisplayErrorMessage (
1475  const char *MessageString = NULL,
1476  int ErrorNumber = 0,
1477  const char *TitleString = NULL)
1478{
1479  BAlert *AlertPntr;
1480  char ErrorBuffer [PATH_MAX + 1500];
1481
1482  if (TitleString == NULL)
1483    TitleString = "SpamDBM Error Message";
1484
1485  if (MessageString == NULL)
1486  {
1487    if (ErrorNumber == 0)
1488      MessageString = "No error, no message, why bother?";
1489    else
1490      MessageString = "Something went wrong";
1491  }
1492
1493  if (ErrorNumber != 0)
1494  {
1495    sprintf (ErrorBuffer, "%s, error code $%X/%d (%s) has occured.",
1496      MessageString, ErrorNumber, ErrorNumber, strerror (ErrorNumber));
1497    MessageString = ErrorBuffer;
1498  }
1499
1500  if (g_CommandLineMode || g_ServerMode)
1501    cerr << TitleString << ": " << MessageString << endl;
1502  else
1503  {
1504    AlertPntr = new BAlert (TitleString, MessageString,
1505      "Acknowledge", NULL, NULL, B_WIDTH_AS_USUAL, B_STOP_ALERT);
1506    if (AlertPntr != NULL) {
1507      AlertPntr->SetFlags(AlertPntr->Flags() | B_CLOSE_ON_ESCAPE);
1508      AlertPntr->Go ();
1509    }
1510  }
1511}
1512
1513
1514
1515/******************************************************************************
1516 * Word wrap a long line of text into shorter 79 column lines and print the
1517 * result on the given output stream.
1518 */
1519
1520static void
1521WrapTextToStream (ostream& OutputStream, const char *TextPntr)
1522{
1523  const int LineLength = 79;
1524  char     *StringPntr;
1525  char      TempString [LineLength+1];
1526
1527  TempString[LineLength] = 0; /* Only needs to be done once. */
1528
1529  while (*TextPntr != 0)
1530  {
1531    while (isspace (*TextPntr))
1532      TextPntr++; /* Skip leading spaces. */
1533    if (*TextPntr == 0)
1534      break; /* It was all spaces, don't print any more. */
1535
1536    strncpy (TempString, TextPntr, LineLength);
1537
1538    /* Advance StringPntr to the end of the temp string, partly to see how long
1539    it is (rather than doing strlen). */
1540
1541    StringPntr = TempString;
1542    while (*StringPntr != 0)
1543      StringPntr++;
1544
1545    if (StringPntr - TempString < LineLength)
1546    {
1547      /* This line fits completely. */
1548      OutputStream << TempString << endl;
1549      TextPntr += StringPntr - TempString;
1550      continue;
1551    }
1552
1553    /* Advance StringPntr to the last space in the temp string. */
1554
1555    while (StringPntr > TempString)
1556    {
1557      if (isspace (*StringPntr))
1558        break; /* Found the trailing space. */
1559      else /* Go backwards, looking for the trailing space. */
1560        StringPntr--;
1561    }
1562
1563    /* Remove more trailing spaces at the end of the line, in case there were
1564    several spaces in a row. */
1565
1566    while (StringPntr > TempString && isspace (StringPntr[-1]))
1567      StringPntr--;
1568
1569    /* Print the line of text and advance the text pointer too. */
1570
1571    if (StringPntr == TempString)
1572    {
1573      /* This line has no spaces, don't wrap it, just split off a chunk. */
1574      OutputStream << TempString << endl;
1575      TextPntr += strlen (TempString);
1576      continue;
1577    }
1578
1579    *StringPntr = 0; /* Cut off after the first trailing space. */
1580    OutputStream << TempString << endl;
1581    TextPntr += StringPntr - TempString;
1582  }
1583}
1584
1585
1586
1587/******************************************************************************
1588 * Print the usage info to the stream.  Includes a list of all commands.
1589 */
1590ostream& PrintUsage (ostream& OutputStream);
1591
1592ostream& PrintUsage (ostream& OutputStream)
1593{
1594  struct property_info *PropInfoPntr;
1595
1596  OutputStream << "\nSpamDBM - A Spam Database Manager\n";
1597  OutputStream << "Copyright �� 2002 by Alexander G. M. Smith.  ";
1598  OutputStream << "Released to the public domain.\n\n";
1599  WrapTextToStream (OutputStream, "Compiled on " __DATE__ " at " __TIME__
1600".  $Id: spamdbm.cpp 30630 2009-05-05 01:31:01Z bga $  $HeadURL: http://svn.haiku-os.org/haiku/haiku/trunk/src/bin/mail_utils/spamdbm.cpp $");
1601  OutputStream << "\n"
1602"This is a program for classifying e-mail messages as spam (junk mail which\n"
1603"you don't want to read) and regular genuine messages.  It can learn what's\n"
1604"spam and what's genuine.  You just give it a bunch of spam messages and a\n"
1605"bunch of non-spam ones.  It uses them to make a list of the words from the\n"
1606"messages with the probability that each word is from a spam message or from\n"
1607"a genuine message.  Later on, it can use those probabilities to classify\n"
1608"new messages as spam or not spam.  If the classifier stops working well\n"
1609"(because the spammers have changed their writing style and vocabulary, or\n"
1610"your regular correspondants are writing like spammers), you can use this\n"
1611"program to update the list of words to identify the new messages\n"
1612"correctly.\n"
1613"\n"
1614"The original idea was from Paul Graham's algorithm, which has an excellent\n"
1615"writeup at: http://www.paulgraham.com/spam.html\n"
1616"\n"
1617"Gary Robinson came up with the improved algorithm, which you can read about at:\n"
1618"http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html\n"
1619"\n"
1620"Then he, Tim Peters and the SpamBayes mailing list developed the Chi-Squared\n"
1621"test, see http://mail.python.org/pipermail/spambayes/2002-October/001036.html\n"
1622"for one of the earlier messages leading from the central limit theorem to\n"
1623"the current chi-squared scoring method.\n"
1624"\n"
1625"Thanks go to Isaac Yonemoto for providing a better icon, which we can\n"
1626"unfortunately no longer use, since the Hormel company wants people to\n"
1627"avoid associating their meat product with junk e-mail.\n"
1628"\n"
1629"Tokenising code updated in 2005 to use some of the tricks that SpamBayes\n"
1630"uses to extract words from messages.  In particular, HTML is now handled.\n"
1631"\n"
1632"Usage: Specify the operation as the first argument followed by more\n"
1633"information as appropriate.  The program's configuration will affect the\n"
1634"actual operation (things like the name of the database file to use, or\n"
1635"whether it should allow non-email messages to be added).  In command line\n"
1636"mode it will do the operation and exit.  In GUI/server mode a command line\n"
1637"invocation will just send the command to the running server.  You can also\n"
1638"use BeOS scripting (see the \"Hey\" command which you can get from\n"
1639"http://www.bebits.com/app/2042 ) to control the Spam server.  And finally,\n"
1640"there's also a GUI interface which shows up if you start it without any\n"
1641"command line arguments.\n"
1642"\n"
1643"Commands:\n"
1644"\n"
1645"Quit\n"
1646"Stop the program.  Useful if it's running as a server.\n"
1647"\n";
1648
1649  /* Go through all our scripting commands and add a description of each one to
1650  the usage text. */
1651
1652  for (PropInfoPntr = g_ScriptingPropertyList + 0;
1653  PropInfoPntr->name != 0;
1654  PropInfoPntr++)
1655  {
1656    switch (PropInfoPntr->commands[0])
1657    {
1658      case B_GET_PROPERTY:
1659        OutputStream << "Get " << PropInfoPntr->name << endl;
1660        break;
1661
1662      case B_SET_PROPERTY:
1663        OutputStream << "Set " << PropInfoPntr->name << " NewValue" << endl;
1664        break;
1665
1666      case B_COUNT_PROPERTIES:
1667        OutputStream << "Count " << PropInfoPntr->name << endl;
1668        break;
1669
1670      case B_CREATE_PROPERTY:
1671        OutputStream << "Create " << PropInfoPntr->name << endl;
1672        break;
1673
1674      case B_DELETE_PROPERTY:
1675        OutputStream << "Delete " << PropInfoPntr->name << endl;
1676        break;
1677
1678      case B_EXECUTE_PROPERTY:
1679        OutputStream << PropInfoPntr->name << endl;
1680        break;
1681
1682      default:
1683        OutputStream << "Buggy Command: " << PropInfoPntr->name << endl;
1684        break;
1685    }
1686    WrapTextToStream (OutputStream, (char *)PropInfoPntr->usage);
1687    OutputStream << endl;
1688  }
1689
1690  return OutputStream;
1691}
1692
1693
1694
1695/******************************************************************************
1696 * A utility function to send a command to the application, will return after a
1697 * short delay if the application is busy (doesn't wait for it to be executed).
1698 * The reply from the application is also thrown away.  It used to be an
1699 * overloaded function, but the system couldn't distinguish between bool and
1700 * int, so now it has slightly different names depending on the arguments.
1701 */
1702
1703static void
1704SubmitCommand (BMessage& CommandMessage)
1705{
1706  status_t ErrorCode;
1707
1708  ErrorCode = be_app_messenger.SendMessage (&CommandMessage,
1709    be_app_messenger /* reply messenger, throw away the reply */,
1710    1000000 /* delivery timeout */);
1711
1712  if (ErrorCode != B_OK)
1713    cerr << "SubmitCommand failed to send a command, code " <<
1714    ErrorCode << " (" << strerror (ErrorCode) << ")." << endl;
1715}
1716
1717
1718static void
1719SubmitCommandString (
1720  PropertyNumbers Property,
1721  uint32 CommandCode,
1722  const char *StringArgument = NULL)
1723{
1724  BMessage CommandMessage (CommandCode);
1725
1726  if (Property < 0 || Property >= PN_MAX)
1727  {
1728    DisplayErrorMessage ("SubmitCommandString bug.");
1729    return;
1730  }
1731  CommandMessage.AddSpecifier (g_PropertyNames [Property]);
1732  if (StringArgument != NULL)
1733    CommandMessage.AddString (g_DataName, StringArgument);
1734  SubmitCommand (CommandMessage);
1735}
1736
1737
1738static void
1739SubmitCommandInt32 (
1740  PropertyNumbers Property,
1741  uint32 CommandCode,
1742  int32 Int32Argument)
1743{
1744  BMessage CommandMessage (CommandCode);
1745
1746  if (Property < 0 || Property >= PN_MAX)
1747  {
1748    DisplayErrorMessage ("SubmitCommandInt32 bug.");
1749    return;
1750  }
1751  CommandMessage.AddSpecifier (g_PropertyNames [Property]);
1752  CommandMessage.AddInt32 (g_DataName, Int32Argument);
1753  SubmitCommand (CommandMessage);
1754}
1755
1756
1757static void
1758SubmitCommandBool (
1759  PropertyNumbers Property,
1760  uint32 CommandCode,
1761  bool BoolArgument)
1762{
1763  BMessage CommandMessage (CommandCode);
1764
1765  if (Property < 0 || Property >= PN_MAX)
1766  {
1767    DisplayErrorMessage ("SubmitCommandBool bug.");
1768    return;
1769  }
1770  CommandMessage.AddSpecifier (g_PropertyNames [Property]);
1771  CommandMessage.AddBool (g_DataName, BoolArgument);
1772  SubmitCommand (CommandMessage);
1773}
1774
1775
1776
1777/******************************************************************************
1778 * A utility function which will estimate the spaminess of file(s), not
1779 * callable from the application thread since it sends a scripting command to
1780 * the application and waits for results.  For each file there will be an entry
1781 * reference in the message.  For each of those, run it through the spam
1782 * estimator and display a box with the results.  This function is used both by
1783 * the file requestor and by dragging and dropping into the middle of the words
1784 * view.
1785 */
1786
1787static void
1788EstimateRefFilesAndDisplay (BMessage *MessagePntr)
1789{
1790  BAlert     *AlertPntr;
1791  BEntry      Entry;
1792  entry_ref   EntryRef;
1793  status_t    ErrorCode;
1794  int         i, j;
1795  BPath       Path;
1796  BMessage    ReplyMessage;
1797  BMessage    ScriptingMessage;
1798  const char *StringPntr;
1799  float       TempFloat;
1800  int32       TempInt32;
1801  char        TempString [PATH_MAX + 1024 +
1802                g_MaxInterestingWords * (g_MaxWordLength + 16)];
1803
1804  for (i = 0; MessagePntr->FindRef ("refs", i, &EntryRef) == B_OK; i++)
1805  {
1806    /* See if the entry is a valid file or directory or other thing. */
1807
1808    ErrorCode = Entry.SetTo (&EntryRef, true /* traverse symbolic links */);
1809    if (ErrorCode != B_OK || !Entry.Exists () || Entry.GetPath (&Path) != B_OK)
1810      continue;
1811
1812    /* Evaluate the spaminess of the file. */
1813
1814    ScriptingMessage.MakeEmpty ();
1815    ScriptingMessage.what = B_SET_PROPERTY;
1816    ScriptingMessage.AddSpecifier (g_PropertyNames[PN_EVALUATE]);
1817    ScriptingMessage.AddString (g_DataName, Path.Path ());
1818
1819    if (be_app_messenger.SendMessage (&ScriptingMessage,&ReplyMessage) != B_OK)
1820      break; /* App has died or something is wrong. */
1821
1822    if (ReplyMessage.FindInt32 ("error", &TempInt32) != B_OK ||
1823    TempInt32 != B_OK)
1824      break; /* Error messages will be displayed elsewhere. */
1825
1826    ReplyMessage.FindFloat (g_ResultName, &TempFloat);
1827    sprintf (TempString, "%f spam ratio for \"%s\".\nThe top words are:",
1828      (double) TempFloat, Path.Path ());
1829
1830    for (j = 0; j < 20 /* Don't print too many! */; j++)
1831    {
1832      if (ReplyMessage.FindString ("words", j, &StringPntr) != B_OK ||
1833      ReplyMessage.FindFloat ("ratios", j, &TempFloat) != B_OK)
1834        break;
1835
1836      sprintf (TempString + strlen (TempString), "\n%s / %f",
1837        StringPntr, TempFloat);
1838    }
1839    if (j >= 20 && j < g_MaxInterestingWords)
1840      sprintf (TempString + strlen (TempString), "\nAnd up to %d more words.",
1841        g_MaxInterestingWords - j);
1842
1843    AlertPntr = new BAlert ("Estimate", TempString, "OK");
1844    if (AlertPntr != NULL) {
1845      AlertPntr->SetFlags(AlertPntr->Flags() | B_CLOSE_ON_ESCAPE);
1846      AlertPntr->Go ();
1847    }
1848  }
1849}
1850
1851
1852
1853/******************************************************************************
1854 * A utility function from the http://sourceforge.net/projects/spambayes
1855 * SpamBayes project.  Return prob(chisq >= x2, with v degrees of freedom).  It
1856 * computes the probability that the chi-squared value (a kind of normalized
1857 * error measurement), with v degrees of freedom, would be larger than a given
1858 * number (x2; chi is the Greek letter X thus x2).  So you can tell if the
1859 * error is really unusual (the returned probability is near zero meaning that
1860 * your measured error number is kind of large - actual chi-squared is rarely
1861 * above that number merely due to random effects), or if it happens often
1862 * (usually if the probability is over 5% then it's within 3 standard
1863 * deviations - meaning that chi-squared goes over your number fairly often due
1864 * merely to random effects).  v must be even for this calculation to work.
1865 */
1866
1867static double ChiSquaredProbability (double x2, int v)
1868{
1869  int    halfV = v / 2;
1870  int    i;
1871  double m;
1872  double sum;
1873  double term;
1874
1875  if (v & 1)
1876    return -1.0; /* Out of range return value as a hint v is odd. */
1877
1878  /* If x2 is very large, exp(-m) will underflow to 0. */
1879  m = x2 / 2.0;
1880  sum = term = exp (-m);
1881  for (i = 1; i < halfV; i++)
1882  {
1883    term *= m / i;
1884    sum += term;
1885  }
1886
1887  /* With small x2 and large v, accumulated roundoff error, plus error in the
1888  platform exp(), can cause this to spill a few ULP above 1.0.  For example,
1889  ChiSquaredProbability(100, 300) on my box has sum == 1.0 + 2.0**-52 at this
1890  point.  Returning a value even a teensy bit over 1.0 is no good. */
1891
1892  if (sum > 1.0)
1893    return 1.0;
1894  return sum;
1895}
1896
1897
1898
1899/******************************************************************************
1900 * A utility function to remove the "[Spam 99.9%] " from in front of the
1901 * MAIL:subject attribute of a file.
1902 */
1903
1904static status_t RemoveSpamPrefixFromSubjectAttribute (BNode *BNodePntr)
1905{
1906  status_t    ErrorCode;
1907  const char *MailSubjectName = "MAIL:subject";
1908  char       *StringPntr;
1909  char        SubjectString [2000];
1910
1911  ErrorCode = BNodePntr->ReadAttr (MailSubjectName,
1912    B_STRING_TYPE, 0 /* offset */, SubjectString,
1913    sizeof (SubjectString) - 1);
1914  if (ErrorCode <= 0)
1915    return 0; /* The attribute isn't there so we don't care. */
1916  if (ErrorCode >= (int) sizeof (SubjectString) - 1)
1917    return 0; /* Can't handle subjects which are too long. */
1918
1919  SubjectString [ErrorCode] = 0;
1920  ErrorCode = 0; /* So do-nothing exit returns zero. */
1921  if (strncmp (SubjectString, "[Spam ", 6) == 0)
1922  {
1923    for (StringPntr = SubjectString;
1924    *StringPntr != 0 && *StringPntr != ']'; StringPntr++)
1925      ; /* No body in this for loop. */
1926    if (StringPntr[0] == ']' && StringPntr[1] == ' ')
1927    {
1928      ErrorCode = BNodePntr->RemoveAttr (MailSubjectName);
1929      ErrorCode = BNodePntr->WriteAttr (MailSubjectName,
1930        B_STRING_TYPE, 0 /* offset */,
1931        StringPntr + 2, strlen (StringPntr + 2) + 1);
1932      if (ErrorCode > 0)
1933        ErrorCode = 0;
1934    }
1935  }
1936
1937  return ErrorCode;
1938}
1939
1940
1941
1942/******************************************************************************
1943 * The tokenizing functions.  To make tokenization of the text easier to
1944 * understand, it is broken up into several passes.  Each pass goes over the
1945 * text (can include NUL bytes) and extracts all the words it can recognise
1946 * (can be none).  The extracted words are added to the WordSet, with the
1947 * PrefixCharacter prepended (zero if none) so we can distinguish between words
1948 * found in headers and in the text body.  It also modifies the input text
1949 * buffer in-place to change the text that the next pass will see (blanking out
1950 * words that it wants to delete, but not inserting much new text since the
1951 * buffer can't be enlarged).  They all return the number of bytes remaining in
1952 * InputString after it has been modified to be input for the next pass.
1953 * Returns zero if it has exhausted the possibility of getting more words, or
1954 * if something goes wrong.
1955 */
1956
1957static size_t TokenizerPassLowerCase (
1958  char *BufferPntr,
1959  size_t NumberOfBytes)
1960{
1961  char *EndOfStringPntr;
1962
1963  EndOfStringPntr = BufferPntr + NumberOfBytes;
1964
1965  while (BufferPntr < EndOfStringPntr)
1966  {
1967    /* Do our own lower case conversion; tolower () has problems with UTF-8
1968    characters that have the high bit set. */
1969
1970    if (*BufferPntr >= 'A' && *BufferPntr <= 'Z')
1971      *BufferPntr = *BufferPntr + ('a' - 'A');
1972    BufferPntr++;
1973  }
1974  return NumberOfBytes;
1975}
1976
1977
1978/* A utility function for some commonly repeated code.  If this was Modula-2,
1979we could use a nested procedure.  But it's not.  Adds the given word to the set
1980of words, checking for maximum word length and prepending the prefix to the
1981word, which gets modified by this function to reflect the word actually added
1982to the set. */
1983
1984static void
1985AddWordAndPrefixToSet (
1986  string &Word,
1987  const char *PrefixString,
1988  set<string> &WordSet)
1989{
1990  if (Word.empty ())
1991    return;
1992
1993  if (Word.size () > g_MaxWordLength)
1994    Word.resize (g_MaxWordLength);
1995  Word.insert (0, PrefixString);
1996  WordSet.insert (Word);
1997}
1998
1999
2000/* Hunt through the text for various URLs and extract the components as
2001separate words.  Doesn't affect the text in the buffer.  Looks for
2002protocol://user:password@computer:port/path?query=key#anchor strings.  Also
2003www.blah strings are detected and broken down.  Doesn't do HREF="" strings
2004where the string has a relative path (no host computer name).  Assumes the
2005input buffer is already in lower case. */
2006
2007static size_t TokenizerPassExtractURLs (
2008  char *BufferPntr,
2009  size_t NumberOfBytes,
2010  char PrefixCharacter,
2011  set<string> &WordSet)
2012{
2013  char   *AtSignStringPntr;
2014  char   *HostStringPntr;
2015  char   *InputStringEndPntr;
2016  char   *InputStringPntr;
2017  char   *OptionsStringPntr;
2018  char   *PathStringPntr;
2019  char    PrefixString [2];
2020  char   *ProtocolStringPntr;
2021  string  Word;
2022
2023  InputStringPntr = BufferPntr;
2024  InputStringEndPntr = BufferPntr + NumberOfBytes;
2025  PrefixString [0] = PrefixCharacter;
2026  PrefixString [1] = 0;
2027
2028  while (InputStringPntr < InputStringEndPntr - 4)
2029  {
2030    HostStringPntr = NULL;
2031    if (memcmp (InputStringPntr, "www.", 4) == 0)
2032      HostStringPntr = InputStringPntr;
2033    else if (memcmp (InputStringPntr, "://", 3) == 0)
2034    {
2035      /* Find the protocol name, and add it as a word such as "ftp:" "http:" */
2036      ProtocolStringPntr = InputStringPntr;
2037      while (ProtocolStringPntr > BufferPntr &&
2038      isalpha (ProtocolStringPntr[-1]))
2039        ProtocolStringPntr--;
2040      Word.assign (ProtocolStringPntr,
2041        (InputStringPntr - ProtocolStringPntr) + 1 /* for the colon */);
2042      AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2043      HostStringPntr = InputStringPntr + 3; /* Skip past the "://" */
2044    }
2045    if (HostStringPntr == NULL)
2046    {
2047      InputStringPntr++;
2048      continue;
2049    }
2050
2051    /* Got a host name string starting at HostStringPntr.  It's everything
2052    until the next slash or space, like "user:password@computer:port". */
2053
2054    InputStringPntr = HostStringPntr;
2055    AtSignStringPntr = NULL;
2056    while (InputStringPntr < InputStringEndPntr &&
2057    (*InputStringPntr != '/' && !isspace (*InputStringPntr)))
2058    {
2059      if (*InputStringPntr == '@')
2060        AtSignStringPntr = InputStringPntr;
2061      InputStringPntr++;
2062    }
2063    if (AtSignStringPntr != NULL)
2064    {
2065      /* Add a word with the user and password, unseparated. */
2066      Word.assign (HostStringPntr,
2067        AtSignStringPntr - HostStringPntr + 1 /* for the @ sign */);
2068      AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2069      HostStringPntr = AtSignStringPntr + 1;
2070    }
2071
2072    /* Add a word with the computer and port, unseparated. */
2073
2074    Word.assign (HostStringPntr, InputStringPntr - HostStringPntr);
2075    AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2076
2077    /* Now get the path name, not including the extra junk after ?  and #
2078    separators (they're stored as separate options).  Stops at white space or a
2079    double quote mark. */
2080
2081    PathStringPntr = InputStringPntr;
2082    OptionsStringPntr = NULL;
2083    while (InputStringPntr < InputStringEndPntr &&
2084    (*InputStringPntr != '"' && !isspace (*InputStringPntr)))
2085    {
2086      if (OptionsStringPntr == NULL &&
2087      (*InputStringPntr == '?' || *InputStringPntr == '#'))
2088        OptionsStringPntr = InputStringPntr;
2089      InputStringPntr++;
2090    }
2091
2092    if (OptionsStringPntr == NULL)
2093    {
2094      /* No options, all path. */
2095      Word.assign (PathStringPntr, InputStringPntr - PathStringPntr);
2096      AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2097    }
2098    else
2099    {
2100      /* Insert the path before the options. */
2101      Word.assign (PathStringPntr, OptionsStringPntr - PathStringPntr);
2102      AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2103
2104      /* Insert all the options as a word. */
2105      Word.assign (OptionsStringPntr, InputStringPntr - OptionsStringPntr);
2106      AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2107    }
2108  }
2109  return NumberOfBytes;
2110}
2111
2112
2113/* Replace long Asian words (likely to actually be sentences) with the first
2114character in the word. */
2115
2116static size_t TokenizerPassTruncateLongAsianWords (
2117  char *BufferPntr,
2118  size_t NumberOfBytes)
2119{
2120  char *EndOfStringPntr;
2121  char *InputStringPntr;
2122  int   Letter;
2123  char *OutputStringPntr;
2124  char *StartOfInputLongUnicodeWord;
2125  char *StartOfOutputLongUnicodeWord;
2126
2127  InputStringPntr = BufferPntr;
2128  EndOfStringPntr = InputStringPntr + NumberOfBytes;
2129  OutputStringPntr = InputStringPntr;
2130  StartOfInputLongUnicodeWord = NULL; /* Non-NULL flags it as started. */
2131  StartOfOutputLongUnicodeWord = NULL;
2132
2133  /* Copy the text from the input to the output (same buffer), but when we find
2134  a sequence of UTF-8 characters that is too long then truncate it down to one
2135  character and reset the output pointer to be after that character, thus
2136  deleting the word.  Replacing the deleted characters after it with spaces
2137  won't work since we need to preserve the lack of space to handle those sneaky
2138  HTML artificial word breakers.  So that Thelongword<blah>ing becomes
2139  "T<blah>ing" rather than "T <blah>ing", so the next step joins them up into
2140  "Ting" rather than "T" and "ing".  The first code in a UTF-8 character is
2141  11xxxxxx and subsequent ones are 10xxxxxx. */
2142
2143  while (InputStringPntr < EndOfStringPntr)
2144  {
2145    Letter = (unsigned char) *InputStringPntr;
2146    if (Letter < 128) // Got a regular ASCII letter?
2147    {
2148      if (StartOfInputLongUnicodeWord != NULL)
2149      {
2150        if (InputStringPntr - StartOfInputLongUnicodeWord >
2151        (int) g_MaxWordLength * 2)
2152        {
2153          /* Need to truncate the long word (100 bytes or about 50 characters)
2154          back down to the first UTF-8 character, so find out where the first
2155          character ends (skip past the 10xxxxxx bytes), and rewind the output
2156          pointer to be just after that (ignoring the rest of the long word in
2157          effect). */
2158
2159          OutputStringPntr = StartOfOutputLongUnicodeWord + 1;
2160          while (OutputStringPntr < InputStringPntr)
2161          {
2162            Letter = (unsigned char) *OutputStringPntr;
2163            if (Letter < 128 || Letter >= 192)
2164              break;
2165            ++OutputStringPntr; // Still a UTF-8 middle of the character code.
2166          }
2167        }
2168        StartOfInputLongUnicodeWord = NULL;
2169      }
2170    }
2171    else if (Letter >= 192 && StartOfInputLongUnicodeWord == NULL)
2172    {
2173      /* Got the start of a UTF-8 character.  Remember the spot so we can see
2174      if this is a too long UTF-8 word, which is often a whole sentence in
2175      asian languages, since they sort of use a single character per word. */
2176
2177      StartOfInputLongUnicodeWord = InputStringPntr;
2178      StartOfOutputLongUnicodeWord = OutputStringPntr;
2179    }
2180    *OutputStringPntr++ = *InputStringPntr++;
2181  }
2182  return OutputStringPntr - BufferPntr;
2183}
2184
2185
2186/* Find all the words in the string and add them to our local set of words.
2187The characters considered white space are defined by g_SpaceCharacters.  This
2188function is also used as a subroutine by other tokenizer functions when they
2189have a bunch of presumably plain text they want broken into words and added. */
2190
2191static size_t TokenizerPassGetPlainWords (
2192  char *BufferPntr,
2193  size_t NumberOfBytes,
2194  char PrefixCharacter,
2195  set<string> &WordSet)
2196{
2197  string  AccumulatedWord;
2198  char   *EndOfStringPntr;
2199  size_t  Length;
2200  int     Letter;
2201
2202  if (NumberOfBytes <= 0)
2203    return 0; /* Nothing to process. */
2204
2205  if (PrefixCharacter != 0)
2206    AccumulatedWord = PrefixCharacter;
2207  EndOfStringPntr = BufferPntr + NumberOfBytes;
2208  while (true)
2209  {
2210    if (BufferPntr >= EndOfStringPntr)
2211      Letter = EOF; // Usually a negative number.
2212    else
2213      Letter = (unsigned char) *BufferPntr++;
2214
2215    /* See if it is a letter we treat as white space.  Some word separators
2216    like dashes and periods aren't considered as space.  Note that codes above
2217    127 are UTF-8 characters, which we consider non-space. */
2218
2219    if (Letter < 0 /* EOF is -1 */ ||
2220    (Letter < 128 && g_SpaceCharacters[Letter]))
2221    {
2222      /* That space finished off a word.  Remove trailing periods... */
2223
2224      while ((Length = AccumulatedWord.size()) > 0 &&
2225      AccumulatedWord [Length-1] == '.')
2226        AccumulatedWord.resize (Length - 1);
2227
2228      /* If there's anything left in the word, add it to the set.  Also ignore
2229      words which are too big (it's probably some binary encoded data).  But
2230      leave room for supercalifragilisticexpialidoceous.  According to one web
2231      site, pneumonoultramicroscopicsilicovolcanoconiosis is the longest word
2232      currently in English.  Note that some uuencoded data was seen with a 60
2233      character line length. */
2234
2235      if (PrefixCharacter != 0)
2236        Length--; // Don't count prefix when judging size or emptiness.
2237      if (Length > 0 && Length <= g_MaxWordLength)
2238        WordSet.insert (AccumulatedWord);
2239
2240      /* Empty out the string to get ready for the next word.  Not quite empty,
2241      start it off with the prefix character if any. */
2242
2243      if (PrefixCharacter != 0)
2244        AccumulatedWord = PrefixCharacter;
2245      else
2246        AccumulatedWord.resize (0);
2247    }
2248    else /* Not a space-like character, add it to the word. */
2249      AccumulatedWord.append (1 /* one copy of the char */, (char) Letter);
2250
2251    if (Letter < 0)
2252      break; /* End of data.  Exit here so that last word got processed. */
2253  }
2254  return NumberOfBytes;
2255}
2256
2257
2258/* Delete Things from the text.  The Thing is marked by a start string and an
2259end string, such as "<!--" and "--> for HTML comment things.  All the text
2260between the markers will be added to the word list before it gets deleted from
2261the buffer.  The markers must be prepared in lower case and the buffer is
2262assumed to have already been converted to lower case.  You can specify an empty
2263string for the end marker if you're just matching a string constant like
2264"&nbsp;", which you would put in the starting marker.  This is a utility
2265function used by other tokenizer functions. */
2266
2267static size_t TokenizerUtilRemoveStartEndThing (
2268  char *BufferPntr,
2269  size_t NumberOfBytes,
2270  char PrefixCharacter,
2271  set<string> &WordSet,
2272  const char *ThingStartCode,
2273  const char *ThingEndCode,
2274  bool ReplaceWithSpace)
2275{
2276  char *EndOfStringPntr;
2277  bool  FoundAndDeletedThing;
2278  char *InputStringPntr;
2279  char *OutputStringPntr;
2280  int   ThingEndLength;
2281  char *ThingEndPntr;
2282  int   ThingStartLength;
2283
2284  InputStringPntr = BufferPntr;
2285  EndOfStringPntr = InputStringPntr + NumberOfBytes;
2286  OutputStringPntr = InputStringPntr;
2287  ThingStartLength = strlen (ThingStartCode);
2288  ThingEndLength = strlen (ThingEndCode);
2289
2290  if (ThingStartLength <= 0)
2291    return NumberOfBytes; /* Need some things to look for first! */
2292
2293  while (InputStringPntr < EndOfStringPntr)
2294  {
2295    /* Search for the starting marker. */
2296
2297    FoundAndDeletedThing = false;
2298    if (EndOfStringPntr - InputStringPntr >=
2299    ThingStartLength + ThingEndLength /* space remains for start + end */ &&
2300    *InputStringPntr == *ThingStartCode &&
2301    memcmp (InputStringPntr, ThingStartCode, ThingStartLength) == 0)
2302    {
2303      /* Found the start marker.  Look for the terminating string.  If it is an
2304      empty string, then we've found it right now! */
2305
2306      ThingEndPntr = InputStringPntr + ThingStartLength;
2307      while (EndOfStringPntr - ThingEndPntr >= ThingEndLength)
2308      {
2309        if (ThingEndLength == 0 ||
2310        (*ThingEndPntr == *ThingEndCode &&
2311        memcmp (ThingEndPntr, ThingEndCode, ThingEndLength) == 0))
2312        {
2313          /* Got the end of the Thing.  First dump the text inbetween the start
2314          and end markers into the words list. */
2315
2316          TokenizerPassGetPlainWords (InputStringPntr + ThingStartLength,
2317            ThingEndPntr - (InputStringPntr + ThingStartLength),
2318            PrefixCharacter, WordSet);
2319
2320          /* Delete by not updating the output pointer while moving the input
2321          pointer to just after the ending tag. */
2322
2323          InputStringPntr = ThingEndPntr + ThingEndLength;
2324          if (ReplaceWithSpace)
2325            *OutputStringPntr++ = ' ';
2326          FoundAndDeletedThing = true;
2327          break;
2328        }
2329        ThingEndPntr++;
2330      } /* End while ThingEndPntr */
2331    }
2332    if (!FoundAndDeletedThing)
2333      *OutputStringPntr++ = *InputStringPntr++;
2334  } /* End while InputStringPntr */
2335
2336  return OutputStringPntr - BufferPntr;
2337}
2338
2339
2340static size_t TokenizerPassRemoveHTMLComments (
2341  char *BufferPntr,
2342  size_t NumberOfBytes,
2343  char PrefixCharacter,
2344  set<string> &WordSet)
2345{
2346  return TokenizerUtilRemoveStartEndThing (BufferPntr, NumberOfBytes,
2347    PrefixCharacter, WordSet, "<!--", "-->", false);
2348}
2349
2350
2351static size_t TokenizerPassRemoveHTMLStyle (
2352  char *BufferPntr,
2353  size_t NumberOfBytes,
2354  char PrefixCharacter,
2355  set<string> &WordSet)
2356{
2357  return TokenizerUtilRemoveStartEndThing (BufferPntr, NumberOfBytes,
2358    PrefixCharacter, WordSet,
2359    "<style", "/style>", false /* replace with space if true */);
2360}
2361
2362
2363/* Convert Japanese periods (a round hollow dot symbol) to spaces so that the
2364start of the next sentence is recognised at least as the start of a very long
2365word.  The Japanese comma also does the same job. */
2366
2367static size_t TokenizerPassJapanesePeriodsToSpaces (
2368  char *BufferPntr,
2369  size_t NumberOfBytes,
2370  char PrefixCharacter,
2371  set<string> &WordSet)
2372{
2373  size_t BytesRemaining = NumberOfBytes;
2374
2375  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2376    BytesRemaining, PrefixCharacter, WordSet, "���" /* period */, "", true);
2377  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2378    BytesRemaining, PrefixCharacter, WordSet, "���" /* comma */, "", true);
2379  return BytesRemaining;
2380}
2381
2382
2383/* Delete HTML tags from the text.  The contents of the tag are added as words
2384before being deleted.  <P>, <BR> and &nbsp; are replaced by spaces at this
2385stage while other HTML things get replaced by nothing. */
2386
2387static size_t TokenizerPassRemoveHTMLTags (
2388  char *BufferPntr,
2389  size_t NumberOfBytes,
2390  char PrefixCharacter,
2391  set<string> &WordSet)
2392{
2393  size_t BytesRemaining = NumberOfBytes;
2394
2395  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2396    BytesRemaining, PrefixCharacter, WordSet, "&nbsp;", "", true);
2397  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2398    BytesRemaining, PrefixCharacter, WordSet, "<p", ">", true);
2399  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2400    BytesRemaining, PrefixCharacter, WordSet, "<br", ">", true);
2401  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2402    BytesRemaining, PrefixCharacter, WordSet, "<", ">", false);
2403  return BytesRemaining;
2404}
2405
2406
2407
2408/******************************************************************************
2409 * Implementation of the ABSApp class, constructor, destructor and the rest of
2410 * the member functions in mostly alphabetical order.
2411 */
2412
2413ABSApp::ABSApp ()
2414: BApplication (g_ABSAppSignature),
2415  m_DatabaseHasChanged (false),
2416  m_SettingsHaveChanged (false)
2417{
2418  status_t    ErrorCode;
2419  int         HalvingCount;
2420  int         i;
2421  const void *ResourceData;
2422  size_t      ResourceSize;
2423  BResources *ResourcesPntr;
2424
2425  MakeDatabaseEmpty ();
2426
2427  /* Set up the pathname which identifies our settings directory.  Note that
2428  the actual settings are loaded later on (or set to defaults) by the main()
2429  function, before this BApplication starts running.  So we don't bother
2430  initialising the other setting related variables here. */
2431
2432  ErrorCode =
2433    find_directory (B_USER_SETTINGS_DIRECTORY, &m_SettingsDirectoryPath);
2434  if (ErrorCode == B_OK)
2435    ErrorCode = m_SettingsDirectoryPath.Append (g_SettingsDirectoryName);
2436  if (ErrorCode != B_OK)
2437    m_SettingsDirectoryPath.SetTo (".");
2438
2439  /* Set up the table which identifies which characters are spaces and which
2440  are not.  Spaces are all control characters and all punctuation except for:
2441  apostrophe (so "it's" and possessive versions of words get stored), dash (for
2442  hyphenated words), dollar sign (for cash amounts), period (for IP addresses,
2443  we later remove trailing periods). */
2444
2445  memset (g_SpaceCharacters, 1, sizeof (g_SpaceCharacters));
2446  g_SpaceCharacters['\''] = false;
2447  g_SpaceCharacters['-'] = false;
2448  g_SpaceCharacters['$'] = false;
2449  g_SpaceCharacters['.'] = false;
2450  for (i = '0'; i <= '9'; i++)
2451    g_SpaceCharacters[i] = false;
2452  for (i = 'A'; i <= 'Z'; i++)
2453    g_SpaceCharacters[i] = false;
2454  for (i = 'a'; i <= 'z'; i++)
2455    g_SpaceCharacters[i] = false;
2456
2457  /* Initialise the busy cursor from data in the application's resources. */
2458
2459  if ((ResourcesPntr = AppResources ()) != NULL && (ResourceData =
2460  ResourcesPntr->LoadResource ('CURS', "Busy Cursor", &ResourceSize)) != NULL
2461  && ResourceSize >= 68 /* Size of a raw 2x16x16x8+4 cursor is 68 bytes */)
2462    g_BusyCursor = new BCursor (ResourceData);
2463
2464  /* Find out the smallest usable double by seeing how small we can make it. */
2465
2466  m_SmallestUseableDouble = 1.0;
2467  HalvingCount = 0;
2468  while (HalvingCount < 10000 && m_SmallestUseableDouble > 0.0)
2469  {
2470    HalvingCount++;
2471    m_SmallestUseableDouble /= 2;
2472  }
2473
2474  /* Recreate the number.  But don't make quite as small, we want to allow some
2475  precision bits and a bit of extra margin for intermediate results in future
2476  calculations. */
2477
2478  HalvingCount -= 50 + sizeof (double) * 8;
2479
2480  m_SmallestUseableDouble = 1.0;
2481  while (HalvingCount > 0)
2482  {
2483    HalvingCount--;
2484    m_SmallestUseableDouble /= 2;
2485  }
2486}
2487
2488
2489ABSApp::~ABSApp ()
2490{
2491  status_t ErrorCode;
2492  char     ErrorMessage [PATH_MAX + 1024];
2493
2494  if (m_SettingsHaveChanged)
2495    LoadSaveSettings (false /* DoLoad */);
2496  if ((ErrorCode = SaveDatabaseIfNeeded (ErrorMessage)) != B_OK)
2497    DisplayErrorMessage (ErrorMessage, ErrorCode, "Exiting Error");
2498  delete g_BusyCursor;
2499  g_BusyCursor = NULL;
2500}
2501
2502
2503/* Display a box showing information about this program. */
2504
2505void
2506ABSApp::AboutRequested ()
2507{
2508  BAlert *AboutAlertPntr;
2509
2510  AboutAlertPntr = new BAlert ("About",
2511"SpamDBM - Spam Database Manager\n\n"
2512
2513"This is a BeOS program for classifying e-mail messages as spam (unwanted \
2514junk mail) or as genuine mail using a Bayesian statistical approach.  There \
2515is also a Mail Daemon Replacement add-on to filter mail using the \
2516classification statistics collected earlier.\n\n"
2517
2518"Written by Alexander G. M. Smith, fall 2002.\n\n"
2519
2520"The original idea was from Paul Graham's algorithm, which has an excellent \
2521writeup at: http://www.paulgraham.com/spam.html\n\n"
2522
2523"Gary Robinson came up with the improved algorithm, which you can read about \
2524at: http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html\n\n"
2525
2526"Mr. Robinson, Tim Peters and the SpamBayes mailing list people then \
2527developed the even better chi-squared scoring method.\n\n"
2528
2529"Icon courtesy of Isaac Yonemoto, though it is no longer used since Hormel \
2530doesn't want their meat product associated with junk e-mail.\n\n"
2531
2532"Tokenising code updated in 2005 to use some of the tricks that SpamBayes \
2533uses to extract words from messages.  In particular, HTML is now handled.\n\n"
2534
2535"Released to the public domain, with no warranty.\n"
2536"$Revision: 30630 $\n"
2537"Compiled on " __DATE__ " at " __TIME__ ".", "Done");
2538  if (AboutAlertPntr != NULL)
2539  {
2540    AboutAlertPntr->SetFlags(AboutAlertPntr->Flags() | B_CLOSE_ON_ESCAPE);
2541    AboutAlertPntr->Go ();
2542  }
2543}
2544
2545
2546/* Add the text in the given file to the database as an example of a spam or
2547genuine message, or removes it from the database if you claim it is
2548CL_UNCERTAIN.  Also resets the spam ratio attribute to show the effect of the
2549database change. */
2550
2551status_t ABSApp::AddFileToDatabase (
2552  ClassificationTypes IsSpamOrWhat,
2553  const char *FileName,
2554  char *ErrorMessage)
2555{
2556  status_t ErrorCode;
2557  BFile    MessageFile;
2558  BMessage TempBMessage;
2559
2560  ErrorCode = MessageFile.SetTo (FileName, B_READ_ONLY);
2561  if (ErrorCode != B_OK)
2562  {
2563    sprintf (ErrorMessage, "Unable to open file \"%s\" for reading", FileName);
2564    return ErrorCode;
2565  }
2566
2567  ErrorCode = AddPositionIOToDatabase (IsSpamOrWhat,
2568    &MessageFile, FileName, ErrorMessage);
2569  MessageFile.Unset ();
2570  if (ErrorCode != B_OK)
2571    return ErrorCode;
2572
2573  /* Re-evaluate the file so that the user sees the new ratio attribute. */
2574  return EvaluateFile (FileName, &TempBMessage, ErrorMessage);
2575}
2576
2577
2578/* Add the given text to the database.  The unique words found in MessageIOPntr
2579will be added to the database (incrementing the count for the number of
2580messages using each word, either the spam or genuine count depending on
2581IsSpamOrWhat).  It will remove the message (decrement the word counts) if you
2582specify CL_UNCERTAIN as the new classification.  And if it switches from spam
2583to genuine or vice versa, it will do both - decrement the counts for the old
2584class and increment the counts for the new one.  An attribute will be added to
2585MessageIOPntr (if it is a file) to record that it has been marked as Spam or
2586Genuine (so that it doesn't get added to the database a second time).  If it is
2587being removed from the database, the classification attribute gets removed too.
2588If things go wrong, a non-zero error code will be returned and an explanation
2589written to ErrorMessage (assumed to be at least PATH_MAX + 1024 bytes long).
2590OptionalFileName is just used in the error message to identify the file to the
2591user. */
2592
2593status_t ABSApp::AddPositionIOToDatabase (
2594  ClassificationTypes IsSpamOrWhat,
2595  BPositionIO *MessageIOPntr,
2596  const char *OptionalFileName,
2597  char *ErrorMessage)
2598{
2599  BNode                             *BNodePntr;
2600  char                               ClassificationString [NAME_MAX];
2601  StatisticsMap::iterator            DataIter;
2602  status_t                           ErrorCode = 0;
2603  pair<StatisticsMap::iterator,bool> InsertResult;
2604  uint32                             NewAge;
2605  StatisticsRecord                   NewStatistics;
2606  ClassificationTypes                PreviousClassification;
2607  StatisticsPointer                  StatisticsPntr;
2608  set<string>::iterator              WordEndIter;
2609  set<string>::iterator              WordIter;
2610  set<string>                        WordSet;
2611
2612  NewAge = m_TotalGenuineMessages + m_TotalSpamMessages;
2613  if (NewAge >= 0xFFFFFFF0UL)
2614  {
2615    sprintf (ErrorMessage, "The database is full!  There are %lu messages in "
2616      "it and we can't add any more without overflowing the maximum integer "
2617      "representation in 32 bits", NewAge);
2618    return B_NO_MEMORY;
2619  }
2620
2621  /* Check that this file hasn't already been added to the database. */
2622
2623  PreviousClassification = CL_UNCERTAIN;
2624  BNodePntr = dynamic_cast<BNode *> (MessageIOPntr);
2625  if (BNodePntr != NULL) /* If this thing might have attributes. */
2626  {
2627    ErrorCode = BNodePntr->ReadAttr (g_AttributeNameClassification,
2628      B_STRING_TYPE, 0 /* offset */, ClassificationString,
2629      sizeof (ClassificationString) - 1);
2630    if (ErrorCode <= 0) /* Positive values for the number of bytes read */
2631      strcpy (ClassificationString, "none");
2632    else /* Just in case it needs a NUL at the end. */
2633      ClassificationString [ErrorCode] = 0;
2634
2635    if (strcasecmp (ClassificationString, g_ClassifiedSpam) == 0)
2636      PreviousClassification = CL_SPAM;
2637    else if (strcasecmp (ClassificationString, g_ClassifiedGenuine) == 0)
2638      PreviousClassification = CL_GENUINE;
2639  }
2640
2641  if (!m_IgnorePreviousClassification &&
2642  PreviousClassification != CL_UNCERTAIN)
2643  {
2644    if (IsSpamOrWhat == PreviousClassification)
2645    {
2646      sprintf (ErrorMessage, "Ignoring file \"%s\" since it seems to have "
2647        "already been classified as %s.", OptionalFileName,
2648        g_ClassificationTypeNames [IsSpamOrWhat]);
2649    }
2650    else
2651    {
2652      sprintf (ErrorMessage, "Changing existing classification of file \"%s\" "
2653        "from %s to %s.", OptionalFileName,
2654        g_ClassificationTypeNames [PreviousClassification],
2655        g_ClassificationTypeNames [IsSpamOrWhat]);
2656    }
2657    DisplayErrorMessage (ErrorMessage, 0, "Note");
2658  }
2659
2660  if (!m_IgnorePreviousClassification &&
2661  IsSpamOrWhat == PreviousClassification)
2662    /* Nothing to do if it is already classified correctly and the user doesn't
2663    want double classification. */
2664    return B_OK;
2665
2666  /* Get the list of unique words in the file. */
2667
2668  ErrorCode = GetWordsFromPositionIO (MessageIOPntr, OptionalFileName,
2669    WordSet, ErrorMessage);
2670  if (ErrorCode != B_OK)
2671    return ErrorCode;
2672
2673  /* Update the count of the number of messages processed, with corrections if
2674  reclassifying a message. */
2675
2676  m_DatabaseHasChanged = true;
2677
2678  if (!m_IgnorePreviousClassification &&
2679  PreviousClassification == CL_SPAM && m_TotalSpamMessages > 0)
2680    m_TotalSpamMessages--;
2681
2682  if (IsSpamOrWhat == CL_SPAM)
2683    m_TotalSpamMessages++;
2684
2685  if (!m_IgnorePreviousClassification &&
2686  PreviousClassification == CL_GENUINE && m_TotalGenuineMessages > 0)
2687      m_TotalGenuineMessages--;
2688
2689  if (IsSpamOrWhat == CL_GENUINE)
2690    m_TotalGenuineMessages++;
2691
2692  /* Mark the file's attributes with the new classification.  Don't care if it
2693  fails. */
2694
2695  if (BNodePntr != NULL) /* If this thing might have attributes. */
2696  {
2697    ErrorCode = BNodePntr->RemoveAttr (g_AttributeNameClassification);
2698    if (IsSpamOrWhat != CL_UNCERTAIN)
2699    {
2700      strcpy (ClassificationString, g_ClassificationTypeNames [IsSpamOrWhat]);
2701      ErrorCode = BNodePntr->WriteAttr (g_AttributeNameClassification,
2702        B_STRING_TYPE, 0 /* offset */,
2703        ClassificationString, strlen (ClassificationString) + 1);
2704    }
2705  }
2706
2707  /* Add the words to the database by incrementing or decrementing the counts
2708  for each word as appropriate. */
2709
2710  WordEndIter = WordSet.end ();
2711  for (WordIter = WordSet.begin (); WordIter != WordEndIter; WordIter++)
2712  {
2713    if ((DataIter = m_WordMap.find (*WordIter)) == m_WordMap.end ())
2714    {
2715      /* No record in the database for the word. */
2716
2717      if (IsSpamOrWhat == CL_UNCERTAIN)
2718        continue; /* Not adding words, don't have to subtract from nothing. */
2719
2720      /* Create a new one record in the database for the new word. */
2721
2722      memset (&NewStatistics, 0, sizeof (NewStatistics));
2723      InsertResult = m_WordMap.insert (
2724        StatisticsMap::value_type (*WordIter, NewStatistics));
2725      if (!InsertResult.second)
2726      {
2727        sprintf (ErrorMessage, "Failed to insert new database entry for "
2728          "word \"%s\", while processing file \"%s\"",
2729          WordIter->c_str (), OptionalFileName);
2730        return B_NO_MEMORY;
2731      }
2732      DataIter = InsertResult.first;
2733      m_WordCount++;
2734    }
2735
2736    /* Got the database record for the word, update the statistics. */
2737
2738    StatisticsPntr = &DataIter->second;
2739
2740    StatisticsPntr->age = NewAge;
2741
2742    /* Can't update m_OldestAge here, since it would take a lot of effort to
2743    find the next older age.  Since it's only used for display, we'll let it be
2744    slightly incorrect.  The next database load or purge will fix it. */
2745
2746    if (IsSpamOrWhat == CL_SPAM)
2747      StatisticsPntr->spamCount++;
2748
2749    if (IsSpamOrWhat == CL_GENUINE)
2750      StatisticsPntr->genuineCount++;
2751
2752    if (!m_IgnorePreviousClassification &&
2753    PreviousClassification == CL_SPAM && StatisticsPntr->spamCount > 0)
2754      StatisticsPntr->spamCount--;
2755
2756    if (!m_IgnorePreviousClassification &&
2757    PreviousClassification == CL_GENUINE && StatisticsPntr->genuineCount > 0)
2758      StatisticsPntr->genuineCount--;
2759  }
2760
2761  return B_OK;
2762}
2763
2764
2765/* Add the text in the string to the database as an example of a spam or
2766genuine message. */
2767
2768status_t ABSApp::AddStringToDatabase (
2769  ClassificationTypes IsSpamOrWhat,
2770  const char *String,
2771  char *ErrorMessage)
2772{
2773  BMemoryIO MemoryIO (String, strlen (String));
2774
2775  return AddPositionIOToDatabase (IsSpamOrWhat, &MemoryIO,
2776   "Memory Buffer" /* OptionalFileName */, ErrorMessage);
2777}
2778
2779
2780/* Given a bunch of text, find the words within it (doing special tricks to
2781extract words from HTML), and add them to the set.  Allow NULs in the text.  If
2782the PrefixCharacter isn't zero then it is prepended to all words found (so you
2783can distinguish words as being from a header or from the body text).  See also
2784TokenizeWhole which does something similar. */
2785
2786void
2787ABSApp::AddWordsToSet (
2788  const char *InputString,
2789  size_t NumberOfBytes,
2790  char PrefixCharacter,
2791  set<string> &WordSet)
2792{
2793  char   *BufferPntr;
2794  size_t  CurrentSize;
2795  int     PassNumber;
2796
2797  /* Copy the input buffer.  The code will be modifying it in-place as HTML
2798  fragments and other junk are deleted. */
2799
2800  BufferPntr = new char [NumberOfBytes];
2801  if (BufferPntr == NULL)
2802    return;
2803  memcpy (BufferPntr, InputString, NumberOfBytes);
2804
2805  /* Do the tokenization.  Each pass does something to the text in the buffer,
2806  and may add words to the word set. */
2807
2808  CurrentSize = NumberOfBytes;
2809  for (PassNumber = 1; PassNumber <= 8 && CurrentSize > 0 ; PassNumber++)
2810  {
2811    switch (PassNumber)
2812    {
2813      case 1: /* Lowercase first, rest of them assume lower case inputs. */
2814        CurrentSize = TokenizerPassLowerCase (BufferPntr, CurrentSize);
2815        break;
2816      case 2: CurrentSize = TokenizerPassJapanesePeriodsToSpaces (
2817        BufferPntr, CurrentSize, PrefixCharacter, WordSet); break;
2818      case 3: CurrentSize = TokenizerPassTruncateLongAsianWords (
2819        BufferPntr, CurrentSize); break;
2820      case 4: CurrentSize = TokenizerPassRemoveHTMLComments (
2821        BufferPntr, CurrentSize, 'Z', WordSet); break;
2822      case 5: CurrentSize = TokenizerPassRemoveHTMLStyle (
2823        BufferPntr, CurrentSize, 'Z', WordSet); break;
2824      case 6: CurrentSize = TokenizerPassExtractURLs (
2825        BufferPntr, CurrentSize, 'Z', WordSet); break;
2826      case 7: CurrentSize = TokenizerPassRemoveHTMLTags (
2827        BufferPntr, CurrentSize, 'Z', WordSet); break;
2828      case 8: CurrentSize = TokenizerPassGetPlainWords (
2829        BufferPntr, CurrentSize, PrefixCharacter, WordSet); break;
2830      default: break;
2831    }
2832  }
2833
2834  delete [] BufferPntr;
2835}
2836
2837
2838/* The user has provided a command line.  This could actually be from a
2839separate attempt to invoke the program (this application's resource/attributes
2840have the launch flags set to "single launch", so the shell doesn't start the
2841program but instead sends the arguments to the already running instance).  In
2842either case, the command is sent to an intermediary thread where it is
2843asynchronously converted into a scripting message(s) that are sent back to this
2844BApplication.  The intermediary is needed since we can't recursively execute
2845scripting messages while processing a message (this ArgsReceived one). */
2846
2847void
2848ABSApp::ArgvReceived (int32 argc, char **argv)
2849{
2850  if (g_CommanderLooperPntr != NULL)
2851    g_CommanderLooperPntr->CommandArguments (argc, argv);
2852}
2853
2854
2855/* Create a new empty database.  Note that we have to write out the new file
2856immediately, otherwise other operations will see the empty database and then
2857try to load the file, and complain that it doesn't exist.  Now they will see
2858the empty database and redundantly load the empty file. */
2859
2860status_t ABSApp::CreateDatabaseFile (char *ErrorMessage)
2861{
2862  MakeDatabaseEmpty ();
2863  m_DatabaseHasChanged = true;
2864  return SaveDatabaseIfNeeded (ErrorMessage); /* Make it now. */
2865}
2866
2867
2868/* Set the settings to the defaults.  Needed in case there isn't a settings
2869file or it is obsolete. */
2870
2871void
2872ABSApp::DefaultSettings ()
2873{
2874  status_t ErrorCode;
2875  BPath    DatabasePath (m_SettingsDirectoryPath);
2876  char     TempString [PATH_MAX];
2877
2878  /* The default database file is in the settings directory. */
2879
2880  ErrorCode = DatabasePath.Append (g_DefaultDatabaseFileName);
2881  if (ErrorCode != B_OK)
2882    strcpy (TempString, g_DefaultDatabaseFileName); /* Unlikely to happen. */
2883  else
2884    strcpy (TempString, DatabasePath.Path ());
2885  m_DatabaseFileName.SetTo (TempString);
2886
2887  // Users need to be allowed to undo their mistakes...
2888  m_IgnorePreviousClassification = true;
2889  g_ServerMode = true;
2890  m_PurgeAge = 2000;
2891  m_PurgePopularity = 2;
2892  m_ScoringMode = SM_CHISQUARED;
2893  m_TokenizeMode = TM_ANY_TEXT_HEADER;
2894
2895  m_SettingsHaveChanged = true;
2896}
2897
2898
2899/* Deletes the database file, and the backup file, and clears the database but
2900marks it as not changed so that it doesn't get written out when the program
2901exits. */
2902
2903status_t ABSApp::DeleteDatabaseFile (char *ErrorMessage)
2904{
2905  BEntry   FileEntry;
2906  status_t ErrorCode;
2907  int      i;
2908  char     TempString [PATH_MAX+20];
2909
2910  /* Clear the in-memory database. */
2911
2912  MakeDatabaseEmpty ();
2913  m_DatabaseHasChanged = false;
2914
2915  /* Delete the backup files first.  Don't care if it fails. */
2916
2917  for (i = 0; i < g_MaxBackups; i++)
2918  {
2919    strcpy (TempString, m_DatabaseFileName.String ());
2920    sprintf (TempString + strlen (TempString), g_BackupSuffix, i);
2921    ErrorCode = FileEntry.SetTo (TempString);
2922    if (ErrorCode == B_OK)
2923      FileEntry.Remove ();
2924  }
2925
2926  /* Delete the main database file. */
2927
2928  strcpy (TempString, m_DatabaseFileName.String ());
2929  ErrorCode = FileEntry.SetTo (TempString);
2930  if (ErrorCode != B_OK)
2931  {
2932    sprintf (ErrorMessage, "While deleting, failed to make BEntry for "
2933      "\"%s\" (does the directory exist?)", TempString);
2934    return ErrorCode;
2935  }
2936
2937  ErrorCode = FileEntry.Remove ();
2938  if (ErrorCode != B_OK)
2939    sprintf (ErrorMessage, "While deleting, failed to remove file "
2940      "\"%s\"", TempString);
2941
2942  return ErrorCode;
2943}
2944
2945
2946/* Evaluate the given file as being a spam message, and tag it with the
2947resulting spam probability ratio.  If it also has an e-mail subject attribute,
2948remove the [Spam 99.9%] prefix since the number usually changes. */
2949
2950status_t ABSApp::EvaluateFile (
2951  const char *PathName,
2952  BMessage *ReplyMessagePntr,
2953  char *ErrorMessage)
2954{
2955  status_t ErrorCode;
2956  float    TempFloat;
2957  BFile    TextFile;
2958
2959  /* Open the specified file. */
2960
2961  ErrorCode = TextFile.SetTo (PathName, B_READ_ONLY);
2962  if (ErrorCode != B_OK)
2963  {
2964    sprintf (ErrorMessage, "Problems opening file \"%s\" for evaluating",
2965      PathName);
2966    return ErrorCode;
2967  }
2968
2969  ErrorCode =
2970    EvaluatePositionIO (&TextFile, PathName, ReplyMessagePntr, ErrorMessage);
2971
2972  if (ErrorCode == B_OK &&
2973  ReplyMessagePntr->FindFloat (g_ResultName, &TempFloat) == B_OK)
2974  {
2975    TextFile.WriteAttr (g_AttributeNameSpamRatio, B_FLOAT_TYPE,
2976      0 /* offset */, &TempFloat, sizeof (TempFloat));
2977    /* Don't know the spam cutoff ratio, that's in the e-mail filter, so just
2978    blindly remove the prefix, which would have the wrong percentage. */
2979    RemoveSpamPrefixFromSubjectAttribute (&TextFile);
2980  }
2981
2982  return ErrorCode;
2983}
2984
2985
2986/* Evaluate a given file or memory buffer (a BPositionIO handles both cases)
2987for spaminess.  The output is added to the ReplyMessagePntr message, with the
2988probability ratio stored in "result" (0.0 means genuine and 1.0 means spam).
2989It also adds the most significant words (used in the ratio calculation) to the
2990array "words" and the associated per-word probability ratios in "ratios".  If
2991it fails, an error code is returned and an error message written to the
2992ErrorMessage string (which is at least MAX_PATH + 1024 bytes long).
2993OptionalFileName is only used in the error message.
2994
2995The math used for combining the individual word probabilities in my method is
2996based on Gary Robinson's method (formerly it was a variation of Paul Graham's
2997method) or the Chi-Squared method.  It's input is the database of words that
2998has a count of the number of spam and number of genuine messages each word
2999appears in (doesn't matter if it appears more than once in a message, it still
3000counts as 1).
3001
3002The spam word count is divided the by the total number of spam e-mail messages
3003in the database to get the probability of spam and probability of genuineness
3004is similarly computed for a particular word.  The spam probability is divided
3005by the sum of the spam and genuine probabilities to get the Raw Spam Ratio for
3006the word.  It's nearer to 0.0 for genuine and nearer to 1.0 for spam, and can
3007be exactly zero or one too.
3008
3009To avoid multiplying later results by zero, and to compensate for a lack of
3010data points, the Raw Spam Ratio is adjusted towards the 0.5 halfway point.  The
30110.5 is combined with the raw spam ratio, with a weight of 0.45 (determined to
3012be a good value by the "spambayes" mailing list tests) messages applied to the
3013half way point and a weight of the number of spam + genuine messages applied to
3014the raw spam ratio.  This gives you the compensated spam ratio for the word.
3015
3016The top N (150 was good in the spambayes tests) extreme words are selected by
3017the distance of each word's compensated spam ratio from 0.5.  Then the ratios
3018of the words are combined.
3019
3020The Gary Robinson combining (scoring) method gets one value from the Nth root
3021of the product of all the word ratios.  The other is the Nth root of the
3022product of (1 - ratio) for all the words.  The final result is the first value
3023divided by the sum of the two values.  The Nth root helps spread the resulting
3024range of values more evenly between 0.0 and 1.0, otherwise the values all clump
3025together at 0 or 1.  Also you can think of the Nth root as a kind of average
3026for products; it's like a generic word probability which when multiplied by
3027itself N times gives you the same result as the N separate actual word
3028probabilities multiplied together.
3029
3030The Chi-Squared combining (scoring) method assumes that the spam word
3031probabilities are uniformly distributed and computes an error measurement
3032(called chi squared - see http://bmj.com/collections/statsbk/8.shtml for a good
3033tutorial) and then sees how likely that error value would be observed in
3034practice.  If it's rare to observe, then the words are likely not just randomly
3035occuring and it's spammy.  The same is done for genuine words.  The two
3036resulting unlikelynesses are compared to see which is more unlikely, if neither
3037is, then the method says it can't decide.  The SpamBayes notes (see the
3038classifier.py file in CVS in http://sourceforge.net/projects/spambayes) say:
3039
3040"Across vectors of length n, containing random uniformly-distributed
3041probabilities, -2*sum(ln(p_i)) follows the chi-squared distribution with 2*n
3042degrees of freedom.  This has been proven (in some appropriate sense) to be the
3043most sensitive possible test for rejecting the hypothesis that a vector of
3044probabilities is uniformly distributed.  Gary Robinson's original scheme was
3045monotonic *with* this test, but skipped the details.  Turns out that getting
3046closer to the theoretical roots gives a much sharper classification, with a
3047very small (in # of msgs), but also very broad (in range of scores), "middle
3048ground", where most of the mistakes live.  In particular, this scheme seems
3049immune to all forms of "cancellation disease": if there are many strong ham
3050*and* spam clues, this reliably scores close to 0.5.  Most other schemes are
3051extremely certain then -- and often wrong."
3052
3053I did a test with 448 example genuine messages including personal mail (some
3054with HTML attachments) and mailing lists, and 267 spam messages for 27471 words
3055total.  Test messages were more recent messages in the same groups.  Out of 100
3056test genuine messages, with Gary Robinson (0.56 cutoff limit), 1 (1%) was
3057falsely identified as spam and 8 of 73 (11%) spam messages were incorrectly
3058classified as genuine.  With my variation of Paul Graham's scheme (0.90 cutoff)
3059I got 6 of 100 (6%) genuine messages incorrectly marked as spam and 2 of 73
3060(3%) spam messages were incorrectly classified as genuine.  Pretty close, but
3061Robinson's values are more evenly spread out so you can tell just how spammy it
3062is by looking at the number. */
3063
3064struct WordAndRatioStruct
3065{
3066  double        probabilityRatio; /* Actually the compensated ratio. */
3067  const string *wordPntr;
3068
3069  bool operator() ( /* Our less-than comparison function for sorting. */
3070    const WordAndRatioStruct &ItemA,
3071    const WordAndRatioStruct &ItemB) const
3072  {
3073    return
3074      (fabs (ItemA.probabilityRatio - 0.5) <
3075      fabs (ItemB.probabilityRatio - 0.5));
3076  };
3077};
3078
3079status_t ABSApp::EvaluatePositionIO (
3080  BPositionIO *PositionIOPntr,
3081  const char *OptionalFileName,
3082  BMessage *ReplyMessagePntr,
3083  char *ErrorMessage)
3084{
3085  StatisticsMap::iterator            DataEndIter;
3086  StatisticsMap::iterator            DataIter;
3087  status_t                           ErrorCode;
3088  double                             GenuineProbability;
3089  uint32                             GenuineSpamSum;
3090  int                                i;
3091  priority_queue<
3092    WordAndRatioStruct /* Data type stored in the queue */,
3093    vector<WordAndRatioStruct> /* Underlying container */,
3094    WordAndRatioStruct /* Function for comparing elements */>
3095                                     PriorityQueue;
3096  double                             ProductGenuine;
3097  double                             ProductLogGenuine;
3098  double                             ProductLogSpam;
3099  double                             ProductSpam;
3100  double                             RawProbabilityRatio;
3101  float                              ResultRatio;
3102  double                             SpamProbability;
3103  StatisticsPointer                  StatisticsPntr;
3104  double                             TempDouble;
3105  double                             TotalGenuine;
3106  double                             TotalSpam;
3107  WordAndRatioStruct                 WordAndRatio;
3108  set<string>::iterator              WordEndIter;
3109  set<string>::iterator              WordIter;
3110  const WordAndRatioStruct          *WordRatioPntr;
3111  set<string>                        WordSet;
3112
3113  /* Get the list of unique words in the file / memory buffer. */
3114
3115  ErrorCode = GetWordsFromPositionIO (PositionIOPntr, OptionalFileName,
3116    WordSet, ErrorMessage);
3117  if (ErrorCode != B_OK)
3118    return ErrorCode;
3119
3120  /* Prepare a few variables.  Mostly these are stored double values of some of
3121  the numbers involved (to avoid the overhead of multiple conversions from
3122  integer to double), with extra precautions to avoid divide by zero. */
3123
3124  if (m_TotalGenuineMessages <= 0)
3125    TotalGenuine = 1.0;
3126  else
3127    TotalGenuine = m_TotalGenuineMessages;
3128
3129  if (m_TotalSpamMessages <= 0)
3130    TotalSpam = 1.0;
3131  else
3132    TotalSpam = m_TotalSpamMessages;
3133
3134  /* Look up the words in the database and calculate their compensated spam
3135  ratio.  The results are stored in a priority queue so that we can later find
3136  the top g_MaxInterestingWords for doing the actual determination. */
3137
3138  WordEndIter = WordSet.end ();
3139  DataEndIter = m_WordMap.end ();
3140  for (WordIter = WordSet.begin (); WordIter != WordEndIter; WordIter++)
3141  {
3142    WordAndRatio.wordPntr = &(*WordIter);
3143
3144    if ((DataIter = m_WordMap.find (*WordIter)) != DataEndIter)
3145    {
3146      StatisticsPntr = &DataIter->second;
3147
3148      /* Calculate the probability the word is spam and the probability it is
3149      genuine.  Then the raw probability ratio. */
3150
3151      SpamProbability = StatisticsPntr->spamCount / TotalSpam;
3152      GenuineProbability = StatisticsPntr->genuineCount / TotalGenuine;
3153
3154      if (SpamProbability + GenuineProbability > 0)
3155        RawProbabilityRatio =
3156        SpamProbability / (SpamProbability + GenuineProbability);
3157      else /* Word with zero statistics, perhaps due to reclassification. */
3158        RawProbabilityRatio = 0.5;
3159
3160      /* The compensated ratio leans towards 0.5 (g_RobinsonX) more for fewer
3161      data points, with a weight of 0.45 (g_RobinsonS). */
3162
3163      GenuineSpamSum =
3164        StatisticsPntr->spamCount + StatisticsPntr->genuineCount;
3165
3166      WordAndRatio.probabilityRatio =
3167        (g_RobinsonS * g_RobinsonX + GenuineSpamSum * RawProbabilityRatio) /
3168        (g_RobinsonS + GenuineSpamSum);
3169    }
3170    else /* Unknown word. With N=0, compensated ratio equation is RobinsonX. */
3171      WordAndRatio.probabilityRatio = g_RobinsonX;
3172
3173     PriorityQueue.push (WordAndRatio);
3174  }
3175
3176  /* Compute the combined probability (multiply them together) of the top few
3177  words.  To avoid numeric underflow (doubles can only get as small as 1E-300),
3178  logarithms are also used.  But avoid the logarithms (sum of logs of numbers
3179  is the same as the product of numbers) as much as possible due to reduced
3180  accuracy and slowness. */
3181
3182  ProductGenuine = 1.0;
3183  ProductLogGenuine = 0.0;
3184  ProductSpam = 1.0;
3185  ProductLogSpam = 0.0;
3186  for (i = 0;
3187  i < g_MaxInterestingWords && !PriorityQueue.empty();
3188  i++, PriorityQueue.pop())
3189  {
3190    WordRatioPntr = &PriorityQueue.top();
3191    ProductSpam *= WordRatioPntr->probabilityRatio;
3192    ProductGenuine *= 1.0 - WordRatioPntr->probabilityRatio;
3193
3194    /* Check for the numbers getting dangerously small, close to underflowing.
3195    If they are, move the value into the logarithm storage part. */
3196
3197    if (ProductSpam < m_SmallestUseableDouble)
3198    {
3199      ProductLogSpam += log (ProductSpam);
3200      ProductSpam = 1.0;
3201    }
3202
3203    if (ProductGenuine < m_SmallestUseableDouble)
3204    {
3205      ProductLogGenuine += log (ProductGenuine);
3206      ProductGenuine = 1.0;
3207    }
3208
3209    ReplyMessagePntr->AddString ("words", WordRatioPntr->wordPntr->c_str ());
3210    ReplyMessagePntr->AddFloat ("ratios", WordRatioPntr->probabilityRatio);
3211  }
3212
3213  /* Get the resulting log of the complete products. */
3214
3215  if (i > 0)
3216  {
3217    ProductLogSpam += log (ProductSpam);
3218    ProductLogGenuine += log (ProductGenuine);
3219  }
3220
3221  if (m_ScoringMode == SM_ROBINSON)
3222  {
3223    /* Apply Gary Robinson's scoring method where we take the Nth root of the
3224    products.  This is easiest in logarithm form. */
3225
3226    if (i > 0)
3227    {
3228      ProductSpam = exp (ProductLogSpam / i);
3229      ProductGenuine = exp (ProductLogGenuine / i);
3230      ResultRatio = ProductSpam / (ProductGenuine + ProductSpam);
3231    }
3232    else /* Somehow got no words! */
3233      ResultRatio = g_RobinsonX;
3234  }
3235  else if (m_ScoringMode == SM_CHISQUARED)
3236  {
3237    /* From the SpamBayes notes: "We compute two chi-squared statistics, one
3238    for ham and one for spam.  The sum-of-the-logs business is more sensitive
3239    to probs near 0 than to probs near 1, so the spam measure uses 1-p (so that
3240    high-spamprob words have greatest effect), and the ham measure uses p
3241    directly (so that lo-spamprob words have greatest effect)."  That means we
3242    just reversed the meaning of the previously calculated spam and genuine
3243    products!  Oh well. */
3244
3245    TempDouble = ProductLogSpam;
3246    ProductLogSpam = ProductLogGenuine;
3247    ProductLogGenuine = TempDouble;
3248
3249    if (i > 0)
3250    {
3251      ProductSpam =
3252        1.0 - ChiSquaredProbability (-2.0 * ProductLogSpam, 2 * i);
3253      ProductGenuine =
3254        1.0 - ChiSquaredProbability (-2.0 * ProductLogGenuine, 2 * i);
3255
3256      /* The SpamBayes notes say: "How to combine these into a single spam
3257      score?  We originally used (S-H)/(S+H) scaled into [0., 1.], which equals
3258      S/(S+H).  A systematic problem is that we could end up being near-certain
3259      a thing was (for example) spam, even if S was small, provided that H was
3260      much smaller.  Rob Hooft stared at these problems and invented the
3261      measure we use now, the simpler S-H, scaled into [0., 1.]." */
3262
3263      ResultRatio = (ProductSpam - ProductGenuine + 1.0) / 2.0;
3264    }
3265    else /* No words to analyse. */
3266      ResultRatio = 0.5;
3267  }
3268  else /* Unknown scoring mode. */
3269  {
3270    strcpy (ErrorMessage, "Unknown scoring mode specified in settings");
3271    return B_BAD_VALUE;
3272  }
3273
3274  ReplyMessagePntr->AddFloat (g_ResultName, ResultRatio);
3275  return B_OK;
3276}
3277
3278
3279/* Just evaluate the given string as being spam text. */
3280
3281status_t ABSApp::EvaluateString (
3282  const char *BufferPntr,
3283  ssize_t BufferSize,
3284  BMessage *ReplyMessagePntr,
3285  char *ErrorMessage)
3286{
3287  BMemoryIO MemoryIO (BufferPntr, BufferSize);
3288
3289  return EvaluatePositionIO (&MemoryIO, "Memory Buffer",
3290    ReplyMessagePntr, ErrorMessage);
3291}
3292
3293
3294/* Tell other programs about the scripting commands we support.  Try this
3295command: "hey application/x-vnd.agmsmith.spamdbm getsuites" to
3296see it in action (this program has to be already running for it to work). */
3297
3298status_t ABSApp::GetSupportedSuites (BMessage *MessagePntr)
3299{
3300  BPropertyInfo TempPropInfo (g_ScriptingPropertyList);
3301
3302  MessagePntr->AddString ("suites", "suite/x-vnd.agmsmith.spamdbm");
3303  MessagePntr->AddFlat ("messages", &TempPropInfo);
3304  return BApplication::GetSupportedSuites (MessagePntr);
3305}
3306
3307
3308/* Add all the words in the given file or memory buffer to the supplied set.
3309The file name is only there for error messages, it assumes you have already
3310opened the PositionIO to the right file.  If things go wrong, a non-zero error
3311code will be returned and an explanation written to ErrorMessage (assumed to be
3312at least PATH_MAX + 1024 bytes long). */
3313
3314status_t ABSApp::GetWordsFromPositionIO (
3315  BPositionIO *PositionIOPntr,
3316  const char *OptionalFileName,
3317  set<string> &WordSet,
3318  char *ErrorMessage)
3319{
3320  status_t ErrorCode;
3321
3322  if (m_TokenizeMode == TM_WHOLE)
3323    ErrorCode = TokenizeWhole (PositionIOPntr, OptionalFileName,
3324      WordSet, ErrorMessage);
3325  else
3326    ErrorCode = TokenizeParts (PositionIOPntr, OptionalFileName,
3327      WordSet, ErrorMessage);
3328
3329  if (ErrorCode == B_OK && WordSet.empty ())
3330  {
3331    /* ENOMSG usually means no message found in queue, but I'm using it to show
3332    no words, a good indicator of spam which is pure HTML. */
3333
3334    sprintf (ErrorMessage, "No words were found in \"%s\"", OptionalFileName);
3335    ErrorCode = ENOMSG;
3336  }
3337
3338  return ErrorCode;
3339}
3340
3341
3342/* Set up indices for attributes MAIL:classification (string) and
3343MAIL:ratio_spam (float) on all mounted disk volumes that support queries.  Also
3344tell the system to make those attributes visible to the user (so they can see
3345them in Tracker) and associate them with e-mail messages.  Also set up the
3346database file MIME type (provide a description and associate it with this
3347program so that it picks up the right icon).  And register the names for our
3348sound effects. */
3349
3350status_t ABSApp::InstallThings (char *ErrorMessage)
3351{
3352  int32       Cookie;
3353  dev_t       DeviceID;
3354  status_t    ErrorCode = B_OK;
3355  fs_info     FSInfo;
3356  int32       i;
3357  int32       iClassification;
3358  int32       iProbability;
3359  int32       j;
3360  index_info  IndexInfo;
3361  BMimeType   MimeType;
3362  BMessage    Parameters;
3363  const char *StringPntr;
3364  bool        TempBool;
3365  int32       TempInt32;
3366
3367  /* Iterate through all mounted devices and try to make the indices on each
3368  one.  Don't bother if the index exists or the device doesn't support indices
3369  (actually queries). */
3370
3371  Cookie = 0;
3372  while ((DeviceID = next_dev (&Cookie)) >= 0)
3373  {
3374    if (!fs_stat_dev (DeviceID, &FSInfo) && (FSInfo.flags & B_FS_HAS_QUERY))
3375    {
3376      if (fs_stat_index (DeviceID, g_AttributeNameClassification, &IndexInfo)
3377      && errno == B_ENTRY_NOT_FOUND)
3378      {
3379        if (fs_create_index (DeviceID, g_AttributeNameClassification,
3380        B_STRING_TYPE, 0 /* flags */))
3381        {
3382          ErrorCode = errno;
3383          sprintf (ErrorMessage, "Unable to make string index %s on "
3384            "volume #%d, volume name \"%s\", file system type \"%s\", "
3385            "on device \"%s\"", g_AttributeNameClassification,
3386            (int) DeviceID, FSInfo.volume_name, FSInfo.fsh_name,
3387            FSInfo.device_name);
3388        }
3389      }
3390
3391      if (fs_stat_index (DeviceID, g_AttributeNameSpamRatio,
3392      &IndexInfo) && errno == B_ENTRY_NOT_FOUND)
3393      {
3394        if (fs_create_index (DeviceID, g_AttributeNameSpamRatio,
3395        B_FLOAT_TYPE, 0 /* flags */))
3396        {
3397          ErrorCode = errno;
3398          sprintf (ErrorMessage, "Unable to make float index %s on "
3399            "volume #%d, volume name \"%s\", file system type \"%s\", "
3400            "on device \"%s\"", g_AttributeNameSpamRatio,
3401            (int) DeviceID, FSInfo.volume_name, FSInfo.fsh_name,
3402            FSInfo.device_name);
3403        }
3404      }
3405    }
3406  }
3407  if (ErrorCode != B_OK)
3408    return ErrorCode;
3409
3410  /* Set up the MIME types for the classification attributes, associate them
3411  with e-mail and make them visible to the user (but not editable).  First need
3412  to get the existing MIME settings, then add ours to them (otherwise the
3413  existing ones get wiped out). */
3414
3415  ErrorCode = MimeType.SetTo ("text/x-email");
3416  if (ErrorCode != B_OK || !MimeType.IsInstalled ())
3417  {
3418    sprintf (ErrorMessage, "No e-mail MIME type (%s) in the system, can't "
3419      "update it to add our special attributes, and without e-mail this "
3420      "program is useless!", MimeType.Type ());
3421    if (ErrorCode == B_OK)
3422      ErrorCode = -1;
3423    return ErrorCode;
3424  }
3425
3426  ErrorCode = MimeType.GetAttrInfo (&Parameters);
3427  if (ErrorCode != B_OK)
3428  {
3429    sprintf (ErrorMessage, "Unable to retrieve list of attributes "
3430      "associated with e-mail messages in the MIME database");
3431    return ErrorCode;
3432  }
3433
3434  for (i = 0, iClassification = -1, iProbability = -1;
3435  i < 1000 && (iClassification < 0 || iProbability < 0);
3436  i++)
3437  {
3438    ErrorCode = Parameters.FindString ("attr:name", i, &StringPntr);
3439    if (ErrorCode != B_OK)
3440      break; /* Reached the end of the attributes. */
3441    if (strcmp (StringPntr, g_AttributeNameClassification) == 0)
3442      iClassification = i;
3443    else if (strcmp (StringPntr, g_AttributeNameSpamRatio) == 0)
3444      iProbability = i;
3445  }
3446
3447  /* Add extra default settings for those programs which previously didn't
3448  update the MIME database with all the attributes that exist (so our new
3449  additions don't show up at the wrong index). */
3450
3451  i--; /* Set i to index of last valid attribute. */
3452
3453  for (j = 0; j <= i; j++)
3454  {
3455    if (Parameters.FindString ("attr:public_name", j, &StringPntr) ==
3456    B_BAD_INDEX)
3457    {
3458      if (Parameters.FindString ("attr:name", j, &StringPntr) != B_OK)
3459        StringPntr = "None!";
3460      Parameters.AddString ("attr:public_name", StringPntr);
3461    }
3462  }
3463
3464  while (Parameters.FindInt32 ("attr:type", i, &TempInt32) == B_BAD_INDEX)
3465    Parameters.AddInt32 ("attr:type", B_STRING_TYPE);
3466
3467  while (Parameters.FindBool ("attr:viewable", i, &TempBool) == B_BAD_INDEX)
3468    Parameters.AddBool ("attr:viewable", true);
3469
3470  while (Parameters.FindBool ("attr:editable", i, &TempBool) == B_BAD_INDEX)
3471    Parameters.AddBool ("attr:editable", false);
3472
3473  while (Parameters.FindInt32 ("attr:width", i, &TempInt32) == B_BAD_INDEX)
3474    Parameters.AddInt32 ("attr:width", 60);
3475
3476  while (Parameters.FindInt32 ("attr:alignment", i, &TempInt32) == B_BAD_INDEX)
3477    Parameters.AddInt32 ("attr:alignment", B_ALIGN_LEFT);
3478
3479  while (Parameters.FindBool ("attr:extra", i, &TempBool) == B_BAD_INDEX)
3480    Parameters.AddBool ("attr:extra", false);
3481
3482  /* Add our new attributes to e-mail related things, if not already there. */
3483
3484  if (iClassification < 0)
3485  {
3486    Parameters.AddString ("attr:name", g_AttributeNameClassification);
3487    Parameters.AddString ("attr:public_name", "Classification Group");
3488    Parameters.AddInt32 ("attr:type", B_STRING_TYPE);
3489    Parameters.AddBool ("attr:viewable", true);
3490    Parameters.AddBool ("attr:editable", false);
3491    Parameters.AddInt32 ("attr:width", 45);
3492    Parameters.AddInt32 ("attr:alignment", B_ALIGN_LEFT);
3493    Parameters.AddBool ("attr:extra", false);
3494  }
3495
3496  if (iProbability < 0)
3497  {
3498    Parameters.AddString ("attr:name", g_AttributeNameSpamRatio);
3499    Parameters.AddString ("attr:public_name", "Spam/Genuine Estimate");
3500    Parameters.AddInt32 ("attr:type", B_FLOAT_TYPE);
3501    Parameters.AddBool ("attr:viewable", true);
3502    Parameters.AddBool ("attr:editable", false);
3503    Parameters.AddInt32 ("attr:width", 50);
3504    Parameters.AddInt32 ("attr:alignment", B_ALIGN_LEFT);
3505    Parameters.AddBool ("attr:extra", false);
3506  }
3507
3508  if (iClassification < 0 || iProbability < 0)
3509  {
3510    ErrorCode = MimeType.SetAttrInfo (&Parameters);
3511    if (ErrorCode != B_OK)
3512    {
3513      sprintf (ErrorMessage, "Unable to associate the classification "
3514        "attributes with e-mail messages in the MIME database");
3515      return ErrorCode;
3516    }
3517  }
3518
3519  /* Set up the MIME type for the database file. */
3520
3521  sprintf (ErrorMessage, "Problems with setting up MIME type (%s) for "
3522    "the database files", g_ABSDatabaseFileMIMEType); /* A generic message. */
3523
3524  ErrorCode = MimeType.SetTo (g_ABSDatabaseFileMIMEType);
3525  if (ErrorCode != B_OK)
3526    return ErrorCode;
3527
3528  MimeType.Delete ();
3529  ErrorCode = MimeType.Install ();
3530  if (ErrorCode != B_OK)
3531  {
3532    sprintf (ErrorMessage, "Failed to install MIME type (%s) in the system",
3533      MimeType.Type ());
3534    return ErrorCode;
3535  }
3536
3537  MimeType.SetShortDescription ("Spam Database");
3538  MimeType.SetLongDescription ("Bayesian Statistical Database for "
3539    "Classifying Junk E-Mail");
3540  sprintf (ErrorMessage, "1.0 ('%s')", g_DatabaseRecognitionString);
3541  MimeType.SetSnifferRule (ErrorMessage);
3542  MimeType.SetPreferredApp (g_ABSAppSignature);
3543
3544  /* Set up the names of the sound effects.  Later on the user can associate
3545  sound files with the names by using the Sounds preferences panel or the
3546  installsound command.  The MDR add-on filter will trigger these sounds. */
3547
3548  add_system_beep_event (g_BeepGenuine);
3549  add_system_beep_event (g_BeepSpam);
3550  add_system_beep_event (g_BeepUncertain);
3551
3552  return B_OK;
3553}
3554
3555
3556/* Load the database if it hasn't been loaded yet.  Otherwise do nothing. */
3557
3558status_t ABSApp::LoadDatabaseIfNeeded (char *ErrorMessage)
3559{
3560  if (m_WordMap.empty ())
3561    return LoadSaveDatabase (true /* DoLoad */, ErrorMessage);
3562
3563  return B_OK;
3564}
3565
3566
3567/* Either load the database of spam words (DoLoad is TRUE) from the file
3568specified in the settings, or write (DoLoad is FALSE) the database to it.  If
3569it doesn't exist (and its parent directories do exist) then it will be created
3570when saving.  If it doesn't exist when loading, the in-memory database will be
3571set to an empty one and an error will be returned with an explanation put into
3572ErrorMessage (should be big enough for a path name and a couple of lines of
3573text).
3574
3575The database file format is a UTF-8 text file (well, there could be some
3576latin-1 characters and other junk in there - it just copies the bytes from the
3577e-mail messages directly), with tab characters to separate fields (so that you
3578can also load it into a spreadsheet).  The first line identifies the overall
3579file type.  The second lists pairs of classifications plus the number of
3580messages in each class.  Currently it is just Genuine and Spam, but for future
3581compatability, that could be followed by more classification pairs.  The
3582remaining lines each contain a word, the date it was last updated (actually
3583it's the number of messages in the database when the word was added, smaller
3584numbers mean it was updated longer ago), the genuine count and the spam count.
3585*/
3586
3587status_t ABSApp::LoadSaveDatabase (bool DoLoad, char *ErrorMessage)
3588{
3589  time_t                             CurrentTime;
3590  FILE                              *DatabaseFile = NULL;
3591  BNode                              DatabaseNode;
3592  BNodeInfo                          DatabaseNodeInfo;
3593  StatisticsMap::iterator            DataIter;
3594  StatisticsMap::iterator            EndIter;
3595  status_t                           ErrorCode;
3596  int                                i;
3597  pair<StatisticsMap::iterator,bool> InsertResult;
3598  char                               LineString [10240];
3599  StatisticsRecord                   Statistics;
3600  const char                        *StringPntr;
3601  char                              *TabPntr;
3602  const char                        *WordPntr;
3603
3604  if (DoLoad)
3605  {
3606    MakeDatabaseEmpty ();
3607    m_DatabaseHasChanged = false; /* In case of early error exit. */
3608  }
3609  else /* Saving the database, backup the old version on disk. */
3610  {
3611    ErrorCode = MakeBackup (ErrorMessage);
3612    if (ErrorCode != B_OK) /* Usually because the directory isn't there. */
3613      return ErrorCode;
3614  }
3615
3616  DatabaseFile = fopen (m_DatabaseFileName.String (), DoLoad ? "rb" : "wb");
3617  if (DatabaseFile == NULL)
3618  {
3619    ErrorCode = errno;
3620    sprintf (ErrorMessage, "Can't open database file \"%s\" for %s",
3621      m_DatabaseFileName.String (), DoLoad ? "reading" : "writing");
3622    goto ErrorExit;
3623  }
3624
3625  /* Process the first line, which identifies the file. */
3626
3627  if (DoLoad)
3628  {
3629    sprintf (ErrorMessage, "Can't read first line of database file \"%s\", "
3630      "expected it to start with \"%s\"",
3631      m_DatabaseFileName.String (), g_DatabaseRecognitionString);
3632    ErrorCode = -1;
3633
3634    if (fgets (LineString, sizeof (LineString), DatabaseFile) == NULL)
3635      goto ErrorExit;
3636    if (strncmp (LineString, g_DatabaseRecognitionString,
3637    strlen (g_DatabaseRecognitionString)) != 0)
3638      goto ErrorExit;
3639  }
3640  else /* Saving */
3641  {
3642    CurrentTime = time (NULL);
3643    if (fprintf (DatabaseFile, "%s V1 (word, age, genuine count, spam count)\t"
3644    "Written by SpamDBM $Revision: 30630 $\t"
3645    "Compiled on " __DATE__ " at " __TIME__ "\tThis file saved on %s",
3646    g_DatabaseRecognitionString, ctime (&CurrentTime)) <= 0)
3647    {
3648      ErrorCode = errno;
3649      sprintf (ErrorMessage, "Problems when writing to database file \"%s\"",
3650        m_DatabaseFileName.String ());
3651      goto ErrorExit;
3652    }
3653  }
3654
3655  /* The second line lists the different classifications.  We just check to see
3656  that the first two are Genuine and Spam.  If there are others, they'll be
3657  ignored and lost when the database is saved. */
3658
3659  if (DoLoad)
3660  {
3661    sprintf (ErrorMessage, "Can't read second line of database file \"%s\", "
3662      "expected it to list classifications %s and %s along with their totals",
3663      m_DatabaseFileName.String (), g_ClassifiedGenuine, g_ClassifiedSpam);
3664    ErrorCode = B_BAD_VALUE;
3665
3666    if (fgets (LineString, sizeof (LineString), DatabaseFile) == NULL)
3667      goto ErrorExit;
3668    i = strlen (LineString);
3669    if (i > 0 && LineString[i-1] == '\n')
3670      LineString[i-1] = 0; /* Remove trailing line feed character. */
3671
3672    /* Look for the title word at the start of the line. */
3673
3674    TabPntr = LineString;
3675    for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3676      ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3677
3678    if (strncmp (StringPntr, "Classifications", 15) != 0)
3679      goto ErrorExit;
3680
3681    /* Look for the Genuine class and count. */
3682
3683    for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3684      ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3685
3686    if (strcmp (StringPntr, g_ClassifiedGenuine) != 0)
3687      goto ErrorExit;
3688
3689    for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3690      ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3691
3692    m_TotalGenuineMessages = atoll (StringPntr);
3693
3694    /* Look for the Spam class and count. */
3695
3696    for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3697      ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3698
3699    if (strcmp (StringPntr, g_ClassifiedSpam) != 0)
3700      goto ErrorExit;
3701
3702    for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3703      ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3704
3705    m_TotalSpamMessages = atoll (StringPntr);
3706  }
3707  else /* Saving */
3708  {
3709    fprintf (DatabaseFile,
3710      "Classifications and total messages:\t%s\t%lu\t%s\t%lu\n",
3711      g_ClassifiedGenuine, m_TotalGenuineMessages,
3712      g_ClassifiedSpam, m_TotalSpamMessages);
3713  }
3714
3715  /* The remainder of the file is the list of words and statistics.  Each line
3716  has a word, a tab, the time when the word was last changed in the database
3717  (sequence number of message addition, starts at 0 and goes up by one for each
3718  message added to the database), a tab then the number of messages in the
3719  first class (genuine) that had that word, then a tab, then the number of
3720  messages in the second class (spam) with that word, and so on. */
3721
3722  if (DoLoad)
3723  {
3724    while (!feof (DatabaseFile))
3725    {
3726      if (fgets (LineString, sizeof (LineString), DatabaseFile) == NULL)
3727      {
3728        ErrorCode = errno;
3729        if (feof (DatabaseFile))
3730          break;
3731        if (ErrorCode == B_OK)
3732          ErrorCode = -1;
3733        sprintf (ErrorMessage, "Error while reading words and statistics "
3734          "from database file \"%s\"", m_DatabaseFileName.String ());
3735        goto ErrorExit;
3736      }
3737
3738      i = strlen (LineString);
3739      if (i > 0 && LineString[i-1] == '\n')
3740        LineString[i-1] = 0; /* Remove trailing line feed character. */
3741
3742      /* Get the word at the start of the line, save in WordPntr. */
3743
3744      TabPntr = LineString;
3745      for (WordPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3746        ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3747
3748      /* Get the date stamp.  Actually a sequence number, not a date. */
3749
3750      for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3751        ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3752
3753      Statistics.age = atoll (StringPntr);
3754
3755      /* Get the Genuine count. */
3756
3757      for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3758        ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3759
3760      Statistics.genuineCount = atoll (StringPntr);
3761
3762      /* Get the Spam count. */
3763
3764      for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3765        ; if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3766
3767      Statistics.spamCount = atoll (StringPntr);
3768
3769      /* Ignore empty words, totally unused words and ones which are too long
3770      (avoids lots of length checking everywhere). */
3771
3772      if (WordPntr[0] == 0 || strlen (WordPntr) > g_MaxWordLength ||
3773      (Statistics.genuineCount <= 0 && Statistics.spamCount <= 0))
3774        continue; /* Ignore this line of text, start on next one. */
3775
3776      /* Add the combination to the database. */
3777
3778      InsertResult = m_WordMap.insert (
3779        StatisticsMap::value_type (WordPntr, Statistics));
3780      if (InsertResult.second == false)
3781      {
3782        ErrorCode = B_BAD_VALUE;
3783        sprintf (ErrorMessage, "Error while inserting word \"%s\" from "
3784          "database \"%s\", perhaps it is a duplicate",
3785          WordPntr, m_DatabaseFileName.String ());
3786        goto ErrorExit;
3787      }
3788      m_WordCount++;
3789
3790      /* And the hunt for the oldest word. */
3791
3792      if (Statistics.age < m_OldestAge)
3793        m_OldestAge = Statistics.age;
3794    }
3795  }
3796  else /* Saving, dump all words and statistics to the file. */
3797  {
3798    EndIter = m_WordMap.end ();
3799    for (DataIter = m_WordMap.begin (); DataIter != EndIter; DataIter++)
3800    {
3801      if (fprintf (DatabaseFile, "%s\t%lu\t%lu\t%lu\n",
3802      DataIter->first.c_str (), DataIter->second.age,
3803      DataIter->second.genuineCount, DataIter->second.spamCount) <= 0)
3804      {
3805        ErrorCode = errno;
3806        sprintf (ErrorMessage, "Error while writing word \"%s\" to "
3807          "database \"%s\"",
3808          DataIter->first.c_str(), m_DatabaseFileName.String ());
3809        goto ErrorExit;
3810      }
3811    }
3812  }
3813
3814  /* Set the file type so that the new file gets associated with this program,
3815  and picks up the right icon. */
3816
3817  if (!DoLoad)
3818  {
3819    sprintf (ErrorMessage, "Unable to set attributes (file type) of database "
3820      "file \"%s\"", m_DatabaseFileName.String ());
3821    ErrorCode = DatabaseNode.SetTo (m_DatabaseFileName.String ());
3822    if (ErrorCode != B_OK)
3823      goto ErrorExit;
3824    DatabaseNodeInfo.SetTo (&DatabaseNode);
3825    ErrorCode = DatabaseNodeInfo.SetType (g_ABSDatabaseFileMIMEType);
3826    if (ErrorCode != B_OK)
3827      goto ErrorExit;
3828  }
3829
3830  /* Success! */
3831  m_DatabaseHasChanged = false;
3832  ErrorCode = B_OK;
3833
3834ErrorExit:
3835  if (DatabaseFile != NULL)
3836    fclose (DatabaseFile);
3837  return ErrorCode;
3838}
3839
3840
3841/* Either load the settings (DoLoad is TRUE) from the configuration file or
3842write them (DoLoad is FALSE) to it.  The configuration file is a flattened
3843BMessage containing the various program settings.  If it doesn't exist (and its
3844parent directories don't exist) then it will be created when saving.  If it
3845doesn't exist when loading, the settings will be set to default values. */
3846
3847status_t ABSApp::LoadSaveSettings (bool DoLoad)
3848{
3849  status_t    ErrorCode;
3850  const char *NamePntr;
3851  BMessage    Settings;
3852  BDirectory  SettingsDirectory;
3853  BFile       SettingsFile;
3854  const char *StringPntr;
3855  bool        TempBool;
3856  int32       TempInt32;
3857  char        TempString [PATH_MAX + 100];
3858
3859  /* Preset things to default values if loading, in case of an error or it's an
3860  older version of the settings file which doesn't have every field defined. */
3861
3862  if (DoLoad)
3863    DefaultSettings ();
3864
3865  /* Look for our settings directory.  When saving we can try to create it. */
3866
3867  ErrorCode = SettingsDirectory.SetTo (m_SettingsDirectoryPath.Path ());
3868  if (ErrorCode != B_OK)
3869  {
3870    if (DoLoad || ErrorCode != B_ENTRY_NOT_FOUND)
3871    {
3872      sprintf (TempString, "Can't find settings directory \"%s\"",
3873        m_SettingsDirectoryPath.Path ());
3874      goto ErrorExit;
3875    }
3876    ErrorCode = create_directory (m_SettingsDirectoryPath.Path (), 0755);
3877    if (ErrorCode == B_OK)
3878      ErrorCode = SettingsDirectory.SetTo (m_SettingsDirectoryPath.Path ());
3879    if (ErrorCode != B_OK)
3880    {
3881      sprintf (TempString, "Can't create settings directory \"%s\"",
3882        m_SettingsDirectoryPath.Path ());
3883      goto ErrorExit;
3884    }
3885  }
3886
3887  ErrorCode = SettingsFile.SetTo (&SettingsDirectory, g_SettingsFileName,
3888    DoLoad ? B_READ_ONLY : B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
3889  if (ErrorCode != B_OK)
3890  {
3891    sprintf (TempString, "Can't open settings file \"%s\" in directory \"%s\" "
3892      "for %s", g_SettingsFileName, m_SettingsDirectoryPath.Path(),
3893      DoLoad ? "reading" : "writing");
3894    goto ErrorExit;
3895  }
3896
3897  if (DoLoad)
3898  {
3899    ErrorCode = Settings.Unflatten (&SettingsFile);
3900    if (ErrorCode != 0 || Settings.what != g_SettingsWhatCode)
3901    {
3902      sprintf (TempString, "Corrupt data detected while reading settings "
3903        "file \"%s\" in directory \"%s\", will revert to defaults",
3904        g_SettingsFileName, m_SettingsDirectoryPath.Path());
3905      goto ErrorExit;
3906    }
3907  }
3908
3909  /* Transfer the settings between the BMessage and our various global
3910  variables.  For loading, if the setting isn't present, leave it at the
3911  default value.  Note that loading and saving are intermingled here to make
3912  code maintenance easier (less chance of forgetting to update it if load and
3913  save were separate functions). */
3914
3915  ErrorCode = B_OK; /* So that saving settings can record an error. */
3916
3917  NamePntr = "DatabaseFileName";
3918  if (DoLoad)
3919  {
3920    if (Settings.FindString (NamePntr, &StringPntr) == B_OK)
3921      m_DatabaseFileName.SetTo (StringPntr);
3922  }
3923  else if (ErrorCode == B_OK)
3924    ErrorCode = Settings.AddString (NamePntr, m_DatabaseFileName);
3925
3926  NamePntr = "ServerMode";
3927  if (DoLoad)
3928  {
3929    if (Settings.FindBool (NamePntr, &TempBool) == B_OK)
3930      g_ServerMode = TempBool;
3931  }
3932  else if (ErrorCode == B_OK)
3933    ErrorCode = Settings.AddBool (NamePntr, g_ServerMode);
3934
3935  NamePntr = "IgnorePreviousClassification";
3936  if (DoLoad)
3937  {
3938    if (Settings.FindBool (NamePntr, &TempBool) == B_OK)
3939      m_IgnorePreviousClassification = TempBool;
3940  }
3941  else if (ErrorCode == B_OK)
3942    ErrorCode = Settings.AddBool (NamePntr, m_IgnorePreviousClassification);
3943
3944  NamePntr = "PurgeAge";
3945  if (DoLoad)
3946  {
3947    if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3948      m_PurgeAge = TempInt32;
3949  }
3950  else if (ErrorCode == B_OK)
3951    ErrorCode = Settings.AddInt32 (NamePntr, m_PurgeAge);
3952
3953  NamePntr = "PurgePopularity";
3954  if (DoLoad)
3955  {
3956    if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3957      m_PurgePopularity = TempInt32;
3958  }
3959  else if (ErrorCode == B_OK)
3960    ErrorCode = Settings.AddInt32 (NamePntr, m_PurgePopularity);
3961
3962  NamePntr = "ScoringMode";
3963  if (DoLoad)
3964  {
3965    if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3966      m_ScoringMode = (ScoringModes) TempInt32;
3967    if (m_ScoringMode < 0 || m_ScoringMode >= SM_MAX)
3968      m_ScoringMode = (ScoringModes) 0;
3969  }
3970  else if (ErrorCode == B_OK)
3971    ErrorCode = Settings.AddInt32 (NamePntr, m_ScoringMode);
3972
3973  NamePntr = "TokenizeMode";
3974  if (DoLoad)
3975  {
3976    if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3977      m_TokenizeMode = (TokenizeModes) TempInt32;
3978    if (m_TokenizeMode < 0 || m_TokenizeMode >= TM_MAX)
3979      m_TokenizeMode = (TokenizeModes) 0;
3980  }
3981  else if (ErrorCode == B_OK)
3982    ErrorCode = Settings.AddInt32 (NamePntr, m_TokenizeMode);
3983
3984  if (ErrorCode != B_OK)
3985  {
3986    strcpy (TempString, "Unable to stuff the program settings into a "
3987      "temporary BMessage, settings not saved");
3988    goto ErrorExit;
3989  }
3990
3991  /* Save the settings BMessage to the settings file. */
3992
3993  if (!DoLoad)
3994  {
3995    Settings.what = g_SettingsWhatCode;
3996    ErrorCode = Settings.Flatten (&SettingsFile);
3997    if (ErrorCode != 0)
3998    {
3999      sprintf (TempString, "Problems while writing settings file \"%s\" in "
4000        "directory \"%s\"", g_SettingsFileName,
4001        m_SettingsDirectoryPath.Path ());
4002      goto ErrorExit;
4003    }
4004  }
4005
4006  m_SettingsHaveChanged = false;
4007  return B_OK;
4008
4009ErrorExit: /* Error message in TempString, code in ErrorCode. */
4010  DisplayErrorMessage (TempString, ErrorCode, DoLoad ?
4011    "Loading Settings Error" : "Saving Settings Error");
4012  return ErrorCode;
4013}
4014
4015
4016void
4017ABSApp::MessageReceived (BMessage *MessagePntr)
4018{
4019  const char           *PropertyName;
4020  struct property_info *PropInfoPntr;
4021  int32                 SpecifierIndex;
4022  int32                 SpecifierKind;
4023  BMessage              SpecifierMessage;
4024
4025  /* See if it is a scripting message that applies to the database or one of
4026  the other operations this program supports.  Pass on other scripting messages
4027  to the inherited parent MessageReceived function (they're usually scripting
4028  messages for the BApplication). */
4029
4030  switch (MessagePntr->what)
4031  {
4032    case B_GET_PROPERTY:
4033    case B_SET_PROPERTY:
4034    case B_COUNT_PROPERTIES:
4035    case B_CREATE_PROPERTY:
4036    case B_DELETE_PROPERTY:
4037    case B_EXECUTE_PROPERTY:
4038      if (MessagePntr->GetCurrentSpecifier (&SpecifierIndex, &SpecifierMessage,
4039      &SpecifierKind, &PropertyName) == B_OK &&
4040      SpecifierKind == B_DIRECT_SPECIFIER)
4041      {
4042        for (PropInfoPntr = g_ScriptingPropertyList + 0; true; PropInfoPntr++)
4043        {
4044          if (PropInfoPntr->name == 0)
4045            break; /* Ran out of commands. */
4046
4047          if (PropInfoPntr->commands[0] == MessagePntr->what &&
4048          strcasecmp (PropInfoPntr->name, PropertyName) == 0)
4049          {
4050            ProcessScriptingMessage (MessagePntr, PropInfoPntr);
4051            return;
4052          }
4053        }
4054      }
4055      break;
4056  }
4057
4058  /* Pass the unprocessed message to the inherited function, maybe it knows
4059  what to do.  This includes replies to messages we sent ourselves. */
4060
4061  BApplication::MessageReceived (MessagePntr);
4062}
4063
4064
4065/* Rename the existing database file to a backup file name, potentially
4066replacing an older backup.  If something goes wrong, returns an error code and
4067puts an explanation in ErrorMessage. */
4068
4069status_t ABSApp::MakeBackup (char *ErrorMessage)
4070{
4071  BEntry   Entry;
4072  status_t ErrorCode;
4073  int      i;
4074  char     LeafName [NAME_MAX];
4075  char     NewName [PATH_MAX+20];
4076  char     OldName [PATH_MAX+20];
4077
4078  ErrorCode = Entry.SetTo (m_DatabaseFileName.String ());
4079  if (ErrorCode != B_OK)
4080  {
4081    sprintf (ErrorMessage, "While making backup, failed to make a BEntry for "
4082      "\"%s\" (maybe the directory doesn't exist?)",
4083      m_DatabaseFileName.String ());
4084    return ErrorCode;
4085  }
4086  if (!Entry.Exists ())
4087    return B_OK; /* No existing file to worry about overwriting. */
4088  Entry.GetName (LeafName);
4089
4090  /* Find the first hole (no file) where we will stop the renaming chain. */
4091
4092  for (i = 0; i < g_MaxBackups - 1; i++)
4093  {
4094    strcpy (OldName, m_DatabaseFileName.String ());
4095    sprintf (OldName + strlen (OldName), g_BackupSuffix, i);
4096    Entry.SetTo (OldName);
4097    if (!Entry.Exists ())
4098      break;
4099  }
4100
4101  /* Move the files down by one to fill in the hole in the name series. */
4102
4103  for (i--; i >= 0; i--)
4104  {
4105    strcpy (OldName, m_DatabaseFileName.String ());
4106    sprintf (OldName + strlen (OldName), g_BackupSuffix, i);
4107    Entry.SetTo (OldName);
4108    strcpy (NewName, LeafName);
4109    sprintf (NewName + strlen (NewName), g_BackupSuffix, i + 1);
4110    ErrorCode = Entry.Rename (NewName, true /* clobber */);
4111  }
4112
4113  Entry.SetTo (m_DatabaseFileName.String ());
4114  strcpy (NewName, LeafName);
4115  sprintf (NewName + strlen (NewName), g_BackupSuffix, 0);
4116  ErrorCode = Entry.Rename (NewName, true /* clobber */);
4117  if (ErrorCode != B_OK)
4118    sprintf (ErrorMessage, "While making backup, failed to rename "
4119      "\"%s\" to \"%s\"", m_DatabaseFileName.String (), NewName);
4120
4121  return ErrorCode;
4122}
4123
4124
4125void
4126ABSApp::MakeDatabaseEmpty ()
4127{
4128  m_WordMap.clear (); /* Sets the map to empty, deallocating any old data. */
4129  m_WordCount = 0;
4130  m_TotalGenuineMessages = 0;
4131  m_TotalSpamMessages = 0;
4132  m_OldestAge = (uint32) -1 /* makes largest number possible */;
4133}
4134
4135
4136/* Do what the scripting command says.  A reply message will be sent back with
4137several fields: "error" containing the numerical error code (0 for success),
4138"CommandText" with a text representation of the command, "result" with the
4139resulting data for a get or count command.  If it isn't understood, then rather
4140than a B_REPLY kind of message, it will be a B_MESSAGE_NOT_UNDERSTOOD message
4141with an "error" number and an "message" string with a description. */
4142
4143void
4144ABSApp::ProcessScriptingMessage (
4145  BMessage *MessagePntr,
4146  struct property_info *PropInfoPntr)
4147{
4148  bool        ArgumentBool = false;
4149  bool        ArgumentGotBool = false;
4150  bool        ArgumentGotInt32 = false;
4151  bool        ArgumentGotString = false;
4152  int32       ArgumentInt32 = 0;
4153  const char *ArgumentString = NULL;
4154  BString     CommandText;
4155  status_t    ErrorCode;
4156  int         i;
4157  BMessage    ReplyMessage (B_MESSAGE_NOT_UNDERSTOOD);
4158  ssize_t     StringBufferSize;
4159  BMessage    TempBMessage;
4160  BPath       TempPath;
4161  char        TempString [PATH_MAX + 1024];
4162
4163  if (g_QuitCountdown >= 0 && !g_CommandLineMode)
4164  {
4165    g_QuitCountdown = -1;
4166    cerr << "Quit countdown aborted due to a scripting command arriving.\n";
4167  }
4168
4169  if (g_BusyCursor != NULL)
4170    SetCursor (g_BusyCursor);
4171
4172  ErrorCode = MessagePntr->FindData (g_DataName, B_STRING_TYPE,
4173    (const void **) &ArgumentString, &StringBufferSize);
4174  if (ErrorCode == B_OK)
4175  {
4176    if (PropInfoPntr->extra_data != PN_EVALUATE_STRING &&
4177    PropInfoPntr->extra_data != PN_SPAM_STRING &&
4178    PropInfoPntr->extra_data != PN_GENUINE_STRING &&
4179    strlen (ArgumentString) >= PATH_MAX)
4180    {
4181      sprintf (TempString, "\"data\" string of a scripting message is too "
4182        "long, for SET %s action", PropInfoPntr->name);
4183      ErrorCode = B_NAME_TOO_LONG;
4184      goto ErrorExit;
4185    }
4186    ArgumentGotString = true;
4187  }
4188  else if (MessagePntr->FindBool (g_DataName, &ArgumentBool) == B_OK)
4189    ArgumentGotBool = true;
4190  else if (MessagePntr->FindInt32 (g_DataName, &ArgumentInt32) == B_OK)
4191    ArgumentGotInt32 = true;
4192
4193  /* Prepare a Human readable description of the scripting command. */
4194
4195  switch (PropInfoPntr->commands[0])
4196  {
4197    case B_SET_PROPERTY:
4198      CommandText.SetTo ("Set ");
4199      break;
4200
4201    case B_GET_PROPERTY:
4202      CommandText.SetTo ("Get ");
4203      break;
4204
4205    case B_COUNT_PROPERTIES:
4206      CommandText.SetTo ("Count ");
4207      break;
4208
4209    case B_CREATE_PROPERTY:
4210      CommandText.SetTo ("Create ");
4211      break;
4212
4213    case B_DELETE_PROPERTY:
4214      CommandText.SetTo ("Delete ");
4215      break;
4216
4217    case B_EXECUTE_PROPERTY:
4218      CommandText.SetTo ("Execute ");
4219      break;
4220
4221    default:
4222      sprintf (TempString, "Bug: scripting command for \"%s\" has an unknown "
4223        "action code %d", PropInfoPntr->name,
4224        (int) PropInfoPntr->commands[0]);
4225      ErrorCode = -1;
4226      goto ErrorExit;
4227  }
4228  CommandText.Append (PropInfoPntr->name);
4229
4230  /* Add on the argument value to our readable command, if there is one. */
4231
4232  if (ArgumentGotString)
4233  {
4234    CommandText.Append (" \"");
4235    CommandText.Append (ArgumentString);
4236    CommandText.Append ("\"");
4237  }
4238  if (ArgumentGotBool)
4239    CommandText.Append (ArgumentBool ? " true" : " false");
4240  if (ArgumentGotInt32)
4241  {
4242    sprintf (TempString, " %ld", ArgumentInt32);
4243    CommandText.Append (TempString);
4244  }
4245
4246  /* From now on the scripting command has been recognized and is in the
4247  correct format, so it always returns a B_REPLY message.  A readable version
4248  of the command is also added to make debugging easier. */
4249
4250  ReplyMessage.what = B_REPLY;
4251  ReplyMessage.AddString ("CommandText", CommandText);
4252
4253  /* Now actually do the command.  First prepare a default error message. */
4254
4255  sprintf (TempString, "Operation code %d (get, set, count, etc) "
4256    "unsupported for property %s",
4257    (int) PropInfoPntr->commands[0], PropInfoPntr->name);
4258  ErrorCode = B_BAD_INDEX;
4259
4260  switch (PropInfoPntr->extra_data)
4261  {
4262    case PN_DATABASE_FILE:
4263      switch (PropInfoPntr->commands[0])
4264      {
4265        case B_GET_PROPERTY: /* Get the database file name. */
4266          ReplyMessage.AddString (g_ResultName, m_DatabaseFileName);
4267          break;
4268
4269        case B_SET_PROPERTY: /* Set the database file name to a new one. */
4270          if (!ArgumentGotString)
4271          {
4272            ErrorCode = B_BAD_TYPE;
4273            sprintf (TempString, "You need to specify a string for the "
4274              "SET %s command", PropInfoPntr->name);
4275            goto ErrorExit;
4276          }
4277          ErrorCode = TempPath.SetTo (ArgumentString, NULL /* leaf */,
4278            true /* normalize - verifies parent directories exist */);
4279          if (ErrorCode != B_OK)
4280          {
4281            sprintf (TempString, "New database path name of \"%s\" is invalid "
4282              "(parent directories must exist)", ArgumentString);
4283            goto ErrorExit;
4284          }
4285          if ((ErrorCode = SaveDatabaseIfNeeded (TempString)) != B_OK)
4286            goto ErrorExit;
4287          MakeDatabaseEmpty (); /* So that the new one gets loaded if used. */
4288
4289          if (strlen (TempPath.Leaf ()) > NAME_MAX-strlen(g_BackupSuffix)-1)
4290          {
4291            /* Truncate the name so that there is enough space for the backup
4292            extension.  Approximately. */
4293            strcpy (TempString, TempPath.Leaf ());
4294            TempString [NAME_MAX - strlen (g_BackupSuffix) - 1] = 0;
4295            TempPath.GetParent (&TempPath);
4296            TempPath.Append (TempString);
4297          }
4298          m_DatabaseFileName.SetTo (TempPath.Path ());
4299          m_SettingsHaveChanged = true;
4300          break;
4301
4302        case B_CREATE_PROPERTY: /* Make a new database file plus more. */
4303          if ((ErrorCode = CreateDatabaseFile (TempString)) != B_OK)
4304            goto ErrorExit;
4305          break;
4306
4307        case B_DELETE_PROPERTY: /* Delete the file and its backups too. */
4308          if ((ErrorCode = DeleteDatabaseFile (TempString)) != B_OK)
4309            goto ErrorExit;
4310          break;
4311
4312        case B_COUNT_PROPERTIES:
4313          if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) != B_OK)
4314            goto ErrorExit;
4315          ReplyMessage.AddInt32 (g_ResultName, m_WordCount);
4316          break;
4317
4318        default: /* Unknown operation code, error message already set. */
4319          goto ErrorExit;
4320      }
4321      break;
4322
4323    case PN_SPAM:
4324    case PN_SPAM_STRING:
4325    case PN_GENUINE:
4326    case PN_GENUINE_STRING:
4327    case PN_UNCERTAIN:
4328      switch (PropInfoPntr->commands[0])
4329      {
4330        case B_COUNT_PROPERTIES: /* Get the number of spam/genuine messages. */
4331          if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) != B_OK)
4332            goto ErrorExit;
4333          if (PropInfoPntr->extra_data == PN_SPAM ||
4334          PropInfoPntr->extra_data == PN_SPAM_STRING)
4335            ReplyMessage.AddInt32 (g_ResultName, m_TotalSpamMessages);
4336          else
4337            ReplyMessage.AddInt32 (g_ResultName, m_TotalGenuineMessages);
4338          break;
4339
4340        case B_SET_PROPERTY: /* Add spam/genuine/uncertain to database. */
4341          if (!ArgumentGotString)
4342          {
4343            ErrorCode = B_BAD_TYPE;
4344            sprintf (TempString, "You need to specify a string (%s) "
4345              "for the SET %s command",
4346              (PropInfoPntr->extra_data == PN_GENUINE_STRING ||
4347              PropInfoPntr->extra_data == PN_SPAM_STRING)
4348              ? "text of the message to be added"
4349              : "pathname of the file containing the text to be added",
4350              PropInfoPntr->name);
4351            goto ErrorExit;
4352          }
4353          if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) != B_OK)
4354            goto ErrorExit;
4355          if (PropInfoPntr->extra_data == PN_GENUINE ||
4356          PropInfoPntr->extra_data == PN_SPAM ||
4357          PropInfoPntr->extra_data == PN_UNCERTAIN)
4358            ErrorCode = AddFileToDatabase (
4359              (PropInfoPntr->extra_data == PN_SPAM) ? CL_SPAM :
4360              ((PropInfoPntr->extra_data == PN_GENUINE) ? CL_GENUINE :
4361              CL_UNCERTAIN),
4362              ArgumentString, TempString /* ErrorMessage */);
4363          else
4364            ErrorCode = AddStringToDatabase (
4365              (PropInfoPntr->extra_data == PN_SPAM_STRING) ?
4366              CL_SPAM : CL_GENUINE,
4367              ArgumentString, TempString /* ErrorMessage */);
4368          if (ErrorCode != B_OK)
4369            goto ErrorExit;
4370          break;
4371
4372        default: /* Unknown operation code, error message already set. */
4373          goto ErrorExit;
4374      }
4375      break;
4376
4377    case PN_IGNORE_PREVIOUS_CLASSIFICATION:
4378      switch (PropInfoPntr->commands[0])
4379      {
4380        case B_GET_PROPERTY:
4381          ReplyMessage.AddBool (g_ResultName, m_IgnorePreviousClassification);
4382          break;
4383
4384        case B_SET_PROPERTY:
4385          if (!ArgumentGotBool)
4386          {
4387            ErrorCode = B_BAD_TYPE;
4388            sprintf (TempString, "You need to specify a boolean (true/yes, "
4389              "false/no) for the SET %s command", PropInfoPntr->name);
4390            goto ErrorExit;
4391          }
4392          m_IgnorePreviousClassification = ArgumentBool;
4393          m_SettingsHaveChanged = true;
4394          break;
4395
4396        default: /* Unknown operation code, error message already set. */
4397          goto ErrorExit;
4398      }
4399      break;
4400
4401    case PN_SERVER_MODE:
4402      switch (PropInfoPntr->commands[0])
4403      {
4404        case B_GET_PROPERTY:
4405          ReplyMessage.AddBool (g_ResultName, g_ServerMode);
4406          break;
4407
4408        case B_SET_PROPERTY:
4409          if (!ArgumentGotBool)
4410          {
4411            ErrorCode = B_BAD_TYPE;
4412            sprintf (TempString, "You need to specify a boolean (true/yes, "
4413              "false/no) for the SET %s command", PropInfoPntr->name);
4414            goto ErrorExit;
4415          }
4416          g_ServerMode = ArgumentBool;
4417          m_SettingsHaveChanged = true;
4418          break;
4419
4420        default: /* Unknown operation code, error message already set. */
4421          goto ErrorExit;
4422      }
4423      break;
4424
4425    case PN_FLUSH:
4426      if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY &&
4427      (ErrorCode = SaveDatabaseIfNeeded (TempString)) == B_OK)
4428        break;
4429      goto ErrorExit;
4430
4431    case PN_PURGE_AGE:
4432      switch (PropInfoPntr->commands[0])
4433      {
4434        case B_GET_PROPERTY:
4435          ReplyMessage.AddInt32 (g_ResultName, m_PurgeAge);
4436          break;
4437
4438        case B_SET_PROPERTY:
4439          if (!ArgumentGotInt32)
4440          {
4441            ErrorCode = B_BAD_TYPE;
4442            sprintf (TempString, "You need to specify a 32 bit integer "
4443              "for the SET %s command", PropInfoPntr->name);
4444            goto ErrorExit;
4445          }
4446          m_PurgeAge = ArgumentInt32;
4447          m_SettingsHaveChanged = true;
4448          break;
4449
4450        default: /* Unknown operation code, error message already set. */
4451          goto ErrorExit;
4452      }
4453      break;
4454
4455    case PN_PURGE_POPULARITY:
4456      switch (PropInfoPntr->commands[0])
4457      {
4458        case B_GET_PROPERTY:
4459          ReplyMessage.AddInt32 (g_ResultName, m_PurgePopularity);
4460          break;
4461
4462        case B_SET_PROPERTY:
4463          if (!ArgumentGotInt32)
4464          {
4465            ErrorCode = B_BAD_TYPE;
4466            sprintf (TempString, "You need to specify a 32 bit integer "
4467              "for the SET %s command", PropInfoPntr->name);
4468            goto ErrorExit;
4469          }
4470          m_PurgePopularity = ArgumentInt32;
4471          m_SettingsHaveChanged = true;
4472          break;
4473
4474        default: /* Unknown operation code, error message already set. */
4475          goto ErrorExit;
4476      }
4477      break;
4478
4479    case PN_PURGE:
4480      if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY &&
4481      (ErrorCode = LoadDatabaseIfNeeded (TempString)) == B_OK &&
4482      (ErrorCode = PurgeOldWords (TempString)) == B_OK)
4483        break;
4484      goto ErrorExit;
4485
4486    case PN_OLDEST:
4487      if (PropInfoPntr->commands[0] == B_GET_PROPERTY &&
4488      (ErrorCode = LoadDatabaseIfNeeded (TempString)) == B_OK)
4489      {
4490        ReplyMessage.AddInt32 (g_ResultName, m_OldestAge);
4491        break;
4492      }
4493      goto ErrorExit;
4494
4495    case PN_EVALUATE:
4496    case PN_EVALUATE_STRING:
4497      if (PropInfoPntr->commands[0] == B_SET_PROPERTY)
4498      {
4499        if (!ArgumentGotString)
4500        {
4501          ErrorCode = B_BAD_TYPE;
4502          sprintf (TempString, "You need to specify a string for the "
4503            "SET %s command", PropInfoPntr->name);
4504          goto ErrorExit;
4505        }
4506        if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) == B_OK)
4507        {
4508          if (PropInfoPntr->extra_data == PN_EVALUATE)
4509          {
4510            if ((ErrorCode = EvaluateFile (ArgumentString, &ReplyMessage,
4511            TempString)) == B_OK)
4512              break;
4513          }
4514          else /* PN_EVALUATE_STRING */
4515          {
4516            if ((ErrorCode = EvaluateString (ArgumentString, StringBufferSize,
4517            &ReplyMessage, TempString)) == B_OK)
4518              break;
4519          }
4520        }
4521      }
4522      goto ErrorExit;
4523
4524    case PN_RESET_TO_DEFAULTS:
4525      if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY)
4526      {
4527        DefaultSettings ();
4528        break;
4529      }
4530      goto ErrorExit;
4531
4532    case PN_INSTALL_THINGS:
4533      if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY &&
4534      (ErrorCode = InstallThings (TempString)) == B_OK)
4535        break;
4536      goto ErrorExit;
4537
4538    case PN_SCORING_MODE:
4539      switch (PropInfoPntr->commands[0])
4540      {
4541        case B_GET_PROPERTY:
4542          ReplyMessage.AddString (g_ResultName,
4543            g_ScoringModeNames[m_ScoringMode]);
4544          break;
4545
4546        case B_SET_PROPERTY:
4547          i = SM_MAX;
4548          if (ArgumentGotString)
4549            for (i = 0; i < SM_MAX; i++)
4550            {
4551              if (strcasecmp (ArgumentString, g_ScoringModeNames [i]) == 0)
4552              {
4553                m_ScoringMode = (ScoringModes) i;
4554                m_SettingsHaveChanged = true;
4555                break;
4556              }
4557            }
4558          if (i >= SM_MAX) /* Didn't find a valid scoring mode word. */
4559          {
4560            ErrorCode = B_BAD_TYPE;
4561            sprintf (TempString, "You used the unrecognized \"%s\" as "
4562              "a scoring mode for the SET %s command.  Should be one of: ",
4563              ArgumentGotString ? ArgumentString : "not specified",
4564              PropInfoPntr->name);
4565            for (i = 0; i < SM_MAX; i++)
4566            {
4567              strcat (TempString, g_ScoringModeNames [i]);
4568              if (i < SM_MAX - 1)
4569                strcat (TempString, ", ");
4570            }
4571            goto ErrorExit;
4572          }
4573          break;
4574
4575        default: /* Unknown operation code, error message already set. */
4576          goto ErrorExit;
4577      }
4578      break;
4579
4580    case PN_TOKENIZE_MODE:
4581      switch (PropInfoPntr->commands[0])
4582      {
4583        case B_GET_PROPERTY:
4584          ReplyMessage.AddString (g_ResultName,
4585            g_TokenizeModeNames[m_TokenizeMode]);
4586          break;
4587
4588        case B_SET_PROPERTY:
4589          i = TM_MAX;
4590          if (ArgumentGotString)
4591            for (i = 0; i < TM_MAX; i++)
4592            {
4593              if (strcasecmp (ArgumentString, g_TokenizeModeNames [i]) == 0)
4594              {
4595                m_TokenizeMode = (TokenizeModes) i;
4596                m_SettingsHaveChanged = true;
4597                break;
4598              }
4599            }
4600          if (i >= TM_MAX) /* Didn't find a valid tokenize mode word. */
4601          {
4602            ErrorCode = B_BAD_TYPE;
4603            sprintf (TempString, "You used the unrecognized \"%s\" as "
4604              "a tokenize mode for the SET %s command.  Should be one of: ",
4605              ArgumentGotString ? ArgumentString : "not specified",
4606              PropInfoPntr->name);
4607            for (i = 0; i < TM_MAX; i++)
4608            {
4609              strcat (TempString, g_TokenizeModeNames [i]);
4610              if (i < TM_MAX - 1)
4611                strcat (TempString, ", ");
4612            }
4613            goto ErrorExit;
4614          }
4615          break;
4616
4617        default: /* Unknown operation code, error message already set. */
4618          goto ErrorExit;
4619      }
4620      break;
4621
4622    default:
4623      sprintf (TempString, "Bug!  Unrecognized property identification "
4624        "number %d (should be between 0 and %d).  Fix the entry in "
4625        "the g_ScriptingPropertyList array!",
4626        (int) PropInfoPntr->extra_data, PN_MAX - 1);
4627      goto ErrorExit;
4628  }
4629
4630  /* Success. */
4631
4632  ReplyMessage.AddInt32 ("error", B_OK);
4633  ErrorCode = MessagePntr->SendReply (&ReplyMessage,
4634    this /* Reply's reply handler */, 500000 /* send timeout */);
4635  if (ErrorCode != B_OK)
4636    cerr << "ProcessScriptingMessage failed to send a reply message, code " <<
4637    ErrorCode << " (" << strerror (ErrorCode) << ")" << " for " <<
4638    CommandText.String () << endl;
4639  SetCursor (B_CURSOR_SYSTEM_DEFAULT);
4640  return;
4641
4642ErrorExit: /* Error message in TempString, return code in ErrorCode. */
4643  ReplyMessage.AddInt32 ("error", ErrorCode);
4644  ReplyMessage.AddString ("message", TempString);
4645  DisplayErrorMessage (TempString, ErrorCode);
4646  ErrorCode = MessagePntr->SendReply (&ReplyMessage,
4647    this /* Reply's reply handler */, 500000 /* send timeout */);
4648  if (ErrorCode != B_OK)
4649    cerr << "ProcessScriptingMessage failed to send an error message, code " <<
4650    ErrorCode << " (" << strerror (ErrorCode) << ")" << " for " <<
4651    CommandText.String () << endl;
4652  SetCursor (B_CURSOR_SYSTEM_DEFAULT);
4653}
4654
4655
4656/* Since quitting stops the program before the results of a script command are
4657received, we use a time delay to do the quit and make sure there are no pending
4658commands being processed by the auxiliary looper which is sending us commands.
4659Also, we have a countdown which can be interrupted by an incoming scripting
4660message in case one client tells us to quit while another one is still using us
4661(happens when you have two or more e-mail accounts).  But if the system is
4662shutting down, quit immediately! */
4663
4664void
4665ABSApp::Pulse ()
4666{
4667  if (g_QuitCountdown == 0)
4668  {
4669    if (g_CommanderLooperPntr == NULL ||
4670    !g_CommanderLooperPntr->IsBusy ())
4671      PostMessage (B_QUIT_REQUESTED);
4672  }
4673  else if (g_QuitCountdown > 0)
4674  {
4675    cerr << "SpamDBM quitting in " << g_QuitCountdown << ".\n";
4676    g_QuitCountdown--;
4677  }
4678}
4679
4680
4681/* A quit request message has come in.  If the quit countdown has reached zero,
4682allow the request, otherwise reject it (and start the countdown if it hasn't
4683been started). */
4684
4685bool
4686ABSApp::QuitRequested ()
4687{
4688  BMessage  *QuitMessage;
4689  team_info  RemoteInfo;
4690  BMessenger RemoteMessenger;
4691  team_id    RemoteTeam;
4692
4693  /* See if the quit is from the system shutdown command (which goes through
4694  the registrar server), if so, quit immediately. */
4695
4696  QuitMessage = CurrentMessage ();
4697  if (QuitMessage != NULL && QuitMessage->IsSourceRemote ())
4698  {
4699    RemoteMessenger = QuitMessage->ReturnAddress ();
4700    RemoteTeam = RemoteMessenger.Team ();
4701    if (get_team_info (RemoteTeam, &RemoteInfo) == B_OK &&
4702    strstr (RemoteInfo.args, "registrar") != NULL)
4703      g_QuitCountdown = 0;
4704  }
4705
4706  if (g_QuitCountdown == 0)
4707    return BApplication::QuitRequested ();
4708
4709  if (g_QuitCountdown < 0)
4710//    g_QuitCountdown = 10; /* Start the countdown. */
4711    g_QuitCountdown = 5; /* Quit more quickly */
4712
4713  return false;
4714}
4715
4716
4717/* Go through the current database and delete words which are too old (time is
4718equivalent to the number of messages added to the database) and too unpopular
4719(words not used by many messages).  Hopefully this will get rid of words which
4720are just hunks of binary or other garbage.  The database has been loaded
4721elsewhere. */
4722
4723status_t
4724ABSApp::PurgeOldWords (char *ErrorMessage)
4725{
4726  uint32                  CurrentTime;
4727  StatisticsMap::iterator CurrentIter;
4728  StatisticsMap::iterator EndIter;
4729  StatisticsMap::iterator NextIter;
4730  char                    TempString [80];
4731
4732  strcpy (ErrorMessage, "Purge can't fail"); /* So argument gets used. */
4733  CurrentTime = m_TotalGenuineMessages + m_TotalSpamMessages - 1;
4734  m_OldestAge = (uint32) -1 /* makes largest number possible */;
4735
4736  EndIter = m_WordMap.end ();
4737  NextIter = m_WordMap.begin ();
4738  while (NextIter != EndIter) {
4739    CurrentIter = NextIter++;
4740
4741    if (CurrentTime - CurrentIter->second.age >= m_PurgeAge &&
4742    CurrentIter->second.genuineCount + CurrentIter->second.spamCount <=
4743    m_PurgePopularity) {
4744      /* Delete this word, it is unpopular and old.  Sob. */
4745
4746      m_WordMap.erase (CurrentIter);
4747      if (m_WordCount > 0)
4748        m_WordCount--;
4749
4750      m_DatabaseHasChanged = true;
4751    }
4752    else /* This word is still in the database.  Update oldest age. */
4753    {
4754      if (CurrentIter->second.age < m_OldestAge)
4755        m_OldestAge = CurrentIter->second.age;
4756    }
4757  }
4758
4759  /* Just a little bug check here.  Just in case. */
4760
4761  if (m_WordCount != m_WordMap.size ()) {
4762    sprintf (TempString, "Our word count of %lu doesn't match the "
4763      "size of the database, %lu", m_WordCount, m_WordMap.size());
4764    DisplayErrorMessage (TempString, -1, "Bug!");
4765    m_WordCount = m_WordMap.size ();
4766  }
4767
4768  return B_OK;
4769}
4770
4771
4772void
4773ABSApp::ReadyToRun ()
4774{
4775  DatabaseWindow *DatabaseWindowPntr;
4776  float           JunkFloat;
4777  BButton        *TempButtonPntr;
4778  BCheckBox      *TempCheckBoxPntr;
4779  font_height     TempFontHeight;
4780  BMenuBar       *TempMenuBarPntr;
4781  BMenuItem      *TempMenuItemPntr;
4782  BPopUpMenu     *TempPopUpMenuPntr;
4783  BRadioButton   *TempRadioButtonPntr;
4784  BRect           TempRect;
4785  const char     *TempString = "Testing My Things";
4786  BStringView    *TempStringViewPntr;
4787  BTextControl   *TempTextPntr;
4788  BWindow        *TempWindowPntr;
4789
4790  /* This batch of code gets some measurements which will be used for laying
4791  out controls and other GUI elements.  Set the spacing between buttons and
4792  other controls to the width of the letter "M" in the user's desired font. */
4793
4794 g_MarginBetweenControls = (int) be_plain_font->StringWidth ("M");
4795
4796  /* Also find out how much space a line of text uses. */
4797
4798  be_plain_font->GetHeight (&TempFontHeight);
4799  g_LineOfTextHeight = ceilf (
4800    TempFontHeight.ascent + TempFontHeight.descent + TempFontHeight.leading);
4801
4802  /* Start finding out the height of various user interface gadgets, which can
4803  vary based on the current font size.  Make a temporary gadget, which is
4804  attached to our window, then resize it to its prefered size so that it
4805  accomodates the font size and other frills it needs. */
4806
4807  TempWindowPntr = new (std::nothrow) BWindow (BRect (10, 20, 200, 200),
4808	"Temporary Window", B_DOCUMENT_WINDOW,
4809	B_NO_WORKSPACE_ACTIVATION | B_ASYNCHRONOUS_CONTROLS);
4810  if (TempWindowPntr == NULL) {
4811    DisplayErrorMessage ("Unable to create temporary window for finding "
4812      "sizes of controls.");
4813    g_QuitCountdown = 0;
4814    return;
4815  }
4816
4817  TempRect = TempWindowPntr->Bounds ();
4818
4819  /* Find the height of a single line of text in a BStringView. */
4820
4821  TempStringViewPntr = new (std::nothrow) BStringView (TempRect, TempString, TempString);
4822  if (TempStringViewPntr != NULL) {
4823    TempWindowPntr->Lock();
4824    TempWindowPntr->AddChild (TempStringViewPntr);
4825    TempStringViewPntr->GetPreferredSize (&JunkFloat, &g_StringViewHeight);
4826    TempWindowPntr->RemoveChild (TempStringViewPntr);
4827    TempWindowPntr->Unlock();
4828    delete TempStringViewPntr;
4829  }
4830
4831  /* Find the height of a button, which seems to be larger than a text
4832  control and can make life difficult.  Make a temporary button, which
4833  is attached to our window so that it resizes to accomodate the font size. */
4834
4835  TempButtonPntr = new (std::nothrow) BButton (TempRect, TempString, TempString, NULL);
4836  if (TempButtonPntr != NULL) {
4837    TempWindowPntr->Lock();
4838    TempWindowPntr->AddChild (TempButtonPntr);
4839    TempButtonPntr->GetPreferredSize (&JunkFloat, &g_ButtonHeight);
4840    TempWindowPntr->RemoveChild (TempButtonPntr);
4841    TempWindowPntr->Unlock();
4842    delete TempButtonPntr;
4843  }
4844
4845  /* Find the height of a text box. */
4846
4847  TempTextPntr = new (std::nothrow) BTextControl (TempRect, TempString, NULL /* label */,
4848    TempString, NULL);
4849  if (TempTextPntr != NULL) {
4850    TempWindowPntr->Lock ();
4851    TempWindowPntr->AddChild (TempTextPntr);
4852    TempTextPntr->GetPreferredSize (&JunkFloat, &g_TextBoxHeight);
4853    TempWindowPntr->RemoveChild (TempTextPntr);
4854    TempWindowPntr->Unlock ();
4855    delete TempTextPntr;
4856  }
4857
4858  /* Find the height of a checkbox control. */
4859
4860  TempCheckBoxPntr = new (std::nothrow) BCheckBox (TempRect, TempString, TempString, NULL);
4861  if (TempCheckBoxPntr != NULL) {
4862    TempWindowPntr->Lock ();
4863    TempWindowPntr->AddChild (TempCheckBoxPntr);
4864    TempCheckBoxPntr->GetPreferredSize (&JunkFloat, &g_CheckBoxHeight);
4865    TempWindowPntr->RemoveChild (TempCheckBoxPntr);
4866    TempWindowPntr->Unlock ();
4867    delete TempCheckBoxPntr;
4868  }
4869
4870  /* Find the height of a radio button control. */
4871
4872  TempRadioButtonPntr =
4873    new (std::nothrow) BRadioButton (TempRect, TempString, TempString, NULL);
4874  if (TempRadioButtonPntr != NULL) {
4875    TempWindowPntr->Lock ();
4876    TempWindowPntr->AddChild (TempRadioButtonPntr);
4877    TempRadioButtonPntr->GetPreferredSize (&JunkFloat, &g_RadioButtonHeight);
4878    TempWindowPntr->RemoveChild (TempRadioButtonPntr);
4879    TempWindowPntr->Unlock ();
4880    delete TempRadioButtonPntr;
4881  }
4882
4883  /* Find the height of a pop-up menu. */
4884
4885  TempMenuBarPntr = new (std::nothrow) BMenuBar (TempRect, TempString,
4886    B_FOLLOW_LEFT | B_FOLLOW_TOP, B_ITEMS_IN_COLUMN,
4887    true /* resize to fit items */);
4888  TempPopUpMenuPntr = new (std::nothrow) BPopUpMenu (TempString);
4889  TempMenuItemPntr = new (std::nothrow) BMenuItem (TempString, new BMessage (12345), 'g');
4890
4891  if (TempMenuBarPntr != NULL && TempPopUpMenuPntr != NULL &&
4892  TempMenuItemPntr != NULL)
4893  {
4894    TempPopUpMenuPntr->AddItem (TempMenuItemPntr);
4895    TempMenuBarPntr->AddItem (TempPopUpMenuPntr);
4896
4897    TempWindowPntr->Lock ();
4898    TempWindowPntr->AddChild (TempMenuBarPntr);
4899    TempMenuBarPntr->GetPreferredSize (&JunkFloat, &g_PopUpMenuHeight);
4900    TempWindowPntr->RemoveChild (TempMenuBarPntr);
4901    TempWindowPntr->Unlock ();
4902    delete TempMenuBarPntr; // It will delete contents too.
4903  }
4904
4905  TempWindowPntr->Lock ();
4906  TempWindowPntr->Quit ();
4907
4908  SetPulseRate (500000);
4909
4910  if (g_CommandLineMode)
4911    g_QuitCountdown = 0; /* Quit as soon as queued up commands done. */
4912  else /* GUI mode, make a window. */
4913  {
4914    DatabaseWindowPntr = new (std::nothrow) DatabaseWindow ();
4915    if (DatabaseWindowPntr == NULL) {
4916      DisplayErrorMessage ("Unable to create window.");
4917      g_QuitCountdown = 0;
4918    } else {
4919      DatabaseWindowPntr->Show (); /* Starts the window's message loop. */
4920    }
4921  }
4922
4923  g_AppReadyToRunCompleted = true;
4924}
4925
4926
4927/* Given a mail component (body text, attachment, whatever), look for words in
4928it.  If the tokenize mode specifies that it isn't one of the ones we are
4929looking for, just skip it.  For container type components, recursively examine
4930their contents, up to the maximum depth specified. */
4931
4932status_t
4933ABSApp::RecursivelyTokenizeMailComponent (
4934  BMailComponent *ComponentPntr,
4935  const char *OptionalFileName,
4936  set<string> &WordSet,
4937  char *ErrorMessage,
4938  int RecursionLevel,
4939  int MaxRecursionLevel)
4940{
4941  char                        AttachmentName [B_FILE_NAME_LENGTH];
4942  BMailAttachment            *AttachmentPntr;
4943  BMimeType                   ComponentMIMEType;
4944  BMailContainer             *ContainerPntr;
4945  BMallocIO                   ContentsIO;
4946  const char                 *ContentsBufferPntr;
4947  size_t                      ContentsBufferSize;
4948  status_t                    ErrorCode;
4949  bool                        ExamineComponent;
4950  const char                 *HeaderKeyPntr;
4951  const char                 *HeaderValuePntr;
4952  int                         i;
4953  int                         j;
4954  const char                 *NameExtension;
4955  int                         NumComponents;
4956  BMimeType                   TextAnyMIMEType ("text");
4957  BMimeType                   TextPlainMIMEType ("text/plain");
4958
4959  if (ComponentPntr == NULL)
4960    return B_OK;
4961
4962  /* Add things in the sub-headers that might be useful.  Things like the file
4963  name of attachments, the encoding type, etc. */
4964
4965  if (m_TokenizeMode == TM_PLAIN_TEXT_HEADER ||
4966  m_TokenizeMode == TM_ANY_TEXT_HEADER ||
4967  m_TokenizeMode == TM_ALL_PARTS_HEADER ||
4968  m_TokenizeMode == TM_JUST_HEADER)
4969  {
4970    for (i = 0; i < 1000; i++)
4971    {
4972      HeaderKeyPntr = ComponentPntr->HeaderAt (i);
4973      if (HeaderKeyPntr == NULL)
4974        break;
4975      AddWordsToSet (HeaderKeyPntr, strlen (HeaderKeyPntr),
4976        'H' /* Prefix for Headers, uppercase unlike normal words. */, WordSet);
4977      for (j = 0; j < 1000; j++)
4978      {
4979        HeaderValuePntr = ComponentPntr->HeaderField (HeaderKeyPntr, j);
4980        if (HeaderValuePntr == NULL)
4981          break;
4982        AddWordsToSet (HeaderValuePntr, strlen (HeaderValuePntr),
4983          'H', WordSet);
4984      }
4985    }
4986  }
4987
4988  /* Check the MIME type of the thing.  It's used to decide if the contents are
4989  worth examining for words. */
4990
4991  ErrorCode = ComponentPntr->MIMEType (&ComponentMIMEType);
4992  if (ErrorCode != B_OK)
4993  {
4994    sprintf (ErrorMessage, "ABSApp::RecursivelyTokenizeMailComponent: "
4995      "Unable to get MIME type at level %d in \"%s\"",
4996      RecursionLevel, OptionalFileName);
4997    return ErrorCode;
4998  }
4999  if (ComponentMIMEType.Type() == NULL)
5000  {
5001    /* Have to make up a MIME type for things which don't have them, such as
5002    the main body text, otherwise it would get ignored. */
5003
5004    if (NULL != dynamic_cast<BTextMailComponent *>(ComponentPntr))
5005      ComponentMIMEType.SetType ("text/plain");
5006  }
5007  if (!TextAnyMIMEType.Contains (&ComponentMIMEType) &&
5008  NULL != (AttachmentPntr = dynamic_cast<BMailAttachment *>(ComponentPntr)))
5009  {
5010    /* Sometimes spam doesn't give a text MIME type for text when they do an
5011    attachment (which is often base64 encoded).  Use the file name extension to
5012    see if it really is text. */
5013    NameExtension = NULL;
5014    if (AttachmentPntr->FileName (AttachmentName) >= 0)
5015      NameExtension = strrchr (AttachmentName, '.');
5016    if (NameExtension != NULL)
5017    {
5018      if (strcasecmp (NameExtension, ".txt") == 0)
5019        ComponentMIMEType.SetType ("text/plain");
5020      else if (strcasecmp (NameExtension, ".htm") == 0 ||
5021      strcasecmp (NameExtension, ".html") == 0)
5022        ComponentMIMEType.SetType ("text/html");
5023    }
5024  }
5025
5026  switch (m_TokenizeMode)
5027  {
5028    case TM_PLAIN_TEXT:
5029    case TM_PLAIN_TEXT_HEADER:
5030      ExamineComponent = TextPlainMIMEType.Contains (&ComponentMIMEType);
5031      break;
5032
5033    case TM_ANY_TEXT:
5034    case TM_ANY_TEXT_HEADER:
5035      ExamineComponent = TextAnyMIMEType.Contains (&ComponentMIMEType);
5036      break;
5037
5038    case TM_ALL_PARTS:
5039    case TM_ALL_PARTS_HEADER:
5040      ExamineComponent = true;
5041      break;
5042
5043    default:
5044      ExamineComponent = false;
5045      break;
5046  }
5047
5048  if (ExamineComponent)
5049  {
5050    /* Get the contents of the component.  This will be UTF-8 text (converted
5051    from whatever encoding was used) for text attachments.  For other ones,
5052    it's just the raw data, or perhaps decoded from base64 encoding. */
5053
5054    ContentsIO.SetBlockSize (16 * 1024);
5055    ErrorCode = ComponentPntr->GetDecodedData (&ContentsIO);
5056    if (ErrorCode == B_OK) /* Can fail for container components: no data. */
5057    {
5058      /* Look for words in the decoded data. */
5059
5060      ContentsBufferPntr = (const char *) ContentsIO.Buffer ();
5061      ContentsBufferSize = ContentsIO.BufferLength ();
5062      if (ContentsBufferPntr != NULL /* can be empty */)
5063        AddWordsToSet (ContentsBufferPntr, ContentsBufferSize,
5064          0 /* no prefix character, this is body text */, WordSet);
5065    }
5066  }
5067
5068  /* Examine any sub-components in the message. */
5069
5070  if (RecursionLevel + 1 <= MaxRecursionLevel &&
5071  NULL != (ContainerPntr = dynamic_cast<BMailContainer *>(ComponentPntr)))
5072  {
5073    NumComponents = ContainerPntr->CountComponents ();
5074
5075    for (i = 0; i < NumComponents; i++)
5076    {
5077      ComponentPntr = ContainerPntr->GetComponent (i);
5078
5079      ErrorCode = RecursivelyTokenizeMailComponent (ComponentPntr,
5080        OptionalFileName, WordSet, ErrorMessage, RecursionLevel + 1,
5081        MaxRecursionLevel);
5082      if (ErrorCode != B_OK)
5083        break;
5084    }
5085  }
5086
5087  return ErrorCode;
5088}
5089
5090
5091/* The user has tried to open a file or several files with this application,
5092via Tracker's open-with menu item.  If it is a database type file, then change
5093the database file name to it.  Otherwise, ask the user whether they want to
5094classify it as spam or non-spam.  There will be at most around 100 files, BeOS
5095R5.0.3's Tracker crashes if it tries to pass on more than that many using Open
5096With... etc.  The command is sent to an intermediary thread where it is
5097asynchronously converted into a scripting message(s) that are sent back to this
5098BApplication.  The intermediary is needed since we can't recursively execute
5099scripting messages while processing a message (this RefsReceived one). */
5100
5101void
5102ABSApp::RefsReceived (BMessage *MessagePntr)
5103{
5104  if (g_CommanderLooperPntr != NULL)
5105    g_CommanderLooperPntr->CommandReferences (MessagePntr);
5106}
5107
5108
5109/* A scripting command is looking for something to execute it.  See if it is
5110targetted at our database. */
5111
5112BHandler * ABSApp::ResolveSpecifier (
5113  BMessage *MessagePntr,
5114  int32 Index,
5115  BMessage *SpecifierMsgPntr,
5116  int32 SpecificationKind,
5117  const char *PropertyPntr)
5118{
5119  int i;
5120
5121  /* See if it is one of our commands. */
5122
5123  if (SpecificationKind == B_DIRECT_SPECIFIER)
5124  {
5125    for (i = PN_MAX - 1; i >= 0; i--)
5126    {
5127      if (strcasecmp (PropertyPntr, g_PropertyNames [i]) == 0)
5128        return this; /* Found it!  Return the Handler (which is us). */
5129    }
5130  }
5131
5132  /* Handle an unrecognized scripting command, let the parent figure it out. */
5133
5134  return BApplication::ResolveSpecifier (
5135    MessagePntr, Index, SpecifierMsgPntr, SpecificationKind, PropertyPntr);
5136}
5137
5138
5139/* Save the database if it hasn't been saved yet.  Otherwise do nothing. */
5140
5141status_t ABSApp::SaveDatabaseIfNeeded (char *ErrorMessage)
5142{
5143  if (m_DatabaseHasChanged)
5144    return LoadSaveDatabase (false /* DoLoad */, ErrorMessage);
5145
5146  return B_OK;
5147}
5148
5149
5150/* Presumably the file is an e-mail message (or at least the header portion of
5151one).  Break it into parts: header, body and MIME components.  Then add the
5152words in the portions that match the current tokenization settings to the set
5153of words. */
5154
5155status_t ABSApp::TokenizeParts (
5156  BPositionIO *PositionIOPntr,
5157  const char *OptionalFileName,
5158  set<string> &WordSet,
5159  char *ErrorMessage)
5160{
5161  status_t        ErrorCode = B_OK;
5162  BEmailMessage   WholeEMail;
5163
5164  sprintf (ErrorMessage, "ABSApp::TokenizeParts: While getting e-mail "
5165    "headers, had problems with \"%s\"", OptionalFileName);
5166
5167  ErrorCode = WholeEMail.SetToRFC822 (
5168    PositionIOPntr /* it does its own seeking to the start */,
5169    -1 /* length */, true /* parse_now */);
5170  if (ErrorCode < 0) goto ErrorExit;
5171
5172  ErrorCode = RecursivelyTokenizeMailComponent (&WholeEMail,
5173    OptionalFileName, WordSet, ErrorMessage, 0 /* Initial recursion level */,
5174    (m_TokenizeMode == TM_JUST_HEADER) ? 0 : 500 /* Max recursion level */);
5175
5176ErrorExit:
5177  return ErrorCode;
5178}
5179
5180
5181/* Add all the words in the whole file or memory buffer to the supplied set.
5182The file doesn't have to be an e-mail message since it isn't parsed for e-mail
5183headers or MIME headers or anything.  It blindly adds everything that looks
5184like a word, though it does convert quoted printable codes to the characters
5185they represent.  See also AddWordsToSet which does something more advanced. */
5186
5187status_t ABSApp::TokenizeWhole (
5188  BPositionIO *PositionIOPntr,
5189  const char *OptionalFileName,
5190  set<string> &WordSet,
5191  char *ErrorMessage)
5192{
5193  string                AccumulatedWord;
5194  uint8                 Buffer [16 * 1024];
5195  uint8                *BufferCurrentPntr = Buffer + 0;
5196  uint8                *BufferEndPntr = Buffer + 0;
5197  const char           *IOErrorString =
5198                          "TokenizeWhole: Error %ld while reading \"%s\"";
5199  size_t                Length;
5200  int                   Letter = ' ';
5201  char                  HexString [4];
5202  int                   NextLetter = ' ';
5203  int                   NextNextLetter = ' ';
5204
5205  /* Use a buffer since reading single characters from a BFile is so slow.
5206  BufferCurrentPntr is the position of the next character to be read.  When it
5207  reaches BufferEndPntr, it is time to fill the buffer again. */
5208
5209#define ReadChar(CharVar) \
5210  { \
5211    if (BufferCurrentPntr < BufferEndPntr) \
5212      CharVar = *BufferCurrentPntr++; \
5213    else /* Try to fill the buffer. */ \
5214    { \
5215      ssize_t AmountRead; \
5216      AmountRead = PositionIOPntr->Read (Buffer, sizeof (Buffer)); \
5217      if (AmountRead < 0) \
5218      { \
5219        sprintf (ErrorMessage, IOErrorString, AmountRead, OptionalFileName); \
5220        return AmountRead; \
5221      } \
5222      else if (AmountRead == 0) \
5223        CharVar = EOF; \
5224      else \
5225      { \
5226        BufferEndPntr = Buffer + AmountRead; \
5227        BufferCurrentPntr = Buffer + 0; \
5228        CharVar = *BufferCurrentPntr++; \
5229      } \
5230    } \
5231  }
5232
5233  /* Read all the words in the file and add them to our local set of words.  A
5234  set is used since we don't care how many times a word occurs. */
5235
5236  while (true)
5237  {
5238    /* We read two letters ahead so that we can decode quoted printable
5239    characters (an equals sign followed by two hex digits or a new line).  Note
5240    that Letter can become EOF (-1) when end of file is reached. */
5241
5242    Letter = NextLetter;
5243    NextLetter = NextNextLetter;
5244    ReadChar (NextNextLetter);
5245
5246    /* Decode quoted printable codes first, so that the rest of the code just
5247    sees an ordinary character.  Or even nothing, if it is the hidden line
5248    break combination.  This may falsely corrupt stuff following an equals
5249    sign, but usually won't. */
5250
5251    if (Letter == '=')
5252    {
5253      if ((NextLetter == '\r' && NextNextLetter == '\n') ||
5254      (NextLetter == '\n' && NextNextLetter == '\r'))
5255      {
5256        /* Make the "=\r\n" pair disappear.  It's not even white space. */
5257        ReadChar (NextLetter);
5258        ReadChar (NextNextLetter);
5259        continue;
5260      }
5261      if (NextLetter == '\n' || NextLetter == '\r')
5262      {
5263        /* Make the "=\n" pair disappear.  It's not even white space. */
5264        NextLetter = NextNextLetter;
5265        ReadChar (NextNextLetter);
5266        continue;
5267      }
5268      if (NextNextLetter != EOF &&
5269      isxdigit (NextLetter) && isxdigit (NextNextLetter))
5270      {
5271        /* Convert the hex code to a letter. */
5272        HexString[0] = NextLetter;
5273        HexString[1] = NextNextLetter;
5274        HexString[2] = 0;
5275        Letter = strtoul (HexString, NULL, 16 /* number system base */);
5276        ReadChar (NextLetter);
5277        ReadChar (NextNextLetter);
5278      }
5279    }
5280
5281    /* Convert to lower case to improve word matches.  Of course this loses a
5282    bit of information, such as MONEY vs Money, an indicator of spam.  Well,
5283    apparently that isn't all that useful a distinction, so do it. */
5284
5285    if (Letter >= 'A' && Letter < 'Z')
5286      Letter = Letter + ('a' - 'A');
5287
5288    /* See if it is a letter we treat as white space - all control characters
5289    and all punctuation except for: apostrophe (so "it's" and possessive
5290    versions of words get stored), dash (for hyphenated words), dollar sign
5291    (for cash amounts), period (for IP addresses, we later remove trailing
5292    (periods).  Note that codes above 127 are UTF-8 characters, which we
5293    consider non-space. */
5294
5295    if (Letter < 0 /* EOF */ || (Letter < 128 && g_SpaceCharacters[Letter]))
5296    {
5297      /* That space finished off a word.  Remove trailing periods... */
5298
5299      while ((Length = AccumulatedWord.size()) > 0 &&
5300      AccumulatedWord [Length-1] == '.')
5301        AccumulatedWord.resize (Length - 1);
5302
5303      /* If there's anything left in the word, add it to the set.  Also ignore
5304      words which are too big (it's probably some binary encoded data).  But
5305      leave room for supercalifragilisticexpialidoceous.  According to one web
5306      site, pneumonoultramicroscopicsilicovolcanoconiosis is the longest word
5307      currently in English.  Note that some uuencoded data was seen with a 60
5308      character line length. */
5309
5310      if (Length > 0 && Length <= g_MaxWordLength)
5311        WordSet.insert (AccumulatedWord);
5312
5313      /* Empty out the string to get ready for the next word. */
5314
5315      AccumulatedWord.resize (0);
5316    }
5317    else /* Not a space-like character, add it to the word. */
5318      AccumulatedWord.append (1 /* one copy of the char */, (char) Letter);
5319
5320    /* Stop at end of file or error.  Don't care which.  Exit here so that last
5321    word got processed. */
5322
5323    if (Letter == EOF)
5324      break;
5325  }
5326
5327  return B_OK;
5328}
5329
5330
5331
5332/******************************************************************************
5333 * Implementation of the ClassificationChoicesView class, constructor,
5334 * destructor and the rest of the member functions in mostly alphabetical
5335 * order.
5336 */
5337
5338ClassificationChoicesWindow::ClassificationChoicesWindow (
5339  BRect FrameRect,
5340  const char *FileName,
5341  int NumberOfFiles)
5342: BWindow (FrameRect, "Classification Choices", B_TITLED_WINDOW,
5343    B_NOT_ZOOMABLE | B_NOT_RESIZABLE | B_ASYNCHRONOUS_CONTROLS),
5344  m_BulkModeSelectedPntr (NULL),
5345  m_ChoosenClassificationPntr (NULL)
5346{
5347  ClassificationChoicesView *SubViewPntr;
5348
5349  SubViewPntr = new ClassificationChoicesView (Bounds(),
5350    FileName, NumberOfFiles);
5351  AddChild (SubViewPntr);
5352  SubViewPntr->ResizeToPreferred ();
5353  ResizeTo (SubViewPntr->Frame().Width(), SubViewPntr->Frame().Height());
5354}
5355
5356
5357void
5358ClassificationChoicesWindow::MessageReceived (BMessage *MessagePntr)
5359{
5360  BControl *ControlPntr;
5361
5362  if (MessagePntr->what >= MSG_CLASS_BUTTONS &&
5363  MessagePntr->what < MSG_CLASS_BUTTONS + CL_MAX)
5364  {
5365    if (m_ChoosenClassificationPntr != NULL)
5366      *m_ChoosenClassificationPntr =
5367        (ClassificationTypes) (MessagePntr->what - MSG_CLASS_BUTTONS);
5368    PostMessage (B_QUIT_REQUESTED); // Close and destroy the window.
5369    return;
5370  }
5371
5372  if (MessagePntr->what == MSG_BULK_CHECKBOX)
5373  {
5374    if (m_BulkModeSelectedPntr != NULL &&
5375    MessagePntr->FindPointer ("source", (void **) &ControlPntr) == B_OK)
5376      *m_BulkModeSelectedPntr = (ControlPntr->Value() == B_CONTROL_ON);
5377    return;
5378  }
5379
5380  if (MessagePntr->what == MSG_CANCEL_BUTTON)
5381  {
5382    PostMessage (B_QUIT_REQUESTED); // Close and destroy the window.
5383    return;
5384  }
5385
5386  BWindow::MessageReceived (MessagePntr);
5387}
5388
5389
5390void
5391ClassificationChoicesWindow::Go (
5392  bool *BulkModeSelectedPntr,
5393  ClassificationTypes *ChoosenClassificationPntr)
5394{
5395  status_t  ErrorCode = 0;
5396  BView    *MainViewPntr;
5397  thread_id WindowThreadID;
5398
5399  m_BulkModeSelectedPntr = BulkModeSelectedPntr;
5400  m_ChoosenClassificationPntr = ChoosenClassificationPntr;
5401  if (m_ChoosenClassificationPntr != NULL)
5402    *m_ChoosenClassificationPntr = CL_MAX;
5403
5404  Show (); // Starts the window thread running.
5405
5406  /* Move the window to the center of the screen it is now being displayed on
5407  (have to wait for it to be showing). */
5408
5409  Lock ();
5410  MainViewPntr = FindView ("ClassificationChoicesView");
5411  if (MainViewPntr != NULL)
5412  {
5413    BRect   TempRect;
5414    BScreen TempScreen (this);
5415    float   X;
5416    float   Y;
5417
5418    TempRect = TempScreen.Frame ();
5419    X = TempRect.Width() / 2;
5420    Y = TempRect.Height() / 2;
5421    TempRect = MainViewPntr->Frame();
5422    X -= TempRect.Width() / 2;
5423    Y -= TempRect.Height() / 2;
5424    MoveTo (ceilf (X), ceilf (Y));
5425  }
5426  Unlock ();
5427
5428  /* Wait for the window to go away. */
5429
5430  WindowThreadID = Thread ();
5431  if (WindowThreadID >= 0)
5432    // Delay until the window thread has died, presumably window deleted now.
5433    wait_for_thread (WindowThreadID, &ErrorCode);
5434}
5435
5436
5437
5438/******************************************************************************
5439 * Implementation of the ClassificationChoicesView class, constructor,
5440 * destructor and the rest of the member functions in mostly alphabetical
5441 * order.
5442 */
5443
5444ClassificationChoicesView::ClassificationChoicesView (
5445  BRect FrameRect,
5446  const char *FileName,
5447  int NumberOfFiles)
5448: BView (FrameRect, "ClassificationChoicesView",
5449    B_FOLLOW_TOP | B_FOLLOW_LEFT, B_WILL_DRAW | B_NAVIGABLE_JUMP),
5450  m_FileName (FileName),
5451  m_NumberOfFiles (NumberOfFiles),
5452  m_PreferredBottomY (ceilf (g_ButtonHeight * 10))
5453{
5454}
5455
5456
5457void
5458ClassificationChoicesView::AttachedToWindow ()
5459{
5460  BButton            *ButtonPntr;
5461  BCheckBox          *CheckBoxPntr;
5462  ClassificationTypes Classification;
5463  float               Margin;
5464  float               RowHeight;
5465  float               RowTop;
5466  BTextView          *TextViewPntr;
5467  BRect               TempRect;
5468  char                TempString [2048];
5469  BRect               TextRect;
5470  float               X;
5471
5472  SetViewColor (ui_color (B_PANEL_BACKGROUND_COLOR));
5473
5474  RowHeight = g_ButtonHeight;
5475  if (g_CheckBoxHeight > RowHeight)
5476    RowHeight = g_CheckBoxHeight;
5477  RowHeight = ceilf (RowHeight * 1.1);
5478
5479  TempRect = Bounds ();
5480  RowTop = TempRect.top;
5481
5482  /* Show the file name text. */
5483
5484  Margin = ceilf ((RowHeight - g_StringViewHeight) / 2);
5485  TempRect = Bounds ();
5486  TempRect.top = RowTop + Margin;
5487  TextRect = TempRect;
5488  TextRect.OffsetTo (0, 0);
5489  TextRect.InsetBy (g_MarginBetweenControls, 2);
5490  sprintf (TempString, "How do you want to classify the file named \"%s\"?",
5491    m_FileName);
5492  TextViewPntr = new BTextView (TempRect, "FileText", TextRect,
5493    B_FOLLOW_TOP | B_FOLLOW_LEFT, B_WILL_DRAW | B_FULL_UPDATE_ON_RESIZE);
5494  AddChild (TextViewPntr);
5495  TextViewPntr->SetText (TempString);
5496  TextViewPntr->MakeEditable (false);
5497  TextViewPntr->SetViewColor (ui_color (B_PANEL_BACKGROUND_COLOR));
5498  TextViewPntr->ResizeTo (TempRect.Width (),
5499    3 + TextViewPntr->TextHeight (0, sizeof (TempString)));
5500  RowTop = TextViewPntr->Frame().bottom + Margin;
5501
5502  /* Make the classification buttons. */
5503
5504  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
5505  TempRect = Bounds ();
5506  TempRect.top = RowTop + Margin;
5507  X = Bounds().left + g_MarginBetweenControls;
5508  for (Classification = (ClassificationTypes) 0; Classification < CL_MAX;
5509  Classification = (ClassificationTypes) ((int) Classification + 1))
5510  {
5511    TempRect = Bounds ();
5512    TempRect.top = RowTop + Margin;
5513    TempRect.left = X;
5514    sprintf (TempString, "%s Button",
5515      g_ClassificationTypeNames [Classification]);
5516    ButtonPntr = new BButton (TempRect, TempString,
5517      g_ClassificationTypeNames [Classification], new BMessage (
5518      ClassificationChoicesWindow::MSG_CLASS_BUTTONS + Classification));
5519    AddChild (ButtonPntr);
5520    ButtonPntr->ResizeToPreferred ();
5521    X = ButtonPntr->Frame().right + 3 * g_MarginBetweenControls;
5522  }
5523  RowTop += ceilf (RowHeight * 1.2);
5524
5525  /* Make the Cancel button. */
5526
5527  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
5528  TempRect = Bounds ();
5529  TempRect.top = RowTop + Margin;
5530  TempRect.left += g_MarginBetweenControls;
5531  ButtonPntr = new BButton (TempRect, "Cancel Button",
5532    "Cancel", new BMessage (ClassificationChoicesWindow::MSG_CANCEL_BUTTON));
5533  AddChild (ButtonPntr);
5534  ButtonPntr->ResizeToPreferred ();
5535  X = ButtonPntr->Frame().right + g_MarginBetweenControls;
5536
5537  /* Make the checkbox for bulk operations. */
5538
5539  if (m_NumberOfFiles > 1)
5540  {
5541    Margin = ceilf ((RowHeight - g_CheckBoxHeight) / 2);
5542    TempRect = Bounds ();
5543    TempRect.top = RowTop + Margin;
5544    TempRect.left = X;
5545    sprintf (TempString, "Mark all %d remaining messages the same way.",
5546      m_NumberOfFiles - 1);
5547    CheckBoxPntr = new BCheckBox (TempRect, "BulkBox", TempString,
5548      new BMessage (ClassificationChoicesWindow::MSG_BULK_CHECKBOX));
5549    AddChild (CheckBoxPntr);
5550    CheckBoxPntr->ResizeToPreferred ();
5551  }
5552  RowTop += RowHeight;
5553
5554  m_PreferredBottomY = RowTop;
5555}
5556
5557
5558void
5559ClassificationChoicesView::GetPreferredSize (float *width, float *height)
5560{
5561  if (width != NULL)
5562    *width = Bounds().Width();
5563  if (height != NULL)
5564    *height = m_PreferredBottomY;
5565}
5566
5567
5568
5569/******************************************************************************
5570 * Implementation of the CommanderLooper class, constructor, destructor and the
5571 * rest of the member functions in mostly alphabetical order.
5572 */
5573
5574CommanderLooper::CommanderLooper ()
5575: BLooper ("CommanderLooper", B_NORMAL_PRIORITY),
5576  m_IsBusy (false)
5577{
5578}
5579
5580
5581CommanderLooper::~CommanderLooper ()
5582{
5583  g_CommanderLooperPntr = NULL;
5584  delete g_CommanderMessenger;
5585  g_CommanderMessenger = NULL;
5586}
5587
5588
5589/* Process some command line arguments.  Basically just send a message to this
5590looper itself to do the work later.  That way the caller can continue doing
5591whatever they're doing, particularly if it's the BApplication. */
5592
5593void
5594CommanderLooper::CommandArguments (int argc, char **argv)
5595{
5596  int      i;
5597  BMessage InternalMessage;
5598
5599  InternalMessage.what = MSG_COMMAND_ARGUMENTS;
5600  for (i = 0; i < argc; i++)
5601    InternalMessage.AddString ("arg", argv[i]);
5602
5603  PostMessage (&InternalMessage);
5604}
5605
5606
5607/* Copy the refs out of the given message and stuff them into an internal
5608message to ourself (so that the original message can be returned to the caller,
5609and if it is Tracker, it can close the file handles it has open).  Optionally
5610allow preset classification rather than asking the user (set BulkMode to TRUE
5611and specify the class with BulkClassification). */
5612
5613void
5614CommanderLooper::CommandReferences (
5615  BMessage *MessagePntr,
5616  bool BulkMode,
5617  ClassificationTypes BulkClassification)
5618{
5619  entry_ref EntryRef;
5620  int       i;
5621  BMessage  InternalMessage;
5622
5623  InternalMessage.what = MSG_COMMAND_FILE_REFS;
5624  for (i = 0; MessagePntr->FindRef ("refs", i, &EntryRef) == B_OK; i++)
5625    InternalMessage.AddRef ("refs", &EntryRef);
5626  InternalMessage.AddBool ("BulkMode", BulkMode);
5627  InternalMessage.AddInt32 ("BulkClassification", BulkClassification);
5628
5629  PostMessage (&InternalMessage);
5630}
5631
5632
5633/* This function is called by other threads to see if the CommanderLooper is
5634busy working on something. */
5635
5636bool
5637CommanderLooper::IsBusy ()
5638{
5639  if (m_IsBusy)
5640    return true;
5641
5642  if (IsLocked () || !MessageQueue()->IsEmpty ())
5643    return true;
5644
5645  return false;
5646}
5647
5648
5649void
5650
5651CommanderLooper::MessageReceived (BMessage *MessagePntr)
5652{
5653  m_IsBusy = true;
5654
5655  if (MessagePntr->what == MSG_COMMAND_ARGUMENTS)
5656    ProcessArgs (MessagePntr);
5657  else if (MessagePntr->what == MSG_COMMAND_FILE_REFS)
5658    ProcessRefs (MessagePntr);
5659  else
5660    BLooper::MessageReceived (MessagePntr);
5661
5662  m_IsBusy = false;
5663}
5664
5665
5666/* Process the command line by converting it into a series of scripting
5667messages (possibly thousands) and sent them to the BApplication synchronously
5668(so we can print the result). */
5669
5670void
5671CommanderLooper::ProcessArgs (BMessage *MessagePntr)
5672{
5673  int32                 argc = 0;
5674  const char          **argv = NULL;
5675  int                   ArgumentIndex;
5676  uint32                CommandCode;
5677  const char           *CommandWord;
5678  status_t              ErrorCode;
5679  const char           *ErrorTitle = "ProcessArgs";
5680  char                 *EndPntr;
5681  int32                 i;
5682  BMessage              ReplyMessage;
5683  BMessage              ScriptMessage;
5684  struct property_info *PropInfoPntr;
5685  const char           *PropertyName;
5686  bool                  TempBool;
5687  float                 TempFloat;
5688  int32                 TempInt32;
5689  const char           *TempStringPntr;
5690  type_code             TypeCode;
5691  const char           *ValuePntr;
5692
5693  /* Get the argument count and pointers to arguments out of the message and
5694  into our argc and argv. */
5695
5696  ErrorCode = MessagePntr->GetInfo ("arg", &TypeCode, &argc);
5697  if (ErrorCode != B_OK || TypeCode != B_STRING_TYPE)
5698  {
5699    DisplayErrorMessage ("Unable to find argument strings in message",
5700      ErrorCode, ErrorTitle);
5701    goto ErrorExit;
5702  }
5703
5704  if (argc < 2)
5705  {
5706    cerr << PrintUsage;
5707    DisplayErrorMessage ("You need to specify a command word, like GET, SET "
5708      "and so on followed by a property, like DatabaseFile, and maybe "
5709      "followed by a value of some sort", -1, ErrorTitle);
5710    goto ErrorExit;
5711  }
5712
5713  argv = (const char **) malloc (sizeof (char *) * argc);
5714  if (argv == NULL)
5715  {
5716    DisplayErrorMessage ("Out of memory when allocating argv array",
5717      ENOMEM, ErrorTitle);
5718    goto ErrorExit;
5719  }
5720
5721  for (i = 0; i < argc; i++)
5722  {
5723    if ((ErrorCode = MessagePntr->FindString ("arg", i, &argv[i])) != B_OK)
5724    {
5725      DisplayErrorMessage ("Unable to find argument in the BMessage",
5726        ErrorCode, ErrorTitle);
5727      goto ErrorExit;
5728    }
5729  }
5730
5731  CommandWord = argv[1];
5732
5733  /* Special case for the Quit command since it isn't a scripting command. */
5734
5735  if (strcasecmp (CommandWord, "quit") == 0)
5736  {
5737    g_QuitCountdown = 10;
5738    goto ErrorExit;
5739  }
5740
5741  /* Find the corresponding scripting command. */
5742
5743  if (strcasecmp (CommandWord, "set") == 0)
5744    CommandCode = B_SET_PROPERTY;
5745  else if (strcasecmp (CommandWord, "get") == 0)
5746    CommandCode = B_GET_PROPERTY;
5747  else if (strcasecmp (CommandWord, "count") == 0)
5748    CommandCode = B_COUNT_PROPERTIES;
5749  else if (strcasecmp (CommandWord, "create") == 0)
5750    CommandCode = B_CREATE_PROPERTY;
5751  else if (strcasecmp (CommandWord, "delete") == 0)
5752    CommandCode = B_DELETE_PROPERTY;
5753  else
5754    CommandCode = B_EXECUTE_PROPERTY;
5755
5756  if (CommandCode == B_EXECUTE_PROPERTY)
5757  {
5758    PropertyName = CommandWord;
5759    ArgumentIndex = 2; /* Arguments to the command start at this index. */
5760  }
5761  else
5762  {
5763    if (CommandCode == B_SET_PROPERTY)
5764    {
5765      /* SET commands require at least one argument value. */
5766      if (argc < 4)
5767      {
5768        cerr << PrintUsage;
5769        DisplayErrorMessage ("SET commands require at least one "
5770          "argument value after the property name", -1, ErrorTitle);
5771        goto ErrorExit;
5772      }
5773    }
5774    else
5775      if (argc < 3)
5776      {
5777        cerr << PrintUsage;
5778        DisplayErrorMessage ("You need to specify a property to act on",
5779          -1, ErrorTitle);
5780        goto ErrorExit;
5781      }
5782    PropertyName = argv[2];
5783    ArgumentIndex = 3;
5784  }
5785
5786  /* See if it is one of our commands. */
5787
5788  for (PropInfoPntr = g_ScriptingPropertyList + 0; true; PropInfoPntr++)
5789  {
5790    if (PropInfoPntr->name == 0)
5791    {
5792      cerr << PrintUsage;
5793      DisplayErrorMessage ("The property specified isn't known or "
5794        "doesn't support the requested action (usually means it is an "
5795        "unknown command)", -1, ErrorTitle);
5796      goto ErrorExit; /* Unrecognized command. */
5797    }
5798
5799    if (PropInfoPntr->commands[0] == CommandCode &&
5800    strcasecmp (PropertyName, PropInfoPntr->name) == 0)
5801      break;
5802  }
5803
5804  /* Make the equivalent command message.  For commands with multiple
5805  arguments, repeat the message for each single argument and just change the
5806  data portion for each extra argument.  Send the command and wait for a reply,
5807  which we'll print out. */
5808
5809  ScriptMessage.MakeEmpty ();
5810  ScriptMessage.what = CommandCode;
5811  ScriptMessage.AddSpecifier (PropertyName);
5812  while (true)
5813  {
5814    if (ArgumentIndex < argc) /* If there are arguments to be added. */
5815    {
5816      ValuePntr = argv[ArgumentIndex];
5817
5818      /* Convert the value into the likely kind of data. */
5819
5820      if (strcasecmp (ValuePntr, "yes") == 0 ||
5821      strcasecmp (ValuePntr, "true") == 0)
5822        ScriptMessage.AddBool (g_DataName, true);
5823      else if (strcasecmp (ValuePntr, "no") == 0 ||
5824      strcasecmp (ValuePntr, "false") == 0)
5825        ScriptMessage.AddBool (g_DataName, false);
5826      else
5827      {
5828        /* See if it is a number. */
5829        i = strtol (ValuePntr, &EndPntr, 0);
5830        if (*EndPntr == 0)
5831          ScriptMessage.AddInt32 (g_DataName, i);
5832        else /* Nope, it's just a string. */
5833          ScriptMessage.AddString (g_DataName, ValuePntr);
5834      }
5835    }
5836
5837    ErrorCode = be_app_messenger.SendMessage (&ScriptMessage, &ReplyMessage);
5838    if (ErrorCode != B_OK)
5839    {
5840      DisplayErrorMessage ("Unable to send scripting command",
5841        ErrorCode, ErrorTitle);
5842      goto ErrorExit;
5843    }
5844
5845    /* Print the reply to the scripting command.  Even in server mode.  To
5846    standard output. */
5847
5848    if (ReplyMessage.FindString ("CommandText", &TempStringPntr) == B_OK)
5849    {
5850      TempInt32 = -1;
5851      if (ReplyMessage.FindInt32 ("error", &TempInt32) == B_OK &&
5852      TempInt32 == B_OK)
5853      {
5854        /* It's a successful reply to one of our scripting messages.  Print out
5855        the returned values code for command line users to see. */
5856
5857        cout << "Result of command to " << TempStringPntr << " is:\t";
5858        if (ReplyMessage.FindString (g_ResultName, &TempStringPntr) == B_OK)
5859          cout << "\"" << TempStringPntr << "\"";
5860        else if (ReplyMessage.FindInt32 (g_ResultName, &TempInt32) == B_OK)
5861          cout << TempInt32;
5862        else if (ReplyMessage.FindFloat (g_ResultName, &TempFloat) == B_OK)
5863          cout << TempFloat;
5864        else if (ReplyMessage.FindBool (g_ResultName, &TempBool) == B_OK)
5865          cout << (TempBool ? "true" : "false");
5866        else
5867          cout << "just plain success";
5868        if (ReplyMessage.FindInt32 ("count", &TempInt32) == B_OK)
5869          cout << "\t(count " << TempInt32 << ")";
5870        for (i = 0; (i < 50) &&
5871        ReplyMessage.FindString ("words", i, &TempStringPntr) == B_OK &&
5872        ReplyMessage.FindFloat ("ratios", i, &TempFloat) == B_OK;
5873        i++)
5874        {
5875          if (i == 0)
5876            cout << "\twith top words:\t";
5877          else
5878            cout << "\t";
5879          cout << TempStringPntr << "/" << TempFloat;
5880        }
5881        cout << endl;
5882      }
5883      else /* An error reply, print out the error, even in server mode. */
5884      {
5885        cout << "Failure of command " << TempStringPntr << ", error ";
5886        cout << TempInt32 << " (" << strerror (TempInt32) << ")";
5887        if (ReplyMessage.FindString ("message", &TempStringPntr) == B_OK)
5888          cout << ", message: " << TempStringPntr;
5889        cout << "." << endl;
5890      }
5891    }
5892
5893    /* Advance to the next argument and its scripting message. */
5894
5895    ScriptMessage.RemoveName (g_DataName);
5896    if (++ArgumentIndex >= argc)
5897      break;
5898  }
5899
5900ErrorExit:
5901  free (argv);
5902}
5903
5904
5905/* Given a bunch of references to files, open the files.  If it's a database
5906file, switch to using it as a database.  Otherwise, treat them as text files
5907and add them to the database.  Prompt the user for the spam or genuine or
5908uncertain (declassification) choice, with the option to bulk mark many files at
5909once. */
5910
5911void
5912CommanderLooper::ProcessRefs (BMessage *MessagePntr)
5913{
5914  bool                         BulkMode = false;
5915  ClassificationTypes          BulkClassification = CL_GENUINE;
5916  ClassificationChoicesWindow *ChoiceWindowPntr;
5917  BEntry                       Entry;
5918  entry_ref                    EntryRef;
5919  status_t                     ErrorCode;
5920  const char                  *ErrorTitle = "CommanderLooper::ProcessRefs";
5921  int32                        NumberOfRefs = 0;
5922  BPath                        Path;
5923  int                          RefIndex;
5924  BMessage                     ReplyMessage;
5925  BMessage                     ScriptingMessage;
5926  bool                         TempBool;
5927  BFile                        TempFile;
5928  int32                        TempInt32;
5929  char                         TempString [PATH_MAX + 1024];
5930  type_code                    TypeCode;
5931
5932  // Wait for ReadyToRun to finish initializing the globals with the sizes of
5933  // the controls, since they are needed when we show the custom alert box for
5934  // choosing the message type.
5935
5936  TempInt32 = 0;
5937  while (!g_AppReadyToRunCompleted && TempInt32++ < 10)
5938    snooze (200000);
5939
5940  ErrorCode = MessagePntr->GetInfo ("refs", &TypeCode, &NumberOfRefs);
5941  if (ErrorCode != B_OK || TypeCode != B_REF_TYPE || NumberOfRefs <= 0)
5942  {
5943    DisplayErrorMessage ("Unable to get refs from the message",
5944      ErrorCode, ErrorTitle);
5945    return;
5946  }
5947
5948  if (MessagePntr->FindBool ("BulkMode", &TempBool) == B_OK)
5949    BulkMode = TempBool;
5950  if (MessagePntr->FindInt32 ("BulkClassification", &TempInt32) == B_OK &&
5951  TempInt32 >= 0 && TempInt32 < CL_MAX)
5952    BulkClassification = (ClassificationTypes) TempInt32;
5953
5954  for (RefIndex = 0;
5955  MessagePntr->FindRef ("refs", RefIndex, &EntryRef) == B_OK;
5956  RefIndex++)
5957  {
5958    ScriptingMessage.MakeEmpty ();
5959    ScriptingMessage.what = 0; /* Haven't figured out what to do yet. */
5960
5961    /* See if the entry is a valid file or directory or other thing. */
5962
5963    ErrorCode = Entry.SetTo (&EntryRef, true /* traverse symbolic links */);
5964    if (ErrorCode != B_OK ||
5965    ((ErrorCode = /* assignment */ B_ENTRY_NOT_FOUND) != 0 /* this pacifies
5966    mwcc -nwhitehorn */ && !Entry.Exists ()) ||
5967    ((ErrorCode = Entry.GetPath (&Path)) != B_OK))
5968    {
5969      DisplayErrorMessage ("Bad entry reference encountered, will skip it",
5970        ErrorCode, ErrorTitle);
5971      BulkMode = false;
5972      continue; /* Bad file reference, try the next one. */
5973    }
5974
5975    /* If it's a file, check if it is a spam database file.  Go by the magic
5976    text at the start of the file, in case someone has edited the file with a
5977    spreadsheet or other tool and lost the MIME type. */
5978
5979    if (Entry.IsFile ())
5980    {
5981      ErrorCode = TempFile.SetTo (&Entry, B_READ_ONLY);
5982      if (ErrorCode != B_OK)
5983      {
5984        sprintf (TempString, "Unable to open file \"%s\" for reading, will "
5985          "skip it", Path.Path ());
5986        DisplayErrorMessage (TempString, ErrorCode, ErrorTitle);
5987        BulkMode = false;
5988        continue;
5989      }
5990      if (TempFile.Read (TempString, strlen (g_DatabaseRecognitionString)) ==
5991      (int) strlen (g_DatabaseRecognitionString) && strncmp (TempString,
5992      g_DatabaseRecognitionString, strlen (g_DatabaseRecognitionString)) == 0)
5993      {
5994        ScriptingMessage.what = B_SET_PROPERTY;
5995        ScriptingMessage.AddSpecifier (g_PropertyNames[PN_DATABASE_FILE]);
5996        ScriptingMessage.AddString (g_DataName, Path.Path ());
5997      }
5998      TempFile.Unset ();
5999    }
6000
6001    /* Not a database file.  Could be a directory or a file.  Submit it as
6002    something to be marked spam or genuine. */
6003
6004    if (ScriptingMessage.what == 0)
6005    {
6006      if (!Entry.IsFile ())
6007      {
6008        sprintf (TempString, "\"%s\" is not a file, can't do anything with it",
6009          Path.Path ());
6010        DisplayErrorMessage (TempString, -1, ErrorTitle);
6011        BulkMode = false;
6012        continue;
6013      }
6014
6015      if (!BulkMode) /* Have to ask the user. */
6016      {
6017        ChoiceWindowPntr = new ClassificationChoicesWindow (
6018          BRect (40, 40, 40 + 50 * g_MarginBetweenControls,
6019          40 + g_ButtonHeight * 5), Path.Path (), NumberOfRefs - RefIndex);
6020        ChoiceWindowPntr->Go (&BulkMode, &BulkClassification);
6021        if (BulkClassification == CL_MAX)
6022          break; /* Cancel was picked. */
6023      }
6024
6025      /* Format the command for classifying the file. */
6026
6027      ScriptingMessage.what = B_SET_PROPERTY;
6028
6029      if (BulkClassification == CL_GENUINE)
6030        ScriptingMessage.AddSpecifier (g_PropertyNames[PN_GENUINE]);
6031      else if (BulkClassification == CL_SPAM)
6032        ScriptingMessage.AddSpecifier (g_PropertyNames[PN_SPAM]);
6033      else if (BulkClassification == CL_UNCERTAIN)
6034        ScriptingMessage.AddSpecifier (g_PropertyNames[PN_UNCERTAIN]);
6035      else /* Broken code */
6036        break;
6037      ScriptingMessage.AddString (g_DataName, Path.Path ());
6038    }
6039
6040    /* Tell the BApplication to do the work, and wait for it to finish.  The
6041    BApplication will display any error messages for us. */
6042
6043    ErrorCode =
6044      be_app_messenger.SendMessage (&ScriptingMessage, &ReplyMessage);
6045    if (ErrorCode != B_OK)
6046    {
6047      DisplayErrorMessage ("Unable to send scripting command",
6048        ErrorCode, ErrorTitle);
6049      return;
6050    }
6051
6052    /* If there was an error, allow the user to stop by switching off bulk
6053    mode.  The message will already have been displayed in an alert box, if
6054    server mode is off. */
6055
6056    if (ReplyMessage.FindInt32 ("error", &TempInt32) != B_OK ||
6057    TempInt32 != B_OK)
6058      BulkMode = false;
6059  }
6060}
6061
6062
6063
6064/******************************************************************************
6065 * Implementation of the ControlsView class, constructor, destructor and the
6066 * rest of the member functions in mostly alphabetical order.
6067 */
6068
6069ControlsView::ControlsView (BRect NewBounds)
6070: BView (NewBounds, "ControlsView", B_FOLLOW_TOP | B_FOLLOW_LEFT_RIGHT,
6071    B_WILL_DRAW | B_PULSE_NEEDED | B_NAVIGABLE_JUMP | B_FRAME_EVENTS),
6072  m_AboutButtonPntr (NULL),
6073  m_AddExampleButtonPntr (NULL),
6074  m_BrowseButtonPntr (NULL),
6075  m_BrowseFilePanelPntr (NULL),
6076  m_CreateDatabaseButtonPntr (NULL),
6077  m_DatabaseFileNameTextboxPntr (NULL),
6078  m_DatabaseLoadDone (false),
6079  m_EstimateSpamButtonPntr (NULL),
6080  m_EstimateSpamFilePanelPntr (NULL),
6081  m_GenuineCountTextboxPntr (NULL),
6082  m_IgnorePreviousClassCheckboxPntr (NULL),
6083  m_InstallThingsButtonPntr (NULL),
6084  m_PurgeAgeTextboxPntr (NULL),
6085  m_PurgeButtonPntr (NULL),
6086  m_PurgePopularityTextboxPntr (NULL),
6087  m_ResetToDefaultsButtonPntr (NULL),
6088  m_ScoringModeMenuBarPntr (NULL),
6089  m_ScoringModePopUpMenuPntr (NULL),
6090  m_ServerModeCheckboxPntr (NULL),
6091  m_SpamCountTextboxPntr (NULL),
6092  m_TimeOfLastPoll (0),
6093  m_TokenizeModeMenuBarPntr (NULL),
6094  m_TokenizeModePopUpMenuPntr (NULL),
6095  m_WordCountTextboxPntr (NULL)
6096{
6097}
6098
6099
6100ControlsView::~ControlsView ()
6101{
6102  if (m_BrowseFilePanelPntr != NULL)
6103  {
6104    delete m_BrowseFilePanelPntr;
6105    m_BrowseFilePanelPntr = NULL;
6106  }
6107
6108  if (m_EstimateSpamFilePanelPntr != NULL)
6109  {
6110    delete m_EstimateSpamFilePanelPntr;
6111    m_EstimateSpamFilePanelPntr = NULL;
6112  }
6113}
6114
6115
6116void
6117ControlsView::AttachedToWindow ()
6118{
6119  float         BigPurgeButtonTop;
6120  BMessage      CommandMessage;
6121  const char   *EightDigitsString = " 12345678 ";
6122  float         Height;
6123  float         Margin;
6124  float         RowHeight;
6125  float         RowTop;
6126  ScoringModes  ScoringMode;
6127  const char   *StringPntr;
6128  BMenuItem    *TempMenuItemPntr;
6129  BRect         TempRect;
6130  char          TempString [PATH_MAX];
6131  TokenizeModes TokenizeMode;
6132  float         Width;
6133  float         X;
6134
6135  SetViewColor (ui_color (B_PANEL_BACKGROUND_COLOR));
6136
6137  TempRect = Bounds ();
6138  X = TempRect.right;
6139  RowTop = TempRect.top;
6140  RowHeight = g_ButtonHeight;
6141  if (g_TextBoxHeight > RowHeight)
6142    RowHeight = g_TextBoxHeight;
6143  RowHeight = ceilf (RowHeight * 1.1);
6144
6145  /* Make the Create button at the far right of the first row of controls,
6146  which are all database file related. */
6147
6148  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6149  TempRect = Bounds ();
6150  TempRect.top = RowTop + Margin;
6151  TempRect.bottom = TempRect.top + g_ButtonHeight;
6152
6153  CommandMessage.MakeEmpty ();
6154  CommandMessage.what = B_CREATE_PROPERTY;
6155  CommandMessage.AddSpecifier (g_PropertyNames[PN_DATABASE_FILE]);
6156  m_CreateDatabaseButtonPntr = new BButton (TempRect, "Create Button",
6157    "Create", new BMessage (CommandMessage), B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6158  if (m_CreateDatabaseButtonPntr == NULL) goto ErrorExit;
6159  AddChild (m_CreateDatabaseButtonPntr);
6160  m_CreateDatabaseButtonPntr->SetTarget (be_app);
6161  m_CreateDatabaseButtonPntr->ResizeToPreferred ();
6162  m_CreateDatabaseButtonPntr->GetPreferredSize (&Width, &Height);
6163  m_CreateDatabaseButtonPntr->MoveTo (X - Width, TempRect.top);
6164  X -= Width + g_MarginBetweenControls;
6165
6166  /* Make the Browse button, middle of the first row. */
6167
6168  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6169  TempRect = Bounds ();
6170  TempRect.top = RowTop + Margin;
6171  TempRect.bottom = TempRect.top + g_ButtonHeight;
6172
6173  m_BrowseButtonPntr = new BButton (TempRect, "Browse Button",
6174    "Browse���", new BMessage (MSG_BROWSE_BUTTON), B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6175  if (m_BrowseButtonPntr == NULL) goto ErrorExit;
6176  AddChild (m_BrowseButtonPntr);
6177  m_BrowseButtonPntr->SetTarget (this);
6178  m_BrowseButtonPntr->ResizeToPreferred ();
6179  m_BrowseButtonPntr->GetPreferredSize (&Width, &Height);
6180  m_BrowseButtonPntr->MoveTo (X - Width, TempRect.top);
6181  X -= Width + g_MarginBetweenControls;
6182
6183  /* Fill the rest of the space on the first row with the file name box. */
6184
6185  Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6186  TempRect = Bounds ();
6187  TempRect.top = RowTop + Margin;
6188  TempRect.bottom = TempRect.top + g_TextBoxHeight;
6189  TempRect.right = X;
6190
6191  StringPntr = "Word Database:";
6192  strcpy (m_DatabaseFileNameCachedValue, "Unknown...");
6193  m_DatabaseFileNameTextboxPntr = new BTextControl (TempRect,
6194    "File Name",
6195    StringPntr /* label */,
6196    m_DatabaseFileNameCachedValue /* text */,
6197    new BMessage (MSG_DATABASE_NAME),
6198    B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP,
6199    B_WILL_DRAW | B_NAVIGABLE | B_NAVIGABLE_JUMP);
6200  AddChild (m_DatabaseFileNameTextboxPntr);
6201  m_DatabaseFileNameTextboxPntr->SetTarget (this);
6202  m_DatabaseFileNameTextboxPntr->SetDivider (
6203    be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6204
6205  /* Second row contains the purge age, and a long line explaining it.  There
6206  is space to the right where the top half of the big purge button will go. */
6207
6208  RowTop += RowHeight /* previous row's RowHeight */;
6209  BigPurgeButtonTop = RowTop;
6210  TempRect = Bounds ();
6211  X = TempRect.left;
6212  RowHeight = g_TextBoxHeight;
6213  RowHeight = ceilf (RowHeight * 1.1);
6214
6215  StringPntr = "Number of occurrences needed to store a word:";
6216  m_PurgeAgeCachedValue = 12345678;
6217
6218  Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6219  TempRect.top = RowTop + Margin;
6220  TempRect.bottom = TempRect.top + g_TextBoxHeight;
6221  TempRect.left = X;
6222  TempRect.right = TempRect.left +
6223    be_plain_font->StringWidth (StringPntr) +
6224    be_plain_font->StringWidth (EightDigitsString) +
6225    3 * g_MarginBetweenControls;
6226
6227  sprintf (TempString, "%d", (int) m_PurgeAgeCachedValue);
6228  m_PurgeAgeTextboxPntr = new BTextControl (TempRect,
6229    "Purge Age",
6230    StringPntr /* label */,
6231    TempString /* text */,
6232    new BMessage (MSG_PURGE_AGE),
6233    B_FOLLOW_LEFT | B_FOLLOW_TOP,
6234    B_WILL_DRAW | B_NAVIGABLE);
6235  AddChild (m_PurgeAgeTextboxPntr);
6236  m_PurgeAgeTextboxPntr->SetTarget (this);
6237  m_PurgeAgeTextboxPntr->SetDivider (
6238    be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6239
6240  /* Third row contains the purge popularity and bottom half of the purge
6241  button. */
6242
6243  RowTop += RowHeight /* previous row's RowHeight */;
6244  TempRect = Bounds ();
6245  X = TempRect.left;
6246  RowHeight = g_TextBoxHeight;
6247  RowHeight = ceilf (RowHeight * 1.1);
6248
6249  StringPntr = "Number of messages to store words from:";
6250  m_PurgePopularityCachedValue = 87654321;
6251  Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6252  TempRect.top = RowTop + Margin;
6253  TempRect.bottom = TempRect.top + g_TextBoxHeight;
6254  TempRect.left = X;
6255  TempRect.right = TempRect.left +
6256    be_plain_font->StringWidth (StringPntr) +
6257    be_plain_font->StringWidth (EightDigitsString) +
6258    3 * g_MarginBetweenControls;
6259  X = TempRect.right + g_MarginBetweenControls;
6260
6261  sprintf (TempString, "%d", (int) m_PurgePopularityCachedValue);
6262  m_PurgePopularityTextboxPntr = new BTextControl (TempRect,
6263    "Purge Popularity",
6264    StringPntr /* label */,
6265    TempString /* text */,
6266    new BMessage (MSG_PURGE_POPULARITY),
6267    B_FOLLOW_LEFT | B_FOLLOW_TOP,
6268    B_WILL_DRAW | B_NAVIGABLE);
6269  AddChild (m_PurgePopularityTextboxPntr);
6270  m_PurgePopularityTextboxPntr->SetTarget (this);
6271  m_PurgePopularityTextboxPntr->SetDivider (
6272    be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6273
6274  /* Make the purge button, which will take up space in the 2nd and 3rd rows,
6275  on the right side.  Twice as tall as a regular button too. */
6276
6277  StringPntr = "Remove Old Words";
6278  Margin = ceilf ((((RowTop + RowHeight) - BigPurgeButtonTop) -
6279    2 * g_TextBoxHeight) / 2);
6280  TempRect.top = BigPurgeButtonTop + Margin;
6281  TempRect.bottom = TempRect.top + 2 * g_TextBoxHeight;
6282  TempRect.left = X;
6283  TempRect.right = X + ceilf (2 * be_plain_font->StringWidth (StringPntr));
6284
6285  CommandMessage.MakeEmpty ();
6286  CommandMessage.what = B_EXECUTE_PROPERTY;
6287  CommandMessage.AddSpecifier (g_PropertyNames[PN_PURGE]);
6288  m_PurgeButtonPntr = new BButton (TempRect, "Purge Button",
6289    StringPntr, new BMessage (CommandMessage), B_FOLLOW_LEFT | B_FOLLOW_TOP);
6290  if (m_PurgeButtonPntr == NULL) goto ErrorExit;
6291  m_PurgeButtonPntr->ResizeToPreferred();
6292  AddChild (m_PurgeButtonPntr);
6293  m_PurgeButtonPntr->SetTarget (be_app);
6294
6295  /* The fourth row contains the ignore previous classification checkbox. */
6296
6297  RowTop += RowHeight /* previous row's RowHeight */;
6298  TempRect = Bounds ();
6299  X = TempRect.left;
6300  RowHeight = g_CheckBoxHeight;
6301  RowHeight = ceilf (RowHeight * 1.1);
6302
6303  StringPntr = "Allow Retraining on a Message";
6304  m_IgnorePreviousClassCachedValue = false;
6305
6306  Margin = ceilf ((RowHeight - g_CheckBoxHeight) / 2);
6307  TempRect.top = RowTop + Margin;
6308  TempRect.bottom = TempRect.top + g_CheckBoxHeight;
6309  TempRect.left = X;
6310  m_IgnorePreviousClassCheckboxPntr = new BCheckBox (TempRect,
6311    "Ignore Check",
6312    StringPntr,
6313    new BMessage (MSG_IGNORE_CLASSIFICATION),
6314    B_FOLLOW_TOP | B_FOLLOW_LEFT);
6315  if (m_IgnorePreviousClassCheckboxPntr == NULL) goto ErrorExit;
6316  AddChild (m_IgnorePreviousClassCheckboxPntr);
6317  m_IgnorePreviousClassCheckboxPntr->SetTarget (this);
6318  m_IgnorePreviousClassCheckboxPntr->ResizeToPreferred ();
6319  m_IgnorePreviousClassCheckboxPntr->GetPreferredSize (&Width, &Height);
6320  X += Width + g_MarginBetweenControls;
6321
6322  /* The fifth row contains the server mode checkbox. */
6323
6324  RowTop += RowHeight /* previous row's RowHeight */;
6325  TempRect = Bounds ();
6326  RowHeight = g_CheckBoxHeight;
6327  RowHeight = ceilf (RowHeight * 1.1);
6328
6329  StringPntr = "Print errors to Terminal";
6330  m_ServerModeCachedValue = false;
6331
6332  Margin = ceilf ((RowHeight - g_CheckBoxHeight) / 2);
6333  TempRect.top = RowTop + Margin;
6334  TempRect.bottom = TempRect.top + g_CheckBoxHeight;
6335  m_ServerModeCheckboxPntr = new BCheckBox (TempRect,
6336    "ServerMode Check",
6337    StringPntr,
6338    new BMessage (MSG_SERVER_MODE),
6339    B_FOLLOW_TOP | B_FOLLOW_LEFT);
6340  if (m_ServerModeCheckboxPntr == NULL) goto ErrorExit;
6341  AddChild (m_ServerModeCheckboxPntr);
6342  m_ServerModeCheckboxPntr->SetTarget (this);
6343  m_ServerModeCheckboxPntr->ResizeToPreferred ();
6344  m_ServerModeCheckboxPntr->GetPreferredSize (&Width, &Height);
6345
6346  /* This row just contains a huge pop-up menu which shows the tokenize mode
6347  and an explanation of what each mode does. */
6348
6349  RowTop += RowHeight /* previous row's RowHeight */;
6350  TempRect = Bounds ();
6351  RowHeight = g_PopUpMenuHeight;
6352  RowHeight = ceilf (RowHeight * 1.1);
6353
6354  Margin = ceilf ((RowHeight - g_PopUpMenuHeight) / 2);
6355  TempRect.top = RowTop + Margin;
6356  TempRect.bottom = TempRect.top + g_PopUpMenuHeight;
6357
6358  m_TokenizeModeCachedValue = TM_MAX; /* Illegal value will force redraw. */
6359  m_TokenizeModeMenuBarPntr = new BMenuBar (TempRect, "TokenizeModeMenuBar",
6360    B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP, B_ITEMS_IN_COLUMN,
6361    false /* resize to fit items */);
6362  if (m_TokenizeModeMenuBarPntr == NULL) goto ErrorExit;
6363  m_TokenizeModePopUpMenuPntr = new BPopUpMenu ("TokenizeModePopUpMenu");
6364  if (m_TokenizeModePopUpMenuPntr == NULL) goto ErrorExit;
6365
6366  for (TokenizeMode = (TokenizeModes) 0;
6367  TokenizeMode < TM_MAX;
6368  TokenizeMode = (TokenizeModes) ((int) TokenizeMode + 1))
6369  {
6370    /* Each different tokenize mode gets its own menu item.  Selecting the item
6371    will send a canned command to the application to switch to the appropriate
6372    tokenize mode.  An optional explanation of each mode is added to the mode
6373    name string. */
6374
6375    CommandMessage.MakeEmpty ();
6376    CommandMessage.what = B_SET_PROPERTY;
6377    CommandMessage.AddSpecifier (g_PropertyNames[PN_TOKENIZE_MODE]);
6378    CommandMessage.AddString (g_DataName, g_TokenizeModeNames[TokenizeMode]);
6379    strcpy (TempString, g_TokenizeModeNames[TokenizeMode]);
6380    switch (TokenizeMode)
6381    {
6382      case TM_WHOLE:
6383        strcat (TempString, " - Scan everything");
6384        break;
6385
6386      case TM_PLAIN_TEXT:
6387        strcat (TempString, " - Scan e-mail body text except rich text");
6388        break;
6389
6390      case TM_PLAIN_TEXT_HEADER:
6391        strcat (TempString, " - Scan entire e-mail text except rich text");
6392        break;
6393
6394      case TM_ANY_TEXT:
6395        strcat (TempString, " - Scan e-mail body text and text attachments");
6396        break;
6397
6398      case TM_ANY_TEXT_HEADER:
6399       strcat (TempString, " - Scan entire e-mail text and text attachments (recommended)");
6400        break;
6401
6402      case TM_ALL_PARTS:
6403        strcat (TempString, " - Scan e-mail body and all attachments");
6404        break;
6405
6406      case TM_ALL_PARTS_HEADER:
6407        strcat (TempString, " - Scan all parts of the e-mail");
6408        break;
6409
6410      case TM_JUST_HEADER:
6411        strcat (TempString, " - Scan just the header (mail routing information)");
6412        break;
6413
6414      default:
6415        break;
6416    }
6417    TempMenuItemPntr =
6418      new BMenuItem (TempString, new BMessage (CommandMessage));
6419    if (TempMenuItemPntr == NULL) goto ErrorExit;
6420    TempMenuItemPntr->SetTarget (be_app);
6421    m_TokenizeModePopUpMenuPntr->AddItem (TempMenuItemPntr);
6422  }
6423  m_TokenizeModeMenuBarPntr->AddItem (m_TokenizeModePopUpMenuPntr);
6424  AddChild (m_TokenizeModeMenuBarPntr);
6425
6426  /* This row just contains a huge pop-up menu which shows the scoring mode
6427  and an explanation of what each mode does. */
6428
6429  RowTop += RowHeight /* previous row's RowHeight */;
6430  TempRect = Bounds ();
6431  RowHeight = g_PopUpMenuHeight;
6432  RowHeight = ceilf (RowHeight * 1.1);
6433
6434  Margin = ceilf ((RowHeight - g_PopUpMenuHeight) / 2);
6435  TempRect.top = RowTop + Margin;
6436  TempRect.bottom = TempRect.top + g_PopUpMenuHeight;
6437
6438  m_ScoringModeCachedValue = SM_MAX; /* Illegal value will force redraw. */
6439  m_ScoringModeMenuBarPntr = new BMenuBar (TempRect, "ScoringModeMenuBar",
6440    B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP, B_ITEMS_IN_COLUMN,
6441    false /* resize to fit items */);
6442  if (m_ScoringModeMenuBarPntr == NULL) goto ErrorExit;
6443  m_ScoringModePopUpMenuPntr = new BPopUpMenu ("ScoringModePopUpMenu");
6444  if (m_ScoringModePopUpMenuPntr == NULL) goto ErrorExit;
6445
6446  for (ScoringMode = (ScoringModes) 0;
6447  ScoringMode < SM_MAX;
6448  ScoringMode = (ScoringModes) ((int) ScoringMode + 1))
6449  {
6450    /* Each different scoring mode gets its own menu item.  Selecting the item
6451    will send a canned command to the application to switch to the appropriate
6452    scoring mode.  An optional explanation of each mode is added to the mode
6453    name string. */
6454
6455    CommandMessage.MakeEmpty ();
6456    CommandMessage.what = B_SET_PROPERTY;
6457    CommandMessage.AddSpecifier (g_PropertyNames[PN_SCORING_MODE]);
6458    CommandMessage.AddString (g_DataName, g_ScoringModeNames[ScoringMode]);
6459/*
6460    strcpy (TempString, g_ScoringModeNames[ScoringMode]);
6461    switch (ScoringMode)
6462    {
6463      case SM_ROBINSON:
6464        strcat (TempString, " - Learning Method 1: Naive Bayesian");
6465        break;
6466
6467      case SM_CHISQUARED:
6468        strcat (TempString, " - Learning Method 2: Chi-Squared");
6469        break;
6470
6471      default:
6472        break;
6473    }
6474*/
6475    switch (ScoringMode)
6476    {
6477      case SM_ROBINSON:
6478        strcpy (TempString, "Learning method 1: Naive Bayesian");
6479        break;
6480
6481      case SM_CHISQUARED:
6482        strcpy (TempString, "Learning method 2: Chi-Squared");
6483        break;
6484
6485      default:
6486        break;
6487    }
6488    TempMenuItemPntr =
6489      new BMenuItem (TempString, new BMessage (CommandMessage));
6490    if (TempMenuItemPntr == NULL) goto ErrorExit;
6491    TempMenuItemPntr->SetTarget (be_app);
6492    m_ScoringModePopUpMenuPntr->AddItem (TempMenuItemPntr);
6493  }
6494  m_ScoringModeMenuBarPntr->AddItem (m_ScoringModePopUpMenuPntr);
6495  AddChild (m_ScoringModeMenuBarPntr);
6496
6497  /* The next row has the install MIME types button and the reset to defaults
6498  button, one on the left and the other on the right. */
6499
6500  RowTop += RowHeight /* previous row's RowHeight */;
6501  TempRect = Bounds ();
6502  RowHeight = g_ButtonHeight;
6503  RowHeight = ceilf (RowHeight * 1.1);
6504
6505  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6506  TempRect.top = RowTop + Margin;
6507  TempRect.bottom = TempRect.top + g_ButtonHeight;
6508
6509  CommandMessage.MakeEmpty ();
6510  CommandMessage.what = B_EXECUTE_PROPERTY;
6511  CommandMessage.AddSpecifier (g_PropertyNames[PN_INSTALL_THINGS]);
6512  m_InstallThingsButtonPntr = new BButton (TempRect, "Install Button",
6513    "Install spam types",
6514    new BMessage (CommandMessage),
6515    B_FOLLOW_LEFT | B_FOLLOW_TOP);
6516  if (m_InstallThingsButtonPntr == NULL) goto ErrorExit;
6517  AddChild (m_InstallThingsButtonPntr);
6518  m_InstallThingsButtonPntr->SetTarget (be_app);
6519  m_InstallThingsButtonPntr->ResizeToPreferred ();
6520
6521  /* The Reset to Defaults button.  On the right side of the row. */
6522
6523  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6524  TempRect = Bounds ();
6525  TempRect.top = RowTop + Margin;
6526  TempRect.bottom = TempRect.top + g_ButtonHeight;
6527
6528  CommandMessage.MakeEmpty ();
6529  CommandMessage.what = B_EXECUTE_PROPERTY;
6530  CommandMessage.AddSpecifier (g_PropertyNames[PN_RESET_TO_DEFAULTS]);
6531  m_ResetToDefaultsButtonPntr = new BButton (TempRect, "Reset Button",
6532    "Default settings", new BMessage (CommandMessage),
6533    B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6534  if (m_ResetToDefaultsButtonPntr == NULL) goto ErrorExit;
6535  AddChild (m_ResetToDefaultsButtonPntr);
6536  m_ResetToDefaultsButtonPntr->SetTarget (be_app);
6537  m_ResetToDefaultsButtonPntr->ResizeToPreferred ();
6538  m_ResetToDefaultsButtonPntr->GetPreferredSize (&Width, &Height);
6539  m_ResetToDefaultsButtonPntr->MoveTo (TempRect.right - Width, TempRect.top);
6540
6541  /* The next row contains the Estimate, Add Examples and About buttons. */
6542
6543  RowTop += RowHeight /* previous row's RowHeight */;
6544  TempRect = Bounds ();
6545  X = TempRect.left;
6546  RowHeight = g_ButtonHeight;
6547  RowHeight = ceilf (RowHeight * 1.1);
6548
6549  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6550  TempRect.top = RowTop + Margin;
6551  TempRect.bottom = TempRect.top + g_ButtonHeight;
6552  TempRect.left = X;
6553
6554  m_EstimateSpamButtonPntr = new BButton (TempRect, "Estimate Button",
6555    "Scan a message",
6556    new BMessage (MSG_ESTIMATE_BUTTON),
6557    B_FOLLOW_LEFT | B_FOLLOW_TOP);
6558  if (m_EstimateSpamButtonPntr == NULL) goto ErrorExit;
6559  AddChild (m_EstimateSpamButtonPntr);
6560  m_EstimateSpamButtonPntr->SetTarget (this);
6561  m_EstimateSpamButtonPntr->ResizeToPreferred ();
6562  X = m_EstimateSpamButtonPntr->Frame().right + g_MarginBetweenControls;
6563
6564  /* The Add Example button in the middle.  Does the same as the browse button,
6565  but don't tell anyone that! */
6566
6567  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6568  TempRect.top = RowTop + Margin;
6569  TempRect.bottom = TempRect.top + g_ButtonHeight;
6570  TempRect.left = X;
6571
6572  m_AddExampleButtonPntr = new BButton (TempRect, "Example Button",
6573    "Train spam filter on a message",
6574    new BMessage (MSG_BROWSE_BUTTON),
6575    B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP,
6576    B_WILL_DRAW | B_NAVIGABLE | B_FULL_UPDATE_ON_RESIZE);
6577  if (m_AddExampleButtonPntr == NULL) goto ErrorExit;
6578  AddChild (m_AddExampleButtonPntr);
6579  m_AddExampleButtonPntr->SetTarget (this);
6580  m_AddExampleButtonPntr->ResizeToPreferred ();
6581  X = m_AddExampleButtonPntr->Frame().right + g_MarginBetweenControls;
6582
6583  /* Add the About button on the right. */
6584
6585  Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6586  TempRect = Bounds ();
6587  TempRect.top = RowTop + Margin;
6588  TempRect.bottom = TempRect.top + g_ButtonHeight;
6589  TempRect.left = X;
6590
6591  m_AboutButtonPntr = new BButton (TempRect, "About Button",
6592    "About���",
6593    new BMessage (B_ABOUT_REQUESTED),
6594    B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6595  if (m_AboutButtonPntr == NULL) goto ErrorExit;
6596  AddChild (m_AboutButtonPntr);
6597  m_AboutButtonPntr->SetTarget (be_app);
6598
6599  /* This row displays various counters.  Starting with the genuine messages
6600  count on the left. */
6601
6602  RowTop += RowHeight /* previous row's RowHeight */;
6603  TempRect = Bounds ();
6604  RowHeight = g_TextBoxHeight;
6605  RowHeight = ceilf (RowHeight * 1.1);
6606
6607  StringPntr = "Genuine messages:";
6608  m_GenuineCountCachedValue = 87654321;
6609  sprintf (TempString, "%d", (int) m_GenuineCountCachedValue);
6610
6611  Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6612  TempRect = Bounds ();
6613  TempRect.top = RowTop + Margin;
6614  TempRect.bottom = TempRect.top + g_TextBoxHeight;
6615  TempRect.right = TempRect.left +
6616    be_plain_font->StringWidth (StringPntr) +
6617    be_plain_font->StringWidth (TempString) +
6618    3 * g_MarginBetweenControls;
6619
6620  m_GenuineCountTextboxPntr = new BTextControl (TempRect,
6621    "Genuine count",
6622    StringPntr /* label */,
6623    TempString /* text */,
6624    NULL /* no message */,
6625    B_FOLLOW_LEFT | B_FOLLOW_TOP,
6626    B_WILL_DRAW /* not B_NAVIGABLE */);
6627  AddChild (m_GenuineCountTextboxPntr);
6628  m_GenuineCountTextboxPntr->SetTarget (this); /* Not that it matters. */
6629  m_GenuineCountTextboxPntr->SetDivider (
6630    be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6631  m_GenuineCountTextboxPntr->SetEnabled (false); /* For display only. */
6632
6633  /* The word count in the center. */
6634
6635  StringPntr = "Word count:";
6636  m_WordCountCachedValue = 87654321;
6637  sprintf (TempString, "%d", (int) m_WordCountCachedValue);
6638
6639  Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6640  TempRect = Bounds ();
6641  TempRect.top = RowTop + Margin;
6642  TempRect.bottom = TempRect.top + g_TextBoxHeight;
6643  Width = be_plain_font->StringWidth (StringPntr) +
6644    be_plain_font->StringWidth (TempString) +
6645    3 * g_MarginBetweenControls;
6646  TempRect.left = ceilf ((TempRect.right - TempRect.left) / 2 - Width / 2);
6647  TempRect.right = TempRect.left + Width;
6648
6649  m_WordCountTextboxPntr = new BTextControl (TempRect,
6650    "Word count",
6651    StringPntr /* label */,
6652    TempString /* text */,
6653    NULL /* no message */,
6654    B_FOLLOW_H_CENTER | B_FOLLOW_TOP,
6655    B_WILL_DRAW /* not B_NAVIGABLE */);
6656  AddChild (m_WordCountTextboxPntr);
6657  m_WordCountTextboxPntr->SetTarget (this); /* Not that it matters. */
6658  m_WordCountTextboxPntr->SetDivider (
6659    be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6660  m_WordCountTextboxPntr->SetEnabled (false); /* For display only. */
6661
6662  /* The spam count on the far right. */
6663
6664  StringPntr = "Spam messages:";
6665  m_SpamCountCachedValue = 87654321;
6666  sprintf (TempString, "%d", (int) m_SpamCountCachedValue);
6667
6668  Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6669  TempRect = Bounds ();
6670  TempRect.top = RowTop + Margin;
6671  TempRect.bottom = TempRect.top + g_TextBoxHeight;
6672  TempRect.left = TempRect.right -
6673    be_plain_font->StringWidth (StringPntr) -
6674    be_plain_font->StringWidth (TempString) -
6675    3 * g_MarginBetweenControls;
6676
6677  m_SpamCountTextboxPntr = new BTextControl (TempRect,
6678    "Spam count",
6679    StringPntr /* label */,
6680    TempString /* text */,
6681    NULL /* no message */,
6682    B_FOLLOW_RIGHT | B_FOLLOW_TOP,
6683    B_WILL_DRAW /* not B_NAVIGABLE */);
6684  AddChild (m_SpamCountTextboxPntr);
6685  m_SpamCountTextboxPntr->SetTarget (this); /* Not that it matters. */
6686  m_SpamCountTextboxPntr->SetDivider (
6687    be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6688  m_SpamCountTextboxPntr->SetEnabled (false); /* For display only. */
6689
6690  /* Change the size of our view so it only takes up the space needed by the
6691  buttons. */
6692
6693  RowTop += RowHeight /* previous row's RowHeight */;
6694  ResizeTo (Bounds().Width(), RowTop - Bounds().top + 1);
6695
6696  return; /* Successful. */
6697
6698ErrorExit:
6699  DisplayErrorMessage ("Unable to initialise the controls view.");
6700}
6701
6702
6703void
6704ControlsView::BrowseForDatabaseFile ()
6705{
6706  if (m_BrowseFilePanelPntr == NULL)
6707  {
6708    BEntry      DirectoryEntry;
6709    entry_ref   DirectoryEntryRef;
6710    BMessage    GetDatabasePathCommand;
6711    BMessage    GetDatabasePathResult;
6712    const char *StringPntr = NULL;
6713
6714    /* Create a new file panel.  First set up the entry ref stuff so that the
6715    file panel can open to show the initial directory (the one where the
6716    database file currently is).  Note that we have to create it after the
6717    window and view are up and running, otherwise the BMessenger won't point to
6718    a valid looper/handler.  First find out the current database file name to
6719    use as a starting point. */
6720
6721    GetDatabasePathCommand.what = B_GET_PROPERTY;
6722    GetDatabasePathCommand.AddSpecifier (g_PropertyNames[PN_DATABASE_FILE]);
6723    be_app_messenger.SendMessage (&GetDatabasePathCommand,
6724      &GetDatabasePathResult, 5000000 /* delivery timeout */,
6725      5000000 /* reply timeout */);
6726    if (GetDatabasePathResult.FindString (g_ResultName, &StringPntr) != B_OK ||
6727    DirectoryEntry.SetTo (StringPntr) != B_OK ||
6728    DirectoryEntry.GetParent (&DirectoryEntry) != B_OK)
6729      DirectoryEntry.SetTo ("."); /* Default directory if we can't find it. */
6730    if (DirectoryEntry.GetRef (&DirectoryEntryRef) != B_OK)
6731    {
6732      DisplayErrorMessage (
6733        "Unable to set up the file requestor starting directory.  Sorry.");
6734      return;
6735    }
6736
6737    m_BrowseFilePanelPntr = new BFilePanel (
6738      B_OPEN_PANEL /* mode */,
6739      &be_app_messenger /* target for event messages */,
6740      &DirectoryEntryRef /* starting directory */,
6741      B_FILE_NODE,
6742      true /* true for multiple selections */,
6743      NULL /* canned message */,
6744      NULL /* ref filter */,
6745      false /* true for modal */,
6746      true /* true to hide when done */);
6747  }
6748
6749  if (m_BrowseFilePanelPntr != NULL)
6750    m_BrowseFilePanelPntr->Show (); /* Answer returned later in RefsReceived. */
6751}
6752
6753
6754void
6755ControlsView::BrowseForFileToEstimate ()
6756{
6757  if (m_EstimateSpamFilePanelPntr == NULL)
6758  {
6759    BEntry      DirectoryEntry;
6760    entry_ref   DirectoryEntryRef;
6761    status_t    ErrorCode;
6762    BMessenger  MessengerToSelf (this);
6763    BPath       PathToMailDirectory;
6764
6765    /* Create a new file panel.  First set up the entry ref stuff so that the
6766    file panel can open to show the initial directory (the user's mail
6767    directory).  Note that we have to create the panel after the window and
6768    view are up and running, otherwise the BMessenger won't point to a valid
6769    looper/handler. */
6770
6771    ErrorCode = find_directory (B_USER_DIRECTORY, &PathToMailDirectory);
6772    if (ErrorCode == B_OK)
6773    {
6774      PathToMailDirectory.Append ("mail");
6775      ErrorCode = DirectoryEntry.SetTo (PathToMailDirectory.Path(),
6776        true /* traverse symbolic links*/);
6777      if (ErrorCode != B_OK || !DirectoryEntry.Exists ())
6778      {
6779        /* If no mail directory, try home directory. */
6780        find_directory (B_USER_DIRECTORY, &PathToMailDirectory);
6781        ErrorCode = DirectoryEntry.SetTo (PathToMailDirectory.Path(), true);
6782      }
6783    }
6784    if (ErrorCode != B_OK)
6785      PathToMailDirectory.SetTo (".");
6786
6787    DirectoryEntry.SetTo (PathToMailDirectory.Path(), true);
6788    if (DirectoryEntry.GetRef (&DirectoryEntryRef) != B_OK)
6789    {
6790      DisplayErrorMessage (
6791        "Unable to set up the file requestor starting directory.  Sorry.");
6792      return;
6793    }
6794
6795    m_EstimateSpamFilePanelPntr = new BFilePanel (
6796      B_OPEN_PANEL /* mode */,
6797      &MessengerToSelf /* target for event messages */,
6798      &DirectoryEntryRef /* starting directory */,
6799      B_FILE_NODE,
6800      true /* true for multiple selections */,
6801      new BMessage (MSG_ESTIMATE_FILE_REFS) /* canned message */,
6802      NULL /* ref filter */,
6803      false /* true for modal */,
6804      true /* true to hide when done */);
6805  }
6806
6807  if (m_EstimateSpamFilePanelPntr != NULL)
6808    m_EstimateSpamFilePanelPntr->Show (); /* Answer sent via a message. */
6809}
6810
6811
6812/* The display has been resized.  Have to manually adjust the popup menu bar to
6813show the new size (the sub-items need to be resized too).  Then make it redraw.
6814Well, actually just resetting the mark on the current item will resize it
6815properly. */
6816
6817void
6818ControlsView::FrameResized (float, float)
6819{
6820  m_ScoringModeCachedValue = SM_MAX; /* Force it to reset the mark. */
6821  m_TokenizeModeCachedValue = TM_MAX; /* Force it to reset the mark. */
6822}
6823
6824
6825void
6826ControlsView::MessageReceived (BMessage *MessagePntr)
6827{
6828  BMessage CommandMessage;
6829  bool     TempBool;
6830  uint32   TempUint32;
6831
6832  switch (MessagePntr->what)
6833  {
6834    case MSG_BROWSE_BUTTON:
6835      BrowseForDatabaseFile ();
6836      break;
6837
6838    case MSG_DATABASE_NAME:
6839      if (strcmp (m_DatabaseFileNameCachedValue,
6840      m_DatabaseFileNameTextboxPntr->Text ()) != 0)
6841        SubmitCommandString (PN_DATABASE_FILE, B_SET_PROPERTY,
6842        m_DatabaseFileNameTextboxPntr->Text ());
6843      break;
6844
6845    case MSG_ESTIMATE_BUTTON:
6846      BrowseForFileToEstimate ();
6847      break;
6848
6849    case MSG_ESTIMATE_FILE_REFS:
6850      EstimateRefFilesAndDisplay (MessagePntr);
6851      break;
6852
6853    case MSG_IGNORE_CLASSIFICATION:
6854      TempBool = (m_IgnorePreviousClassCheckboxPntr->Value() == B_CONTROL_ON);
6855      if (m_IgnorePreviousClassCachedValue != TempBool)
6856        SubmitCommandBool (PN_IGNORE_PREVIOUS_CLASSIFICATION,
6857        B_SET_PROPERTY, TempBool);
6858      break;
6859
6860    case MSG_PURGE_AGE:
6861      TempUint32 = strtoul (m_PurgeAgeTextboxPntr->Text (), NULL, 10);
6862      if (m_PurgeAgeCachedValue != TempUint32)
6863        SubmitCommandInt32 (PN_PURGE_AGE, B_SET_PROPERTY, TempUint32);
6864      break;
6865
6866    case MSG_PURGE_POPULARITY:
6867      TempUint32 = strtoul (m_PurgePopularityTextboxPntr->Text (), NULL, 10);
6868      if (m_PurgePopularityCachedValue != TempUint32)
6869        SubmitCommandInt32 (PN_PURGE_POPULARITY, B_SET_PROPERTY, TempUint32);
6870      break;
6871
6872    case MSG_SERVER_MODE:
6873      TempBool = (m_ServerModeCheckboxPntr->Value() == B_CONTROL_ON);
6874      if (m_ServerModeCachedValue != TempBool)
6875        SubmitCommandBool (PN_SERVER_MODE, B_SET_PROPERTY, TempBool);
6876      break;
6877
6878    default:
6879      BView::MessageReceived (MessagePntr);
6880  }
6881}
6882
6883
6884/* Check the server for changes in the state of the database, and if there are
6885any changes, update the displayed values.  Since this is a read only
6886examination of the server, we go directly to the application rather than
6887sending it messages.  Also, when sending messages, we can't find out what it is
6888doing while it is busy with a batch of spam additions (all the spam add
6889commands will be in the queue ahead of our requests for info).  Instead, we
6890lock the BApplication (so it isn't changing things while we're looking) and
6891retrieve our values. */
6892
6893void
6894ControlsView::PollServerForChanges ()
6895{
6896  ABSApp     *MyAppPntr;
6897  BMenuItem  *TempMenuItemPntr;
6898  char        TempString [PATH_MAX];
6899  BWindow    *WindowPntr;
6900
6901  /* We need a pointer to our window, for changing the title etc. */
6902
6903  WindowPntr = Window ();
6904  if (WindowPntr == NULL)
6905    return; /* No window, no point in updating the display! */
6906
6907  /* Check the server mode flag.  If the mode is off, then the window has to be
6908  minimized.  Similarly, if it gets turned on, maximize the window.  Note that
6909  the user can maximize the window manually, even while still in server mode.
6910  */
6911
6912  if (g_ServerMode != m_ServerModeCachedValue &&
6913  m_ServerModeCheckboxPntr != NULL)
6914  {
6915    m_ServerModeCachedValue = g_ServerMode;
6916    m_ServerModeCheckboxPntr->SetValue (
6917      m_ServerModeCachedValue ? B_CONTROL_ON : B_CONTROL_OFF);
6918    WindowPntr->Minimize (m_ServerModeCachedValue);
6919  }
6920
6921  if (WindowPntr->IsMinimized ())
6922    return; /* Window isn't visible, don't waste time updating it. */
6923
6924  /* So that people don't stare at a blank screen, request a database load if
6925  nothing is there.  But only do it once, so the user doesn't get a lot of
6926  invalid database messages if one doesn't exist yet.  In server mode, we never
6927  get this far so it is only loaded when the user wants to see something. */
6928
6929  if (!m_DatabaseLoadDone)
6930  {
6931    m_DatabaseLoadDone = true;
6932    /* Counting the number of words will load the database. */
6933    SubmitCommandString (PN_DATABASE_FILE, B_COUNT_PROPERTIES, "");
6934  }
6935
6936  /* Check various read only values, which can be read from the BApplication
6937  without having to lock it.  This is useful for displaying the number of words
6938  as it is changing.  First up is the purge age setting. */
6939
6940  MyAppPntr = dynamic_cast<ABSApp *> (be_app);
6941  if (MyAppPntr == NULL)
6942    return; /* Doesn't exist or is the wrong class.  Not likely! */
6943
6944  if (MyAppPntr->m_PurgeAge != m_PurgeAgeCachedValue &&
6945  m_PurgeAgeTextboxPntr != NULL)
6946  {
6947    m_PurgeAgeCachedValue = MyAppPntr->m_PurgeAge;
6948    sprintf (TempString, "%lu", m_PurgeAgeCachedValue);
6949    m_PurgeAgeTextboxPntr->SetText (TempString);
6950  }
6951
6952  /* Check the purge popularity. */
6953
6954  if (MyAppPntr->m_PurgePopularity != m_PurgePopularityCachedValue &&
6955  m_PurgePopularityTextboxPntr != NULL)
6956  {
6957    m_PurgePopularityCachedValue = MyAppPntr->m_PurgePopularity;
6958    sprintf (TempString, "%lu", m_PurgePopularityCachedValue);
6959    m_PurgePopularityTextboxPntr->SetText (TempString);
6960  }
6961
6962  /* Check the Ignore Previous Classification flag. */
6963
6964  if (MyAppPntr->m_IgnorePreviousClassification !=
6965  m_IgnorePreviousClassCachedValue &&
6966  m_IgnorePreviousClassCheckboxPntr != NULL)
6967  {
6968    m_IgnorePreviousClassCachedValue =
6969      MyAppPntr->m_IgnorePreviousClassification;
6970    m_IgnorePreviousClassCheckboxPntr->SetValue (
6971      m_IgnorePreviousClassCachedValue ? B_CONTROL_ON : B_CONTROL_OFF);
6972  }
6973
6974  /* Update the genuine count. */
6975
6976  if (MyAppPntr->m_TotalGenuineMessages != m_GenuineCountCachedValue &&
6977  m_GenuineCountTextboxPntr != NULL)
6978  {
6979    m_GenuineCountCachedValue = MyAppPntr->m_TotalGenuineMessages;
6980    sprintf (TempString, "%lu", m_GenuineCountCachedValue);
6981    m_GenuineCountTextboxPntr->SetText (TempString);
6982  }
6983
6984  /* Update the spam count. */
6985
6986  if (MyAppPntr->m_TotalSpamMessages != m_SpamCountCachedValue &&
6987  m_SpamCountTextboxPntr != NULL)
6988  {
6989    m_SpamCountCachedValue = MyAppPntr->m_TotalSpamMessages;
6990    sprintf (TempString, "%lu", m_SpamCountCachedValue);
6991    m_SpamCountTextboxPntr->SetText (TempString);
6992  }
6993
6994  /* Update the word count. */
6995
6996  if (MyAppPntr->m_WordCount != m_WordCountCachedValue &&
6997  m_WordCountTextboxPntr != NULL)
6998  {
6999    m_WordCountCachedValue = MyAppPntr->m_WordCount;
7000    sprintf (TempString, "%lu", m_WordCountCachedValue);
7001    m_WordCountTextboxPntr->SetText (TempString);
7002  }
7003
7004  /* Update the tokenize mode pop-up menu. */
7005
7006  if (MyAppPntr->m_TokenizeMode != m_TokenizeModeCachedValue &&
7007  m_TokenizeModePopUpMenuPntr != NULL)
7008  {
7009    m_TokenizeModeCachedValue = MyAppPntr->m_TokenizeMode;
7010    TempMenuItemPntr =
7011      m_TokenizeModePopUpMenuPntr->ItemAt ((int) m_TokenizeModeCachedValue);
7012    if (TempMenuItemPntr != NULL)
7013      TempMenuItemPntr->SetMarked (true);
7014  }
7015
7016  /* Update the scoring mode pop-up menu. */
7017
7018  if (MyAppPntr->m_ScoringMode != m_ScoringModeCachedValue &&
7019  m_ScoringModePopUpMenuPntr != NULL)
7020  {
7021    m_ScoringModeCachedValue = MyAppPntr->m_ScoringMode;
7022    TempMenuItemPntr =
7023      m_ScoringModePopUpMenuPntr->ItemAt ((int) m_ScoringModeCachedValue);
7024    if (TempMenuItemPntr != NULL)
7025      TempMenuItemPntr->SetMarked (true);
7026  }
7027
7028  /* Lock the application.  This will stop it from processing any further
7029  messages until we are done.  Or if it is busy, the lock will fail. */
7030
7031  if (MyAppPntr->LockWithTimeout (100000) != B_OK)
7032    return; /* It's probably busy doing something. */
7033
7034  /* See if the database file name has changed. */
7035
7036  if (strcmp (MyAppPntr->m_DatabaseFileName.String (),
7037  m_DatabaseFileNameCachedValue) != 0 &&
7038  m_DatabaseFileNameTextboxPntr != NULL)
7039  {
7040    strcpy (m_DatabaseFileNameCachedValue,
7041      MyAppPntr->m_DatabaseFileName.String ());
7042    m_DatabaseFileNameTextboxPntr->SetText (m_DatabaseFileNameCachedValue);
7043    WindowPntr->SetTitle (m_DatabaseFileNameCachedValue);
7044  }
7045
7046  /* Done.  Let the BApplication continue processing messages. */
7047
7048  MyAppPntr->Unlock ();
7049}
7050
7051
7052void
7053ControlsView::Pulse ()
7054{
7055  if (system_time () > m_TimeOfLastPoll + 200000)
7056  {
7057    PollServerForChanges ();
7058    m_TimeOfLastPoll = system_time ();
7059  }
7060}
7061
7062
7063
7064/******************************************************************************
7065 * Implementation of the DatabaseWindow class, constructor, destructor and the
7066 * rest of the member functions in mostly alphabetical order.
7067 */
7068
7069DatabaseWindow::DatabaseWindow ()
7070: BWindow (BRect (30, 30, 620, 400),
7071    "Haiku spam filter server",
7072    B_DOCUMENT_WINDOW, B_ASYNCHRONOUS_CONTROLS)
7073{
7074  BRect TempRect;
7075
7076  /* Add the controls view. */
7077
7078  m_ControlsViewPntr = new ControlsView (Bounds ());
7079  if (m_ControlsViewPntr == NULL)
7080    goto ErrorExit;
7081  AddChild (m_ControlsViewPntr);
7082
7083  /* Add the word view in the remaining space under the controls view. */
7084
7085
7086  TempRect = Bounds ();
7087  TempRect.top = m_ControlsViewPntr->Frame().bottom + 1;
7088  m_WordsViewPntr = new WordsView (TempRect);
7089  if (m_WordsViewPntr == NULL)
7090    goto ErrorExit;
7091  AddChild (m_WordsViewPntr);
7092
7093 /* Minimize the window if we are starting up in server mode.  This is done
7094	before the window is open so it doesn't flash onto the screen, and possibly
7095	steal a keystroke or two.  The ControlsView will further update the minimize
7096	mode when it detects changes in the server mode. */
7097  Minimize (g_ServerMode);
7098
7099  return;
7100
7101ErrorExit:
7102  DisplayErrorMessage ("Unable to initialise the window contents.");
7103}
7104
7105
7106void
7107DatabaseWindow::MessageReceived (BMessage *MessagePntr)
7108{
7109  if (MessagePntr->what == B_MOUSE_WHEEL_CHANGED)
7110  {
7111    /* Pass the mouse wheel stuff down to the words view, since that's the only
7112    one which does scrolling so we don't need to worry about whether it has
7113    focus or not. */
7114
7115    if (m_WordsViewPntr != NULL)
7116      m_WordsViewPntr->MessageReceived (MessagePntr);
7117  }
7118  else
7119    BWindow::MessageReceived (MessagePntr);
7120}
7121
7122
7123bool
7124DatabaseWindow::QuitRequested ()
7125{
7126  be_app->PostMessage (B_QUIT_REQUESTED);
7127  return true;
7128}
7129
7130
7131
7132/******************************************************************************
7133 * Implementation of the word display view.
7134 */
7135
7136WordsView::WordsView (BRect NewBounds)
7137: BView (NewBounds, "WordsView", B_FOLLOW_ALL_SIDES,
7138    B_WILL_DRAW | B_FULL_UPDATE_ON_RESIZE | B_NAVIGABLE | B_PULSE_NEEDED),
7139  m_ArrowLineDownPntr (NULL),
7140  m_ArrowLineUpPntr (NULL),
7141  m_ArrowPageDownPntr (NULL),
7142  m_ArrowPageUpPntr (NULL),
7143  m_LastTimeAKeyWasPressed (0)
7144{
7145  font_height TempFontHeight;
7146
7147  GetFont (&m_TextFont); /* Modify the default font to be our own. */
7148  m_TextFont.SetSize (ceilf (m_TextFont.Size() * 1.1));
7149  m_TextFont.GetHeight (&TempFontHeight);
7150  SetFont (&m_TextFont);
7151
7152  m_LineHeight = ceilf (TempFontHeight.ascent +
7153    TempFontHeight.descent + TempFontHeight.leading);
7154  m_AscentHeight = ceilf (TempFontHeight.ascent);
7155  m_TextHeight = ceilf (TempFontHeight.ascent +
7156    TempFontHeight.descent);
7157
7158  m_FocusedColour.red = 255;
7159  m_FocusedColour.green = 255;
7160  m_FocusedColour.blue = 255;
7161  m_FocusedColour.alpha = 255;
7162
7163  m_UnfocusedColour.red = 245;
7164  m_UnfocusedColour.green = 245;
7165  m_UnfocusedColour.blue = 255;
7166  m_UnfocusedColour.alpha = 255;
7167
7168  m_BackgroundColour = m_UnfocusedColour;
7169  SetViewColor (m_BackgroundColour);
7170  SetLowColor (m_BackgroundColour);
7171  SetHighColor (0, 0, 0);
7172
7173  strcpy (m_FirstDisplayedWord, "a");
7174}
7175
7176
7177void
7178WordsView::AttachedToWindow ()
7179{
7180  BPolygon        DownLinePolygon (g_DownLinePoints,
7181                    sizeof (g_DownLinePoints) /
7182                    sizeof (g_DownLinePoints[0]));
7183
7184  BPolygon        DownPagePolygon (g_DownPagePoints,
7185                    sizeof (g_DownPagePoints) /
7186                    sizeof (g_DownPagePoints[0]));
7187
7188  BPolygon        UpLinePolygon (g_UpLinePoints,
7189                    sizeof (g_UpLinePoints) /
7190                    sizeof (g_UpLinePoints[0]));
7191
7192  BPolygon        UpPagePolygon (g_UpPagePoints,
7193                    sizeof (g_UpPagePoints) /
7194                    sizeof (g_UpPagePoints[0]));
7195
7196  BPicture        TempOffPicture;
7197  BPicture        TempOnPicture;
7198  BRect           TempRect;
7199
7200  /* Make the buttons and associated polygon images for the forward and
7201  backwards a word or a page of words buttons.  They're the width of the scroll
7202  bar area on the right, but twice as tall as usual, since there is no scroll
7203  bar and that will make it easier to use them.  First the up a line button. */
7204
7205  SetHighColor (0, 0, 0);
7206  BeginPicture (&TempOffPicture);
7207  FillPolygon (&UpLinePolygon);
7208  SetHighColor (180, 180, 180);
7209  StrokePolygon (&UpLinePolygon);
7210  EndPicture ();
7211
7212  SetHighColor (128, 128, 128);
7213  BeginPicture (&TempOnPicture);
7214  FillPolygon (&UpLinePolygon);
7215  EndPicture ();
7216
7217  TempRect = Bounds ();
7218  TempRect.bottom = TempRect.top + 2 * B_H_SCROLL_BAR_HEIGHT;
7219  TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7220  m_ArrowLineUpPntr = new BPictureButton (TempRect, "Up Line",
7221    &TempOffPicture, &TempOnPicture,
7222    new BMessage (MSG_LINE_UP), B_ONE_STATE_BUTTON,
7223    B_FOLLOW_RIGHT | B_FOLLOW_TOP, B_WILL_DRAW | B_NAVIGABLE);
7224  if (m_ArrowLineUpPntr == NULL) goto ErrorExit;
7225  AddChild (m_ArrowLineUpPntr);
7226  m_ArrowLineUpPntr->SetTarget (this);
7227
7228  /* Up a page button. */
7229
7230  SetHighColor (0, 0, 0);
7231  BeginPicture (&TempOffPicture);
7232  FillPolygon (&UpPagePolygon);
7233  SetHighColor (180, 180, 180);
7234  StrokePolygon (&UpPagePolygon);
7235  EndPicture ();
7236
7237  SetHighColor (128, 128, 128);
7238  BeginPicture (&TempOnPicture);
7239  FillPolygon (&UpPagePolygon);
7240  EndPicture ();
7241
7242  TempRect = Bounds ();
7243  TempRect.top += 2 * B_H_SCROLL_BAR_HEIGHT + 1;
7244  TempRect.bottom = TempRect.top + 2 * B_H_SCROLL_BAR_HEIGHT;
7245  TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7246  m_ArrowPageUpPntr = new BPictureButton (TempRect, "Up Page",
7247    &TempOffPicture, &TempOnPicture,
7248    new BMessage (MSG_PAGE_UP), B_ONE_STATE_BUTTON,
7249    B_FOLLOW_RIGHT | B_FOLLOW_TOP, B_WILL_DRAW | B_NAVIGABLE);
7250  if (m_ArrowPageUpPntr == NULL) goto ErrorExit;
7251  AddChild (m_ArrowPageUpPntr);
7252  m_ArrowPageUpPntr->SetTarget (this);
7253
7254  /* Down a page button. */
7255
7256  SetHighColor (0, 0, 0);
7257  BeginPicture (&TempOffPicture);
7258  FillPolygon (&DownPagePolygon);
7259  SetHighColor (180, 180, 180);
7260  StrokePolygon (&DownPagePolygon);
7261  EndPicture ();
7262
7263  SetHighColor (128, 128, 128);
7264  BeginPicture (&TempOnPicture);
7265  FillPolygon (&DownPagePolygon);
7266  EndPicture ();
7267
7268  TempRect = Bounds ();
7269  TempRect.bottom -= 3 * B_H_SCROLL_BAR_HEIGHT + 1;
7270  TempRect.top = TempRect.bottom - 2 * B_H_SCROLL_BAR_HEIGHT;
7271  TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7272  m_ArrowPageDownPntr = new BPictureButton (TempRect, "Down Page",
7273    &TempOffPicture, &TempOnPicture,
7274    new BMessage (MSG_PAGE_DOWN), B_ONE_STATE_BUTTON,
7275    B_FOLLOW_RIGHT | B_FOLLOW_BOTTOM, B_WILL_DRAW | B_NAVIGABLE);
7276  if (m_ArrowPageDownPntr == NULL) goto ErrorExit;
7277  AddChild (m_ArrowPageDownPntr);
7278  m_ArrowPageDownPntr->SetTarget (this);
7279
7280  /* Down a line button. */
7281
7282  SetHighColor (0, 0, 0);
7283  BeginPicture (&TempOffPicture);
7284  FillPolygon (&DownLinePolygon);
7285  SetHighColor (180, 180, 180);
7286  StrokePolygon (&DownLinePolygon);
7287  EndPicture ();
7288
7289  SetHighColor (128, 128, 128);
7290  BeginPicture (&TempOnPicture);
7291  FillPolygon (&DownLinePolygon);
7292  EndPicture ();
7293
7294  TempRect = Bounds ();
7295  TempRect.bottom -= B_H_SCROLL_BAR_HEIGHT;
7296  TempRect.top = TempRect.bottom - 2 * B_H_SCROLL_BAR_HEIGHT;
7297  TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7298  m_ArrowLineDownPntr = new BPictureButton (TempRect, "Down Line",
7299    &TempOffPicture, &TempOnPicture,
7300    new BMessage (MSG_LINE_DOWN), B_ONE_STATE_BUTTON,
7301    B_FOLLOW_RIGHT | B_FOLLOW_BOTTOM, B_WILL_DRAW | B_NAVIGABLE);
7302  if (m_ArrowLineDownPntr == NULL) goto ErrorExit;
7303  AddChild (m_ArrowLineDownPntr);
7304  m_ArrowLineDownPntr->SetTarget (this);
7305
7306  return;
7307
7308ErrorExit:
7309  DisplayErrorMessage ("Problems while making view displaying the words.");
7310}
7311
7312
7313/* Draw the words starting with the one at or after m_FirstDisplayedWord.  This
7314requires looking at the database in the BApplication, which may or may not be
7315available (if it isn't, don't draw, a redraw will usually be requested by the
7316Pulse member function when it keeps on noticing that the stuff on the display
7317doesn't match the database). */
7318
7319void
7320WordsView::Draw (BRect UpdateRect)
7321{
7322  float                   AgeDifference;
7323  float                   AgeProportion;
7324  float                   CenterX;
7325  float                   ColumnLeftCenterX;
7326  float                   ColumnMiddleCenterX;
7327  float                   ColumnRightCenterX;
7328  float                   CompensatedRatio;
7329  StatisticsMap::iterator DataIter;
7330  StatisticsMap::iterator EndIter;
7331  rgb_color               FillColour;
7332  float                   GenuineProportion;
7333  uint32                  GenuineSpamSum;
7334  float                   HeightPixels;
7335  float                   HeightProportion;
7336  float                   LeftBounds;
7337  ABSApp                 *MyAppPntr;
7338  uint32                  NewestAge;
7339  uint32                  OldestAge;
7340  float                   OneFifthTotalGenuine;
7341  float                   OneFifthTotalSpam;
7342  double                  RawProbabilityRatio;
7343  float                   RightBounds;
7344  float                   SpamProportion;
7345  StatisticsPointer       StatisticsPntr;
7346  BRect                   TempRect;
7347  char                    TempString [PATH_MAX];
7348  float                   TotalGenuineMessages = 1.0; /* Avoid divide by 0. */
7349  float                   TotalSpamMessages = 1.0;
7350  float                   Width;
7351  float                   Y;
7352
7353  /* Lock the application.  This will stop it from processing any further
7354  messages until we are done.  Or if it is busy, the lock will fail. */
7355
7356  MyAppPntr = dynamic_cast<ABSApp *> (be_app);
7357  if (MyAppPntr == NULL || MyAppPntr->LockWithTimeout (100000) != B_OK)
7358    return; /* It's probably busy doing something. */
7359
7360  /* Set up various loop invariant variables. */
7361
7362  if (MyAppPntr->m_TotalGenuineMessages > 0)
7363    TotalGenuineMessages = MyAppPntr->m_TotalGenuineMessages;
7364  OneFifthTotalGenuine = TotalGenuineMessages / 5;
7365
7366  if (MyAppPntr->m_TotalSpamMessages > 0)
7367    TotalSpamMessages = MyAppPntr->m_TotalSpamMessages;
7368  OneFifthTotalSpam = TotalSpamMessages / 5;
7369
7370  EndIter = MyAppPntr->m_WordMap.end ();
7371
7372  OldestAge = MyAppPntr->m_OldestAge;
7373  NewestAge = /* actually newest age plus one */
7374    MyAppPntr->m_TotalGenuineMessages + MyAppPntr->m_TotalSpamMessages;
7375
7376  if (NewestAge == 0)
7377    goto NormalExit; /* No words to display, or something is badly wrong. */
7378
7379  NewestAge--; /* The newest message has age NewestAge. */
7380  AgeDifference = NewestAge - OldestAge; /* Can be zero if just one message. */
7381
7382  LeftBounds = Bounds().left;
7383  RightBounds = Bounds().right - B_V_SCROLL_BAR_WIDTH;
7384  Width = RightBounds - LeftBounds;
7385  FillColour.alpha = 255;
7386
7387  CenterX = ceilf (LeftBounds + Width * 0.5);
7388  ColumnLeftCenterX = ceilf (LeftBounds + Width * 0.05);
7389  ColumnMiddleCenterX = CenterX;
7390  ColumnRightCenterX = ceilf (LeftBounds + Width * 0.95);
7391
7392  for (DataIter = MyAppPntr->m_WordMap.lower_bound (m_FirstDisplayedWord),
7393  Y = Bounds().top;
7394  DataIter != EndIter && Y < UpdateRect.bottom;
7395  DataIter++, Y += m_LineHeight)
7396  {
7397    if (Y + m_LineHeight < UpdateRect.top)
7398      continue; /* Not in the visible area yet, don't actually draw. */
7399
7400    /* Draw the colour bar behind the word.  It reflects the spamness or
7401    genuineness of that particular word, plus the importance of the word and
7402    the age of the word.
7403
7404    First calculate the compensated spam ratio (described elsewhere).  It is
7405    close to 0.0 for genuine words and close to 1.0 for pure spam.  It is drawn
7406    as a blue bar to the left of center if it is less than 0.5, and a red bar
7407    on the right of center if it is greater than 0.5.  At exactly 0.5 nothing
7408    is drawn; the word is worthless as an indicator.
7409
7410    The height of the bar corresponds to the number of messages the word was
7411    found in.  Make the height proportional to the total of spam and genuine
7412    messages for the word divided by the sum of the most extreme spam and
7413    genuine counts in the database.
7414
7415    The staturation of the colour corresponds to the age of the word, with old
7416    words being almost white rather than solid blue or red. */
7417
7418    StatisticsPntr = &DataIter->second;
7419
7420    SpamProportion = StatisticsPntr->spamCount / TotalSpamMessages;
7421    GenuineProportion = StatisticsPntr->genuineCount / TotalGenuineMessages;
7422    if (SpamProportion + GenuineProportion > 0.0f)
7423      RawProbabilityRatio =
7424      SpamProportion / (SpamProportion + GenuineProportion);
7425    else
7426      RawProbabilityRatio = g_RobinsonX;
7427
7428    /* The compensated ratio leans towards 0.5 (RobinsonX) more for fewer
7429    data points, with a weight of 0.45 (RobinsonS). */
7430
7431    GenuineSpamSum =
7432      StatisticsPntr->spamCount + StatisticsPntr->genuineCount;
7433    CompensatedRatio =
7434      (g_RobinsonS * g_RobinsonX + GenuineSpamSum * RawProbabilityRatio) /
7435      (g_RobinsonS + GenuineSpamSum);
7436
7437    /* Used to use the height based on the most frequent word, but some words,
7438    like "From", show up in all messages which made most other words just
7439    appear as a thin line.  I did a histogram plot of the sizes in my test
7440    database, and figured that you get better coverage of 90% of the messages
7441    if you use 1/5 of the total number as the count which gives you 100%
7442    height.  The other 10% get a full height bar, but most people wouldn't care
7443    that they're super frequently used. */
7444
7445    HeightProportion = 0.5f * (StatisticsPntr->genuineCount /
7446      OneFifthTotalGenuine + StatisticsPntr->spamCount / OneFifthTotalSpam);
7447
7448    if (HeightProportion > 1.0f)
7449      HeightProportion = 1.0f;
7450    HeightPixels = ceilf (HeightProportion * m_TextHeight);
7451
7452    if (AgeDifference <= 0.0f)
7453      AgeProportion = 1.0; /* New is 1.0, old is 0.0 */
7454    else
7455      AgeProportion = (StatisticsPntr->age - OldestAge) / AgeDifference;
7456
7457    TempRect.top = ceilf (Y + m_TextHeight / 2 - HeightPixels / 2);
7458    TempRect.bottom = TempRect.top + HeightPixels;
7459
7460    if (CompensatedRatio < 0.5f)
7461    {
7462      TempRect.left = ceilf (
7463        CenterX - 1.6f * (0.5f - CompensatedRatio) * (CenterX - LeftBounds));
7464      TempRect.right = CenterX;
7465      FillColour.red = 230 - (int) (AgeProportion * 230.0f);
7466      FillColour.green = FillColour.red;
7467      FillColour.blue = 255;
7468    }
7469    else /* Ratio >= 0.5, red spam block. */
7470    {
7471      TempRect.left = CenterX;
7472      TempRect.right = ceilf (
7473        CenterX + 1.6f * (CompensatedRatio - 0.5f) * (RightBounds - CenterX));
7474      FillColour.blue = 230 - (int) (AgeProportion * 230.0f);
7475      FillColour.green = FillColour.blue;
7476      FillColour.red = 255;
7477    }
7478    SetHighColor (FillColour);
7479    SetDrawingMode (B_OP_COPY);
7480    FillRect (TempRect);
7481
7482    /* Print the text centered in columns of various widths.  The number of
7483    genuine messages in the left 10% of the width, the word in the middle 80%,
7484    and the number of spam messages using the word in the right 10%. */
7485
7486    SetHighColor (0, 0, 0);
7487    SetDrawingMode (B_OP_OVER); /* So that antialiased text mixes better. */
7488
7489    sprintf (TempString, "%lu", StatisticsPntr->genuineCount);
7490    Width = m_TextFont.StringWidth (TempString);
7491    MovePenTo (ceilf (ColumnLeftCenterX - Width / 2), Y + m_AscentHeight);
7492    DrawString (TempString);
7493
7494    strcpy (TempString, DataIter->first.c_str ());
7495    Width = m_TextFont.StringWidth (TempString);
7496    MovePenTo (ceilf (ColumnMiddleCenterX - Width / 2), Y + m_AscentHeight);
7497    DrawString (TempString);
7498
7499    sprintf (TempString, "%lu", StatisticsPntr->spamCount);
7500    Width = m_TextFont.StringWidth (TempString);
7501    MovePenTo (ceilf (ColumnRightCenterX - Width / 2), Y + m_AscentHeight);
7502    DrawString (TempString);
7503  }
7504
7505  /* Draw the first word (the one which the user types in to select the first
7506  displayed word) on the right, in the scroll bar margin, rotated 90 degrees to
7507  fit between the page up and page down buttons. */
7508
7509  Width = m_TextFont.StringWidth (m_FirstDisplayedWord);
7510  if (Width > 0)
7511  {
7512    TempRect = Bounds ();
7513    TempRect.top += 4 * B_H_SCROLL_BAR_HEIGHT + 1;
7514    TempRect.bottom -= 5 * B_H_SCROLL_BAR_HEIGHT + 1;
7515
7516    MovePenTo (TempRect.right - m_TextHeight + m_AscentHeight - 1,
7517      ceilf ((TempRect.bottom + TempRect.top) / 2 + Width / 2));
7518    m_TextFont.SetRotation (90);
7519    SetFont (&m_TextFont, B_FONT_ROTATION);
7520    DrawString (m_FirstDisplayedWord);
7521    m_TextFont.SetRotation (0);
7522    SetFont (&m_TextFont, B_FONT_ROTATION);
7523  }
7524
7525NormalExit:
7526
7527  /* Successfully finished drawing.  Update the cached values to match what we
7528  have drawn. */
7529  m_CachedTotalGenuineMessages = MyAppPntr->m_TotalGenuineMessages;
7530  m_CachedTotalSpamMessages = MyAppPntr->m_TotalSpamMessages;
7531  m_CachedWordCount = MyAppPntr->m_WordCount;
7532
7533  /* Done.  Let the BApplication continue processing messages. */
7534  MyAppPntr->Unlock ();
7535}
7536
7537
7538/* When the user presses keys, they select the first word to be displayed in
7539the view (it's the word at or lexicographically after the word typed in).  The
7540keys are appended to the starting word, until the user stops typing for a
7541while, then the next key will be the first letter of a new starting word. */
7542
7543void
7544WordsView::KeyDown (const char *BufferPntr, int32 NumBytes)
7545{
7546  int32          CharLength;
7547  bigtime_t      CurrentTime;
7548  char           TempString [40];
7549
7550  CurrentTime = system_time ();
7551
7552  if (NumBytes < (int32) sizeof (TempString))
7553  {
7554    memcpy (TempString, BufferPntr, NumBytes);
7555    TempString [NumBytes] = 0;
7556    CharLength = strlen (TempString); /* So NUL bytes don't get through. */
7557
7558    /* Check for arrow keys, which move the view up and down. */
7559
7560    if (CharLength == 1 &&
7561    (TempString[0] == B_UP_ARROW ||
7562    TempString[0] == B_DOWN_ARROW ||
7563    TempString[0] == B_PAGE_UP ||
7564    TempString[0] == B_PAGE_DOWN))
7565    {
7566      MoveTextUpOrDown ((TempString[0] == B_UP_ARROW) ? MSG_LINE_UP :
7567        ((TempString[0] == B_DOWN_ARROW) ? MSG_LINE_DOWN :
7568        ((TempString[0] == B_PAGE_UP) ? MSG_PAGE_UP : MSG_PAGE_DOWN)));
7569    }
7570    else if (CharLength > 1 ||
7571    (CharLength == 1 && 32 <= (uint8) TempString[0]))
7572    {
7573      /* Have a non-control character, or some sort of multibyte char.  Add it
7574      to the word and mark things for redisplay starting at the resulting word.
7575      */
7576
7577      if (CurrentTime - m_LastTimeAKeyWasPressed >= 1000000 /* microseconds */)
7578        strcpy (m_FirstDisplayedWord, TempString); /* Starting a new word. */
7579      else if (strlen (m_FirstDisplayedWord) + CharLength <= g_MaxWordLength)
7580        strcat (m_FirstDisplayedWord, TempString); /* Append to existing. */
7581
7582      Invalidate ();
7583    }
7584  }
7585
7586  m_LastTimeAKeyWasPressed = CurrentTime;
7587  BView::KeyDown (BufferPntr, NumBytes);
7588}
7589
7590
7591/* Change the background colour to show that we have the focus.  When we have
7592it, keystrokes will select the word to be displayed at the top of the list. */
7593
7594void
7595WordsView::MakeFocus (bool Focused)
7596{
7597  if (Focused)
7598    m_BackgroundColour = m_FocusedColour;
7599  else
7600    m_BackgroundColour = m_UnfocusedColour;
7601  SetViewColor (m_BackgroundColour);
7602  SetLowColor (m_BackgroundColour);
7603
7604  /* Also need to set the background colour for the scroll buttons, since they
7605  can't be made transparent. */
7606
7607  if (m_ArrowLineDownPntr != NULL)
7608  {
7609    m_ArrowLineDownPntr->SetViewColor (m_BackgroundColour);
7610    m_ArrowLineDownPntr->Invalidate ();
7611  }
7612
7613  if (m_ArrowLineUpPntr != NULL)
7614  {
7615    m_ArrowLineUpPntr->SetViewColor (m_BackgroundColour);
7616    m_ArrowLineUpPntr->Invalidate ();
7617  }
7618
7619  if (m_ArrowPageDownPntr != NULL)
7620  {
7621    m_ArrowPageDownPntr->SetViewColor (m_BackgroundColour);
7622    m_ArrowPageDownPntr->Invalidate ();
7623  }
7624
7625  if (m_ArrowPageUpPntr != NULL)
7626  {
7627    m_ArrowPageUpPntr->SetViewColor (m_BackgroundColour);
7628    m_ArrowPageUpPntr->Invalidate ();
7629  }
7630
7631  Invalidate ();
7632
7633  BView::MakeFocus (Focused);
7634}
7635
7636
7637void
7638WordsView::MessageReceived (BMessage *MessagePntr)
7639{
7640  int32     CountFound;
7641  float     DeltaY; /* Usually -1.0, 0.0 or +1.0. */
7642  type_code TypeFound;
7643
7644  switch (MessagePntr->what)
7645  {
7646    case B_MOUSE_WHEEL_CHANGED:
7647      if (MessagePntr->FindFloat ("be:wheel_delta_y", &DeltaY) != 0) break;
7648      if (DeltaY < 0)
7649        MoveTextUpOrDown (MSG_LINE_UP);
7650      else if (DeltaY > 0)
7651        MoveTextUpOrDown (MSG_LINE_DOWN);
7652      break;
7653
7654    case MSG_LINE_DOWN:
7655    case MSG_LINE_UP:
7656    case MSG_PAGE_DOWN:
7657    case MSG_PAGE_UP:
7658      MoveTextUpOrDown (MessagePntr->what);
7659      break;
7660
7661    case B_SIMPLE_DATA: /* Something has been dropped in our view. */
7662      if (MessagePntr->GetInfo ("refs", &TypeFound, &CountFound) == B_OK &&
7663      CountFound > 0 && TypeFound == B_REF_TYPE)
7664      {
7665        RefsDroppedHere (MessagePntr);
7666        break;
7667      }
7668      /* Else fall through to the default case, in case it is something else
7669      dropped that the system knows about. */
7670
7671    default:
7672      BView::MessageReceived (MessagePntr);
7673  }
7674}
7675
7676
7677/* If the user clicks on our view, take over the focus. */
7678
7679void
7680WordsView::MouseDown (BPoint)
7681{
7682  if (!IsFocus ())
7683    MakeFocus (true);
7684}
7685
7686
7687void
7688WordsView::MoveTextUpOrDown (uint32 MovementType)
7689{
7690  StatisticsMap::iterator  DataIter;
7691  int                      i;
7692  ABSApp                  *MyAppPntr;
7693  int                      PageSize;
7694
7695  /* Lock the application.  This will stop it from processing any further
7696  messages until we are done (we need to look at the word list directly).  Or
7697  if it is busy, the lock will fail. */
7698
7699  MyAppPntr = dynamic_cast<ABSApp *> (be_app);
7700  if (MyAppPntr == NULL || MyAppPntr->LockWithTimeout (2000000) != B_OK)
7701    return; /* It's probably busy doing something. */
7702
7703  PageSize = (int) (Bounds().Height() / m_LineHeight - 1);
7704  if (PageSize < 1)
7705    PageSize = 1;
7706
7707  DataIter = MyAppPntr->m_WordMap.lower_bound (m_FirstDisplayedWord);
7708
7709  switch (MovementType)
7710  {
7711    case MSG_LINE_UP:
7712      if (DataIter != MyAppPntr->m_WordMap.begin ())
7713        DataIter--;
7714      break;
7715
7716    case MSG_LINE_DOWN:
7717      if (DataIter != MyAppPntr->m_WordMap.end ())
7718        DataIter++;
7719      break;
7720
7721    case MSG_PAGE_UP:
7722      for (i = 0; i < PageSize; i++)
7723      {
7724        if (DataIter == MyAppPntr->m_WordMap.begin ())
7725          break;
7726        DataIter--;
7727      }
7728      break;
7729
7730    case MSG_PAGE_DOWN:
7731      for (i = 0; i < PageSize; i++)
7732      {
7733        if (DataIter == MyAppPntr->m_WordMap.end ())
7734          break;
7735        DataIter++;
7736      }
7737      break;
7738  }
7739
7740  if (DataIter != MyAppPntr->m_WordMap.end ())
7741    strcpy (m_FirstDisplayedWord, DataIter->first.c_str ());
7742
7743  Invalidate ();
7744
7745  MyAppPntr->Unlock ();
7746}
7747
7748
7749/* This function periodically polls the BApplication to see if anything has
7750changed.  If the word list is different or the display has changed in some
7751other way, it will then try to refresh the display, repeating the attempt until
7752it gets successfully drawn. */
7753
7754void
7755WordsView::Pulse ()
7756{
7757  ABSApp *MyAppPntr;
7758
7759  /* Probe the BApplication to see if it has changed. */
7760
7761  MyAppPntr = dynamic_cast<ABSApp *> (be_app);
7762  if (MyAppPntr == NULL)
7763    return; /* Something is wrong, give up. */
7764
7765  if (MyAppPntr->m_TotalGenuineMessages != m_CachedTotalGenuineMessages ||
7766  MyAppPntr->m_TotalSpamMessages != m_CachedTotalSpamMessages ||
7767  MyAppPntr->m_WordCount != m_CachedWordCount)
7768    Invalidate ();
7769}
7770
7771
7772/* The user has dragged and dropped some file references on the words view.  If
7773it is in the left third, add the file(s) as examples of genuine messages, right
7774third for spam messages and if it is in the middle third then evaluate the
7775file(s) for spaminess. */
7776
7777void
7778WordsView::RefsDroppedHere (BMessage *MessagePntr)
7779{
7780  float  Left;
7781  bool   SpamExample = true; /* TRUE if example is of spam, FALSE genuine. */
7782  float  Third;
7783  BPoint WhereDropped;
7784
7785  /* Find out which third of the view it was dropped into. */
7786
7787  if (MessagePntr->FindPoint ("_drop_point_", &WhereDropped) != B_OK)
7788    return;  /* Need to know where it was dropped. */
7789  ConvertFromScreen (&WhereDropped);
7790  Third = Bounds().Width() / 3;
7791  Left = Bounds().left;
7792  if (WhereDropped.x < Left + Third)
7793    SpamExample = false;
7794  else if (WhereDropped.x < Left + 2 * Third)
7795  {
7796    /* In the middle third, evaluate all files for spaminess. */
7797    EstimateRefFilesAndDisplay (MessagePntr);
7798    return;
7799  }
7800
7801  if (g_CommanderLooperPntr != NULL)
7802    g_CommanderLooperPntr->CommandReferences (
7803    MessagePntr, true /* BulkMode */, SpamExample ? CL_SPAM : CL_GENUINE);
7804}
7805
7806
7807
7808/******************************************************************************
7809 * Finally, the main program which drives it all.
7810 */
7811
7812int main (int argc, char**)
7813{
7814  g_CommandLineMode = (argc > 1);
7815  if (!g_CommandLineMode)
7816    cout << PrintUsage; /* In case no arguments specified. */
7817
7818  g_CommanderLooperPntr = new CommanderLooper;
7819  if (g_CommanderLooperPntr != NULL)
7820  {
7821    g_CommanderMessenger = new BMessenger (NULL, g_CommanderLooperPntr);
7822    g_CommanderLooperPntr->Run ();
7823  }
7824
7825  ABSApp MyApp;
7826
7827  if (MyApp.InitCheck () == 0)
7828  {
7829    MyApp.LoadSaveSettings (true /* DoLoad */);
7830    MyApp.Run ();
7831  }
7832
7833  if (g_CommanderLooperPntr != NULL)
7834  {
7835    g_CommanderLooperPntr->PostMessage (B_QUIT_REQUESTED);
7836    snooze (100000); /* Let the CommanderLooper thread run so it quits. */
7837  }
7838
7839  cerr << "SpamDBM shutting down..." << endl;
7840  return 0; /* And implicitly destroys MyApp, which writes out the database. */
7841}
7842