1/*
2Open Tracker License
3
4Terms and Conditions
5
6Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining a copy of
9this software and associated documentation files (the "Software"), to deal in
10the Software without restriction, including without limitation the rights to
11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12of the Software, and to permit persons to whom the Software is furnished to do
13so, subject to the following conditions:
14
15The above copyright notice and this permission notice applies to all licensees
16and shall be included in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25Except as contained in this notice, the name of Be Incorporated shall not be
26used in advertising or otherwise to promote the sale, use or other dealings in
27this Software without prior written authorization from Be Incorporated.
28
29BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30of Be Incorporated in the United States and other countries. Other brand product
31names are registered trademarks or trademarks of their respective holders.
32All rights reserved.
33*/
34
35
36#include "WIndex.h"
37
38#include <File.h>
39#include <fs_attr.h>
40#include <Message.h>
41#include <Node.h>
42
43#include <ctype.h>
44#include <stdlib.h>
45#include <stdio.h>
46
47
48#define IVERSION	1
49
50
51static int32 kCRCTable = 0;
52
53
54int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2);
55void gen_crc_table();
56unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr,
57	int data_blk_size);
58
59
60FileEntry::FileEntry(void)
61{
62
63}
64
65
66FileEntry::FileEntry(const char *entryString)
67	:
68	BString(entryString)
69{
70
71}
72
73
74status_t
75WIndex::SetTo(const char *dataPath, const char *indexPath)
76{
77	BFile* dataFile;
78	BFile indexFile;
79
80	dataFile = new BFile();
81
82	if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) {
83		return B_ERROR;
84	} else {
85		bool buildIndex = true;
86		SetTo(dataFile);
87
88		time_t mtime;
89		time_t modified;
90
91		dataFile->GetModificationTime(&mtime);
92
93		if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) {
94			attr_info info;
95			if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) {
96				uint32 version = 0;
97				indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0,
98					&version, 4);
99				if (IVERSION == version) {
100					if ((indexFile.GetAttrInfo("WINDEX:modified", &info)
101						== B_OK)) {
102						indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0,
103							&modified, 4);
104
105						if (mtime == modified) {
106							if (UnflattenIndex(&indexFile) == B_OK)
107								buildIndex = false;
108						}
109					}
110				}
111			}
112			indexFile.Unset();
113		}
114		if (buildIndex) {
115			InitIndex();
116			BuildIndex();
117			if (indexFile.SetTo(indexPath,
118				B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) {
119				FlattenIndex(&indexFile);
120				indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0,
121					&mtime, 4);
122				uint32 version = IVERSION;
123				indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0,
124					&version, 4);
125			}
126		}
127	}
128	return B_OK;
129}
130
131
132FileEntry::~FileEntry(void)
133{
134
135}
136
137
138WIndex::WIndex(int32 count)
139{
140	fEntryList = NULL;
141	fDataFile = NULL;
142	fEntriesPerBlock = count;
143	fEntrySize = sizeof(WIndexEntry);
144	if (!atomic_or(&kCRCTable, 1))
145		gen_crc_table();
146}
147
148
149WIndex::WIndex(BPositionIO *dataFile, int32 count)
150{
151	fEntryList = NULL;
152	fDataFile = dataFile;
153	fEntriesPerBlock = count;
154	fEntrySize = sizeof(WIndexEntry);
155	if (!atomic_or(&kCRCTable, 1))
156		gen_crc_table();
157}
158
159
160WIndex::~WIndex(void)
161{
162	if (fEntryList)
163		free(fEntryList);
164	delete fDataFile;
165}
166
167
168status_t
169WIndex::UnflattenIndex(BPositionIO *io)
170{
171	if (fEntryList)
172		free(fEntryList);
173	WIndexHead		head;
174
175	io->Seek(0, SEEK_SET);
176	io->Read(&head, sizeof(head));
177	io->Seek(head.offset, SEEK_SET);
178
179	fEntrySize = head.entrySize;
180	fEntries = head.entries;
181	fMaxEntries = fEntriesPerBlock;
182	fBlockSize = fEntriesPerBlock * fEntrySize;
183	fBlocks = fEntries / fEntriesPerBlock + 1;;
184	fIsSorted = true;
185
186	int32 size = (head.entries + 1) * head.entrySize;
187	if (!(fEntryList = (uint8 *)malloc(size)))
188		return B_ERROR;
189
190	if (fEntries)
191		io->Read(fEntryList, size);
192
193	return B_OK;
194}
195
196
197status_t
198WIndex::FlattenIndex(BPositionIO *io)
199{
200	if (fEntries && !fIsSorted)
201		SortItems();
202	WIndexHead		head;
203
204	head.entries = fEntries;
205	head.entrySize = fEntrySize;
206	head.offset = sizeof(WIndexHead);
207	io->Seek(0, SEEK_SET);
208	io->Write(&head, sizeof(head));
209	if (fEntries)
210		io->Write(fEntryList, head.entries * head.entrySize);
211
212	return B_OK;
213}
214
215
216int32
217WIndex::Lookup(int32 key)
218{
219	if (!fEntries)
220		return -1;
221	if (!fIsSorted)
222		SortItems();
223
224	// Binary Search
225	int32	M, Lb, Ub;
226	Lb = 0;
227	Ub = fEntries - 1;
228	while (true) {
229		M = (Lb + Ub) / 2;
230		if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
231			Ub = M - 1;
232		else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
233			Lb = M + 1;
234		else
235			return M;
236		if (Lb > Ub)
237			return -1;
238	}
239}
240
241
242status_t
243WIndex::AddItem(WIndexEntry *entry)
244{
245	if (_BlockCheck() == B_ERROR)
246		return B_ERROR;
247	memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry,
248		fEntrySize);
249	fEntries++;
250	fIsSorted = false;
251	return B_OK;
252}
253
254
255void
256WIndex::SortItems(void)
257{
258	qsort(fEntryList, fEntries, fEntrySize,
259		(int(*)(const void *, const void *))cmp_i_entries);
260
261	fIsSorted = true;
262}
263
264
265status_t
266WIndex::_BlockCheck(void)
267{
268	if (fEntries < fMaxEntries)
269		return B_OK;
270	fBlocks = fEntries / fEntriesPerBlock + 1;
271	fEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks);
272	if (!fEntryList)
273		return B_ERROR;
274	return B_OK;
275}
276
277
278status_t
279WIndex::InitIndex(void)
280{
281	if (fEntryList)
282		free(fEntryList);
283	fIsSorted = 0;
284	fEntries = 0;
285	fMaxEntries = fEntriesPerBlock;
286	fBlockSize = fEntriesPerBlock * fEntrySize;
287	fBlocks = 1;
288	fEntryList = (uint8 *)malloc(fBlockSize);
289	if (!fEntryList)
290		return B_ERROR;
291	return B_OK;
292}
293
294
295int32
296WIndex::GetKey(const char *s)
297{
298
299	int32	key = 0;
300	/*int32	x;
301	int32	a = 84589;
302	int32	b = 45989;
303	int32	m = 217728;
304	while (*s) {
305		x = *s++ - 'a';
306
307		key ^= (a * x + b) % m;
308		key <<= 1;
309	}*/
310
311	key = update_crc(0, s, strlen(s));
312
313	if (key < 0) // No negative values!
314		key = ~key;
315
316	return key;
317}
318
319
320int32
321cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2)
322{
323	return e1->key - e2->key;
324}
325
326
327status_t
328WIndex::SetTo(BPositionIO *dataFile)
329{
330	fDataFile = dataFile;
331	return B_OK;
332}
333
334
335void
336WIndex::Unset(void)
337{
338	fDataFile = NULL;
339}
340
341
342int32
343WIndex::FindFirst(const char *word)
344{
345	if (!fEntries)
346		return -1;
347
348	int32			index;
349	char			nword[256];
350	int32			key;
351
352	NormalizeWord(word, nword);
353	key = GetKey(nword);
354
355	if ((index = Lookup(key)) < 0)
356		return -1;
357	// Find first instance of key
358	while ((ItemAt(index - 1))->key == key)
359		index--;
360	return index;
361}
362
363
364FileEntry*
365WIndex::GetEntry(int32 index)
366{
367	if ((index >= fEntries)||(index < 0))
368		return NULL;
369	WIndexEntry		*ientry;
370	FileEntry		*dentry;
371	char			*buffer;
372
373	dentry = new FileEntry();
374
375	ientry = ItemAt(index);
376
377	int32 size;
378
379	fDataFile->Seek(ientry->offset, SEEK_SET);
380	buffer = dentry->LockBuffer(256);
381	fDataFile->Read(buffer, 256);
382	size = _GetEntrySize(ientry, buffer);
383	//buffer[256] = 0;
384	//printf("Entry: = %s\n", buffer);
385	dentry->UnlockBuffer(size);
386	return dentry;
387}
388
389
390size_t
391WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData)
392{
393	// eliminate unused parameter warning
394	(void)entry;
395
396	return strcspn(entryData, "\n\r");
397}
398
399
400FileEntry*
401WIndex::GetEntry(const char *word)
402{
403	return GetEntry(FindFirst(word));
404}
405
406
407char*
408WIndex::NormalizeWord(const char *word, char *dest)
409{
410	const char 	*src;
411	char		*dst;
412
413	// remove dots and copy
414	src = word;
415	dst = dest;
416	while (*src) {
417		if (*src != '.')
418			*dst++ = *src;
419		src++;
420	}
421	*dst = 0;
422
423	// convert to lower-case
424	for (dst = dest; *dst; dst++)
425		*dst = tolower(*dst);
426	return dest;
427}
428
429
430/* crc32h.c -- package to compute 32-bit CRC one byte at a time using   */
431/*             the high-bit first (Big-Endian) bit ordering convention  */
432/*                                                                      */
433/* Synopsis:                                                            */
434/*  gen_crc_table() -- generates a 256-word table containing all CRC    */
435/*                     remainders for every possible 8-bit byte.  It    */
436/*                     must be executed (once) before any CRC updates.  */
437/*                                                                      */
438/*  unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size)         */
439/*           unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */
440/*           Returns the updated value of the CRC accumulator after     */
441/*           processing each byte in the addressed block of data.       */
442/*                                                                      */
443/*  It is assumed that an unsigned long is at least 32 bits wide and    */
444/*  that the predefined type char occupies one 8-bit byte of storage.   */
445/*                                                                      */
446/*  The generator polynomial used for this version of the package is    */
447/*  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */
448/*  as specified in the Autodin/Ethernet/ADCCP protocol standards.      */
449/*  Other degree 32 polynomials may be substituted by re-defining the   */
450/*  symbol POLYNOMIAL below.  Lower degree polynomials must first be    */
451/*  multiplied by an appropriate power of x.  The representation used   */
452/*  is that the coefficient of x^0 is stored in the LSB of the 32-bit   */
453/*  word and the coefficient of x^31 is stored in the most significant  */
454/*  bit.  The CRC is to be appended to the data most significant byte   */
455/*  first.  For those protocols in which bytes are transmitted MSB      */
456/*  first and in the same order as they are encountered in the block    */
457/*  this convention results in the CRC remainder being transmitted with */
458/*  the coefficient of x^31 first and with that of x^0 last (just as    */
459/*  would be done by a hardware shift register mechanization).          */
460/*                                                                      */
461/*  The table lookup technique was adapted from the algorithm described */
462/*  by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/
463
464
465#define POLYNOMIAL 0x04c11db7L
466
467
468static unsigned long crc_table[256];
469
470
471void
472gen_crc_table()
473{
474	// generate the table of CRC remainders for all possible bytes
475
476	register int i, j;
477	register unsigned long crc_accum;
478
479	for (i = 0;  i < 256;  i++) {
480		crc_accum = ((unsigned long) i << 24);
481		for (j = 0;  j < 8;  j++) {
482			if (crc_accum & 0x80000000L)
483				crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
484			else
485				crc_accum = (crc_accum << 1);
486		}
487		crc_table[i] = crc_accum;
488	}
489
490	return;
491}
492
493
494unsigned long
495update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size)
496{
497	// update the CRC on the data block one byte at a time
498
499	register int i, j;
500
501	for (j = 0;  j < data_blk_size;  j++) {
502		i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff;
503		crc_accum = (crc_accum << 8) ^ crc_table[i];
504	}
505
506	return crc_accum;
507}
508
509