1/* 2Open Tracker License 3 4Terms and Conditions 5 6Copyright (c) 1991-2001, Be Incorporated. All rights reserved. 7 8Permission is hereby granted, free of charge, to any person obtaining a copy of 9this software and associated documentation files (the "Software"), to deal in 10the Software without restriction, including without limitation the rights to 11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12of the Software, and to permit persons to whom the Software is furnished to do 13so, subject to the following conditions: 14 15The above copyright notice and this permission notice applies to all licensees 16and shall be included in all copies or substantial portions of the Software. 17 18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25Except as contained in this notice, the name of Be Incorporated shall not be 26used in advertising or otherwise to promote the sale, use or other dealings in 27this Software without prior written authorization from Be Incorporated. 28 29BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30of Be Incorporated in the United States and other countries. Other brand product 31names are registered trademarks or trademarks of their respective holders. 32All rights reserved. 33*/ 34 35 36#include "WIndex.h" 37 38#include <File.h> 39#include <fs_attr.h> 40#include <Message.h> 41#include <Node.h> 42 43#include <ctype.h> 44#include <stdlib.h> 45#include <stdio.h> 46 47 48#define IVERSION 1 49 50 51static int32 kCRCTable = 0; 52 53 54int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2); 55void gen_crc_table(); 56unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr, 57 int data_blk_size); 58 59 60FileEntry::FileEntry(void) 61{ 62 63} 64 65 66FileEntry::FileEntry(const char *entryString) 67 : 68 BString(entryString) 69{ 70 71} 72 73 74status_t 75WIndex::SetTo(const char *dataPath, const char *indexPath) 76{ 77 BFile* dataFile; 78 BFile indexFile; 79 80 dataFile = new BFile(); 81 82 if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) { 83 return B_ERROR; 84 } else { 85 bool buildIndex = true; 86 SetTo(dataFile); 87 88 time_t mtime; 89 time_t modified; 90 91 dataFile->GetModificationTime(&mtime); 92 93 if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) { 94 attr_info info; 95 if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) { 96 uint32 version = 0; 97 indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0, 98 &version, 4); 99 if (IVERSION == version) { 100 if ((indexFile.GetAttrInfo("WINDEX:modified", &info) 101 == B_OK)) { 102 indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0, 103 &modified, 4); 104 105 if (mtime == modified) { 106 if (UnflattenIndex(&indexFile) == B_OK) 107 buildIndex = false; 108 } 109 } 110 } 111 } 112 indexFile.Unset(); 113 } 114 if (buildIndex) { 115 InitIndex(); 116 BuildIndex(); 117 if (indexFile.SetTo(indexPath, 118 B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) { 119 FlattenIndex(&indexFile); 120 indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0, 121 &mtime, 4); 122 uint32 version = IVERSION; 123 indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0, 124 &version, 4); 125 } 126 } 127 } 128 return B_OK; 129} 130 131 132FileEntry::~FileEntry(void) 133{ 134 135} 136 137 138WIndex::WIndex(int32 count) 139{ 140 fEntryList = NULL; 141 fDataFile = NULL; 142 fEntriesPerBlock = count; 143 fEntrySize = sizeof(WIndexEntry); 144 if (!atomic_or(&kCRCTable, 1)) 145 gen_crc_table(); 146} 147 148 149WIndex::WIndex(BPositionIO *dataFile, int32 count) 150{ 151 fEntryList = NULL; 152 fDataFile = dataFile; 153 fEntriesPerBlock = count; 154 fEntrySize = sizeof(WIndexEntry); 155 if (!atomic_or(&kCRCTable, 1)) 156 gen_crc_table(); 157} 158 159 160WIndex::~WIndex(void) 161{ 162 if (fEntryList) 163 free(fEntryList); 164 delete fDataFile; 165} 166 167 168status_t 169WIndex::UnflattenIndex(BPositionIO *io) 170{ 171 if (fEntryList) 172 free(fEntryList); 173 WIndexHead head; 174 175 io->Seek(0, SEEK_SET); 176 io->Read(&head, sizeof(head)); 177 io->Seek(head.offset, SEEK_SET); 178 179 fEntrySize = head.entrySize; 180 fEntries = head.entries; 181 fMaxEntries = fEntriesPerBlock; 182 fBlockSize = fEntriesPerBlock * fEntrySize; 183 fBlocks = fEntries / fEntriesPerBlock + 1;; 184 fIsSorted = true; 185 186 int32 size = (head.entries + 1) * head.entrySize; 187 if (!(fEntryList = (uint8 *)malloc(size))) 188 return B_ERROR; 189 190 if (fEntries) 191 io->Read(fEntryList, size); 192 193 return B_OK; 194} 195 196 197status_t 198WIndex::FlattenIndex(BPositionIO *io) 199{ 200 if (fEntries && !fIsSorted) 201 SortItems(); 202 WIndexHead head; 203 204 head.entries = fEntries; 205 head.entrySize = fEntrySize; 206 head.offset = sizeof(WIndexHead); 207 io->Seek(0, SEEK_SET); 208 io->Write(&head, sizeof(head)); 209 if (fEntries) 210 io->Write(fEntryList, head.entries * head.entrySize); 211 212 return B_OK; 213} 214 215 216int32 217WIndex::Lookup(int32 key) 218{ 219 if (!fEntries) 220 return -1; 221 if (!fIsSorted) 222 SortItems(); 223 224 // Binary Search 225 int32 M, Lb, Ub; 226 Lb = 0; 227 Ub = fEntries - 1; 228 while (true) { 229 M = (Lb + Ub) / 2; 230 if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 231 Ub = M - 1; 232 else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 233 Lb = M + 1; 234 else 235 return M; 236 if (Lb > Ub) 237 return -1; 238 } 239} 240 241 242status_t 243WIndex::AddItem(WIndexEntry *entry) 244{ 245 if (_BlockCheck() == B_ERROR) 246 return B_ERROR; 247 memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry, 248 fEntrySize); 249 fEntries++; 250 fIsSorted = false; 251 return B_OK; 252} 253 254 255void 256WIndex::SortItems(void) 257{ 258 qsort(fEntryList, fEntries, fEntrySize, 259 (int(*)(const void *, const void *))cmp_i_entries); 260 261 fIsSorted = true; 262} 263 264 265status_t 266WIndex::_BlockCheck(void) 267{ 268 if (fEntries < fMaxEntries) 269 return B_OK; 270 fBlocks = fEntries / fEntriesPerBlock + 1; 271 fEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks); 272 if (!fEntryList) 273 return B_ERROR; 274 return B_OK; 275} 276 277 278status_t 279WIndex::InitIndex(void) 280{ 281 if (fEntryList) 282 free(fEntryList); 283 fIsSorted = 0; 284 fEntries = 0; 285 fMaxEntries = fEntriesPerBlock; 286 fBlockSize = fEntriesPerBlock * fEntrySize; 287 fBlocks = 1; 288 fEntryList = (uint8 *)malloc(fBlockSize); 289 if (!fEntryList) 290 return B_ERROR; 291 return B_OK; 292} 293 294 295int32 296WIndex::GetKey(const char *s) 297{ 298 299 int32 key = 0; 300 /*int32 x; 301 int32 a = 84589; 302 int32 b = 45989; 303 int32 m = 217728; 304 while (*s) { 305 x = *s++ - 'a'; 306 307 key ^= (a * x + b) % m; 308 key <<= 1; 309 }*/ 310 311 key = update_crc(0, s, strlen(s)); 312 313 if (key < 0) // No negative values! 314 key = ~key; 315 316 return key; 317} 318 319 320int32 321cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2) 322{ 323 return e1->key - e2->key; 324} 325 326 327status_t 328WIndex::SetTo(BPositionIO *dataFile) 329{ 330 fDataFile = dataFile; 331 return B_OK; 332} 333 334 335void 336WIndex::Unset(void) 337{ 338 fDataFile = NULL; 339} 340 341 342int32 343WIndex::FindFirst(const char *word) 344{ 345 if (!fEntries) 346 return -1; 347 348 int32 index; 349 char nword[256]; 350 int32 key; 351 352 NormalizeWord(word, nword); 353 key = GetKey(nword); 354 355 if ((index = Lookup(key)) < 0) 356 return -1; 357 // Find first instance of key 358 while ((ItemAt(index - 1))->key == key) 359 index--; 360 return index; 361} 362 363 364FileEntry* 365WIndex::GetEntry(int32 index) 366{ 367 if ((index >= fEntries)||(index < 0)) 368 return NULL; 369 WIndexEntry *ientry; 370 FileEntry *dentry; 371 char *buffer; 372 373 dentry = new FileEntry(); 374 375 ientry = ItemAt(index); 376 377 int32 size; 378 379 fDataFile->Seek(ientry->offset, SEEK_SET); 380 buffer = dentry->LockBuffer(256); 381 fDataFile->Read(buffer, 256); 382 size = _GetEntrySize(ientry, buffer); 383 //buffer[256] = 0; 384 //printf("Entry: = %s\n", buffer); 385 dentry->UnlockBuffer(size); 386 return dentry; 387} 388 389 390size_t 391WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData) 392{ 393 // eliminate unused parameter warning 394 (void)entry; 395 396 return strcspn(entryData, "\n\r"); 397} 398 399 400FileEntry* 401WIndex::GetEntry(const char *word) 402{ 403 return GetEntry(FindFirst(word)); 404} 405 406 407char* 408WIndex::NormalizeWord(const char *word, char *dest) 409{ 410 const char *src; 411 char *dst; 412 413 // remove dots and copy 414 src = word; 415 dst = dest; 416 while (*src) { 417 if (*src != '.') 418 *dst++ = *src; 419 src++; 420 } 421 *dst = 0; 422 423 // convert to lower-case 424 for (dst = dest; *dst; dst++) 425 *dst = tolower(*dst); 426 return dest; 427} 428 429 430/* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ 431/* the high-bit first (Big-Endian) bit ordering convention */ 432/* */ 433/* Synopsis: */ 434/* gen_crc_table() -- generates a 256-word table containing all CRC */ 435/* remainders for every possible 8-bit byte. It */ 436/* must be executed (once) before any CRC updates. */ 437/* */ 438/* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ 439/* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ 440/* Returns the updated value of the CRC accumulator after */ 441/* processing each byte in the addressed block of data. */ 442/* */ 443/* It is assumed that an unsigned long is at least 32 bits wide and */ 444/* that the predefined type char occupies one 8-bit byte of storage. */ 445/* */ 446/* The generator polynomial used for this version of the package is */ 447/* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ 448/* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ 449/* Other degree 32 polynomials may be substituted by re-defining the */ 450/* symbol POLYNOMIAL below. Lower degree polynomials must first be */ 451/* multiplied by an appropriate power of x. The representation used */ 452/* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ 453/* word and the coefficient of x^31 is stored in the most significant */ 454/* bit. The CRC is to be appended to the data most significant byte */ 455/* first. For those protocols in which bytes are transmitted MSB */ 456/* first and in the same order as they are encountered in the block */ 457/* this convention results in the CRC remainder being transmitted with */ 458/* the coefficient of x^31 first and with that of x^0 last (just as */ 459/* would be done by a hardware shift register mechanization). */ 460/* */ 461/* The table lookup technique was adapted from the algorithm described */ 462/* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ 463 464 465#define POLYNOMIAL 0x04c11db7L 466 467 468static unsigned long crc_table[256]; 469 470 471void 472gen_crc_table() 473{ 474 // generate the table of CRC remainders for all possible bytes 475 476 register int i, j; 477 register unsigned long crc_accum; 478 479 for (i = 0; i < 256; i++) { 480 crc_accum = ((unsigned long) i << 24); 481 for (j = 0; j < 8; j++) { 482 if (crc_accum & 0x80000000L) 483 crc_accum = (crc_accum << 1) ^ POLYNOMIAL; 484 else 485 crc_accum = (crc_accum << 1); 486 } 487 crc_table[i] = crc_accum; 488 } 489 490 return; 491} 492 493 494unsigned long 495update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size) 496{ 497 // update the CRC on the data block one byte at a time 498 499 register int i, j; 500 501 for (j = 0; j < data_blk_size; j++) { 502 i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff; 503 crc_accum = (crc_accum << 8) ^ crc_table[i]; 504 } 505 506 return crc_accum; 507} 508 509