1252690Sadrian/* 2252690Sadrian * Copyright 2010, Haiku. 3252690Sadrian * Distributed under the terms of the MIT License. 4252690Sadrian * 5252690Sadrian * Authors: 6252690Sadrian * based on previous work of Ankur Sethi 7252690Sadrian * Clemens Zeidler <haiku@clemens-zeidler.de> 8252690Sadrian */ 9252690Sadrian 10252690Sadrian#include "CLuceneDataBase.h" 11252690Sadrian 12252690Sadrian#include <Directory.h> 13252690Sadrian#include <File.h> 14252690Sadrian#include <TranslatorRoster.h> 15252690Sadrian 16252690Sadrian 17252690Sadrian#define DEBUG_CLUCENE_DATABASE 18252690Sadrian#ifdef DEBUG_CLUCENE_DATABASE 19252690Sadrian#include <stdio.h> 20252690Sadrian# define STRACE(x...) printf("FT: " x) 21252690Sadrian#else 22252690Sadrian# define STRACE(x...) ; 23252690Sadrian#endif 24252690Sadrian 25252690Sadrian 26252690Sadrianusing namespace lucene::document; 27252690Sadrianusing namespace lucene::util; 28252690Sadrian 29252690Sadrian 30252690Sadrianconst uint8 kCluceneTries = 10; 31252690Sadrian 32252690Sadrian 33252690Sadrianwchar_t* to_wchar(const char *str) 34252690Sadrian{ 35252690Sadrian int size = strlen(str) * sizeof(wchar_t) ; 36252690Sadrian wchar_t *wStr = new wchar_t[size] ; 37252690Sadrian 38252690Sadrian if (mbstowcs(wStr, str, size) == -1) { 39252690Sadrian delete[] wStr ; 40252690Sadrian return NULL ; 41252690Sadrian } else 42252690Sadrian return wStr ; 43252690Sadrian} 44252690Sadrian 45252690Sadrian 46252690SadrianCLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath) 47252690Sadrian : 48252690Sadrian fDataBasePath(databasePath), 49252690Sadrian fTempPath(databasePath), 50252690Sadrian fIndexWriter(NULL) 51252690Sadrian{ 52252690Sadrian printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path()); 53252690Sadrian create_directory(fDataBasePath.Path(), 0755); 54252690Sadrian 55252690Sadrian fTempPath.Append("temp_file"); 56252690Sadrian} 57252690Sadrian 58252690Sadrian 59252690SadrianCLuceneWriteDataBase::~CLuceneWriteDataBase() 60252690Sadrian{ 61252690Sadrian // TODO: delete fTempPath file 62252690Sadrian} 63252690Sadrian 64252690Sadrian 65252690Sadrianstatus_t 66252690SadrianCLuceneWriteDataBase::InitCheck() 67252690Sadrian{ 68252690Sadrian 69252690Sadrian return B_OK; 70252690Sadrian} 71252690Sadrian 72252690Sadrian 73252690Sadrianstatus_t 74252690SadrianCLuceneWriteDataBase::AddDocument(const entry_ref& ref) 75252690Sadrian{ 76252690Sadrian // check if already in the queue 77252690Sadrian for (unsigned int i = 0; i < fAddQueue.size(); i++) { 78252690Sadrian if (fAddQueue.at(i) == ref) 79252690Sadrian return B_OK; 80252690Sadrian } 81252690Sadrian fAddQueue.push_back(ref); 82252690Sadrian 83252690Sadrian return B_OK; 84252690Sadrian} 85252690Sadrian 86252690Sadrian 87252690Sadrianstatus_t 88252690SadrianCLuceneWriteDataBase::RemoveDocument(const entry_ref& ref) 89252690Sadrian{ 90252690Sadrian // check if already in the queue 91252690Sadrian for (unsigned int i = 0; i < fAddQueue.size(); i++) { 92252690Sadrian if (fDeleteQueue.at(i) == ref) 93252690Sadrian return B_OK; 94252690Sadrian } 95252690Sadrian fDeleteQueue.push_back(ref); 96252690Sadrian return B_OK; 97252690Sadrian} 98252690Sadrian 99252690Sadrian 100252690Sadrianstatus_t 101252690SadrianCLuceneWriteDataBase::Commit() 102252690Sadrian{ 103252690Sadrian if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0) 104252690Sadrian return B_OK; 105252690Sadrian STRACE("Commit\n"); 106252690Sadrian 107252690Sadrian _RemoveDocuments(fAddQueue); 108252690Sadrian _RemoveDocuments(fDeleteQueue); 109252690Sadrian fDeleteQueue.clear(); 110252690Sadrian 111252690Sadrian if (fAddQueue.size() == 0) 112252690Sadrian return B_OK; 113252690Sadrian 114252690Sadrian fIndexWriter = _OpenIndexWriter(); 115252690Sadrian if (fIndexWriter == NULL) 116252690Sadrian return B_ERROR; 117252690Sadrian 118252690Sadrian status_t status = B_OK; 119252690Sadrian for (unsigned int i = 0; i < fAddQueue.size(); i++) { 120252691Sadrian if (!_IndexDocument(fAddQueue.at(i))) { 121252691Sadrian status = B_ERROR; 122252691Sadrian break; 123252691Sadrian } 124252691Sadrian } 125 126 fAddQueue.clear(); 127 fIndexWriter->close(); 128 delete fIndexWriter; 129 fIndexWriter = NULL; 130 131 return status; 132} 133 134 135IndexWriter* 136CLuceneWriteDataBase::_OpenIndexWriter() 137{ 138 IndexWriter* writer = NULL; 139 for (int i = 0; i < kCluceneTries; i++) { 140 try { 141 bool createIndex = true; 142 if (IndexReader::indexExists(fDataBasePath.Path())) 143 createIndex = false; 144 145 writer = new IndexWriter(fDataBasePath.Path(), 146 &fStandardAnalyzer, createIndex); 147 if (writer) 148 break; 149 } catch (CLuceneError &error) { 150 STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what()); 151 delete writer; 152 writer = NULL; 153 } 154 } 155 return writer; 156} 157 158 159IndexReader* 160CLuceneWriteDataBase::_OpenIndexReader() 161{ 162 IndexReader* reader = NULL; 163 164 BEntry entry(fDataBasePath.Path(), NULL); 165 if (!entry.Exists()) 166 return NULL; 167 168 for (int i = 0; i < kCluceneTries; i++) { 169 try { 170 if (!IndexReader::indexExists(fDataBasePath.Path())) 171 return NULL; 172 173 reader = IndexReader::open(fDataBasePath.Path()); 174 if (reader) 175 break; 176 } catch (CLuceneError &error) { 177 STRACE("CLuceneError: _OpenIndexReader %s\n", error.what()); 178 delete reader; 179 reader = NULL; 180 } 181 } 182 183 return reader; 184} 185 186 187bool 188CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs) 189{ 190 IndexReader *reader = NULL; 191 reader = _OpenIndexReader(); 192 if (!reader) 193 return false; 194 bool status = false; 195 196 for (unsigned int i = 0; i < docs.size(); i++) { 197 BPath path(&docs.at(i)); 198 wchar_t* wPath = to_wchar(path.Path()); 199 if (wPath == NULL) 200 continue; 201 202 for (int i = 0; i < kCluceneTries; i++) { 203 status = _RemoveDocument(wPath, reader); 204 if (status) 205 break; 206 reader->close(); 207 delete reader; 208 reader = _OpenIndexReader(); 209 if (!reader) { 210 status = false; 211 break; 212 } 213 } 214 delete[] wPath; 215 216 if (!status) 217 break; 218 } 219 220 reader->close(); 221 delete reader; 222 223 return status; 224} 225 226 227bool 228CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader) 229{ 230 try { 231 Term term(_T("path"), wPath); 232 reader->deleteDocuments(&term); 233 } catch (CLuceneError &error) { 234 STRACE("CLuceneError: deleteDocuments %s\n", error.what()); 235 return false; 236 } 237 return true; 238} 239 240 241bool 242CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref) 243{ 244 BPath path(&ref); 245 246 BFile inFile, outFile; 247 inFile.SetTo(path.Path(), B_READ_ONLY); 248 if (inFile.InitCheck() != B_OK) { 249 STRACE("Can't open inFile %s\n", path.Path()); 250 return false; 251 } 252 outFile.SetTo(fTempPath.Path(), 253 B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE); 254 if (outFile.InitCheck() != B_OK) { 255 STRACE("Can't open outFile %s\n", fTempPath.Path()); 256 return false; 257 } 258 259 BTranslatorRoster* translatorRoster = BTranslatorRoster::Default(); 260 if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT') 261 != B_OK) 262 return false; 263 264 inFile.Unset(); 265 outFile.Unset(); 266 267 FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8"); 268 wchar_t* wPath = to_wchar(path.Path()); 269 if (wPath == NULL) 270 return false; 271 272 Document *document = new Document; 273 Field contentField(_T("contents"), fileReader, 274 Field::STORE_NO | Field::INDEX_TOKENIZED); 275 document->add(contentField); 276 Field pathField(_T("path"), wPath, 277 Field::STORE_YES | Field::INDEX_UNTOKENIZED); 278 document->add(pathField); 279 280 bool status = true; 281 for (int i = 0; i < kCluceneTries; i++) { 282 try { 283 fIndexWriter->addDocument(document); 284 STRACE("document added, retries: %i\n", i); 285 break; 286 } catch (CLuceneError &error) { 287 STRACE("CLuceneError addDocument %s\n", error.what()); 288 fIndexWriter->close(); 289 delete fIndexWriter; 290 fIndexWriter = _OpenIndexWriter(); 291 if (fIndexWriter == NULL) { 292 status = false; 293 break; 294 } 295 } 296 } 297 298 if (!status) 299 delete document; 300 delete[] wPath; 301 return status; 302} 303