1/*
2 * Copyright 2010, Haiku.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		based on previous work of Ankur Sethi
7 *		Clemens Zeidler <haiku@clemens-zeidler.de>
8 */
9
10#include "CLuceneDataBase.h"
11
12#include <Directory.h>
13#include <File.h>
14#include <TranslatorRoster.h>
15
16
17#define DEBUG_CLUCENE_DATABASE
18#ifdef DEBUG_CLUCENE_DATABASE
19#include <stdio.h>
20#	define STRACE(x...) printf("FT: " x)
21#else
22#	define STRACE(x...) ;
23#endif
24
25
26using namespace lucene::document;
27using namespace lucene::util;
28
29
30const uint8 kCluceneTries = 10;
31
32
33wchar_t* to_wchar(const char *str)
34{
35	int size = strlen(str) * sizeof(wchar_t) ;
36	wchar_t *wStr = new wchar_t[size] ;
37
38	if (mbstowcs(wStr, str, size) == -1) {
39		delete[] wStr ;
40		return NULL ;
41	} else
42		return wStr ;
43}
44
45
46CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
47	:
48	fDataBasePath(databasePath),
49	fTempPath(databasePath),
50	fIndexWriter(NULL)
51{
52	printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
53	create_directory(fDataBasePath.Path(), 0755);
54
55	fTempPath.Append("temp_file");
56}
57
58
59CLuceneWriteDataBase::~CLuceneWriteDataBase()
60{
61	// TODO: delete fTempPath file
62}
63
64
65status_t
66CLuceneWriteDataBase::InitCheck()
67{
68
69	return B_OK;
70}
71
72
73status_t
74CLuceneWriteDataBase::AddDocument(const entry_ref& ref)
75{
76	// check if already in the queue
77	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
78		if (fAddQueue.at(i) == ref)
79			return B_OK;
80	}
81	fAddQueue.push_back(ref);
82
83	return B_OK;
84}
85
86
87status_t
88CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
89{
90	// check if already in the queue
91	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
92		if (fDeleteQueue.at(i) == ref)
93			return B_OK;
94	}
95	fDeleteQueue.push_back(ref);
96	return B_OK;
97}
98
99
100status_t
101CLuceneWriteDataBase::Commit()
102{
103	if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
104		return B_OK;
105	STRACE("Commit\n");
106
107	_RemoveDocuments(fAddQueue);
108	_RemoveDocuments(fDeleteQueue);
109	fDeleteQueue.clear();
110
111	if (fAddQueue.size() == 0)
112		return B_OK;
113
114	fIndexWriter = _OpenIndexWriter();
115	if (fIndexWriter == NULL)
116		return B_ERROR;
117
118	status_t status = B_OK;
119	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
120		if (!_IndexDocument(fAddQueue.at(i))) {
121			status = B_ERROR;
122			break;
123		}
124	}
125
126	fAddQueue.clear();
127	fIndexWriter->close();
128	delete fIndexWriter;
129	fIndexWriter = NULL;
130
131	return status;
132}
133
134
135IndexWriter*
136CLuceneWriteDataBase::_OpenIndexWriter()
137{
138	IndexWriter* writer = NULL;
139	for (int i = 0; i < kCluceneTries; i++) {
140		try {
141			bool createIndex = true;
142			if (IndexReader::indexExists(fDataBasePath.Path()))
143				createIndex = false;
144
145			writer = new IndexWriter(fDataBasePath.Path(),
146				&fStandardAnalyzer, createIndex);
147			if (writer)
148				break;
149		} catch (CLuceneError &error) {
150			STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
151			delete writer;
152			writer = NULL;
153		}
154	}
155	return writer;
156}
157
158
159IndexReader*
160CLuceneWriteDataBase::_OpenIndexReader()
161{
162	IndexReader* reader = NULL;
163
164	BEntry entry(fDataBasePath.Path(), NULL);
165	if (!entry.Exists())
166		return NULL;
167
168	for (int i = 0; i < kCluceneTries; i++) {
169		try {
170			if (!IndexReader::indexExists(fDataBasePath.Path()))
171				return NULL;
172
173			reader = IndexReader::open(fDataBasePath.Path());
174			if (reader)
175				break;
176		} catch (CLuceneError &error) {
177			STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
178			delete reader;
179			reader = NULL;
180		}
181	}
182
183	return reader;
184}
185
186
187bool
188CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
189{
190	IndexReader *reader = NULL;
191	reader = _OpenIndexReader();
192	if (!reader)
193		return false;
194	bool status = false;
195
196	for (unsigned int i = 0; i < docs.size(); i++) {
197		BPath path(&docs.at(i));
198		wchar_t* wPath = to_wchar(path.Path());
199		if (wPath == NULL)
200			continue;
201
202		for (int i = 0; i < kCluceneTries; i++) {
203			status = _RemoveDocument(wPath, reader);
204			if (status)
205				break;
206			reader->close();
207			delete reader;
208			reader = _OpenIndexReader();
209			if (!reader) {
210				status = false;
211				break;
212			}
213		}
214		delete[] wPath;
215
216		if (!status)
217			break;
218	}
219
220	reader->close();
221	delete reader;
222
223	return status;
224}
225
226
227bool
228CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
229{
230	try {
231		Term term(_T("path"), wPath);
232		reader->deleteDocuments(&term);
233	} catch (CLuceneError &error) {
234		STRACE("CLuceneError: deleteDocuments %s\n", error.what());
235		return false;
236	}
237	return true;
238}
239
240
241bool
242CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
243{
244	BPath path(&ref);
245
246	BFile inFile, outFile;
247	inFile.SetTo(path.Path(), B_READ_ONLY);
248	if (inFile.InitCheck() != B_OK) {
249		STRACE("Can't open inFile %s\n", path.Path());
250		return false;
251	}
252	outFile.SetTo(fTempPath.Path(),
253		B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
254	if (outFile.InitCheck() != B_OK) {
255		STRACE("Can't open outFile %s\n", fTempPath.Path());
256		return false;
257	}
258
259	BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
260	if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
261		!= B_OK)
262		return false;
263
264	inFile.Unset();
265	outFile.Unset();
266
267	FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
268	wchar_t* wPath = to_wchar(path.Path());
269	if (wPath == NULL)
270		return false;
271
272	Document *document = new Document;
273	Field contentField(_T("contents"), fileReader,
274		Field::STORE_NO | Field::INDEX_TOKENIZED);
275	document->add(contentField);
276	Field pathField(_T("path"), wPath,
277		Field::STORE_YES | Field::INDEX_UNTOKENIZED);
278	document->add(pathField);
279
280	bool status = true;
281	for (int i = 0; i < kCluceneTries; i++) {
282		try {
283			fIndexWriter->addDocument(document);
284			STRACE("document added, retries: %i\n", i);
285			break;
286		} catch (CLuceneError &error) {
287			STRACE("CLuceneError addDocument %s\n", error.what());
288			fIndexWriter->close();
289			delete fIndexWriter;
290			fIndexWriter = _OpenIndexWriter();
291			if (fIndexWriter == NULL) {
292				status = false;
293				break;
294			}
295		}
296	}
297
298	if (!status)
299		delete document;
300	delete[] wPath;
301	return status;
302}
303