1252690Sadrian/*
2252690Sadrian * Copyright 2010, Haiku.
3252690Sadrian * Distributed under the terms of the MIT License.
4252690Sadrian *
5252690Sadrian * Authors:
6252690Sadrian *		based on previous work of Ankur Sethi
7252690Sadrian *		Clemens Zeidler <haiku@clemens-zeidler.de>
8252690Sadrian */
9252690Sadrian
10252690Sadrian#include "CLuceneDataBase.h"
11252690Sadrian
12252690Sadrian#include <Directory.h>
13252690Sadrian#include <File.h>
14252690Sadrian#include <TranslatorRoster.h>
15252690Sadrian
16252690Sadrian
17252690Sadrian#define DEBUG_CLUCENE_DATABASE
18252690Sadrian#ifdef DEBUG_CLUCENE_DATABASE
19252690Sadrian#include <stdio.h>
20252690Sadrian#	define STRACE(x...) printf("FT: " x)
21252690Sadrian#else
22252690Sadrian#	define STRACE(x...) ;
23252690Sadrian#endif
24252690Sadrian
25252690Sadrian
26252690Sadrianusing namespace lucene::document;
27252690Sadrianusing namespace lucene::util;
28252690Sadrian
29252690Sadrian
30252690Sadrianconst uint8 kCluceneTries = 10;
31252690Sadrian
32252690Sadrian
33252690Sadrianwchar_t* to_wchar(const char *str)
34252690Sadrian{
35252690Sadrian	int size = strlen(str) * sizeof(wchar_t) ;
36252690Sadrian	wchar_t *wStr = new wchar_t[size] ;
37252690Sadrian
38252690Sadrian	if (mbstowcs(wStr, str, size) == -1) {
39252690Sadrian		delete[] wStr ;
40252690Sadrian		return NULL ;
41252690Sadrian	} else
42252690Sadrian		return wStr ;
43252690Sadrian}
44252690Sadrian
45252690Sadrian
46252690SadrianCLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
47252690Sadrian	:
48252690Sadrian	fDataBasePath(databasePath),
49252690Sadrian	fTempPath(databasePath),
50252690Sadrian	fIndexWriter(NULL)
51252690Sadrian{
52252690Sadrian	printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
53252690Sadrian	create_directory(fDataBasePath.Path(), 0755);
54252690Sadrian
55252690Sadrian	fTempPath.Append("temp_file");
56252690Sadrian}
57252690Sadrian
58252690Sadrian
59252690SadrianCLuceneWriteDataBase::~CLuceneWriteDataBase()
60252690Sadrian{
61252690Sadrian	// TODO: delete fTempPath file
62252690Sadrian}
63252690Sadrian
64252690Sadrian
65252690Sadrianstatus_t
66252690SadrianCLuceneWriteDataBase::InitCheck()
67252690Sadrian{
68252690Sadrian
69252690Sadrian	return B_OK;
70252690Sadrian}
71252690Sadrian
72252690Sadrian
73252690Sadrianstatus_t
74252690SadrianCLuceneWriteDataBase::AddDocument(const entry_ref& ref)
75252690Sadrian{
76252690Sadrian	// check if already in the queue
77252690Sadrian	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
78252690Sadrian		if (fAddQueue.at(i) == ref)
79252690Sadrian			return B_OK;
80252690Sadrian	}
81252690Sadrian	fAddQueue.push_back(ref);
82252690Sadrian
83252690Sadrian	return B_OK;
84252690Sadrian}
85252690Sadrian
86252690Sadrian
87252690Sadrianstatus_t
88252690SadrianCLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
89252690Sadrian{
90252690Sadrian	// check if already in the queue
91252690Sadrian	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
92252690Sadrian		if (fDeleteQueue.at(i) == ref)
93252690Sadrian			return B_OK;
94252690Sadrian	}
95252690Sadrian	fDeleteQueue.push_back(ref);
96252690Sadrian	return B_OK;
97252690Sadrian}
98252690Sadrian
99252690Sadrian
100252690Sadrianstatus_t
101252690SadrianCLuceneWriteDataBase::Commit()
102252690Sadrian{
103252690Sadrian	if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
104252690Sadrian		return B_OK;
105252690Sadrian	STRACE("Commit\n");
106252690Sadrian
107252690Sadrian	_RemoveDocuments(fAddQueue);
108252690Sadrian	_RemoveDocuments(fDeleteQueue);
109252690Sadrian	fDeleteQueue.clear();
110252690Sadrian
111252690Sadrian	if (fAddQueue.size() == 0)
112252690Sadrian		return B_OK;
113252690Sadrian
114252690Sadrian	fIndexWriter = _OpenIndexWriter();
115252690Sadrian	if (fIndexWriter == NULL)
116252690Sadrian		return B_ERROR;
117252690Sadrian
118252690Sadrian	status_t status = B_OK;
119252690Sadrian	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
120252691Sadrian		if (!_IndexDocument(fAddQueue.at(i))) {
121252691Sadrian			status = B_ERROR;
122252691Sadrian			break;
123252691Sadrian		}
124252691Sadrian	}
125
126	fAddQueue.clear();
127	fIndexWriter->close();
128	delete fIndexWriter;
129	fIndexWriter = NULL;
130
131	return status;
132}
133
134
135IndexWriter*
136CLuceneWriteDataBase::_OpenIndexWriter()
137{
138	IndexWriter* writer = NULL;
139	for (int i = 0; i < kCluceneTries; i++) {
140		try {
141			bool createIndex = true;
142			if (IndexReader::indexExists(fDataBasePath.Path()))
143				createIndex = false;
144
145			writer = new IndexWriter(fDataBasePath.Path(),
146				&fStandardAnalyzer, createIndex);
147			if (writer)
148				break;
149		} catch (CLuceneError &error) {
150			STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
151			delete writer;
152			writer = NULL;
153		}
154	}
155	return writer;
156}
157
158
159IndexReader*
160CLuceneWriteDataBase::_OpenIndexReader()
161{
162	IndexReader* reader = NULL;
163
164	BEntry entry(fDataBasePath.Path(), NULL);
165	if (!entry.Exists())
166		return NULL;
167
168	for (int i = 0; i < kCluceneTries; i++) {
169		try {
170			if (!IndexReader::indexExists(fDataBasePath.Path()))
171				return NULL;
172
173			reader = IndexReader::open(fDataBasePath.Path());
174			if (reader)
175				break;
176		} catch (CLuceneError &error) {
177			STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
178			delete reader;
179			reader = NULL;
180		}
181	}
182
183	return reader;
184}
185
186
187bool
188CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
189{
190	IndexReader *reader = NULL;
191	reader = _OpenIndexReader();
192	if (!reader)
193		return false;
194	bool status = false;
195
196	for (unsigned int i = 0; i < docs.size(); i++) {
197		BPath path(&docs.at(i));
198		wchar_t* wPath = to_wchar(path.Path());
199		if (wPath == NULL)
200			continue;
201
202		for (int i = 0; i < kCluceneTries; i++) {
203			status = _RemoveDocument(wPath, reader);
204			if (status)
205				break;
206			reader->close();
207			delete reader;
208			reader = _OpenIndexReader();
209			if (!reader) {
210				status = false;
211				break;
212			}
213		}
214		delete[] wPath;
215
216		if (!status)
217			break;
218	}
219
220	reader->close();
221	delete reader;
222
223	return status;
224}
225
226
227bool
228CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
229{
230	try {
231		Term term(_T("path"), wPath);
232		reader->deleteDocuments(&term);
233	} catch (CLuceneError &error) {
234		STRACE("CLuceneError: deleteDocuments %s\n", error.what());
235		return false;
236	}
237	return true;
238}
239
240
241bool
242CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
243{
244	BPath path(&ref);
245
246	BFile inFile, outFile;
247	inFile.SetTo(path.Path(), B_READ_ONLY);
248	if (inFile.InitCheck() != B_OK) {
249		STRACE("Can't open inFile %s\n", path.Path());
250		return false;
251	}
252	outFile.SetTo(fTempPath.Path(),
253		B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
254	if (outFile.InitCheck() != B_OK) {
255		STRACE("Can't open outFile %s\n", fTempPath.Path());
256		return false;
257	}
258
259	BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
260	if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
261		!= B_OK)
262		return false;
263
264	inFile.Unset();
265	outFile.Unset();
266
267	FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
268	wchar_t* wPath = to_wchar(path.Path());
269	if (wPath == NULL)
270		return false;
271
272	Document *document = new Document;
273	Field contentField(_T("contents"), fileReader,
274		Field::STORE_NO | Field::INDEX_TOKENIZED);
275	document->add(contentField);
276	Field pathField(_T("path"), wPath,
277		Field::STORE_YES | Field::INDEX_UNTOKENIZED);
278	document->add(pathField);
279
280	bool status = true;
281	for (int i = 0; i < kCluceneTries; i++) {
282		try {
283			fIndexWriter->addDocument(document);
284			STRACE("document added, retries: %i\n", i);
285			break;
286		} catch (CLuceneError &error) {
287			STRACE("CLuceneError addDocument %s\n", error.what());
288			fIndexWriter->close();
289			delete fIndexWriter;
290			fIndexWriter = _OpenIndexWriter();
291			if (fIndexWriter == NULL) {
292				status = false;
293				break;
294			}
295		}
296	}
297
298	if (!status)
299		delete document;
300	delete[] wPath;
301	return status;
302}
303