1/*
2 * Copyright (c) 1998-2007 Matthijs Hollemans
3 * Copyright (c) 2008-2017, Haiku Inc.
4 * Distributed under the terms of the MIT license.
5 *
6 * Authors:
7 *      Matthijs Holleman
8 *      Stephan A��mus <superstippi@gmx.de>
9 *      Philippe Houdoin
10 */
11
12#include "Grepper.h"
13
14#include <errno.h>
15#include <new>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/select.h>
20#include <sys/time.h>
21
22#include <Catalog.h>
23#include <Directory.h>
24#include <image.h>
25#include <List.h>
26#include <Locale.h>
27#include <NodeInfo.h>
28#include <OS.h>
29#include <Path.h>
30#include <UTF8.h>
31
32#include "FileIterator.h"
33#include "Model.h"
34
35#undef B_TRANSLATION_CONTEXT
36#define B_TRANSLATION_CONTEXT "Grepper"
37
38
39const char* kEOFTag = "//EOF";
40
41
42using std::nothrow;
43
44char*
45strdup_to_utf8(uint32 encode, const char* src, int32 length)
46{
47	int32 srcLen = length;
48	int32 dstLen = 2 * srcLen;
49	// TODO: stippi: Why the duplicate copy? Why not just return
50	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
51	// enough space? Check return value of convert_to_utf8 and keep
52	// converting if it didn't fit?
53	char* dst = new (nothrow) char[dstLen + 1];
54	if (dst == NULL)
55		return NULL;
56	int32 cookie = 0;
57	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
58	dst[dstLen] = '\0';
59	char* dup = strdup(dst);
60	delete[] dst;
61	if (srcLen != length) {
62		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
63			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
64	}
65	return dup;
66}
67
68
69char*
70strdup_from_utf8(uint32 encode, const char* src, int32 length)
71{
72	int32 srcLen = length;
73	int32 dstLen = srcLen;
74	char* dst = new (nothrow) char[dstLen + 1];
75	if (dst == NULL)
76		return NULL;
77	int32 cookie = 0;
78	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
79	// TODO: See above.
80	dst[dstLen] = '\0';
81	char* dup = strdup(dst);
82	delete[] dst;
83	if (srcLen != length) {
84		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
85			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
86	}
87	return dup;
88}
89
90
91Grepper::Grepper(const char* pattern, const Model* model,
92		const BHandler* target, FileIterator* iterator)
93	: fPattern(NULL),
94	  fTarget(target),
95	  fRegularExpression(model->fRegularExpression),
96	  fCaseSensitive(model->fCaseSensitive),
97	  fEncoding(model->fEncoding),
98
99	  fIterator(iterator),
100	  fRunnerThreadId(-1),
101	  fXargsInput(-1),
102	  fMustQuit(false)
103{
104	if (fEncoding > 0) {
105		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
106		_SetPattern(src);
107		free(src);
108	} else
109		_SetPattern(pattern);
110}
111
112
113Grepper::~Grepper()
114{
115	Cancel();
116	free(fPattern);
117	delete fIterator;
118}
119
120
121bool
122Grepper::IsValid() const
123{
124	if (fIterator == NULL || !fIterator->IsValid())
125		return false;
126	return fPattern != NULL;
127}
128
129
130void
131Grepper::Start()
132{
133	Cancel();
134
135	fMustQuit = false;
136	fRunnerThreadId = spawn_thread(
137		_SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
138
139	resume_thread(fRunnerThreadId);
140}
141
142
143void
144Grepper::Cancel()
145{
146	if (fRunnerThreadId < 0)
147		return;
148
149	fMustQuit = true;
150	int32 exitValue;
151	wait_for_thread(fRunnerThreadId, &exitValue);
152	fRunnerThreadId = -1;
153}
154
155
156// #pragma mark - private
157
158
159int32
160Grepper::_SpawnWriterThread(void* cookie)
161{
162	Grepper* self = static_cast<Grepper*>(cookie);
163	return self->_WriterThread();
164}
165
166
167int32
168Grepper::_WriterThread()
169{
170	BMessage message;
171	char fileName[B_PATH_NAME_LENGTH*2];
172	int count = 0;
173	bigtime_t lastProgressReportTime = 0, now;
174
175	printf("paths_writer started.\n");
176
177	while (!fMustQuit && fIterator->GetNextName(fileName)) {
178		BEntry entry(fileName);
179		entry_ref ref;
180		entry.GetRef(&ref);
181		if (!entry.Exists()) {
182			if (fIterator->NotifyNegatives()) {
183				message.MakeEmpty();
184				message.what = MSG_REPORT_RESULT;
185				message.AddString("filename", fileName);
186				message.AddRef("ref", &ref);
187				fTarget.SendMessage(&message);
188			}
189			continue;
190		}
191
192		if (!_EscapeSpecialChars(fileName, sizeof(fileName))) {
193			char tempString[B_PATH_NAME_LENGTH + 32];
194			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
195				"the filename."), fileName);
196			message.MakeEmpty();
197			message.what = MSG_REPORT_ERROR;
198			message.AddString("error", tempString);
199			fTarget.SendMessage(&message);
200			continue;
201		}
202
203		count++;
204
205		// file exists, send it to xargs
206		write(fXargsInput, fileName, strlen(fileName));
207		write(fXargsInput, "\n", 1);
208
209		now = system_time();
210		// to avoid message flood,
211		// report progress no more than 20 times per second
212		if (now - lastProgressReportTime > 50000) {
213			message.MakeEmpty();
214			message.what = MSG_REPORT_FILE_NAME;
215			message.AddString("filename", fileName);
216			fTarget.SendMessage(&message);
217			lastProgressReportTime = now;
218		}
219	}
220
221	write(fXargsInput, kEOFTag, strlen(kEOFTag));
222	write(fXargsInput, "\n", 1);
223	close(fXargsInput);
224
225	printf("paths_writer stopped (%d paths).\n", count);
226
227	return 0;
228}
229
230
231int32
232Grepper::_SpawnRunnerThread(void* cookie)
233{
234	Grepper* self = static_cast<Grepper*>(cookie);
235	return self->_RunnerThread();
236}
237
238
239int32
240Grepper::_RunnerThread()
241{
242	BMessage message;
243	char fileName[B_PATH_NAME_LENGTH];
244
245	const char* argv[32];
246	int argc = 0;
247	argv[argc++] = "xargs";
248
249	// can't use yet the --null mode due to pipe issue
250	// the xargs stdin input pipe closure is not detected
251	// by xargs. Instead, we use eof-string mode
252
253	// argv[argc++] = "--null";
254	argv[argc++] = "-E";
255	argv[argc++] = kEOFTag;
256
257	// Enable parallel mode
258	// Retrieve cpu count for to parallel xargs via -P argument
259	char cpuCount[8];
260	system_info sys_info;
261	get_system_info(&sys_info);
262	snprintf(cpuCount, sizeof(cpuCount), "%" B_PRIu32, sys_info.cpu_count);
263	argv[argc++] = "-P";
264	argv[argc++] = cpuCount;
265
266	// grep command driven by xargs dispatcher
267	argv[argc++] = "grep";
268	argv[argc++] = "-n"; // need matching line(s) number(s)
269	argv[argc++] = "-H"; // need filename prefix
270	if (! fCaseSensitive)
271		argv[argc++] = "-i";
272	if (! fRegularExpression)
273		argv[argc++] = "-F";	 // no a regexp: force fixed string,
274	// Add double dash argument to tell grep
275	// it's the end of commands options
276	argv[argc++] = "--";
277	argv[argc++] = fPattern;
278	argv[argc] = NULL;
279
280	// prepare xargs to run with stdin, stdout and stderr pipes
281
282	int oldStdIn, oldStdOut, oldStdErr;
283	oldStdIn  = dup(STDIN_FILENO);
284	oldStdOut = dup(STDOUT_FILENO);
285	oldStdErr = dup(STDERR_FILENO);
286
287	int fds[2];
288	if (pipe(fds) != 0) {
289		message.MakeEmpty();
290		message.what = MSG_REPORT_ERROR;
291		message.AddString("error",
292			B_TRANSLATE("Failed to open input pipe!"));
293		fTarget.SendMessage(&message);
294		return 0;
295	}
296	dup2(fds[0], STDIN_FILENO);
297	close(fds[0]);
298	fXargsInput = fds[1];	// write to in, appears on command's stdin
299
300	if (pipe(fds) != 0) {
301		close(fXargsInput);
302		message.MakeEmpty();
303		message.what = MSG_REPORT_ERROR;
304		message.AddString("error",
305			B_TRANSLATE("Failed to open output pipe!"));
306		fTarget.SendMessage(&message);
307		return 0;
308	}
309	dup2(fds[1], STDOUT_FILENO);
310	close(fds[1]);
311	int out = fds[0]; // read from out, taken from command's stdout
312
313	if (pipe(fds) != 0) {
314		close(fXargsInput);
315		close(out);
316		message.MakeEmpty();
317		message.what = MSG_REPORT_ERROR;
318		message.AddString("error",
319			B_TRANSLATE("Failed to open errors pipe!"));
320		fTarget.SendMessage(&message);
321		return 0;
322	}
323	dup2(fds[1], STDERR_FILENO);
324	close(fds[1]);
325	int err = fds[0]; // read from err, taken from command's stderr
326
327	// "load" xargs tool
328	thread_id xargsThread = load_image(argc, argv,
329		const_cast<const char**>(environ));
330	// xargsThread is suspended after loading
331
332	// restore our previous stdin, stdout and stderr
333	close(STDIN_FILENO);
334	dup(oldStdIn);
335	close(oldStdIn);
336	close(STDOUT_FILENO);
337	dup(oldStdOut);
338	close(oldStdOut);
339	close(STDERR_FILENO);
340	dup(oldStdErr);
341	close(oldStdErr);
342
343	if (xargsThread < B_OK) {
344		close(fXargsInput);
345		close(out);
346		close(err);
347		message.MakeEmpty();
348		message.what = MSG_REPORT_ERROR;
349		message.AddString("error",
350			B_TRANSLATE("Failed to start xargs program!"));
351		fTarget.SendMessage(&message);
352		return 0;
353	}
354
355	// Listen on xargs's stdout and stderr via select()
356	printf("Running: ");
357	for (int i = 0; i < argc; i++) {
358		printf("%s ", argv[i]);
359	}
360	printf("\n");
361
362	int fdl[2] = { out, err };
363	int maxfd = 0;
364	for (int i = 0; i < 2; i++) {
365		if (maxfd < fdl[i])
366			maxfd = fdl[i];
367	}
368
369	fd_set readSet;
370	struct timeval timeout = { 0, 100000 };
371	char line[B_PATH_NAME_LENGTH * 2];
372
373	FILE* output = fdopen(out, "r");
374	FILE* errors = fdopen(err, "r");
375
376	char currentFileName[B_PATH_NAME_LENGTH];
377	currentFileName[0] = '\0';
378	bool canReadOutput, canReadErrors;
379	canReadOutput = canReadErrors = true;
380
381	thread_id writerThread = spawn_thread(_SpawnWriterThread,
382		"Grep writer", B_LOW_PRIORITY, this);
383	set_thread_priority(xargsThread, B_LOW_PRIORITY);
384
385	// we're ready, let's go!
386	resume_thread(xargsThread);
387	resume_thread(writerThread);
388
389	while (!fMustQuit && (canReadOutput || canReadErrors)) {
390		FD_ZERO(&readSet);
391		if (canReadOutput) {
392			FD_SET(out, &readSet);
393		}
394		if (canReadErrors) {
395			FD_SET(err, &readSet);
396		}
397
398		int result = select(maxfd + 1, &readSet, NULL, NULL, &timeout);
399		if (result == -1 && errno == EINTR)
400			continue;
401		if (result == 0) {
402			// timeout, but meanwhile fMustQuit was changed maybe...
403			continue;
404		}
405		if (result < 0) {
406			perror("select():");
407			message.MakeEmpty();
408			message.what = MSG_REPORT_ERROR;
409			message.AddString("error", strerror(errno));
410			fTarget.SendMessage(&message);
411			break;
412		}
413
414		if (canReadOutput && FD_ISSET(out, &readSet)) {
415			if (fgets(line, sizeof(line), output) != NULL) {
416				// parse grep output
417				int lineNumber = -1;
418				int textPos = -1;
419				sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
420				// printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
421				//		lineNumber, textPos);
422				if (textPos > 0) {
423					if (strcmp(fileName, currentFileName) != 0) {
424						fTarget.SendMessage(&message);
425
426						strncpy(currentFileName, fileName,
427							sizeof(currentFileName));
428
429						message.MakeEmpty();
430						message.what = MSG_REPORT_RESULT;
431						message.AddString("filename", fileName);
432
433						BEntry entry(fileName);
434						entry_ref ref;
435						entry.GetRef(&ref);
436						message.AddRef("ref", &ref);
437					}
438
439					char* text = &line[strlen(fileName)+1];
440					// printf("[%s] %s", fileName, text);
441					if (fEncoding > 0) {
442						char* tempdup = strdup_to_utf8(fEncoding, text,
443							strlen(text));
444						message.AddString("text", tempdup);
445						free(tempdup);
446					} else {
447						message.AddString("text", text);
448					}
449					message.AddInt32("line", lineNumber);
450				}
451			} else {
452				canReadOutput = false;
453			}
454		}
455		if (canReadErrors && FD_ISSET(err, &readSet)) {
456			if (fgets(line, sizeof(line), errors) != NULL) {
457				// printf("ERROR: %s", line);
458				if (message.HasString("text"))
459					fTarget.SendMessage(&message);
460				currentFileName[0] = '\0';
461
462				message.MakeEmpty();
463				message.what = MSG_REPORT_ERROR;
464				message.AddString("error", line);
465				fTarget.SendMessage(&message);
466			} else {
467				canReadErrors = false;
468			}
469		}
470	}
471
472	// send last pending message, if any
473	if (message.HasString("text"))
474		fTarget.SendMessage(&message);
475
476	printf("Done.\n");
477	fclose(output);
478	fclose(errors);
479
480	close(out);
481	close(err);
482
483	fMustQuit = true;
484	int32 exitValue;
485	wait_for_thread(xargsThread, &exitValue);
486	wait_for_thread(writerThread, &exitValue);
487
488	message.MakeEmpty();
489	message.what = MSG_SEARCH_FINISHED;
490	fTarget.SendMessage(&message);
491
492	return 0;
493}
494
495
496void
497Grepper::_SetPattern(const char* src)
498{
499	if (src == NULL)
500		return;
501
502	fPattern = strdup(src);
503}
504
505
506bool
507Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
508{
509	char* copy = strdup(buffer);
510	char* start = buffer;
511	uint32 len = strlen(copy);
512	bool result = true;
513	for (uint32 count = 0; count < len; ++count) {
514		if (copy[count] == '\'' || copy[count] == '\\'
515			|| copy[count] == ' ' || copy[count] == '\n'
516			|| copy[count] == '"')
517			*buffer++ = '\\';
518		if (buffer - start == bufferSize - 1) {
519			result = false;
520			break;
521		}
522		*buffer++ = copy[count];
523	}
524	*buffer = '\0';
525	free(copy);
526	return result;
527}
528