1/* vi: set sw=4 ts=4: */
2/*
3 * Mini grep implementation for busybox using libc regex.
4 *
5 * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6 * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7 *
8 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
9 */
10/* BB_AUDIT SUSv3 defects - unsupported option -x.  */
11/* BB_AUDIT GNU defects - always acts as -a.  */
12/* http://www.opengroup.org/onlinepubs/007904975/utilities/grep.html */
13/*
14 * 2004,2006 (C) Vladimir Oleynik <dzo@simtreas.ru> -
15 * correction "-e pattern1 -e pattern2" logic and more optimizations.
16 * precompiled regex
17 */
18/*
19 * (C) 2006 Jac Goudsmit added -o option
20 */
21
22#include "libbb.h"
23#include "xregex.h"
24
25/* options */
26#define OPTSTR_GREP \
27	"lnqvscFiHhe:f:Lorm:" \
28	USE_FEATURE_GREP_CONTEXT("A:B:C:") \
29	USE_FEATURE_GREP_EGREP_ALIAS("E") \
30	USE_DESKTOP("w") \
31	"aI"
32/* ignored: -a "assume all files to be text" */
33/* ignored: -I "assume binary files have no matches" */
34
35enum {
36	OPTBIT_l, /* list matched file names only */
37	OPTBIT_n, /* print line# */
38	OPTBIT_q, /* quiet - exit(0) of first match */
39	OPTBIT_v, /* invert the match, to select non-matching lines */
40	OPTBIT_s, /* suppress errors about file open errors */
41	OPTBIT_c, /* count matches per file (suppresses normal output) */
42	OPTBIT_F, /* literal match */
43	OPTBIT_i, /* case-insensitive */
44	OPTBIT_H, /* force filename display */
45	OPTBIT_h, /* inhibit filename display */
46	OPTBIT_e, /* -e PATTERN */
47	OPTBIT_f, /* -f FILE_WITH_PATTERNS */
48	OPTBIT_L, /* list unmatched file names only */
49	OPTBIT_o, /* show only matching parts of lines */
50	OPTBIT_r, /* recurse dirs */
51	OPTBIT_m, /* -m MAX_MATCHES */
52	USE_FEATURE_GREP_CONTEXT(    OPTBIT_A ,) /* -A NUM: after-match context */
53	USE_FEATURE_GREP_CONTEXT(    OPTBIT_B ,) /* -B NUM: before-match context */
54	USE_FEATURE_GREP_CONTEXT(    OPTBIT_C ,) /* -C NUM: -A and -B combined */
55	USE_FEATURE_GREP_EGREP_ALIAS(OPTBIT_E ,) /* extended regexp */
56	USE_DESKTOP(                 OPTBIT_w ,) /* whole word match */
57	OPT_l = 1 << OPTBIT_l,
58	OPT_n = 1 << OPTBIT_n,
59	OPT_q = 1 << OPTBIT_q,
60	OPT_v = 1 << OPTBIT_v,
61	OPT_s = 1 << OPTBIT_s,
62	OPT_c = 1 << OPTBIT_c,
63	OPT_F = 1 << OPTBIT_F,
64	OPT_i = 1 << OPTBIT_i,
65	OPT_H = 1 << OPTBIT_H,
66	OPT_h = 1 << OPTBIT_h,
67	OPT_e = 1 << OPTBIT_e,
68	OPT_f = 1 << OPTBIT_f,
69	OPT_L = 1 << OPTBIT_L,
70	OPT_o = 1 << OPTBIT_o,
71	OPT_r = 1 << OPTBIT_r,
72	OPT_m = 1 << OPTBIT_m,
73	OPT_A = USE_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_A)) + 0,
74	OPT_B = USE_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_B)) + 0,
75	OPT_C = USE_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_C)) + 0,
76	OPT_E = USE_FEATURE_GREP_EGREP_ALIAS((1 << OPTBIT_E)) + 0,
77	OPT_w = USE_DESKTOP(                 (1 << OPTBIT_w)) + 0,
78};
79
80#define PRINT_FILES_WITH_MATCHES    (option_mask32 & OPT_l)
81#define PRINT_LINE_NUM              (option_mask32 & OPT_n)
82#define BE_QUIET                    (option_mask32 & OPT_q)
83#define SUPPRESS_ERR_MSGS           (option_mask32 & OPT_s)
84#define PRINT_MATCH_COUNTS          (option_mask32 & OPT_c)
85#define FGREP_FLAG                  (option_mask32 & OPT_F)
86#define PRINT_FILES_WITHOUT_MATCHES (option_mask32 & OPT_L)
87
88typedef unsigned char byte_t;
89
90static int max_matches;
91static int reflags;
92static byte_t invert_search;
93static byte_t print_filename;
94static byte_t open_errors;
95
96#if ENABLE_FEATURE_GREP_CONTEXT
97static byte_t did_print_line;
98static int lines_before;
99static int lines_after;
100static char **before_buf;
101static int last_line_printed;
102#endif /* ENABLE_FEATURE_GREP_CONTEXT */
103/* globals used internally */
104static llist_t *pattern_head;   /* growable list of patterns to match */
105static const char *cur_file;    /* the current file we are reading */
106
107typedef struct grep_list_data_t {
108	char *pattern;
109	regex_t preg;
110#define PATTERN_MEM_A 1
111#define COMPILED 2
112	int flg_mem_alocated_compiled;
113} grep_list_data_t;
114
115static void print_line(const char *line, int linenum, char decoration)
116{
117#if ENABLE_FEATURE_GREP_CONTEXT
118	/* Happens when we go to next file, immediately hit match
119	 * and try to print prev context... from prev file! Don't do it */
120	if (linenum < 1)
121		return;
122	/* possibly print the little '--' separator */
123	if ((lines_before || lines_after) && did_print_line &&
124			last_line_printed != linenum - 1) {
125		puts("--");
126	}
127	/* guard against printing "--" before first line of first file */
128	did_print_line = 1;
129	last_line_printed = linenum;
130#endif
131	if (print_filename)
132		printf("%s%c", cur_file, decoration);
133	if (PRINT_LINE_NUM)
134		printf("%i%c", linenum, decoration);
135	/* Emulate weird GNU grep behavior with -ov */
136	if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o))
137		puts(line);
138}
139
140static int grep_file(FILE *file)
141{
142	char *line;
143	byte_t ret;
144	int linenum = 0;
145	int nmatches = 0;
146	regmatch_t regmatch;
147#if ENABLE_FEATURE_GREP_CONTEXT
148	int print_n_lines_after = 0;
149	int curpos = 0; /* track where we are in the circular 'before' buffer */
150	int idx = 0; /* used for iteration through the circular buffer */
151#else
152	enum { print_n_lines_after = 0 };
153#endif /* ENABLE_FEATURE_GREP_CONTEXT */
154
155	while ((line = xmalloc_getline(file)) != NULL) {
156		llist_t *pattern_ptr = pattern_head;
157		grep_list_data_t * gl;
158
159		linenum++;
160		ret = 0;
161		while (pattern_ptr) {
162			gl = (grep_list_data_t *)pattern_ptr->data;
163			if (FGREP_FLAG) {
164				ret = strstr(line, gl->pattern) != NULL;
165			} else {
166				/*
167				 * test for a postitive-assertion match (regexec returns success (0)
168				 * and the user did not specify invert search), or a negative-assertion
169				 * match (regexec returns failure (REG_NOMATCH) and the user specified
170				 * invert search)
171				 */
172				if (!(gl->flg_mem_alocated_compiled & COMPILED)) {
173					gl->flg_mem_alocated_compiled |= COMPILED;
174					xregcomp(&(gl->preg), gl->pattern, reflags);
175				}
176				regmatch.rm_so = 0;
177				regmatch.rm_eo = 0;
178				if (regexec(&(gl->preg), line, 1, &regmatch, 0) == 0) {
179					if (!(option_mask32 & OPT_w))
180						ret = 1;
181					else {
182						char c = ' ';
183						if (regmatch.rm_so)
184							c = line[regmatch.rm_so - 1];
185						if (!isalnum(c) && c != '_') {
186							c = line[regmatch.rm_eo];
187							if (!c || (!isalnum(c) && c != '_'))
188								ret = 1;
189						}
190					}
191				}
192			}
193			pattern_ptr = pattern_ptr->link;
194		} /* while (pattern_ptr) */
195
196		if (ret ^ invert_search) {
197			/* keep track of matches */
198			nmatches++;
199
200			/* quiet/print (non)matching file names only? */
201			if (option_mask32 & (OPT_q|OPT_l|OPT_L)) {
202				free(line); /* we don't need line anymore */
203				if (BE_QUIET) {
204					/* manpage says about -q:
205					 * "exit immediately with zero status
206					 * if any match is found,
207					 * even if errors were detected" */
208					exit(0);
209				}
210				/* if we're just printing filenames, we stop after the first match */
211				if (PRINT_FILES_WITH_MATCHES) {
212					puts(cur_file);
213					/* fall thru to "return 1" */
214				}
215				/* OPT_L aka PRINT_FILES_WITHOUT_MATCHES: return early */
216				return 1; /* one match */
217			}
218
219#if ENABLE_FEATURE_GREP_CONTEXT
220			/* Were we printing context and saw next (unwanted) match? */
221			if ((option_mask32 & OPT_m) && nmatches > max_matches)
222				break;
223#endif
224
225			/* print the matched line */
226			if (PRINT_MATCH_COUNTS == 0) {
227#if ENABLE_FEATURE_GREP_CONTEXT
228				int prevpos = (curpos == 0) ? lines_before - 1 : curpos - 1;
229
230				/* if we were told to print 'before' lines and there is at least
231				 * one line in the circular buffer, print them */
232				if (lines_before && before_buf[prevpos] != NULL) {
233					int first_buf_entry_line_num = linenum - lines_before;
234
235					/* advance to the first entry in the circular buffer, and
236					 * figure out the line number is of the first line in the
237					 * buffer */
238					idx = curpos;
239					while (before_buf[idx] == NULL) {
240						idx = (idx + 1) % lines_before;
241						first_buf_entry_line_num++;
242					}
243
244					/* now print each line in the buffer, clearing them as we go */
245					while (before_buf[idx] != NULL) {
246						print_line(before_buf[idx], first_buf_entry_line_num, '-');
247						free(before_buf[idx]);
248						before_buf[idx] = NULL;
249						idx = (idx + 1) % lines_before;
250						first_buf_entry_line_num++;
251					}
252				}
253
254				/* make a note that we need to print 'after' lines */
255				print_n_lines_after = lines_after;
256#endif
257				if (option_mask32 & OPT_o) {
258					line[regmatch.rm_eo] = '\0';
259					print_line(line + regmatch.rm_so, linenum, ':');
260				} else {
261					print_line(line, linenum, ':');
262				}
263			}
264		}
265#if ENABLE_FEATURE_GREP_CONTEXT
266		else { /* no match */
267			/* if we need to print some context lines after the last match, do so */
268			if (print_n_lines_after) {
269				print_line(line, linenum, '-');
270				print_n_lines_after--;
271			} else if (lines_before) {
272				/* Add the line to the circular 'before' buffer */
273				free(before_buf[curpos]);
274				before_buf[curpos] = line;
275				curpos = (curpos + 1) % lines_before;
276				/* avoid free(line) - we took line */
277				line = NULL;
278			}
279		}
280
281#endif /* ENABLE_FEATURE_GREP_CONTEXT */
282		free(line);
283
284		/* Did we print all context after last requested match? */
285		if ((option_mask32 & OPT_m)
286		 && !print_n_lines_after && nmatches == max_matches)
287			break;
288	}
289
290	/* special-case file post-processing for options where we don't print line
291	 * matches, just filenames and possibly match counts */
292
293	/* grep -c: print [filename:]count, even if count is zero */
294	if (PRINT_MATCH_COUNTS) {
295		if (print_filename)
296			printf("%s:", cur_file);
297		printf("%d\n", nmatches);
298	}
299
300	/* grep -L: print just the filename */
301	if (PRINT_FILES_WITHOUT_MATCHES) {
302		/* nmatches is zero, no need to check it:
303		 * we return 1 early if we detected a match
304		 * and PRINT_FILES_WITHOUT_MATCHES is set */
305		puts(cur_file);
306	}
307
308	return nmatches;
309}
310
311#if ENABLE_FEATURE_CLEAN_UP
312#define new_grep_list_data(p, m) add_grep_list_data(p, m)
313static char * add_grep_list_data(char *pattern, int flg_used_mem)
314#else
315#define new_grep_list_data(p, m) add_grep_list_data(p)
316static char * add_grep_list_data(char *pattern)
317#endif
318{
319	grep_list_data_t *gl = xmalloc(sizeof(grep_list_data_t));
320	gl->pattern = pattern;
321#if ENABLE_FEATURE_CLEAN_UP
322	gl->flg_mem_alocated_compiled = flg_used_mem;
323#else
324	gl->flg_mem_alocated_compiled = 0;
325#endif
326	return (char *)gl;
327}
328
329static void load_regexes_from_file(llist_t *fopt)
330{
331	char *line;
332	FILE *f;
333
334	while (fopt) {
335		llist_t *cur = fopt;
336		char *ffile = cur->data;
337
338		fopt = cur->link;
339		free(cur);
340		f = xfopen(ffile, "r");
341		while ((line = xmalloc_getline(f)) != NULL) {
342			llist_add_to(&pattern_head,
343				new_grep_list_data(line, PATTERN_MEM_A));
344		}
345	}
346}
347
348static int file_action_grep(const char *filename, struct stat *statbuf, void* matched, int depth)
349{
350	FILE *file = fopen(filename, "r");
351	if (file == NULL) {
352		if (!SUPPRESS_ERR_MSGS)
353			bb_perror_msg("%s", cur_file);
354		open_errors = 1;
355		return 0;
356	}
357	cur_file = filename;
358	*(int*)matched += grep_file(file);
359	fclose(file);
360	return 1;
361}
362
363static int grep_dir(const char *dir)
364{
365	int matched = 0;
366	recursive_action(dir,
367		/* recurse=yes */ ACTION_RECURSE |
368		/* followLinks=no */
369		/* depthFirst=yes */ ACTION_DEPTHFIRST,
370		/* fileAction= */ file_action_grep,
371		/* dirAction= */ NULL,
372		/* userData= */ &matched,
373		/* depth= */ 0);
374	return matched;
375}
376
377int grep_main(int argc, char **argv);
378int grep_main(int argc, char **argv)
379{
380	FILE *file;
381	int matched;
382	char *mopt;
383	llist_t *fopt = NULL;
384
385	/* do normal option parsing */
386#if ENABLE_FEATURE_GREP_CONTEXT
387	char *slines_after;
388	char *slines_before;
389	char *Copt;
390
391	opt_complementary = "H-h:e::f::C-AB";
392	getopt32(argv,
393		OPTSTR_GREP,
394		&pattern_head, &fopt, &mopt,
395		&slines_after, &slines_before, &Copt);
396
397	if (option_mask32 & OPT_C) {
398		/* -C unsets prev -A and -B, but following -A or -B
399		   may override it */
400		if (!(option_mask32 & OPT_A)) /* not overridden */
401			slines_after = Copt;
402		if (!(option_mask32 & OPT_B)) /* not overridden */
403			slines_before = Copt;
404		option_mask32 |= OPT_A|OPT_B; /* for parser */
405	}
406	if (option_mask32 & OPT_A) {
407		lines_after = xatoi_u(slines_after);
408	}
409	if (option_mask32 & OPT_B) {
410		lines_before = xatoi_u(slines_before);
411	}
412	/* sanity checks */
413	if (option_mask32 & (OPT_c|OPT_q|OPT_l|OPT_L)) {
414		option_mask32 &= ~OPT_n;
415		lines_before = 0;
416		lines_after = 0;
417	} else if (lines_before > 0)
418		before_buf = xzalloc(lines_before * sizeof(char *));
419#else
420	/* with auto sanity checks */
421	opt_complementary = "H-h:e::f::c-n:q-n:l-n";
422	getopt32(argv, OPTSTR_GREP,
423		&pattern_head, &fopt, &mopt);
424#endif
425	if (option_mask32 & OPT_m) {
426		max_matches = xatoi_u(mopt);
427	}
428	invert_search = ((option_mask32 & OPT_v) != 0); /* 0 | 1 */
429
430	if (pattern_head != NULL) {
431		/* convert char **argv to grep_list_data_t */
432		llist_t *cur;
433
434		for (cur = pattern_head; cur; cur = cur->link)
435			cur->data = new_grep_list_data(cur->data, 0);
436	}
437	if (option_mask32 & OPT_f)
438		load_regexes_from_file(fopt);
439
440	if (ENABLE_FEATURE_GREP_FGREP_ALIAS && applet_name[0] == 'f')
441		option_mask32 |= OPT_F;
442
443	if (!(option_mask32 & (OPT_o | OPT_w)))
444		reflags = REG_NOSUB;
445
446	if (ENABLE_FEATURE_GREP_EGREP_ALIAS &&
447			(applet_name[0] == 'e' || (option_mask32 & OPT_E)))
448		reflags |= REG_EXTENDED;
449
450	if (option_mask32 & OPT_i)
451		reflags |= REG_ICASE;
452
453	argv += optind;
454	argc -= optind;
455
456	/* if we didn't get a pattern from a -e and no command file was specified,
457	 * argv[optind] should be the pattern. no pattern, no worky */
458	if (pattern_head == NULL) {
459		char *pattern;
460		if (*argv == NULL)
461			bb_show_usage();
462		pattern = new_grep_list_data(*argv++, 0);
463		llist_add_to(&pattern_head, pattern);
464		argc--;
465	}
466
467	/* argv[(optind)..(argc-1)] should be names of file to grep through. If
468	 * there is more than one file to grep, we will print the filenames. */
469	if (argc > 1)
470		print_filename = 1;
471	/* -H / -h of course override */
472	if (option_mask32 & OPT_H)
473		print_filename = 1;
474	if (option_mask32 & OPT_h)
475		print_filename = 0;
476
477	/* If no files were specified, or '-' was specified, take input from
478	 * stdin. Otherwise, we grep through all the files specified. */
479	if (argc == 0)
480		argc++;
481	matched = 0;
482	while (argc--) {
483		cur_file = *argv++;
484		file = stdin;
485		if (!cur_file || (*cur_file == '-' && !cur_file[1])) {
486			cur_file = "(standard input)";
487		} else {
488			if (option_mask32 & OPT_r) {
489				struct stat st;
490				if (stat(cur_file, &st) == 0 && S_ISDIR(st.st_mode)) {
491					if (!(option_mask32 & OPT_h))
492						print_filename = 1;
493					matched += grep_dir(cur_file);
494					goto grep_done;
495				}
496			}
497			/* else: fopen(dir) will succeed, but reading won't */
498			file = fopen(cur_file, "r");
499			if (file == NULL) {
500				if (!SUPPRESS_ERR_MSGS)
501					bb_perror_msg("%s", cur_file);
502				open_errors = 1;
503				continue;
504			}
505		}
506		matched += grep_file(file);
507		fclose_if_not_stdin(file);
508 grep_done: ;
509	}
510
511	/* destroy all the elments in the pattern list */
512	if (ENABLE_FEATURE_CLEAN_UP) {
513		while (pattern_head) {
514			llist_t *pattern_head_ptr = pattern_head;
515			grep_list_data_t *gl =
516				(grep_list_data_t *)pattern_head_ptr->data;
517
518			pattern_head = pattern_head->link;
519			if ((gl->flg_mem_alocated_compiled & PATTERN_MEM_A))
520				free(gl->pattern);
521			if ((gl->flg_mem_alocated_compiled & COMPILED))
522				regfree(&(gl->preg));
523			free(gl);
524			free(pattern_head_ptr);
525		}
526	}
527	/* 0 = success, 1 = failed, 2 = error */
528	if (open_errors)
529		return 2;
530	return !matched; /* invert return value 0 = success, 1 = failed */
531}
532