grep.c revision 211496
1/*	$OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 211496 2010-08-19 09:28:59Z des $");
32
33#include <sys/stat.h>
34#include <sys/types.h>
35
36#include <ctype.h>
37#include <err.h>
38#include <errno.h>
39#include <getopt.h>
40#include <limits.h>
41#include <libgen.h>
42#include <locale.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#include "grep.h"
50
51#ifndef WITHOUT_NLS
52#include <nl_types.h>
53nl_catd	 catalog;
54#endif
55
56/*
57 * Default messags to use when NLS is disabled or no catalogue
58 * is found.
59 */
60const char	*errstr[] = {
61	"",
62/* 1*/	"(standard input)",
63/* 2*/	"cannot read bzip2 compressed file",
64/* 3*/	"unknown %s option",
65/* 4*/	"usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66/* 5*/	"\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67/* 6*/	"\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68/* 7*/	"\t[--null] [pattern] [file ...]\n",
69/* 8*/	"Binary file %s matches\n",
70/* 9*/	"%s (BSD grep) %s\n",
71};
72
73/* Flags passed to regcomp() and regexec() */
74int		 cflags = 0;
75int		 eflags = REG_STARTEND;
76
77/* Shortcut for matching all cases like empty regex */
78bool		 matchall;
79
80/* Searching patterns */
81unsigned int	 patterns, pattern_sz;
82char		**pattern;
83regex_t		*r_pattern;
84fastgrep_t	*fg_pattern;
85
86/* Filename exclusion/inclusion patterns */
87unsigned int	 fpatterns, fpattern_sz;
88unsigned int	 dpatterns, dpattern_sz;
89struct epat	*dpattern, *fpattern;
90
91/* For regex errors  */
92char	 re_error[RE_ERROR_BUF + 1];
93
94/* Command-line flags */
95unsigned long long Aflag;	/* -A x: print x lines trailing each match */
96unsigned long long Bflag;	/* -B x: print x lines leading each match */
97bool	 Hflag;		/* -H: always print file name */
98bool	 Lflag;		/* -L: only show names of files with no matches */
99bool	 bflag;		/* -b: show block numbers for each match */
100bool	 cflag;		/* -c: only show a count of matching lines */
101bool	 hflag;		/* -h: don't print filename headers */
102bool	 iflag;		/* -i: ignore case */
103bool	 lflag;		/* -l: only show names of files with matches */
104bool	 mflag;		/* -m x: stop reading the files after x matches */
105unsigned long long mcount;	/* count for -m */
106bool	 nflag;		/* -n: show line numbers in front of matching lines */
107bool	 oflag;		/* -o: print only matching part */
108bool	 qflag;		/* -q: quiet mode (don't output anything) */
109bool	 sflag;		/* -s: silent mode (ignore errors) */
110bool	 vflag;		/* -v: only show non-matching lines */
111bool	 wflag;		/* -w: pattern must start and end on word boundaries */
112bool	 xflag;		/* -x: pattern must match entire line */
113bool	 lbflag;	/* --line-buffered */
114bool	 nullflag;	/* --null */
115char	*label;		/* --label */
116const char *color;	/* --color */
117int	 grepbehave = GREP_BASIC;	/* -EFGP: type of the regex */
118int	 binbehave = BINFILE_BIN;	/* -aIU: handling of binary files */
119int	 filebehave = FILE_STDIO;	/* -JZ: normal, gzip or bzip2 file */
120int	 devbehave = DEV_READ;		/* -D: handling of devices */
121int	 dirbehave = DIR_READ;		/* -dRr: handling of directories */
122int	 linkbehave = LINK_READ;	/* -OpS: handling of symlinks */
123
124bool	 dexclude, dinclude;	/* --exclude-dir and --include-dir */
125bool	 fexclude, finclude;	/* --exclude and --include */
126
127enum {
128	BIN_OPT = CHAR_MAX + 1,
129	COLOR_OPT,
130	HELP_OPT,
131	MMAP_OPT,
132	LINEBUF_OPT,
133	LABEL_OPT,
134	NULL_OPT,
135	R_EXCLUDE_OPT,
136	R_INCLUDE_OPT,
137	R_DEXCLUDE_OPT,
138	R_DINCLUDE_OPT
139};
140
141static inline const char	*init_color(const char *);
142
143/* Housekeeping */
144bool	 first = true;	/* flag whether we are processing the first match */
145bool	 prev;		/* flag whether or not the previous line matched */
146int	 tail;		/* lines left to print */
147bool	 notfound;	/* file not found */
148
149extern char	*__progname;
150
151/*
152 * Prints usage information and returns 2.
153 */
154static void
155usage(void)
156{
157	fprintf(stderr, getstr(4), __progname);
158	fprintf(stderr, "%s", getstr(5));
159	fprintf(stderr, "%s", getstr(5));
160	fprintf(stderr, "%s", getstr(6));
161	fprintf(stderr, "%s", getstr(7));
162	exit(2);
163}
164
165static const char	*optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
166
167struct option long_options[] =
168{
169	{"binary-files",	required_argument,	NULL, BIN_OPT},
170	{"help",		no_argument,		NULL, HELP_OPT},
171	{"mmap",		no_argument,		NULL, MMAP_OPT},
172	{"line-buffered",	no_argument,		NULL, LINEBUF_OPT},
173	{"label",		required_argument,	NULL, LABEL_OPT},
174	{"null",		no_argument,		NULL, NULL_OPT},
175	{"color",		optional_argument,	NULL, COLOR_OPT},
176	{"colour",		optional_argument,	NULL, COLOR_OPT},
177	{"exclude",		required_argument,	NULL, R_EXCLUDE_OPT},
178	{"include",		required_argument,	NULL, R_INCLUDE_OPT},
179	{"exclude-dir",		required_argument,	NULL, R_DEXCLUDE_OPT},
180	{"include-dir",		required_argument,	NULL, R_DINCLUDE_OPT},
181	{"after-context",	required_argument,	NULL, 'A'},
182	{"text",		no_argument,		NULL, 'a'},
183	{"before-context",	required_argument,	NULL, 'B'},
184	{"byte-offset",		no_argument,		NULL, 'b'},
185	{"context",		optional_argument,	NULL, 'C'},
186	{"count",		no_argument,		NULL, 'c'},
187	{"devices",		required_argument,	NULL, 'D'},
188        {"directories",		required_argument,	NULL, 'd'},
189	{"extended-regexp",	no_argument,		NULL, 'E'},
190	{"regexp",		required_argument,	NULL, 'e'},
191	{"fixed-strings",	no_argument,		NULL, 'F'},
192	{"file",		required_argument,	NULL, 'f'},
193	{"basic-regexp",	no_argument,		NULL, 'G'},
194	{"no-filename",		no_argument,		NULL, 'h'},
195	{"with-filename",	no_argument,		NULL, 'H'},
196	{"ignore-case",		no_argument,		NULL, 'i'},
197	{"bz2decompress",	no_argument,		NULL, 'J'},
198	{"files-with-matches",	no_argument,		NULL, 'l'},
199	{"files-without-match", no_argument,            NULL, 'L'},
200	{"max-count",		required_argument,	NULL, 'm'},
201	{"line-number",		no_argument,		NULL, 'n'},
202	{"only-matching",	no_argument,		NULL, 'o'},
203	{"quiet",		no_argument,		NULL, 'q'},
204	{"silent",		no_argument,		NULL, 'q'},
205	{"recursive",		no_argument,		NULL, 'r'},
206	{"no-messages",		no_argument,		NULL, 's'},
207	{"binary",		no_argument,		NULL, 'U'},
208	{"unix-byte-offsets",	no_argument,		NULL, 'u'},
209	{"invert-match",	no_argument,		NULL, 'v'},
210	{"version",		no_argument,		NULL, 'V'},
211	{"word-regexp",		no_argument,		NULL, 'w'},
212	{"line-regexp",		no_argument,		NULL, 'x'},
213	{"decompress",          no_argument,            NULL, 'Z'},
214	{NULL,			no_argument,		NULL, 0}
215};
216
217/*
218 * Adds a searching pattern to the internal array.
219 */
220static void
221add_pattern(char *pat, size_t len)
222{
223
224	/* Check if we can do a shortcut */
225	if (len == 0 || matchall) {
226		matchall = true;
227		return;
228	}
229	/* Increase size if necessary */
230	if (patterns == pattern_sz) {
231		pattern_sz *= 2;
232		pattern = grep_realloc(pattern, ++pattern_sz *
233		    sizeof(*pattern));
234	}
235	if (len > 0 && pat[len - 1] == '\n')
236		--len;
237	/* pat may not be NUL-terminated */
238	pattern[patterns] = grep_malloc(len + 1);
239	memcpy(pattern[patterns], pat, len);
240	pattern[patterns][len] = '\0';
241	++patterns;
242}
243
244/*
245 * Adds a file include/exclude pattern to the internal array.
246 */
247static void
248add_fpattern(const char *pat, int mode)
249{
250
251	/* Increase size if necessary */
252	if (fpatterns == fpattern_sz) {
253		fpattern_sz *= 2;
254		fpattern = grep_realloc(fpattern, ++fpattern_sz *
255		    sizeof(struct epat));
256	}
257	fpattern[fpatterns].pat = grep_strdup(pat);
258	fpattern[fpatterns].mode = mode;
259	++fpatterns;
260}
261
262/*
263 * Adds a directory include/exclude pattern to the internal array.
264 */
265static void
266add_dpattern(const char *pat, int mode)
267{
268
269	/* Increase size if necessary */
270	if (dpatterns == dpattern_sz) {
271		dpattern_sz *= 2;
272		dpattern = grep_realloc(dpattern, ++dpattern_sz *
273		    sizeof(struct epat));
274	}
275	dpattern[dpatterns].pat = grep_strdup(pat);
276	dpattern[dpatterns].mode = mode;
277	++dpatterns;
278}
279
280/*
281 * Reads searching patterns from a file and adds them with add_pattern().
282 */
283static void
284read_patterns(const char *fn)
285{
286	FILE *f;
287	char *line;
288	size_t len;
289
290	if ((f = fopen(fn, "r")) == NULL)
291		err(2, "%s", fn);
292	while ((line = fgetln(f, &len)) != NULL)
293		add_pattern(line, *line == '\n' ? 0 : len);
294	if (ferror(f))
295		err(2, "%s", fn);
296	fclose(f);
297}
298
299static inline const char *
300init_color(const char *d)
301{
302	char *c;
303
304	c = getenv("GREP_COLOR");
305	return (c != NULL ? c : d);
306}
307
308int
309main(int argc, char *argv[])
310{
311	char **aargv, **eargv, *eopts;
312	char *ep;
313	unsigned long long l;
314	unsigned int aargc, eargc, i;
315	int c, lastc, needpattern, newarg, prevoptind;
316
317	setlocale(LC_ALL, "");
318
319#ifndef WITHOUT_NLS
320	catalog = catopen("grep", NL_CAT_LOCALE);
321#endif
322
323	/* Check what is the program name of the binary.  In this
324	   way we can have all the funcionalities in one binary
325	   without the need of scripting and using ugly hacks. */
326	switch (__progname[0]) {
327	case 'e':
328		grepbehave = GREP_EXTENDED;
329		break;
330	case 'f':
331		grepbehave = GREP_FIXED;
332		break;
333	case 'g':
334		grepbehave = GREP_BASIC;
335		break;
336	case 'z':
337		filebehave = FILE_GZIP;
338		switch(__progname[1]) {
339		case 'e':
340			grepbehave = GREP_EXTENDED;
341			break;
342		case 'f':
343			grepbehave = GREP_FIXED;
344			break;
345		case 'g':
346			grepbehave = GREP_BASIC;
347			break;
348		}
349		break;
350	}
351
352	lastc = '\0';
353	newarg = 1;
354	prevoptind = 1;
355	needpattern = 1;
356
357	eopts = getenv("GREP_OPTIONS");
358
359	/* support for extra arguments in GREP_OPTIONS */
360	eargc = 0;
361	if (eopts != NULL) {
362		char *str;
363
364		/* make an estimation of how many extra arguments we have */
365		for (unsigned int j = 0; j < strlen(eopts); j++)
366			if (eopts[j] == ' ')
367				eargc++;
368
369		eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
370
371		eargc = 0;
372		/* parse extra arguments */
373		while ((str = strsep(&eopts, " ")) != NULL)
374			eargv[eargc++] = grep_strdup(str);
375
376		aargv = (char **)grep_calloc(eargc + argc + 1,
377		    sizeof(char *));
378
379		aargv[0] = argv[0];
380		for (i = 0; i < eargc; i++)
381			aargv[i + 1] = eargv[i];
382		for (int j = 1; j < argc; j++, i++)
383			aargv[i + 1] = argv[j];
384
385		aargc = eargc + argc;
386	} else {
387		aargv = argv;
388		aargc = argc;
389	}
390
391	while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
392	    -1)) {
393		switch (c) {
394		case '0': case '1': case '2': case '3': case '4':
395		case '5': case '6': case '7': case '8': case '9':
396			if (newarg || !isdigit(lastc))
397				Aflag = 0;
398			else if (Aflag > LLONG_MAX / 10) {
399				errno = ERANGE;
400				err(2, NULL);
401			}
402			Aflag = Bflag = (Aflag * 10) + (c - '0');
403			break;
404		case 'C':
405			if (optarg == NULL) {
406				Aflag = Bflag = 2;
407				break;
408			}
409			/* FALLTHROUGH */
410		case 'A':
411			/* FALLTHROUGH */
412		case 'B':
413			errno = 0;
414			l = strtoull(optarg, &ep, 10);
415			if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
416			    ((errno == EINVAL) && (l == 0)))
417				err(2, NULL);
418			else if (ep[0] != '\0') {
419				errno = EINVAL;
420				err(2, NULL);
421			}
422			if (c == 'A')
423				Aflag = l;
424			else if (c == 'B')
425				Bflag = l;
426			else
427				Aflag = Bflag = l;
428			break;
429		case 'a':
430			binbehave = BINFILE_TEXT;
431			break;
432		case 'b':
433			bflag = true;
434			break;
435		case 'c':
436			cflag = true;
437			break;
438		case 'D':
439			if (strcasecmp(optarg, "skip") == 0)
440				devbehave = DEV_SKIP;
441			else if (strcasecmp(optarg, "read") == 0)
442				devbehave = DEV_READ;
443			else
444				errx(2, getstr(3), "--devices");
445			break;
446		case 'd':
447			if (strcasecmp("recurse", optarg) == 0) {
448				Hflag = true;
449				dirbehave = DIR_RECURSE;
450			} else if (strcasecmp("skip", optarg) == 0)
451				dirbehave = DIR_SKIP;
452			else if (strcasecmp("read", optarg) == 0)
453				dirbehave = DIR_READ;
454			else
455				errx(2, getstr(3), "--directories");
456			break;
457		case 'E':
458			grepbehave = GREP_EXTENDED;
459			break;
460		case 'e':
461			add_pattern(optarg, strlen(optarg));
462			needpattern = 0;
463			break;
464		case 'F':
465			grepbehave = GREP_FIXED;
466			break;
467		case 'f':
468			read_patterns(optarg);
469			needpattern = 0;
470			break;
471		case 'G':
472			grepbehave = GREP_BASIC;
473			break;
474		case 'H':
475			Hflag = true;
476			break;
477		case 'h':
478			Hflag = false;
479			hflag = true;
480			break;
481		case 'I':
482			binbehave = BINFILE_SKIP;
483			break;
484		case 'i':
485		case 'y':
486			iflag =  true;
487			cflags |= REG_ICASE;
488			break;
489		case 'J':
490			filebehave = FILE_BZIP;
491			break;
492		case 'L':
493			lflag = false;
494			Lflag = true;
495			break;
496		case 'l':
497			Lflag = false;
498			lflag = true;
499			break;
500		case 'm':
501			mflag = true;
502			errno = 0;
503			mcount = strtoull(optarg, &ep, 10);
504			if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
505			    ((errno == EINVAL) && (mcount == 0)))
506				err(2, NULL);
507			else if (ep[0] != '\0') {
508				errno = EINVAL;
509				err(2, NULL);
510			}
511			break;
512		case 'n':
513			nflag = true;
514			break;
515		case 'O':
516			linkbehave = LINK_EXPLICIT;
517			break;
518		case 'o':
519			oflag = true;
520			break;
521		case 'p':
522			linkbehave = LINK_SKIP;
523			break;
524		case 'q':
525			qflag = true;
526			break;
527		case 'S':
528			linkbehave = LINK_READ;
529			break;
530		case 'R':
531		case 'r':
532			dirbehave = DIR_RECURSE;
533			Hflag = true;
534			break;
535		case 's':
536			sflag = true;
537			break;
538		case 'U':
539			binbehave = BINFILE_BIN;
540			break;
541		case 'u':
542		case MMAP_OPT:
543			/* noop, compatibility */
544			break;
545		case 'V':
546			printf(getstr(9), __progname, VERSION);
547			exit(0);
548		case 'v':
549			vflag = true;
550			break;
551		case 'w':
552			wflag = true;
553			break;
554		case 'x':
555			xflag = true;
556			break;
557		case 'Z':
558			filebehave = FILE_GZIP;
559			break;
560		case BIN_OPT:
561			if (strcasecmp("binary", optarg) == 0)
562				binbehave = BINFILE_BIN;
563			else if (strcasecmp("without-match", optarg) == 0)
564				binbehave = BINFILE_SKIP;
565			else if (strcasecmp("text", optarg) == 0)
566				binbehave = BINFILE_TEXT;
567			else
568				errx(2, getstr(3), "--binary-files");
569			break;
570		case COLOR_OPT:
571			color = NULL;
572			if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
573			    strcasecmp("tty", optarg) == 0 ||
574			    strcasecmp("if-tty", optarg) == 0) {
575				char *term;
576
577				term = getenv("TERM");
578				if (isatty(STDOUT_FILENO) && term != NULL &&
579				    strcasecmp(term, "dumb") != 0)
580					color = init_color("01;31");
581			} else if (strcasecmp("always", optarg) == 0 ||
582			    strcasecmp("yes", optarg) == 0 ||
583			    strcasecmp("force", optarg) == 0) {
584				color = init_color("01;31");
585			} else if (strcasecmp("never", optarg) != 0 &&
586			    strcasecmp("none", optarg) != 0 &&
587			    strcasecmp("no", optarg) != 0)
588				errx(2, getstr(3), "--color");
589			break;
590		case LABEL_OPT:
591			label = optarg;
592			break;
593		case LINEBUF_OPT:
594			lbflag = true;
595			break;
596		case NULL_OPT:
597			nullflag = true;
598			break;
599		case R_INCLUDE_OPT:
600			finclude = true;
601			add_fpattern(optarg, INCL_PAT);
602			break;
603		case R_EXCLUDE_OPT:
604			fexclude = true;
605			add_fpattern(optarg, EXCL_PAT);
606			break;
607		case R_DINCLUDE_OPT:
608			dinclude = true;
609			add_dpattern(optarg, INCL_PAT);
610			break;
611		case R_DEXCLUDE_OPT:
612			dexclude = true;
613			add_dpattern(optarg, EXCL_PAT);
614			break;
615		case HELP_OPT:
616		default:
617			usage();
618		}
619		lastc = c;
620		newarg = optind != prevoptind;
621		prevoptind = optind;
622	}
623	aargc -= optind;
624	aargv += optind;
625
626	/* Fail if we don't have any pattern */
627	if (aargc == 0 && needpattern)
628		usage();
629
630	/* Process patterns from command line */
631	if (aargc != 0 && needpattern) {
632		add_pattern(*aargv, strlen(*aargv));
633		--aargc;
634		++aargv;
635	}
636
637	switch (grepbehave) {
638	case GREP_FIXED:
639	case GREP_BASIC:
640		break;
641	case GREP_EXTENDED:
642		cflags |= REG_EXTENDED;
643		break;
644	default:
645		/* NOTREACHED */
646		usage();
647	}
648
649	fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
650	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
651/*
652 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
653 * Optimizations should be done there.
654 */
655		/* Check if cheating is allowed (always is for fgrep). */
656	if (grepbehave == GREP_FIXED) {
657		for (i = 0; i < patterns; ++i)
658			fgrepcomp(&fg_pattern[i], pattern[i]);
659	} else {
660		for (i = 0; i < patterns; ++i) {
661			if (fastcomp(&fg_pattern[i], pattern[i])) {
662				/* Fall back to full regex library */
663				c = regcomp(&r_pattern[i], pattern[i], cflags);
664				if (c != 0) {
665					regerror(c, &r_pattern[i], re_error,
666					    RE_ERROR_BUF);
667					errx(2, "%s", re_error);
668				}
669			}
670		}
671	}
672
673	if (lbflag)
674		setlinebuf(stdout);
675
676	if ((aargc == 0 || aargc == 1) && !Hflag)
677		hflag = true;
678
679	if (aargc == 0)
680		exit(!procfile("-"));
681
682	if (dirbehave == DIR_RECURSE)
683		c = grep_tree(aargv);
684	else {
685		if (aargc == 1)
686			hflag = true;
687		for (c = 0; aargc--; ++aargv) {
688			if ((finclude || fexclude) && !file_matching(*aargv))
689				continue;
690			c+= procfile(*aargv);
691		}
692	}
693
694#ifndef WITHOUT_NLS
695	catclose(catalog);
696#endif
697
698	/* Find out the correct return value according to the
699	   results and the command line option. */
700	exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
701}
702