grep.c revision 210622
1/*	$OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210622 2010-07-29 18:02:57Z gabor $");
32
33#include <sys/stat.h>
34#include <sys/types.h>
35
36#include <ctype.h>
37#include <err.h>
38#include <errno.h>
39#include <getopt.h>
40#include <limits.h>
41#include <libgen.h>
42#include <locale.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#include "grep.h"
50
51#ifndef WITHOUT_NLS
52#include <nl_types.h>
53nl_catd	 catalog;
54#endif
55
56/*
57 * Default messags to use when NLS is disabled or no catalogue
58 * is found.
59 */
60const char	*errstr[] = {
61	"",
62/* 1*/	"(standard input)",
63/* 2*/	"cannot read bzip2 compressed file",
64/* 3*/	"unknown %s option",
65/* 4*/	"usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66/* 5*/	"\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67/* 6*/	"\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68/* 7*/	"\t[--null] [pattern] [file ...]\n",
69/* 8*/	"Binary file %s matches\n",
70/* 9*/	"%s (BSD grep) %s\n",
71};
72
73/* Flags passed to regcomp() and regexec() */
74int		 cflags = 0;
75int		 eflags = REG_STARTEND;
76
77/* Shortcut for matching all cases like empty regex */
78bool		 matchall;
79
80/* Searching patterns */
81unsigned int	 patterns, pattern_sz;
82char		**pattern;
83regex_t		*r_pattern;
84fastgrep_t	*fg_pattern;
85
86/* Filename exclusion/inclusion patterns */
87unsigned int	 fpatterns, fpattern_sz;
88unsigned int	 dpatterns, dpattern_sz;
89struct epat	*dpattern, *fpattern;
90
91/* For regex errors  */
92char	 re_error[RE_ERROR_BUF + 1];
93
94/* Command-line flags */
95unsigned long long Aflag;	/* -A x: print x lines trailing each match */
96unsigned long long Bflag;	/* -B x: print x lines leading each match */
97bool	 Hflag;		/* -H: always print file name */
98bool	 Lflag;		/* -L: only show names of files with no matches */
99bool	 bflag;		/* -b: show block numbers for each match */
100bool	 cflag;		/* -c: only show a count of matching lines */
101bool	 hflag;		/* -h: don't print filename headers */
102bool	 iflag;		/* -i: ignore case */
103bool	 lflag;		/* -l: only show names of files with matches */
104bool	 mflag;		/* -m x: stop reading the files after x matches */
105unsigned long long mcount;	/* count for -m */
106bool	 nflag;		/* -n: show line numbers in front of matching lines */
107bool	 oflag;		/* -o: print only matching part */
108bool	 qflag;		/* -q: quiet mode (don't output anything) */
109bool	 sflag;		/* -s: silent mode (ignore errors) */
110bool	 vflag;		/* -v: only show non-matching lines */
111bool	 wflag;		/* -w: pattern must start and end on word boundaries */
112bool	 xflag;		/* -x: pattern must match entire line */
113bool	 lbflag;	/* --line-buffered */
114bool	 nullflag;	/* --null */
115char	*label;		/* --label */
116const char *color;	/* --color */
117int	 grepbehave = GREP_BASIC;	/* -EFGP: type of the regex */
118int	 binbehave = BINFILE_BIN;	/* -aIU: handling of binary files */
119int	 filebehave = FILE_STDIO;	/* -JZ: normal, gzip or bzip2 file */
120int	 devbehave = DEV_READ;		/* -D: handling of devices */
121int	 dirbehave = DIR_READ;		/* -dRr: handling of directories */
122int	 linkbehave = LINK_READ;	/* -OpS: handling of symlinks */
123
124bool	 dexclude, dinclude;	/* --exclude amd --include */
125bool	 fexclude, finclude;	/* --exclude-dir and --include-dir */
126
127enum {
128	BIN_OPT = CHAR_MAX + 1,
129	COLOR_OPT,
130	HELP_OPT,
131	MMAP_OPT,
132	LINEBUF_OPT,
133	LABEL_OPT,
134	NULL_OPT,
135	R_EXCLUDE_OPT,
136	R_INCLUDE_OPT,
137	R_DEXCLUDE_OPT,
138	R_DINCLUDE_OPT
139};
140
141static inline const char	*init_color(const char *);
142
143/* Housekeeping */
144bool	 first = true;	/* flag whether we are processing the first match */
145bool	 prev;		/* flag whether or not the previous line matched */
146int	 tail;		/* lines left to print */
147bool	 notfound;	/* file not found */
148
149extern char	*__progname;
150
151/*
152 * Prints usage information and returns 2.
153 */
154static void
155usage(void)
156{
157	fprintf(stderr, getstr(4), __progname);
158	fprintf(stderr, "%s", getstr(5));
159	fprintf(stderr, "%s", getstr(5));
160	fprintf(stderr, "%s", getstr(6));
161	fprintf(stderr, "%s", getstr(7));
162	exit(2);
163}
164
165static const char	*optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
166
167struct option long_options[] =
168{
169	{"binary-files",	required_argument,	NULL, BIN_OPT},
170	{"help",		no_argument,		NULL, HELP_OPT},
171	{"mmap",		no_argument,		NULL, MMAP_OPT},
172	{"line-buffered",	no_argument,		NULL, LINEBUF_OPT},
173	{"label",		required_argument,	NULL, LABEL_OPT},
174	{"null",		no_argument,		NULL, NULL_OPT},
175	{"color",		optional_argument,	NULL, COLOR_OPT},
176	{"colour",		optional_argument,	NULL, COLOR_OPT},
177	{"exclude",		required_argument,	NULL, R_EXCLUDE_OPT},
178	{"include",		required_argument,	NULL, R_INCLUDE_OPT},
179	{"exclude-dir",		required_argument,	NULL, R_DEXCLUDE_OPT},
180	{"include-dir",		required_argument,	NULL, R_DINCLUDE_OPT},
181	{"after-context",	required_argument,	NULL, 'A'},
182	{"text",		no_argument,		NULL, 'a'},
183	{"before-context",	required_argument,	NULL, 'B'},
184	{"byte-offset",		no_argument,		NULL, 'b'},
185	{"context",		optional_argument,	NULL, 'C'},
186	{"count",		no_argument,		NULL, 'c'},
187	{"devices",		required_argument,	NULL, 'D'},
188        {"directories",		required_argument,	NULL, 'd'},
189	{"extended-regexp",	no_argument,		NULL, 'E'},
190	{"regexp",		required_argument,	NULL, 'e'},
191	{"fixed-strings",	no_argument,		NULL, 'F'},
192	{"file",		required_argument,	NULL, 'f'},
193	{"basic-regexp",	no_argument,		NULL, 'G'},
194	{"no-filename",		no_argument,		NULL, 'h'},
195	{"with-filename",	no_argument,		NULL, 'H'},
196	{"ignore-case",		no_argument,		NULL, 'i'},
197	{"bz2decompress",	no_argument,		NULL, 'J'},
198	{"files-with-matches",	no_argument,		NULL, 'l'},
199	{"files-without-match", no_argument,            NULL, 'L'},
200	{"max-count",		required_argument,	NULL, 'm'},
201	{"line-number",		no_argument,		NULL, 'n'},
202	{"only-matching",	no_argument,		NULL, 'o'},
203	{"quiet",		no_argument,		NULL, 'q'},
204	{"silent",		no_argument,		NULL, 'q'},
205	{"recursive",		no_argument,		NULL, 'r'},
206	{"no-messages",		no_argument,		NULL, 's'},
207	{"binary",		no_argument,		NULL, 'U'},
208	{"unix-byte-offsets",	no_argument,		NULL, 'u'},
209	{"invert-match",	no_argument,		NULL, 'v'},
210	{"version",		no_argument,		NULL, 'V'},
211	{"word-regexp",		no_argument,		NULL, 'w'},
212	{"line-regexp",		no_argument,		NULL, 'x'},
213	{"decompress",          no_argument,            NULL, 'Z'},
214	{NULL,			no_argument,		NULL, 0}
215};
216
217/*
218 * Adds a searching pattern to the internal array.
219 */
220static void
221add_pattern(char *pat, size_t len)
222{
223
224	/* Check if we can do a shortcut */
225	if (len == 0 || matchall) {
226		matchall = true;
227		return;
228	}
229	/* Increase size if necessary */
230	if (patterns == pattern_sz) {
231		pattern_sz *= 2;
232		pattern = grep_realloc(pattern, ++pattern_sz *
233		    sizeof(*pattern));
234	}
235	if (len > 0 && pat[len - 1] == '\n')
236		--len;
237	/* pat may not be NUL-terminated */
238	pattern[patterns] = grep_malloc(len + 1);
239	strlcpy(pattern[patterns], pat, len + 1);
240	++patterns;
241}
242
243/*
244 * Adds a file include/exclude pattern to the internal array.
245 */
246static void
247add_fpattern(const char *pat, int mode)
248{
249
250	/* Increase size if necessary */
251	if (fpatterns == fpattern_sz) {
252		fpattern_sz *= 2;
253		fpattern = grep_realloc(fpattern, ++fpattern_sz *
254		    sizeof(struct epat));
255	}
256	fpattern[fpatterns].pat = grep_strdup(pat);
257	fpattern[fpatterns].mode = mode;
258	++fpatterns;
259}
260
261/*
262 * Adds a directory include/exclude pattern to the internal array.
263 */
264static void
265add_dpattern(const char *pat, int mode)
266{
267
268	/* Increase size if necessary */
269	if (dpatterns == dpattern_sz) {
270		dpattern_sz *= 2;
271		dpattern = grep_realloc(dpattern, ++dpattern_sz *
272		    sizeof(struct epat));
273	}
274	dpattern[dpatterns].pat = grep_strdup(pat);
275	dpattern[dpatterns].mode = mode;
276	++dpatterns;
277}
278
279/*
280 * Reads searching patterns from a file and adds them with add_pattern().
281 */
282static void
283read_patterns(const char *fn)
284{
285	FILE *f;
286	char *line;
287	size_t len;
288
289	if ((f = fopen(fn, "r")) == NULL)
290		err(2, "%s", fn);
291	while ((line = fgetln(f, &len)) != NULL)
292		add_pattern(line, *line == '\n' ? 0 : len);
293	if (ferror(f))
294		err(2, "%s", fn);
295	fclose(f);
296}
297
298static inline const char *
299init_color(const char *d)
300{
301	char *c;
302
303	c = getenv("GREP_COLOR");
304	return (c != NULL ? c : d);
305}
306
307int
308main(int argc, char *argv[])
309{
310	char **aargv, **eargv, *eopts;
311	char *ep;
312	unsigned long long l;
313	unsigned int aargc, eargc, i;
314	int c, lastc, needpattern, newarg, prevoptind;
315
316	setlocale(LC_ALL, "");
317
318#ifndef WITHOUT_NLS
319	catalog = catopen("grep", NL_CAT_LOCALE);
320#endif
321
322	/* Check what is the program name of the binary.  In this
323	   way we can have all the funcionalities in one binary
324	   without the need of scripting and using ugly hacks. */
325	switch (__progname[0]) {
326	case 'e':
327		grepbehave = GREP_EXTENDED;
328		break;
329	case 'f':
330		grepbehave = GREP_FIXED;
331		break;
332	case 'g':
333		grepbehave = GREP_BASIC;
334		break;
335	case 'z':
336		filebehave = FILE_GZIP;
337		switch(__progname[1]) {
338		case 'e':
339			grepbehave = GREP_EXTENDED;
340			break;
341		case 'f':
342			grepbehave = GREP_FIXED;
343			break;
344		case 'g':
345			grepbehave = GREP_BASIC;
346			break;
347		}
348		break;
349	}
350
351	lastc = '\0';
352	newarg = 1;
353	prevoptind = 1;
354	needpattern = 1;
355
356	eopts = getenv("GREP_OPTIONS");
357
358	eargc = 1;
359	if (eopts != NULL) {
360		char *str;
361
362		for(i = 0; i < strlen(eopts); i++)
363			if (eopts[i] == ' ')
364				eargc++;
365
366		eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
367
368		str = strtok(eopts, " ");
369		eargc = 0;
370
371		while(str != NULL) {
372			eargv[++eargc] = (char *)grep_malloc(sizeof(char) *
373			    (strlen(str) + 1));
374			strlcpy(eargv[eargc], str, strlen(str) + 1);
375			str = strtok(NULL, " ");
376		}
377		eargv[++eargc] = NULL;
378
379		aargv = (char **)grep_calloc(eargc + argc + 1,
380		    sizeof(char *));
381		aargv[0] = argv[0];
382
383		for(i = 1; i < eargc; i++)
384			aargv[i] = eargv[i];
385		for(int j = 1; j < argc; j++)
386			aargv[i++] = argv[j];
387
388		aargc = eargc + argc - 1;
389
390	} else {
391		aargv = argv;
392		aargc = argc;
393	}
394
395	while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
396	    -1)) {
397		switch (c) {
398		case '0': case '1': case '2': case '3': case '4':
399		case '5': case '6': case '7': case '8': case '9':
400			if (newarg || !isdigit(lastc))
401				Aflag = 0;
402			else if (Aflag > LLONG_MAX / 10) {
403				errno = ERANGE;
404				err(2, NULL);
405			}
406			Aflag = Bflag = (Aflag * 10) + (c - '0');
407			break;
408		case 'C':
409			if (optarg == NULL) {
410				Aflag = Bflag = 2;
411				break;
412			}
413			/* FALLTHROUGH */
414		case 'A':
415			/* FALLTHROUGH */
416		case 'B':
417			errno = 0;
418			l = strtoull(optarg, &ep, 10);
419			if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
420			    ((errno == EINVAL) && (l == 0)))
421				err(2, NULL);
422			else if (ep[0] != '\0') {
423				errno = EINVAL;
424				err(2, NULL);
425			}
426			if (c == 'A')
427				Aflag = l;
428			else if (c == 'B')
429				Bflag = l;
430			else
431				Aflag = Bflag = l;
432			break;
433		case 'a':
434			binbehave = BINFILE_TEXT;
435			break;
436		case 'b':
437			bflag = true;
438			break;
439		case 'c':
440			cflag = true;
441			break;
442		case 'D':
443			if (strcasecmp(optarg, "skip") == 0)
444				devbehave = DEV_SKIP;
445			else if (strcasecmp(optarg, "read") == 0)
446				devbehave = DEV_READ;
447			else
448				errx(2, getstr(3), "--devices");
449			break;
450		case 'd':
451			if (strcasecmp("recurse", optarg) == 0) {
452				Hflag = true;
453				dirbehave = DIR_RECURSE;
454			} else if (strcasecmp("skip", optarg) == 0)
455				dirbehave = DIR_SKIP;
456			else if (strcasecmp("read", optarg) == 0)
457				dirbehave = DIR_READ;
458			else
459				errx(2, getstr(3), "--directories");
460			break;
461		case 'E':
462			grepbehave = GREP_EXTENDED;
463			break;
464		case 'e':
465			add_pattern(optarg, strlen(optarg));
466			needpattern = 0;
467			break;
468		case 'F':
469			grepbehave = GREP_FIXED;
470			break;
471		case 'f':
472			read_patterns(optarg);
473			needpattern = 0;
474			break;
475		case 'G':
476			grepbehave = GREP_BASIC;
477			break;
478		case 'H':
479			Hflag = true;
480			break;
481		case 'h':
482			Hflag = false;
483			hflag = true;
484			break;
485		case 'I':
486			binbehave = BINFILE_SKIP;
487			break;
488		case 'i':
489		case 'y':
490			iflag =  true;
491			cflags |= REG_ICASE;
492			break;
493		case 'J':
494			filebehave = FILE_BZIP;
495			break;
496		case 'L':
497			lflag = false;
498			Lflag = true;
499			break;
500		case 'l':
501			Lflag = false;
502			lflag = true;
503			break;
504		case 'm':
505			mflag = true;
506			errno = 0;
507			mcount = strtoull(optarg, &ep, 10);
508			if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
509			    ((errno == EINVAL) && (mcount == 0)))
510				err(2, NULL);
511			else if (ep[0] != '\0') {
512				errno = EINVAL;
513				err(2, NULL);
514			}
515			break;
516		case 'n':
517			nflag = true;
518			break;
519		case 'O':
520			linkbehave = LINK_EXPLICIT;
521			break;
522		case 'o':
523			oflag = true;
524			break;
525		case 'p':
526			linkbehave = LINK_SKIP;
527			break;
528		case 'q':
529			qflag = true;
530			break;
531		case 'S':
532			linkbehave = LINK_READ;
533			break;
534		case 'R':
535		case 'r':
536			dirbehave = DIR_RECURSE;
537			Hflag = true;
538			break;
539		case 's':
540			sflag = true;
541			break;
542		case 'U':
543			binbehave = BINFILE_BIN;
544			break;
545		case 'u':
546		case MMAP_OPT:
547			/* noop, compatibility */
548			break;
549		case 'V':
550			printf(getstr(9), __progname, VERSION);
551			exit(0);
552		case 'v':
553			vflag = true;
554			break;
555		case 'w':
556			wflag = true;
557			break;
558		case 'x':
559			xflag = true;
560			break;
561		case 'Z':
562			filebehave = FILE_GZIP;
563			break;
564		case BIN_OPT:
565			if (strcasecmp("binary", optarg) == 0)
566				binbehave = BINFILE_BIN;
567			else if (strcasecmp("without-match", optarg) == 0)
568				binbehave = BINFILE_SKIP;
569			else if (strcasecmp("text", optarg) == 0)
570				binbehave = BINFILE_TEXT;
571			else
572				errx(2, getstr(3), "--binary-files");
573			break;
574		case COLOR_OPT:
575			color = NULL;
576			if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
577			    strcasecmp("tty", optarg) == 0 ||
578			    strcasecmp("if-tty", optarg) == 0) {
579				char *term;
580
581				term = getenv("TERM");
582				if (isatty(STDOUT_FILENO) && term != NULL &&
583				    strcasecmp(term, "dumb") != 0)
584					color = init_color("01;31");
585			} else if (strcasecmp("always", optarg) == 0 ||
586			    strcasecmp("yes", optarg) == 0 ||
587			    strcasecmp("force", optarg) == 0) {
588				color = init_color("01;31");
589			} else if (strcasecmp("never", optarg) != 0 &&
590			    strcasecmp("none", optarg) != 0 &&
591			    strcasecmp("no", optarg) != 0)
592				errx(2, getstr(3), "--color");
593			break;
594		case LABEL_OPT:
595			label = optarg;
596			break;
597		case LINEBUF_OPT:
598			lbflag = true;
599			break;
600		case NULL_OPT:
601			nullflag = true;
602			break;
603		case R_INCLUDE_OPT:
604			finclude = true;
605			add_fpattern(optarg, INCL_PAT);
606			break;
607		case R_EXCLUDE_OPT:
608			fexclude = true;
609			add_fpattern(optarg, EXCL_PAT);
610			break;
611		case R_DINCLUDE_OPT:
612			dexclude = true;
613			add_dpattern(optarg, INCL_PAT);
614			break;
615		case R_DEXCLUDE_OPT:
616			dinclude = true;
617			add_dpattern(optarg, EXCL_PAT);
618			break;
619		case HELP_OPT:
620		default:
621			usage();
622		}
623		lastc = c;
624		newarg = optind != prevoptind;
625		prevoptind = optind;
626	}
627	aargc -= optind;
628	aargv += optind;
629
630	/* Fail if we don't have any pattern */
631	if (aargc == 0 && needpattern)
632		usage();
633
634	/* Process patterns from command line */
635	if (aargc != 0 && needpattern) {
636		add_pattern(*aargv, strlen(*aargv));
637		--aargc;
638		++aargv;
639	}
640
641	switch (grepbehave) {
642	case GREP_FIXED:
643	case GREP_BASIC:
644		break;
645	case GREP_EXTENDED:
646		cflags |= REG_EXTENDED;
647		break;
648	default:
649		/* NOTREACHED */
650		usage();
651	}
652
653	fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
654	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
655/*
656 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
657 * Optimizations should be done there.
658 */
659		/* Check if cheating is allowed (always is for fgrep). */
660	if (grepbehave == GREP_FIXED) {
661		for (i = 0; i < patterns; ++i)
662			fgrepcomp(&fg_pattern[i], pattern[i]);
663	} else {
664		for (i = 0; i < patterns; ++i) {
665			if (fastcomp(&fg_pattern[i], pattern[i])) {
666				/* Fall back to full regex library */
667				c = regcomp(&r_pattern[i], pattern[i], cflags);
668				if (c != 0) {
669					regerror(c, &r_pattern[i], re_error,
670					    RE_ERROR_BUF);
671					errx(2, "%s", re_error);
672				}
673			}
674		}
675	}
676
677	if (lbflag)
678		setlinebuf(stdout);
679
680	if ((aargc == 0 || aargc == 1) && !Hflag)
681		hflag = true;
682
683	if (aargc == 0)
684		exit(!procfile("-"));
685
686	if (dirbehave == DIR_RECURSE)
687		c = grep_tree(aargv);
688	else
689		for (c = 0; aargc--; ++aargv) {
690			if ((finclude || fexclude) && !file_matching(*aargv))
691				continue;
692			c+= procfile(*aargv);
693		}
694
695#ifndef WITHOUT_NLS
696	catclose(catalog);
697#endif
698
699	/* Find out the correct return value according to the
700	   results and the command line option. */
701	exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
702}
703