grep.c revision 210430
1/*	$OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210430 2010-07-23 19:36:11Z delphij $");
32
33#include <sys/stat.h>
34#include <sys/types.h>
35
36#include <ctype.h>
37#include <err.h>
38#include <errno.h>
39#include <getopt.h>
40#include <limits.h>
41#include <libgen.h>
42#include <locale.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#include "grep.h"
50
51#ifndef WITHOUT_NLS
52#include <nl_types.h>
53nl_catd	 catalog;
54#endif
55
56/*
57 * Default messags to use when NLS is disabled or no catalogue
58 * is found.
59 */
60const char	*errstr[] = {
61	"",
62/* 1*/	"(standard input)",
63/* 2*/	"cannot read bzip2 compressed file",
64/* 3*/	"unknown --color option",
65/* 4*/	"usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66/* 5*/	"\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67/* 6*/	"\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68/* 7*/	"\t[--null] [pattern] [file ...]\n",
69/* 8*/	"unknown --binary-files option",
70/* 9*/	"Binary file %s matches\n",
71/*10*/	"%s (BSD grep) %s\n",
72};
73
74/* Flags passed to regcomp() and regexec() */
75int		 cflags = 0;
76int		 eflags = REG_STARTEND;
77
78/* Shortcut for matching all cases like empty regex */
79bool		 matchall;
80
81/* Searching patterns */
82unsigned int	 patterns, pattern_sz;
83char		**pattern;
84regex_t		*r_pattern;
85fastgrep_t	*fg_pattern;
86
87/* Filename exclusion/inclusion patterns */
88unsigned int	 epatterns, epattern_sz;
89struct epat	*epattern;
90
91/* For regex errors  */
92char	 re_error[RE_ERROR_BUF + 1];
93
94/* Command-line flags */
95unsigned long long Aflag;	/* -A x: print x lines trailing each match */
96unsigned long long Bflag;	/* -B x: print x lines leading each match */
97bool	 Hflag;		/* -H: always print file name */
98bool	 Lflag;		/* -L: only show names of files with no matches */
99bool	 bflag;		/* -b: show block numbers for each match */
100bool	 cflag;		/* -c: only show a count of matching lines */
101bool	 hflag;		/* -h: don't print filename headers */
102bool	 iflag;		/* -i: ignore case */
103bool	 lflag;		/* -l: only show names of files with matches */
104bool	 mflag;		/* -m x: stop reading the files after x matches */
105unsigned long long mcount;	/* count for -m */
106bool	 nflag;		/* -n: show line numbers in front of matching lines */
107bool	 oflag;		/* -o: print only matching part */
108bool	 qflag;		/* -q: quiet mode (don't output anything) */
109bool	 sflag;		/* -s: silent mode (ignore errors) */
110bool	 vflag;		/* -v: only show non-matching lines */
111bool	 wflag;		/* -w: pattern must start and end on word boundaries */
112bool	 xflag;		/* -x: pattern must match entire line */
113bool	 lbflag;	/* --line-buffered */
114bool	 nullflag;	/* --null */
115bool	 exclflag;	/* --exclude */
116char	*label;		/* --label */
117char	*color;		/* --color */
118int	 grepbehave = GREP_BASIC;	/* -EFGP: type of the regex */
119int	 binbehave = BINFILE_BIN;	/* -aIU: handling of binary files */
120int	 filebehave = FILE_STDIO;	/* -JZ: normal, gzip or bzip2 file */
121int	 devbehave = DEV_GREP;		/* -D: handling of devices */
122int	 dirbehave = DIR_GREP;		/* -dRr: handling of directories */
123int	 linkbehave = LINK_GREP;	/* -OpS: handling of symlinks */
124
125enum {
126	BIN_OPT = CHAR_MAX + 1,
127	COLOR_OPT,
128	HELP_OPT,
129	MMAP_OPT,
130	LINEBUF_OPT,
131	LABEL_OPT,
132	NULL_OPT,
133	R_EXCLUDE_OPT,
134	R_INCLUDE_OPT,
135	R_DEXCLUDE_OPT,
136	R_DINCLUDE_OPT
137};
138
139/* Housekeeping */
140bool	 first = true;	/* flag whether we are processing the first match */
141bool	 prev;		/* flag whether or not the previous line matched */
142int	 tail;		/* lines left to print */
143bool	 notfound;	/* file not found */
144
145extern char	*__progname;
146
147/*
148 * Prints usage information and returns 2.
149 */
150static void
151usage(void)
152{
153	fprintf(stderr, getstr(4), __progname);
154	fprintf(stderr, "%s", getstr(5));
155	fprintf(stderr, "%s", getstr(5));
156	fprintf(stderr, "%s", getstr(6));
157	fprintf(stderr, "%s", getstr(7));
158	exit(2);
159}
160
161static const char	*optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
162
163struct option long_options[] =
164{
165	{"binary-files",	required_argument,	NULL, BIN_OPT},
166	{"help",		no_argument,		NULL, HELP_OPT},
167	{"mmap",		no_argument,		NULL, MMAP_OPT},
168	{"line-buffered",	no_argument,		NULL, LINEBUF_OPT},
169	{"label",		required_argument,	NULL, LABEL_OPT},
170	{"null",		no_argument,		NULL, NULL_OPT},
171	{"color",		optional_argument,	NULL, COLOR_OPT},
172	{"colour",		optional_argument,	NULL, COLOR_OPT},
173	{"exclude",		required_argument,	NULL, R_EXCLUDE_OPT},
174	{"include",		required_argument,	NULL, R_INCLUDE_OPT},
175	{"exclude-dir",		required_argument,	NULL, R_DEXCLUDE_OPT},
176	{"include-dir",		required_argument,	NULL, R_DINCLUDE_OPT},
177	{"after-context",	required_argument,	NULL, 'A'},
178	{"text",		no_argument,		NULL, 'a'},
179	{"before-context",	required_argument,	NULL, 'B'},
180	{"byte-offset",		no_argument,		NULL, 'b'},
181	{"context",		optional_argument,	NULL, 'C'},
182	{"count",		no_argument,		NULL, 'c'},
183	{"devices",		required_argument,	NULL, 'D'},
184        {"directories",		required_argument,	NULL, 'd'},
185	{"extended-regexp",	no_argument,		NULL, 'E'},
186	{"regexp",		required_argument,	NULL, 'e'},
187	{"fixed-strings",	no_argument,		NULL, 'F'},
188	{"file",		required_argument,	NULL, 'f'},
189	{"basic-regexp",	no_argument,		NULL, 'G'},
190	{"no-filename",		no_argument,		NULL, 'h'},
191	{"with-filename",	no_argument,		NULL, 'H'},
192	{"ignore-case",		no_argument,		NULL, 'i'},
193	{"bz2decompress",	no_argument,		NULL, 'J'},
194	{"files-with-matches",	no_argument,		NULL, 'l'},
195	{"files-without-match", no_argument,            NULL, 'L'},
196	{"max-count",		required_argument,	NULL, 'm'},
197	{"line-number",		no_argument,		NULL, 'n'},
198	{"only-matching",	no_argument,		NULL, 'o'},
199	{"quiet",		no_argument,		NULL, 'q'},
200	{"silent",		no_argument,		NULL, 'q'},
201	{"recursive",		no_argument,		NULL, 'r'},
202	{"no-messages",		no_argument,		NULL, 's'},
203	{"binary",		no_argument,		NULL, 'U'},
204	{"unix-byte-offsets",	no_argument,		NULL, 'u'},
205	{"invert-match",	no_argument,		NULL, 'v'},
206	{"version",		no_argument,		NULL, 'V'},
207	{"word-regexp",		no_argument,		NULL, 'w'},
208	{"line-regexp",		no_argument,		NULL, 'x'},
209	{"decompress",          no_argument,            NULL, 'Z'},
210	{NULL,			no_argument,		NULL, 0}
211};
212
213/*
214 * Adds a searching pattern to the internal array.
215 */
216static void
217add_pattern(char *pat, size_t len)
218{
219
220	/* Check if we can do a shortcut */
221	if (len == 0 || matchall) {
222		matchall = true;
223		return;
224	}
225	/* Increase size if necessary */
226	if (patterns == pattern_sz) {
227		pattern_sz *= 2;
228		pattern = grep_realloc(pattern, ++pattern_sz *
229		    sizeof(*pattern));
230	}
231	if (len > 0 && pat[len - 1] == '\n')
232		--len;
233	/* pat may not be NUL-terminated */
234	pattern[patterns] = grep_malloc(len + 1);
235	memcpy(pattern[patterns], pat, len);
236	pattern[patterns][len] = '\0';
237	++patterns;
238}
239
240/*
241 * Adds an include/exclude pattern to the internal array.
242 */
243static void
244add_epattern(char *pat, size_t len, int type, int mode)
245{
246
247	/* Increase size if necessary */
248	if (epatterns == epattern_sz) {
249		epattern_sz *= 2;
250		epattern = grep_realloc(epattern, ++epattern_sz *
251		    sizeof(struct epat));
252	}
253	if (len > 0 && pat[len - 1] == '\n')
254		 --len;
255	epattern[epatterns].pat = grep_malloc(len + 1);
256	memcpy(epattern[epatterns].pat, pat, len);
257	epattern[epatterns].pat[len] = '\0';
258	epattern[epatterns].type = type;
259	epattern[epatterns].mode = mode;
260	++epatterns;
261}
262
263/*
264 * Reads searching patterns from a file and adds them with add_pattern().
265 */
266static void
267read_patterns(const char *fn)
268{
269	FILE *f;
270	char *line;
271	size_t len;
272
273	if ((f = fopen(fn, "r")) == NULL)
274		err(2, "%s", fn);
275	while ((line = fgetln(f, &len)) != NULL)
276		add_pattern(line, *line == '\n' ? 0 : len);
277	if (ferror(f))
278		err(2, "%s", fn);
279	fclose(f);
280}
281
282int
283main(int argc, char *argv[])
284{
285	char **aargv, **eargv, *eopts;
286	char *ep;
287	unsigned long long l;
288	unsigned int aargc, eargc, i;
289	int c, lastc, needpattern, newarg, prevoptind;
290
291	setlocale(LC_ALL, "");
292
293#ifndef WITHOUT_NLS
294	catalog = catopen("grep", NL_CAT_LOCALE);
295#endif
296
297	/* Check what is the program name of the binary.  In this
298	   way we can have all the funcionalities in one binary
299	   without the need of scripting and using ugly hacks. */
300	switch (__progname[0]) {
301	case 'e':
302		grepbehave = GREP_EXTENDED;
303		break;
304	case 'f':
305		grepbehave = GREP_FIXED;
306		break;
307	case 'g':
308		grepbehave = GREP_BASIC;
309		break;
310	case 'z':
311		filebehave = FILE_GZIP;
312		switch(__progname[1]) {
313		case 'e':
314			grepbehave = GREP_EXTENDED;
315			break;
316		case 'f':
317			grepbehave = GREP_FIXED;
318			break;
319		case 'g':
320			grepbehave = GREP_BASIC;
321			break;
322		}
323		break;
324	}
325
326	lastc = '\0';
327	newarg = 1;
328	prevoptind = 1;
329	needpattern = 1;
330
331	eopts = getenv("GREP_OPTIONS");
332
333	eargc = 1;
334	if (eopts != NULL) {
335		char *str;
336
337		for(i = 0; i < strlen(eopts); i++)
338			if (eopts[i] == ' ')
339				eargc++;
340
341		eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
342
343		str = strtok(eopts, " ");
344		eargc = 0;
345
346		while(str != NULL) {
347			eargv[++eargc] = (char *)grep_malloc(sizeof(char) *
348			    (strlen(str) + 1));
349			strlcpy(eargv[eargc], str, strlen(str) + 1);
350			str = strtok(NULL, " ");
351		}
352		eargv[++eargc] = NULL;
353
354		aargv = (char **)grep_calloc(eargc + argc + 1,
355		    sizeof(char *));
356		aargv[0] = argv[0];
357
358		for(i = 1; i < eargc; i++)
359			aargv[i] = eargv[i];
360		for(int j = 1; j < argc; j++)
361			aargv[i++] = argv[j];
362
363		aargc = eargc + argc - 1;
364
365	} else {
366		aargv = argv;
367		aargc = argc;
368	}
369
370	while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
371	    -1)) {
372		switch (c) {
373		case '0': case '1': case '2': case '3': case '4':
374		case '5': case '6': case '7': case '8': case '9':
375			if (newarg || !isdigit(lastc))
376				Aflag = 0;
377			else if (Aflag > LLONG_MAX / 10) {
378				errno = ERANGE;
379				err(2, NULL);
380			}
381			Aflag = Bflag = (Aflag * 10) + (c - '0');
382			break;
383		case 'C':
384			if (optarg == NULL) {
385				Aflag = Bflag = 2;
386				break;
387			}
388			/* FALLTHROUGH */
389		case 'A':
390			/* FALLTHROUGH */
391		case 'B':
392			errno = 0;
393			l = strtoull(optarg, &ep, 10);
394			if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
395			    ((errno == EINVAL) && (l == 0)))
396				err(2, NULL);
397			else if (ep[0] != '\0') {
398				errno = EINVAL;
399				err(2, NULL);
400			}
401			if (c == 'A')
402				Aflag = l;
403			else if (c == 'B')
404				Bflag = l;
405			else
406				Aflag = Bflag = l;
407			break;
408		case 'a':
409			binbehave = BINFILE_TEXT;
410			break;
411		case 'b':
412			bflag = true;
413			break;
414		case 'c':
415			cflag = true;
416			break;
417		case 'D':
418			if (strcmp(optarg, "skip") == 0)
419				devbehave = DEV_SKIP;
420			break;
421		case 'd':
422			if (strcmp("recurse", optarg) == 0) {
423				Hflag = true;
424				dirbehave = DIR_RECURSE;
425			} else if (strcmp("skip", optarg) == 0)
426				dirbehave = DIR_SKIP;
427			else if (strcmp("read", optarg) != 0) {
428				errno = EINVAL;
429				err(2, NULL);
430			}
431			break;
432		case 'E':
433			grepbehave = GREP_EXTENDED;
434			break;
435		case 'e':
436			add_pattern(optarg, strlen(optarg));
437			needpattern = 0;
438			break;
439		case 'F':
440			grepbehave = GREP_FIXED;
441			break;
442		case 'f':
443			read_patterns(optarg);
444			needpattern = 0;
445			break;
446		case 'G':
447			grepbehave = GREP_BASIC;
448			break;
449		case 'H':
450			Hflag = true;
451			break;
452		case 'h':
453			Hflag = false;
454			hflag = true;
455			break;
456		case 'I':
457			binbehave = BINFILE_SKIP;
458			break;
459		case 'i':
460		case 'y':
461			iflag =  true;
462			cflags |= REG_ICASE;
463			break;
464		case 'J':
465			filebehave = FILE_BZIP;
466			break;
467		case 'L':
468			lflag = false;
469			Lflag = qflag = true;
470			break;
471		case 'l':
472			Lflag = false;
473			lflag = qflag = true;
474			break;
475		case 'm':
476			mflag = true;
477			errno = 0;
478			mcount = strtoull(optarg, &ep, 10);
479			if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
480			    ((errno == EINVAL) && (mcount == 0)))
481				err(2, NULL);
482			else if (ep[0] != '\0') {
483				errno = EINVAL;
484				err(2, NULL);
485			}
486			break;
487		case 'n':
488			nflag = true;
489			break;
490		case 'O':
491			linkbehave = LINK_EXPLICIT;
492			break;
493		case 'o':
494			oflag = true;
495			break;
496		case 'p':
497			linkbehave = LINK_SKIP;
498			break;
499		case 'q':
500			qflag = true;
501			break;
502		case 'S':
503			linkbehave = LINK_GREP;
504			break;
505		case 'R':
506		case 'r':
507			dirbehave = DIR_RECURSE;
508			Hflag = true;
509			break;
510		case 's':
511			sflag = true;
512			break;
513		case 'U':
514			binbehave = BINFILE_BIN;
515			break;
516		case 'u':
517		case MMAP_OPT:
518			/* noop, compatibility */
519			break;
520		case 'V':
521			printf(getstr(10), __progname, VERSION);
522			exit(0);
523		case 'v':
524			vflag = true;
525			break;
526		case 'w':
527			wflag = true;
528			break;
529		case 'x':
530			xflag = true;
531			break;
532		case 'Z':
533			filebehave = FILE_GZIP;
534			break;
535		case BIN_OPT:
536			if (strcmp("binary", optarg) == 0)
537				binbehave = BINFILE_BIN;
538			else if (strcmp("without-match", optarg) == 0)
539				binbehave = BINFILE_SKIP;
540			else if (strcmp("text", optarg) == 0)
541				binbehave = BINFILE_TEXT;
542			else
543				errx(2, "%s", getstr(8));
544			break;
545		case COLOR_OPT:
546			if (optarg == NULL || strcmp("auto", optarg) == 0 ||
547			    strcmp("always", optarg) == 0 ) {
548				color = getenv("GREP_COLOR");
549				if (color == NULL) {
550					color = grep_malloc(sizeof(char) * 6);
551					strcpy(color, "01;31");
552				}
553			} else if (strcmp("never", optarg) == 0)
554				color = NULL;
555			else
556				errx(2, "%s", getstr(3));
557			break;
558		case LABEL_OPT:
559			label = optarg;
560			break;
561		case LINEBUF_OPT:
562			lbflag = true;
563			break;
564		case NULL_OPT:
565			nullflag = true;
566			break;
567		case R_INCLUDE_OPT:
568			exclflag = true;
569			add_epattern(basename(optarg), strlen(basename(optarg)),
570			    FILE_PAT, INCL_PAT);
571			break;
572		case R_EXCLUDE_OPT:
573			exclflag = true;
574			add_epattern(basename(optarg), strlen(basename(optarg)),
575			    FILE_PAT, EXCL_PAT);
576			break;
577		case R_DINCLUDE_OPT:
578			exclflag = true;
579			add_epattern(basename(optarg), strlen(basename(optarg)),
580			    DIR_PAT, INCL_PAT);
581			break;
582		case R_DEXCLUDE_OPT:
583			exclflag = true;
584			add_epattern(basename(optarg), strlen(basename(optarg)),
585			    DIR_PAT, EXCL_PAT);
586			break;
587		case HELP_OPT:
588		default:
589			usage();
590		}
591		lastc = c;
592		newarg = optind != prevoptind;
593		prevoptind = optind;
594	}
595	aargc -= optind;
596	aargv += optind;
597
598	/* Fail if we don't have any pattern */
599	if (aargc == 0 && needpattern)
600		usage();
601
602	/* Process patterns from command line */
603	if (aargc != 0 && needpattern) {
604		add_pattern(*aargv, strlen(*aargv));
605		--aargc;
606		++aargv;
607	}
608
609	switch (grepbehave) {
610	case GREP_FIXED:
611	case GREP_BASIC:
612		break;
613	case GREP_EXTENDED:
614		cflags |= REG_EXTENDED;
615		break;
616	default:
617		/* NOTREACHED */
618		usage();
619	}
620
621	fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
622	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
623/*
624 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
625 * Optimizations should be done there.
626 */
627		/* Check if cheating is allowed (always is for fgrep). */
628	if (grepbehave == GREP_FIXED) {
629		for (i = 0; i < patterns; ++i)
630			fgrepcomp(&fg_pattern[i], pattern[i]);
631	} else {
632		for (i = 0; i < patterns; ++i) {
633			if (fastcomp(&fg_pattern[i], pattern[i])) {
634				/* Fall back to full regex library */
635				c = regcomp(&r_pattern[i], pattern[i], cflags);
636				if (c != 0) {
637					regerror(c, &r_pattern[i], re_error,
638					    RE_ERROR_BUF);
639					errx(2, "%s", re_error);
640				}
641			}
642		}
643	}
644
645	if (lbflag)
646		setlinebuf(stdout);
647
648	if ((aargc == 0 || aargc == 1) && !Hflag)
649		hflag = true;
650
651	if (aargc == 0)
652		exit(!procfile("-"));
653
654	if (dirbehave == DIR_RECURSE)
655		c = grep_tree(aargv);
656	else
657		for (c = 0; aargc--; ++aargv)
658			c+= procfile(*aargv);
659
660#ifndef WITHOUT_NLS
661	catclose(catalog);
662#endif
663
664	/* Find out the correct return value according to the
665	   results and the command line option. */
666	exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
667}
668