grep.c revision 210461
1/*	$OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210461 2010-07-25 08:42:18Z gabor $");
32
33#include <sys/stat.h>
34#include <sys/types.h>
35
36#include <ctype.h>
37#include <err.h>
38#include <errno.h>
39#include <getopt.h>
40#include <limits.h>
41#include <libgen.h>
42#include <locale.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#include "grep.h"
50
51#ifndef WITHOUT_NLS
52#include <nl_types.h>
53nl_catd	 catalog;
54#endif
55
56/*
57 * Default messags to use when NLS is disabled or no catalogue
58 * is found.
59 */
60const char	*errstr[] = {
61	"",
62/* 1*/	"(standard input)",
63/* 2*/	"cannot read bzip2 compressed file",
64/* 3*/	"unknown --color option",
65/* 4*/	"usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66/* 5*/	"\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67/* 6*/	"\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68/* 7*/	"\t[--null] [pattern] [file ...]\n",
69/* 8*/	"unknown --binary-files option",
70/* 9*/	"Binary file %s matches\n",
71/*10*/	"%s (BSD grep) %s\n",
72};
73
74/* Flags passed to regcomp() and regexec() */
75int		 cflags = 0;
76int		 eflags = REG_STARTEND;
77
78/* Shortcut for matching all cases like empty regex */
79bool		 matchall;
80
81/* Searching patterns */
82unsigned int	 patterns, pattern_sz;
83char		**pattern;
84regex_t		*r_pattern;
85fastgrep_t	*fg_pattern;
86
87/* Filename exclusion/inclusion patterns */
88unsigned int	 epatterns, epattern_sz;
89struct epat	*epattern;
90
91/* For regex errors  */
92char	 re_error[RE_ERROR_BUF + 1];
93
94/* Command-line flags */
95unsigned long long Aflag;	/* -A x: print x lines trailing each match */
96unsigned long long Bflag;	/* -B x: print x lines leading each match */
97bool	 Hflag;		/* -H: always print file name */
98bool	 Lflag;		/* -L: only show names of files with no matches */
99bool	 bflag;		/* -b: show block numbers for each match */
100bool	 cflag;		/* -c: only show a count of matching lines */
101bool	 hflag;		/* -h: don't print filename headers */
102bool	 iflag;		/* -i: ignore case */
103bool	 lflag;		/* -l: only show names of files with matches */
104bool	 mflag;		/* -m x: stop reading the files after x matches */
105unsigned long long mcount;	/* count for -m */
106bool	 nflag;		/* -n: show line numbers in front of matching lines */
107bool	 oflag;		/* -o: print only matching part */
108bool	 qflag;		/* -q: quiet mode (don't output anything) */
109bool	 sflag;		/* -s: silent mode (ignore errors) */
110bool	 vflag;		/* -v: only show non-matching lines */
111bool	 wflag;		/* -w: pattern must start and end on word boundaries */
112bool	 xflag;		/* -x: pattern must match entire line */
113bool	 lbflag;	/* --line-buffered */
114bool	 nullflag;	/* --null */
115bool	 exclflag;	/* --exclude */
116char	*label;		/* --label */
117const char *color;	/* --color */
118int	 grepbehave = GREP_BASIC;	/* -EFGP: type of the regex */
119int	 binbehave = BINFILE_BIN;	/* -aIU: handling of binary files */
120int	 filebehave = FILE_STDIO;	/* -JZ: normal, gzip or bzip2 file */
121int	 devbehave = DEV_READ;		/* -D: handling of devices */
122int	 dirbehave = DIR_READ;		/* -dRr: handling of directories */
123int	 linkbehave = LINK_READ;	/* -OpS: handling of symlinks */
124
125enum {
126	BIN_OPT = CHAR_MAX + 1,
127	COLOR_OPT,
128	HELP_OPT,
129	MMAP_OPT,
130	LINEBUF_OPT,
131	LABEL_OPT,
132	NULL_OPT,
133	R_EXCLUDE_OPT,
134	R_INCLUDE_OPT,
135	R_DEXCLUDE_OPT,
136	R_DINCLUDE_OPT
137};
138
139static inline const char	*init_color(const char *);
140
141/* Housekeeping */
142bool	 first = true;	/* flag whether we are processing the first match */
143bool	 prev;		/* flag whether or not the previous line matched */
144int	 tail;		/* lines left to print */
145bool	 notfound;	/* file not found */
146
147extern char	*__progname;
148
149/*
150 * Prints usage information and returns 2.
151 */
152static void
153usage(void)
154{
155	fprintf(stderr, getstr(4), __progname);
156	fprintf(stderr, "%s", getstr(5));
157	fprintf(stderr, "%s", getstr(5));
158	fprintf(stderr, "%s", getstr(6));
159	fprintf(stderr, "%s", getstr(7));
160	exit(2);
161}
162
163static const char	*optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
164
165struct option long_options[] =
166{
167	{"binary-files",	required_argument,	NULL, BIN_OPT},
168	{"help",		no_argument,		NULL, HELP_OPT},
169	{"mmap",		no_argument,		NULL, MMAP_OPT},
170	{"line-buffered",	no_argument,		NULL, LINEBUF_OPT},
171	{"label",		required_argument,	NULL, LABEL_OPT},
172	{"null",		no_argument,		NULL, NULL_OPT},
173	{"color",		optional_argument,	NULL, COLOR_OPT},
174	{"colour",		optional_argument,	NULL, COLOR_OPT},
175	{"exclude",		required_argument,	NULL, R_EXCLUDE_OPT},
176	{"include",		required_argument,	NULL, R_INCLUDE_OPT},
177	{"exclude-dir",		required_argument,	NULL, R_DEXCLUDE_OPT},
178	{"include-dir",		required_argument,	NULL, R_DINCLUDE_OPT},
179	{"after-context",	required_argument,	NULL, 'A'},
180	{"text",		no_argument,		NULL, 'a'},
181	{"before-context",	required_argument,	NULL, 'B'},
182	{"byte-offset",		no_argument,		NULL, 'b'},
183	{"context",		optional_argument,	NULL, 'C'},
184	{"count",		no_argument,		NULL, 'c'},
185	{"devices",		required_argument,	NULL, 'D'},
186        {"directories",		required_argument,	NULL, 'd'},
187	{"extended-regexp",	no_argument,		NULL, 'E'},
188	{"regexp",		required_argument,	NULL, 'e'},
189	{"fixed-strings",	no_argument,		NULL, 'F'},
190	{"file",		required_argument,	NULL, 'f'},
191	{"basic-regexp",	no_argument,		NULL, 'G'},
192	{"no-filename",		no_argument,		NULL, 'h'},
193	{"with-filename",	no_argument,		NULL, 'H'},
194	{"ignore-case",		no_argument,		NULL, 'i'},
195	{"bz2decompress",	no_argument,		NULL, 'J'},
196	{"files-with-matches",	no_argument,		NULL, 'l'},
197	{"files-without-match", no_argument,            NULL, 'L'},
198	{"max-count",		required_argument,	NULL, 'm'},
199	{"line-number",		no_argument,		NULL, 'n'},
200	{"only-matching",	no_argument,		NULL, 'o'},
201	{"quiet",		no_argument,		NULL, 'q'},
202	{"silent",		no_argument,		NULL, 'q'},
203	{"recursive",		no_argument,		NULL, 'r'},
204	{"no-messages",		no_argument,		NULL, 's'},
205	{"binary",		no_argument,		NULL, 'U'},
206	{"unix-byte-offsets",	no_argument,		NULL, 'u'},
207	{"invert-match",	no_argument,		NULL, 'v'},
208	{"version",		no_argument,		NULL, 'V'},
209	{"word-regexp",		no_argument,		NULL, 'w'},
210	{"line-regexp",		no_argument,		NULL, 'x'},
211	{"decompress",          no_argument,            NULL, 'Z'},
212	{NULL,			no_argument,		NULL, 0}
213};
214
215/*
216 * Adds a searching pattern to the internal array.
217 */
218static void
219add_pattern(char *pat, size_t len)
220{
221
222	/* Check if we can do a shortcut */
223	if (len == 0 || matchall) {
224		matchall = true;
225		return;
226	}
227	/* Increase size if necessary */
228	if (patterns == pattern_sz) {
229		pattern_sz *= 2;
230		pattern = grep_realloc(pattern, ++pattern_sz *
231		    sizeof(*pattern));
232	}
233	if (len > 0 && pat[len - 1] == '\n')
234		--len;
235	/* pat may not be NUL-terminated */
236	pattern[patterns] = grep_malloc(len + 1);
237	memcpy(pattern[patterns], pat, len);
238	pattern[patterns][len] = '\0';
239	++patterns;
240}
241
242/*
243 * Adds an include/exclude pattern to the internal array.
244 */
245static void
246add_epattern(char *pat, size_t len, int type, int mode)
247{
248
249	/* Increase size if necessary */
250	if (epatterns == epattern_sz) {
251		epattern_sz *= 2;
252		epattern = grep_realloc(epattern, ++epattern_sz *
253		    sizeof(struct epat));
254	}
255	if (len > 0 && pat[len - 1] == '\n')
256		 --len;
257	epattern[epatterns].pat = grep_malloc(len + 1);
258	memcpy(epattern[epatterns].pat, pat, len);
259	epattern[epatterns].pat[len] = '\0';
260	epattern[epatterns].type = type;
261	epattern[epatterns].mode = mode;
262	++epatterns;
263}
264
265/*
266 * Reads searching patterns from a file and adds them with add_pattern().
267 */
268static void
269read_patterns(const char *fn)
270{
271	FILE *f;
272	char *line;
273	size_t len;
274
275	if ((f = fopen(fn, "r")) == NULL)
276		err(2, "%s", fn);
277	while ((line = fgetln(f, &len)) != NULL)
278		add_pattern(line, *line == '\n' ? 0 : len);
279	if (ferror(f))
280		err(2, "%s", fn);
281	fclose(f);
282}
283
284static inline const char *
285init_color(const char *d)
286{
287	char *c;
288
289	c = getenv("GREP_COLOR");
290	return (c != NULL ? c : d);
291}
292
293int
294main(int argc, char *argv[])
295{
296	char **aargv, **eargv, *eopts;
297	char *ep;
298	unsigned long long l;
299	unsigned int aargc, eargc, i;
300	int c, lastc, needpattern, newarg, prevoptind;
301
302	setlocale(LC_ALL, "");
303
304#ifndef WITHOUT_NLS
305	catalog = catopen("grep", NL_CAT_LOCALE);
306#endif
307
308	/* Check what is the program name of the binary.  In this
309	   way we can have all the funcionalities in one binary
310	   without the need of scripting and using ugly hacks. */
311	switch (__progname[0]) {
312	case 'e':
313		grepbehave = GREP_EXTENDED;
314		break;
315	case 'f':
316		grepbehave = GREP_FIXED;
317		break;
318	case 'g':
319		grepbehave = GREP_BASIC;
320		break;
321	case 'z':
322		filebehave = FILE_GZIP;
323		switch(__progname[1]) {
324		case 'e':
325			grepbehave = GREP_EXTENDED;
326			break;
327		case 'f':
328			grepbehave = GREP_FIXED;
329			break;
330		case 'g':
331			grepbehave = GREP_BASIC;
332			break;
333		}
334		break;
335	}
336
337	lastc = '\0';
338	newarg = 1;
339	prevoptind = 1;
340	needpattern = 1;
341
342	eopts = getenv("GREP_OPTIONS");
343
344	eargc = 1;
345	if (eopts != NULL) {
346		char *str;
347
348		for(i = 0; i < strlen(eopts); i++)
349			if (eopts[i] == ' ')
350				eargc++;
351
352		eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
353
354		str = strtok(eopts, " ");
355		eargc = 0;
356
357		while(str != NULL) {
358			eargv[++eargc] = (char *)grep_malloc(sizeof(char) *
359			    (strlen(str) + 1));
360			strlcpy(eargv[eargc], str, strlen(str) + 1);
361			str = strtok(NULL, " ");
362		}
363		eargv[++eargc] = NULL;
364
365		aargv = (char **)grep_calloc(eargc + argc + 1,
366		    sizeof(char *));
367		aargv[0] = argv[0];
368
369		for(i = 1; i < eargc; i++)
370			aargv[i] = eargv[i];
371		for(int j = 1; j < argc; j++)
372			aargv[i++] = argv[j];
373
374		aargc = eargc + argc - 1;
375
376	} else {
377		aargv = argv;
378		aargc = argc;
379	}
380
381	while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
382	    -1)) {
383		switch (c) {
384		case '0': case '1': case '2': case '3': case '4':
385		case '5': case '6': case '7': case '8': case '9':
386			if (newarg || !isdigit(lastc))
387				Aflag = 0;
388			else if (Aflag > LLONG_MAX / 10) {
389				errno = ERANGE;
390				err(2, NULL);
391			}
392			Aflag = Bflag = (Aflag * 10) + (c - '0');
393			break;
394		case 'C':
395			if (optarg == NULL) {
396				Aflag = Bflag = 2;
397				break;
398			}
399			/* FALLTHROUGH */
400		case 'A':
401			/* FALLTHROUGH */
402		case 'B':
403			errno = 0;
404			l = strtoull(optarg, &ep, 10);
405			if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
406			    ((errno == EINVAL) && (l == 0)))
407				err(2, NULL);
408			else if (ep[0] != '\0') {
409				errno = EINVAL;
410				err(2, NULL);
411			}
412			if (c == 'A')
413				Aflag = l;
414			else if (c == 'B')
415				Bflag = l;
416			else
417				Aflag = Bflag = l;
418			break;
419		case 'a':
420			binbehave = BINFILE_TEXT;
421			break;
422		case 'b':
423			bflag = true;
424			break;
425		case 'c':
426			cflag = true;
427			break;
428		case 'D':
429			if (strcasecmp(optarg, "skip") == 0)
430				devbehave = DEV_SKIP;
431			else if (strcasecmp(optarg, "read") == 0)
432				devbehave = DEV_READ;
433			else {
434				errno = EINVAL;
435				err(2, NULL);
436			}
437			break;
438		case 'd':
439			if (strcasecmp("recurse", optarg) == 0) {
440				Hflag = true;
441				dirbehave = DIR_RECURSE;
442			} else if (strcasecmp("skip", optarg) == 0)
443				dirbehave = DIR_SKIP;
444			else if (strcasecmp("read", optarg) == 0)
445				dirbehave = DIR_READ;
446			else {
447				errno = EINVAL;
448				err(2, NULL);
449			}
450			break;
451		case 'E':
452			grepbehave = GREP_EXTENDED;
453			break;
454		case 'e':
455			add_pattern(optarg, strlen(optarg));
456			needpattern = 0;
457			break;
458		case 'F':
459			grepbehave = GREP_FIXED;
460			break;
461		case 'f':
462			read_patterns(optarg);
463			needpattern = 0;
464			break;
465		case 'G':
466			grepbehave = GREP_BASIC;
467			break;
468		case 'H':
469			Hflag = true;
470			break;
471		case 'h':
472			Hflag = false;
473			hflag = true;
474			break;
475		case 'I':
476			binbehave = BINFILE_SKIP;
477			break;
478		case 'i':
479		case 'y':
480			iflag =  true;
481			cflags |= REG_ICASE;
482			break;
483		case 'J':
484			filebehave = FILE_BZIP;
485			break;
486		case 'L':
487			lflag = false;
488			Lflag = true;
489			break;
490		case 'l':
491			Lflag = false;
492			lflag = true;
493			break;
494		case 'm':
495			mflag = true;
496			errno = 0;
497			mcount = strtoull(optarg, &ep, 10);
498			if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
499			    ((errno == EINVAL) && (mcount == 0)))
500				err(2, NULL);
501			else if (ep[0] != '\0') {
502				errno = EINVAL;
503				err(2, NULL);
504			}
505			break;
506		case 'n':
507			nflag = true;
508			break;
509		case 'O':
510			linkbehave = LINK_EXPLICIT;
511			break;
512		case 'o':
513			oflag = true;
514			break;
515		case 'p':
516			linkbehave = LINK_SKIP;
517			break;
518		case 'q':
519			qflag = true;
520			break;
521		case 'S':
522			linkbehave = LINK_READ;
523			break;
524		case 'R':
525		case 'r':
526			dirbehave = DIR_RECURSE;
527			Hflag = true;
528			break;
529		case 's':
530			sflag = true;
531			break;
532		case 'U':
533			binbehave = BINFILE_BIN;
534			break;
535		case 'u':
536		case MMAP_OPT:
537			/* noop, compatibility */
538			break;
539		case 'V':
540			printf(getstr(10), __progname, VERSION);
541			exit(0);
542		case 'v':
543			vflag = true;
544			break;
545		case 'w':
546			wflag = true;
547			break;
548		case 'x':
549			xflag = true;
550			break;
551		case 'Z':
552			filebehave = FILE_GZIP;
553			break;
554		case BIN_OPT:
555			if (strcasecmp("binary", optarg) == 0)
556				binbehave = BINFILE_BIN;
557			else if (strcasecmp("without-match", optarg) == 0)
558				binbehave = BINFILE_SKIP;
559			else if (strcasecmp("text", optarg) == 0)
560				binbehave = BINFILE_TEXT;
561			else
562				errx(2, "%s", getstr(8));
563			break;
564		case COLOR_OPT:
565			color = NULL;
566			if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
567			    strcasecmp("tty", optarg) == 0 ||
568			    strcasecmp("if-tty", optarg) == 0) {
569				char *term;
570
571				term = getenv("TERM");
572				if (isatty(STDOUT_FILENO) && term != NULL &&
573				    strcasecmp(term, "dumb") != 0)
574					color = init_color("01;31");
575			} else if (strcasecmp("always", optarg) == 0 ||
576			    strcasecmp("yes", optarg) == 0 ||
577			    strcasecmp("force", optarg) == 0) {
578				color = init_color("01;31");
579			} else if (strcasecmp("never", optarg) != 0 &&
580			    strcasecmp("none", optarg) != 0 &&
581			    strcasecmp("no", optarg) != 0)
582				errx(2, "%s", getstr(3));
583			break;
584		case LABEL_OPT:
585			label = optarg;
586			break;
587		case LINEBUF_OPT:
588			lbflag = true;
589			break;
590		case NULL_OPT:
591			nullflag = true;
592			break;
593		case R_INCLUDE_OPT:
594			exclflag = true;
595			add_epattern(basename(optarg), strlen(basename(optarg)),
596			    FILE_PAT, INCL_PAT);
597			break;
598		case R_EXCLUDE_OPT:
599			exclflag = true;
600			add_epattern(basename(optarg), strlen(basename(optarg)),
601			    FILE_PAT, EXCL_PAT);
602			break;
603		case R_DINCLUDE_OPT:
604			exclflag = true;
605			add_epattern(basename(optarg), strlen(basename(optarg)),
606			    DIR_PAT, INCL_PAT);
607			break;
608		case R_DEXCLUDE_OPT:
609			exclflag = true;
610			add_epattern(basename(optarg), strlen(basename(optarg)),
611			    DIR_PAT, EXCL_PAT);
612			break;
613		case HELP_OPT:
614		default:
615			usage();
616		}
617		lastc = c;
618		newarg = optind != prevoptind;
619		prevoptind = optind;
620	}
621	aargc -= optind;
622	aargv += optind;
623
624	/* Fail if we don't have any pattern */
625	if (aargc == 0 && needpattern)
626		usage();
627
628	/* Process patterns from command line */
629	if (aargc != 0 && needpattern) {
630		add_pattern(*aargv, strlen(*aargv));
631		--aargc;
632		++aargv;
633	}
634
635	switch (grepbehave) {
636	case GREP_FIXED:
637	case GREP_BASIC:
638		break;
639	case GREP_EXTENDED:
640		cflags |= REG_EXTENDED;
641		break;
642	default:
643		/* NOTREACHED */
644		usage();
645	}
646
647	fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
648	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
649/*
650 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
651 * Optimizations should be done there.
652 */
653		/* Check if cheating is allowed (always is for fgrep). */
654	if (grepbehave == GREP_FIXED) {
655		for (i = 0; i < patterns; ++i)
656			fgrepcomp(&fg_pattern[i], pattern[i]);
657	} else {
658		for (i = 0; i < patterns; ++i) {
659			if (fastcomp(&fg_pattern[i], pattern[i])) {
660				/* Fall back to full regex library */
661				c = regcomp(&r_pattern[i], pattern[i], cflags);
662				if (c != 0) {
663					regerror(c, &r_pattern[i], re_error,
664					    RE_ERROR_BUF);
665					errx(2, "%s", re_error);
666				}
667			}
668		}
669	}
670
671	if (lbflag)
672		setlinebuf(stdout);
673
674	if ((aargc == 0 || aargc == 1) && !Hflag)
675		hflag = true;
676
677	if (aargc == 0)
678		exit(!procfile("-"));
679
680	if (dirbehave == DIR_RECURSE)
681		c = grep_tree(aargv);
682	else
683		for (c = 0; aargc--; ++aargv)
684			c+= procfile(*aargv);
685
686#ifndef WITHOUT_NLS
687	catclose(catalog);
688#endif
689
690	/* Find out the correct return value according to the
691	   results and the command line option. */
692	exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
693}
694