Deleted Added
full compact
grep.c (211519) grep.c (220422)
1/* $NetBSD: grep.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
2/* $FreeBSD: head/usr.bin/grep/grep.c 220422 2011-04-07 13:03:35Z gabor $ */
1/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
3/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $");
33__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 220422 2011-04-07 13:03:35Z gabor $");
32
33#include <sys/stat.h>
34#include <sys/types.h>
35
36#include <ctype.h>
37#include <err.h>
38#include <errno.h>
39#include <getopt.h>
40#include <limits.h>
41#include <libgen.h>
42#include <locale.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#include "grep.h"
50
51#ifndef WITHOUT_NLS
52#include <nl_types.h>
53nl_catd catalog;
54#endif
55
56/*
57 * Default messags to use when NLS is disabled or no catalogue
58 * is found.
59 */
60const char *errstr[] = {
61 "",
62/* 1*/ "(standard input)",
63/* 2*/ "cannot read bzip2 compressed file",
64/* 3*/ "unknown %s option",
65/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68/* 7*/ "\t[--null] [pattern] [file ...]\n",
69/* 8*/ "Binary file %s matches\n",
70/* 9*/ "%s (BSD grep) %s\n",
71};
72
73/* Flags passed to regcomp() and regexec() */
74int cflags = 0;
75int eflags = REG_STARTEND;
76
77/* Shortcut for matching all cases like empty regex */
78bool matchall;
79
80/* Searching patterns */
81unsigned int patterns, pattern_sz;
82char **pattern;
83regex_t *r_pattern;
84fastgrep_t *fg_pattern;
85
86/* Filename exclusion/inclusion patterns */
87unsigned int fpatterns, fpattern_sz;
88unsigned int dpatterns, dpattern_sz;
89struct epat *dpattern, *fpattern;
90
91/* For regex errors */
92char re_error[RE_ERROR_BUF + 1];
93
94/* Command-line flags */
95unsigned long long Aflag; /* -A x: print x lines trailing each match */
96unsigned long long Bflag; /* -B x: print x lines leading each match */
97bool Hflag; /* -H: always print file name */
98bool Lflag; /* -L: only show names of files with no matches */
99bool bflag; /* -b: show block numbers for each match */
100bool cflag; /* -c: only show a count of matching lines */
101bool hflag; /* -h: don't print filename headers */
102bool iflag; /* -i: ignore case */
103bool lflag; /* -l: only show names of files with matches */
104bool mflag; /* -m x: stop reading the files after x matches */
105unsigned long long mcount; /* count for -m */
106bool nflag; /* -n: show line numbers in front of matching lines */
107bool oflag; /* -o: print only matching part */
108bool qflag; /* -q: quiet mode (don't output anything) */
109bool sflag; /* -s: silent mode (ignore errors) */
110bool vflag; /* -v: only show non-matching lines */
111bool wflag; /* -w: pattern must start and end on word boundaries */
112bool xflag; /* -x: pattern must match entire line */
113bool lbflag; /* --line-buffered */
114bool nullflag; /* --null */
115char *label; /* --label */
116const char *color; /* --color */
117int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
118int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
119int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
120int devbehave = DEV_READ; /* -D: handling of devices */
121int dirbehave = DIR_READ; /* -dRr: handling of directories */
122int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
123
124bool dexclude, dinclude; /* --exclude-dir and --include-dir */
125bool fexclude, finclude; /* --exclude and --include */
126
127enum {
128 BIN_OPT = CHAR_MAX + 1,
129 COLOR_OPT,
130 HELP_OPT,
131 MMAP_OPT,
132 LINEBUF_OPT,
133 LABEL_OPT,
134 NULL_OPT,
135 R_EXCLUDE_OPT,
136 R_INCLUDE_OPT,
137 R_DEXCLUDE_OPT,
138 R_DINCLUDE_OPT
139};
140
141static inline const char *init_color(const char *);
142
143/* Housekeeping */
144bool first = true; /* flag whether we are processing the first match */
145bool prev; /* flag whether or not the previous line matched */
146int tail; /* lines left to print */
147bool notfound; /* file not found */
148
149extern char *__progname;
150
151/*
152 * Prints usage information and returns 2.
153 */
154static void
155usage(void)
156{
157 fprintf(stderr, getstr(4), __progname);
158 fprintf(stderr, "%s", getstr(5));
159 fprintf(stderr, "%s", getstr(5));
160 fprintf(stderr, "%s", getstr(6));
161 fprintf(stderr, "%s", getstr(7));
162 exit(2);
163}
164
165static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
166
167struct option long_options[] =
168{
169 {"binary-files", required_argument, NULL, BIN_OPT},
170 {"help", no_argument, NULL, HELP_OPT},
171 {"mmap", no_argument, NULL, MMAP_OPT},
172 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
173 {"label", required_argument, NULL, LABEL_OPT},
174 {"null", no_argument, NULL, NULL_OPT},
175 {"color", optional_argument, NULL, COLOR_OPT},
176 {"colour", optional_argument, NULL, COLOR_OPT},
177 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
178 {"include", required_argument, NULL, R_INCLUDE_OPT},
179 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
180 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
181 {"after-context", required_argument, NULL, 'A'},
182 {"text", no_argument, NULL, 'a'},
183 {"before-context", required_argument, NULL, 'B'},
184 {"byte-offset", no_argument, NULL, 'b'},
185 {"context", optional_argument, NULL, 'C'},
186 {"count", no_argument, NULL, 'c'},
187 {"devices", required_argument, NULL, 'D'},
188 {"directories", required_argument, NULL, 'd'},
189 {"extended-regexp", no_argument, NULL, 'E'},
190 {"regexp", required_argument, NULL, 'e'},
191 {"fixed-strings", no_argument, NULL, 'F'},
192 {"file", required_argument, NULL, 'f'},
193 {"basic-regexp", no_argument, NULL, 'G'},
194 {"no-filename", no_argument, NULL, 'h'},
195 {"with-filename", no_argument, NULL, 'H'},
196 {"ignore-case", no_argument, NULL, 'i'},
197 {"bz2decompress", no_argument, NULL, 'J'},
198 {"files-with-matches", no_argument, NULL, 'l'},
199 {"files-without-match", no_argument, NULL, 'L'},
200 {"max-count", required_argument, NULL, 'm'},
201 {"line-number", no_argument, NULL, 'n'},
202 {"only-matching", no_argument, NULL, 'o'},
203 {"quiet", no_argument, NULL, 'q'},
204 {"silent", no_argument, NULL, 'q'},
205 {"recursive", no_argument, NULL, 'r'},
206 {"no-messages", no_argument, NULL, 's'},
207 {"binary", no_argument, NULL, 'U'},
208 {"unix-byte-offsets", no_argument, NULL, 'u'},
209 {"invert-match", no_argument, NULL, 'v'},
210 {"version", no_argument, NULL, 'V'},
211 {"word-regexp", no_argument, NULL, 'w'},
212 {"line-regexp", no_argument, NULL, 'x'},
213 {"decompress", no_argument, NULL, 'Z'},
214 {NULL, no_argument, NULL, 0}
215};
216
217/*
218 * Adds a searching pattern to the internal array.
219 */
220static void
221add_pattern(char *pat, size_t len)
222{
223
224 /* Check if we can do a shortcut */
225 if (len == 0 || matchall) {
226 matchall = true;
227 return;
228 }
229 /* Increase size if necessary */
230 if (patterns == pattern_sz) {
231 pattern_sz *= 2;
232 pattern = grep_realloc(pattern, ++pattern_sz *
233 sizeof(*pattern));
234 }
235 if (len > 0 && pat[len - 1] == '\n')
236 --len;
237 /* pat may not be NUL-terminated */
238 pattern[patterns] = grep_malloc(len + 1);
239 memcpy(pattern[patterns], pat, len);
240 pattern[patterns][len] = '\0';
241 ++patterns;
242}
243
244/*
245 * Adds a file include/exclude pattern to the internal array.
246 */
247static void
248add_fpattern(const char *pat, int mode)
249{
250
251 /* Increase size if necessary */
252 if (fpatterns == fpattern_sz) {
253 fpattern_sz *= 2;
254 fpattern = grep_realloc(fpattern, ++fpattern_sz *
255 sizeof(struct epat));
256 }
257 fpattern[fpatterns].pat = grep_strdup(pat);
258 fpattern[fpatterns].mode = mode;
259 ++fpatterns;
260}
261
262/*
263 * Adds a directory include/exclude pattern to the internal array.
264 */
265static void
266add_dpattern(const char *pat, int mode)
267{
268
269 /* Increase size if necessary */
270 if (dpatterns == dpattern_sz) {
271 dpattern_sz *= 2;
272 dpattern = grep_realloc(dpattern, ++dpattern_sz *
273 sizeof(struct epat));
274 }
275 dpattern[dpatterns].pat = grep_strdup(pat);
276 dpattern[dpatterns].mode = mode;
277 ++dpatterns;
278}
279
280/*
281 * Reads searching patterns from a file and adds them with add_pattern().
282 */
283static void
284read_patterns(const char *fn)
285{
286 FILE *f;
287 char *line;
288 size_t len;
289
290 if ((f = fopen(fn, "r")) == NULL)
291 err(2, "%s", fn);
292 while ((line = fgetln(f, &len)) != NULL)
293 add_pattern(line, *line == '\n' ? 0 : len);
294 if (ferror(f))
295 err(2, "%s", fn);
296 fclose(f);
297}
298
299static inline const char *
300init_color(const char *d)
301{
302 char *c;
303
304 c = getenv("GREP_COLOR");
305 return (c != NULL ? c : d);
306}
307
308int
309main(int argc, char *argv[])
310{
311 char **aargv, **eargv, *eopts;
312 char *ep;
313 unsigned long long l;
314 unsigned int aargc, eargc, i;
315 int c, lastc, needpattern, newarg, prevoptind;
316
317 setlocale(LC_ALL, "");
318
319#ifndef WITHOUT_NLS
320 catalog = catopen("grep", NL_CAT_LOCALE);
321#endif
322
323 /* Check what is the program name of the binary. In this
324 way we can have all the funcionalities in one binary
325 without the need of scripting and using ugly hacks. */
326 switch (__progname[0]) {
327 case 'e':
328 grepbehave = GREP_EXTENDED;
329 break;
330 case 'f':
331 grepbehave = GREP_FIXED;
332 break;
333 case 'g':
334 grepbehave = GREP_BASIC;
335 break;
336 case 'z':
337 filebehave = FILE_GZIP;
338 switch(__progname[1]) {
339 case 'e':
340 grepbehave = GREP_EXTENDED;
341 break;
342 case 'f':
343 grepbehave = GREP_FIXED;
344 break;
345 case 'g':
346 grepbehave = GREP_BASIC;
347 break;
348 }
349 break;
350 }
351
352 lastc = '\0';
353 newarg = 1;
354 prevoptind = 1;
355 needpattern = 1;
356
357 eopts = getenv("GREP_OPTIONS");
358
359 /* support for extra arguments in GREP_OPTIONS */
360 eargc = 0;
361 if (eopts != NULL) {
362 char *str;
363
364 /* make an estimation of how many extra arguments we have */
365 for (unsigned int j = 0; j < strlen(eopts); j++)
366 if (eopts[j] == ' ')
367 eargc++;
368
369 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
370
371 eargc = 0;
372 /* parse extra arguments */
373 while ((str = strsep(&eopts, " ")) != NULL)
374 eargv[eargc++] = grep_strdup(str);
375
376 aargv = (char **)grep_calloc(eargc + argc + 1,
377 sizeof(char *));
378
379 aargv[0] = argv[0];
380 for (i = 0; i < eargc; i++)
381 aargv[i + 1] = eargv[i];
382 for (int j = 1; j < argc; j++, i++)
383 aargv[i + 1] = argv[j];
384
385 aargc = eargc + argc;
386 } else {
387 aargv = argv;
388 aargc = argc;
389 }
390
391 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
392 -1)) {
393 switch (c) {
394 case '0': case '1': case '2': case '3': case '4':
395 case '5': case '6': case '7': case '8': case '9':
396 if (newarg || !isdigit(lastc))
397 Aflag = 0;
398 else if (Aflag > LLONG_MAX / 10) {
399 errno = ERANGE;
400 err(2, NULL);
401 }
402 Aflag = Bflag = (Aflag * 10) + (c - '0');
403 break;
404 case 'C':
405 if (optarg == NULL) {
406 Aflag = Bflag = 2;
407 break;
408 }
409 /* FALLTHROUGH */
410 case 'A':
411 /* FALLTHROUGH */
412 case 'B':
413 errno = 0;
414 l = strtoull(optarg, &ep, 10);
415 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
416 ((errno == EINVAL) && (l == 0)))
417 err(2, NULL);
418 else if (ep[0] != '\0') {
419 errno = EINVAL;
420 err(2, NULL);
421 }
422 if (c == 'A')
423 Aflag = l;
424 else if (c == 'B')
425 Bflag = l;
426 else
427 Aflag = Bflag = l;
428 break;
429 case 'a':
430 binbehave = BINFILE_TEXT;
431 break;
432 case 'b':
433 bflag = true;
434 break;
435 case 'c':
436 cflag = true;
437 break;
438 case 'D':
439 if (strcasecmp(optarg, "skip") == 0)
440 devbehave = DEV_SKIP;
441 else if (strcasecmp(optarg, "read") == 0)
442 devbehave = DEV_READ;
443 else
444 errx(2, getstr(3), "--devices");
445 break;
446 case 'd':
447 if (strcasecmp("recurse", optarg) == 0) {
448 Hflag = true;
449 dirbehave = DIR_RECURSE;
450 } else if (strcasecmp("skip", optarg) == 0)
451 dirbehave = DIR_SKIP;
452 else if (strcasecmp("read", optarg) == 0)
453 dirbehave = DIR_READ;
454 else
455 errx(2, getstr(3), "--directories");
456 break;
457 case 'E':
458 grepbehave = GREP_EXTENDED;
459 break;
460 case 'e':
461 add_pattern(optarg, strlen(optarg));
462 needpattern = 0;
463 break;
464 case 'F':
465 grepbehave = GREP_FIXED;
466 break;
467 case 'f':
468 read_patterns(optarg);
469 needpattern = 0;
470 break;
471 case 'G':
472 grepbehave = GREP_BASIC;
473 break;
474 case 'H':
475 Hflag = true;
476 break;
477 case 'h':
478 Hflag = false;
479 hflag = true;
480 break;
481 case 'I':
482 binbehave = BINFILE_SKIP;
483 break;
484 case 'i':
485 case 'y':
486 iflag = true;
487 cflags |= REG_ICASE;
488 break;
489 case 'J':
490 filebehave = FILE_BZIP;
491 break;
492 case 'L':
493 lflag = false;
494 Lflag = true;
495 break;
496 case 'l':
497 Lflag = false;
498 lflag = true;
499 break;
500 case 'm':
501 mflag = true;
502 errno = 0;
503 mcount = strtoull(optarg, &ep, 10);
504 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
505 ((errno == EINVAL) && (mcount == 0)))
506 err(2, NULL);
507 else if (ep[0] != '\0') {
508 errno = EINVAL;
509 err(2, NULL);
510 }
511 break;
512 case 'n':
513 nflag = true;
514 break;
515 case 'O':
516 linkbehave = LINK_EXPLICIT;
517 break;
518 case 'o':
519 oflag = true;
520 break;
521 case 'p':
522 linkbehave = LINK_SKIP;
523 break;
524 case 'q':
525 qflag = true;
526 break;
527 case 'S':
528 linkbehave = LINK_READ;
529 break;
530 case 'R':
531 case 'r':
532 dirbehave = DIR_RECURSE;
533 Hflag = true;
534 break;
535 case 's':
536 sflag = true;
537 break;
538 case 'U':
539 binbehave = BINFILE_BIN;
540 break;
541 case 'u':
542 case MMAP_OPT:
543 /* noop, compatibility */
544 break;
545 case 'V':
546 printf(getstr(9), __progname, VERSION);
547 exit(0);
548 case 'v':
549 vflag = true;
550 break;
551 case 'w':
552 wflag = true;
553 break;
554 case 'x':
555 xflag = true;
556 break;
557 case 'Z':
558 filebehave = FILE_GZIP;
559 break;
560 case BIN_OPT:
561 if (strcasecmp("binary", optarg) == 0)
562 binbehave = BINFILE_BIN;
563 else if (strcasecmp("without-match", optarg) == 0)
564 binbehave = BINFILE_SKIP;
565 else if (strcasecmp("text", optarg) == 0)
566 binbehave = BINFILE_TEXT;
567 else
568 errx(2, getstr(3), "--binary-files");
569 break;
570 case COLOR_OPT:
571 color = NULL;
572 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
573 strcasecmp("tty", optarg) == 0 ||
574 strcasecmp("if-tty", optarg) == 0) {
575 char *term;
576
577 term = getenv("TERM");
578 if (isatty(STDOUT_FILENO) && term != NULL &&
579 strcasecmp(term, "dumb") != 0)
580 color = init_color("01;31");
581 } else if (strcasecmp("always", optarg) == 0 ||
582 strcasecmp("yes", optarg) == 0 ||
583 strcasecmp("force", optarg) == 0) {
584 color = init_color("01;31");
585 } else if (strcasecmp("never", optarg) != 0 &&
586 strcasecmp("none", optarg) != 0 &&
587 strcasecmp("no", optarg) != 0)
588 errx(2, getstr(3), "--color");
589 break;
590 case LABEL_OPT:
591 label = optarg;
592 break;
593 case LINEBUF_OPT:
594 lbflag = true;
595 break;
596 case NULL_OPT:
597 nullflag = true;
598 break;
599 case R_INCLUDE_OPT:
600 finclude = true;
601 add_fpattern(optarg, INCL_PAT);
602 break;
603 case R_EXCLUDE_OPT:
604 fexclude = true;
605 add_fpattern(optarg, EXCL_PAT);
606 break;
607 case R_DINCLUDE_OPT:
608 dinclude = true;
609 add_dpattern(optarg, INCL_PAT);
610 break;
611 case R_DEXCLUDE_OPT:
612 dexclude = true;
613 add_dpattern(optarg, EXCL_PAT);
614 break;
615 case HELP_OPT:
616 default:
617 usage();
618 }
619 lastc = c;
620 newarg = optind != prevoptind;
621 prevoptind = optind;
622 }
623 aargc -= optind;
624 aargv += optind;
625
626 /* Fail if we don't have any pattern */
627 if (aargc == 0 && needpattern)
628 usage();
629
630 /* Process patterns from command line */
631 if (aargc != 0 && needpattern) {
632 add_pattern(*aargv, strlen(*aargv));
633 --aargc;
634 ++aargv;
635 }
636
637 switch (grepbehave) {
638 case GREP_FIXED:
639 case GREP_BASIC:
640 break;
641 case GREP_EXTENDED:
642 cflags |= REG_EXTENDED;
643 break;
644 default:
645 /* NOTREACHED */
646 usage();
647 }
648
649 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
650 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
651/*
652 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
653 * Optimizations should be done there.
654 */
655 /* Check if cheating is allowed (always is for fgrep). */
656 if (grepbehave == GREP_FIXED) {
657 for (i = 0; i < patterns; ++i)
658 fgrepcomp(&fg_pattern[i], pattern[i]);
659 } else {
660 for (i = 0; i < patterns; ++i) {
661 if (fastcomp(&fg_pattern[i], pattern[i])) {
662 /* Fall back to full regex library */
663 c = regcomp(&r_pattern[i], pattern[i], cflags);
664 if (c != 0) {
665 regerror(c, &r_pattern[i], re_error,
666 RE_ERROR_BUF);
667 errx(2, "%s", re_error);
668 }
669 }
670 }
671 }
672
673 if (lbflag)
674 setlinebuf(stdout);
675
676 if ((aargc == 0 || aargc == 1) && !Hflag)
677 hflag = true;
678
679 if (aargc == 0)
680 exit(!procfile("-"));
681
682 if (dirbehave == DIR_RECURSE)
683 c = grep_tree(aargv);
684 else
685 for (c = 0; aargc--; ++aargv) {
686 if ((finclude || fexclude) && !file_matching(*aargv))
687 continue;
688 c+= procfile(*aargv);
689 }
690
691#ifndef WITHOUT_NLS
692 catclose(catalog);
693#endif
694
695 /* Find out the correct return value according to the
696 results and the command line option. */
697 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
698}
34
35#include <sys/stat.h>
36#include <sys/types.h>
37
38#include <ctype.h>
39#include <err.h>
40#include <errno.h>
41#include <getopt.h>
42#include <limits.h>
43#include <libgen.h>
44#include <locale.h>
45#include <stdbool.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50
51#include "grep.h"
52
53#ifndef WITHOUT_NLS
54#include <nl_types.h>
55nl_catd catalog;
56#endif
57
58/*
59 * Default messags to use when NLS is disabled or no catalogue
60 * is found.
61 */
62const char *errstr[] = {
63 "",
64/* 1*/ "(standard input)",
65/* 2*/ "cannot read bzip2 compressed file",
66/* 3*/ "unknown %s option",
67/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
68/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
69/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
70/* 7*/ "\t[--null] [pattern] [file ...]\n",
71/* 8*/ "Binary file %s matches\n",
72/* 9*/ "%s (BSD grep) %s\n",
73};
74
75/* Flags passed to regcomp() and regexec() */
76int cflags = 0;
77int eflags = REG_STARTEND;
78
79/* Shortcut for matching all cases like empty regex */
80bool matchall;
81
82/* Searching patterns */
83unsigned int patterns, pattern_sz;
84char **pattern;
85regex_t *r_pattern;
86fastgrep_t *fg_pattern;
87
88/* Filename exclusion/inclusion patterns */
89unsigned int fpatterns, fpattern_sz;
90unsigned int dpatterns, dpattern_sz;
91struct epat *dpattern, *fpattern;
92
93/* For regex errors */
94char re_error[RE_ERROR_BUF + 1];
95
96/* Command-line flags */
97unsigned long long Aflag; /* -A x: print x lines trailing each match */
98unsigned long long Bflag; /* -B x: print x lines leading each match */
99bool Hflag; /* -H: always print file name */
100bool Lflag; /* -L: only show names of files with no matches */
101bool bflag; /* -b: show block numbers for each match */
102bool cflag; /* -c: only show a count of matching lines */
103bool hflag; /* -h: don't print filename headers */
104bool iflag; /* -i: ignore case */
105bool lflag; /* -l: only show names of files with matches */
106bool mflag; /* -m x: stop reading the files after x matches */
107unsigned long long mcount; /* count for -m */
108bool nflag; /* -n: show line numbers in front of matching lines */
109bool oflag; /* -o: print only matching part */
110bool qflag; /* -q: quiet mode (don't output anything) */
111bool sflag; /* -s: silent mode (ignore errors) */
112bool vflag; /* -v: only show non-matching lines */
113bool wflag; /* -w: pattern must start and end on word boundaries */
114bool xflag; /* -x: pattern must match entire line */
115bool lbflag; /* --line-buffered */
116bool nullflag; /* --null */
117char *label; /* --label */
118const char *color; /* --color */
119int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
120int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
121int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
122int devbehave = DEV_READ; /* -D: handling of devices */
123int dirbehave = DIR_READ; /* -dRr: handling of directories */
124int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
125
126bool dexclude, dinclude; /* --exclude-dir and --include-dir */
127bool fexclude, finclude; /* --exclude and --include */
128
129enum {
130 BIN_OPT = CHAR_MAX + 1,
131 COLOR_OPT,
132 HELP_OPT,
133 MMAP_OPT,
134 LINEBUF_OPT,
135 LABEL_OPT,
136 NULL_OPT,
137 R_EXCLUDE_OPT,
138 R_INCLUDE_OPT,
139 R_DEXCLUDE_OPT,
140 R_DINCLUDE_OPT
141};
142
143static inline const char *init_color(const char *);
144
145/* Housekeeping */
146bool first = true; /* flag whether we are processing the first match */
147bool prev; /* flag whether or not the previous line matched */
148int tail; /* lines left to print */
149bool notfound; /* file not found */
150
151extern char *__progname;
152
153/*
154 * Prints usage information and returns 2.
155 */
156static void
157usage(void)
158{
159 fprintf(stderr, getstr(4), __progname);
160 fprintf(stderr, "%s", getstr(5));
161 fprintf(stderr, "%s", getstr(5));
162 fprintf(stderr, "%s", getstr(6));
163 fprintf(stderr, "%s", getstr(7));
164 exit(2);
165}
166
167static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
168
169struct option long_options[] =
170{
171 {"binary-files", required_argument, NULL, BIN_OPT},
172 {"help", no_argument, NULL, HELP_OPT},
173 {"mmap", no_argument, NULL, MMAP_OPT},
174 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
175 {"label", required_argument, NULL, LABEL_OPT},
176 {"null", no_argument, NULL, NULL_OPT},
177 {"color", optional_argument, NULL, COLOR_OPT},
178 {"colour", optional_argument, NULL, COLOR_OPT},
179 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
180 {"include", required_argument, NULL, R_INCLUDE_OPT},
181 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
182 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
183 {"after-context", required_argument, NULL, 'A'},
184 {"text", no_argument, NULL, 'a'},
185 {"before-context", required_argument, NULL, 'B'},
186 {"byte-offset", no_argument, NULL, 'b'},
187 {"context", optional_argument, NULL, 'C'},
188 {"count", no_argument, NULL, 'c'},
189 {"devices", required_argument, NULL, 'D'},
190 {"directories", required_argument, NULL, 'd'},
191 {"extended-regexp", no_argument, NULL, 'E'},
192 {"regexp", required_argument, NULL, 'e'},
193 {"fixed-strings", no_argument, NULL, 'F'},
194 {"file", required_argument, NULL, 'f'},
195 {"basic-regexp", no_argument, NULL, 'G'},
196 {"no-filename", no_argument, NULL, 'h'},
197 {"with-filename", no_argument, NULL, 'H'},
198 {"ignore-case", no_argument, NULL, 'i'},
199 {"bz2decompress", no_argument, NULL, 'J'},
200 {"files-with-matches", no_argument, NULL, 'l'},
201 {"files-without-match", no_argument, NULL, 'L'},
202 {"max-count", required_argument, NULL, 'm'},
203 {"line-number", no_argument, NULL, 'n'},
204 {"only-matching", no_argument, NULL, 'o'},
205 {"quiet", no_argument, NULL, 'q'},
206 {"silent", no_argument, NULL, 'q'},
207 {"recursive", no_argument, NULL, 'r'},
208 {"no-messages", no_argument, NULL, 's'},
209 {"binary", no_argument, NULL, 'U'},
210 {"unix-byte-offsets", no_argument, NULL, 'u'},
211 {"invert-match", no_argument, NULL, 'v'},
212 {"version", no_argument, NULL, 'V'},
213 {"word-regexp", no_argument, NULL, 'w'},
214 {"line-regexp", no_argument, NULL, 'x'},
215 {"decompress", no_argument, NULL, 'Z'},
216 {NULL, no_argument, NULL, 0}
217};
218
219/*
220 * Adds a searching pattern to the internal array.
221 */
222static void
223add_pattern(char *pat, size_t len)
224{
225
226 /* Check if we can do a shortcut */
227 if (len == 0 || matchall) {
228 matchall = true;
229 return;
230 }
231 /* Increase size if necessary */
232 if (patterns == pattern_sz) {
233 pattern_sz *= 2;
234 pattern = grep_realloc(pattern, ++pattern_sz *
235 sizeof(*pattern));
236 }
237 if (len > 0 && pat[len - 1] == '\n')
238 --len;
239 /* pat may not be NUL-terminated */
240 pattern[patterns] = grep_malloc(len + 1);
241 memcpy(pattern[patterns], pat, len);
242 pattern[patterns][len] = '\0';
243 ++patterns;
244}
245
246/*
247 * Adds a file include/exclude pattern to the internal array.
248 */
249static void
250add_fpattern(const char *pat, int mode)
251{
252
253 /* Increase size if necessary */
254 if (fpatterns == fpattern_sz) {
255 fpattern_sz *= 2;
256 fpattern = grep_realloc(fpattern, ++fpattern_sz *
257 sizeof(struct epat));
258 }
259 fpattern[fpatterns].pat = grep_strdup(pat);
260 fpattern[fpatterns].mode = mode;
261 ++fpatterns;
262}
263
264/*
265 * Adds a directory include/exclude pattern to the internal array.
266 */
267static void
268add_dpattern(const char *pat, int mode)
269{
270
271 /* Increase size if necessary */
272 if (dpatterns == dpattern_sz) {
273 dpattern_sz *= 2;
274 dpattern = grep_realloc(dpattern, ++dpattern_sz *
275 sizeof(struct epat));
276 }
277 dpattern[dpatterns].pat = grep_strdup(pat);
278 dpattern[dpatterns].mode = mode;
279 ++dpatterns;
280}
281
282/*
283 * Reads searching patterns from a file and adds them with add_pattern().
284 */
285static void
286read_patterns(const char *fn)
287{
288 FILE *f;
289 char *line;
290 size_t len;
291
292 if ((f = fopen(fn, "r")) == NULL)
293 err(2, "%s", fn);
294 while ((line = fgetln(f, &len)) != NULL)
295 add_pattern(line, *line == '\n' ? 0 : len);
296 if (ferror(f))
297 err(2, "%s", fn);
298 fclose(f);
299}
300
301static inline const char *
302init_color(const char *d)
303{
304 char *c;
305
306 c = getenv("GREP_COLOR");
307 return (c != NULL ? c : d);
308}
309
310int
311main(int argc, char *argv[])
312{
313 char **aargv, **eargv, *eopts;
314 char *ep;
315 unsigned long long l;
316 unsigned int aargc, eargc, i;
317 int c, lastc, needpattern, newarg, prevoptind;
318
319 setlocale(LC_ALL, "");
320
321#ifndef WITHOUT_NLS
322 catalog = catopen("grep", NL_CAT_LOCALE);
323#endif
324
325 /* Check what is the program name of the binary. In this
326 way we can have all the funcionalities in one binary
327 without the need of scripting and using ugly hacks. */
328 switch (__progname[0]) {
329 case 'e':
330 grepbehave = GREP_EXTENDED;
331 break;
332 case 'f':
333 grepbehave = GREP_FIXED;
334 break;
335 case 'g':
336 grepbehave = GREP_BASIC;
337 break;
338 case 'z':
339 filebehave = FILE_GZIP;
340 switch(__progname[1]) {
341 case 'e':
342 grepbehave = GREP_EXTENDED;
343 break;
344 case 'f':
345 grepbehave = GREP_FIXED;
346 break;
347 case 'g':
348 grepbehave = GREP_BASIC;
349 break;
350 }
351 break;
352 }
353
354 lastc = '\0';
355 newarg = 1;
356 prevoptind = 1;
357 needpattern = 1;
358
359 eopts = getenv("GREP_OPTIONS");
360
361 /* support for extra arguments in GREP_OPTIONS */
362 eargc = 0;
363 if (eopts != NULL) {
364 char *str;
365
366 /* make an estimation of how many extra arguments we have */
367 for (unsigned int j = 0; j < strlen(eopts); j++)
368 if (eopts[j] == ' ')
369 eargc++;
370
371 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
372
373 eargc = 0;
374 /* parse extra arguments */
375 while ((str = strsep(&eopts, " ")) != NULL)
376 eargv[eargc++] = grep_strdup(str);
377
378 aargv = (char **)grep_calloc(eargc + argc + 1,
379 sizeof(char *));
380
381 aargv[0] = argv[0];
382 for (i = 0; i < eargc; i++)
383 aargv[i + 1] = eargv[i];
384 for (int j = 1; j < argc; j++, i++)
385 aargv[i + 1] = argv[j];
386
387 aargc = eargc + argc;
388 } else {
389 aargv = argv;
390 aargc = argc;
391 }
392
393 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
394 -1)) {
395 switch (c) {
396 case '0': case '1': case '2': case '3': case '4':
397 case '5': case '6': case '7': case '8': case '9':
398 if (newarg || !isdigit(lastc))
399 Aflag = 0;
400 else if (Aflag > LLONG_MAX / 10) {
401 errno = ERANGE;
402 err(2, NULL);
403 }
404 Aflag = Bflag = (Aflag * 10) + (c - '0');
405 break;
406 case 'C':
407 if (optarg == NULL) {
408 Aflag = Bflag = 2;
409 break;
410 }
411 /* FALLTHROUGH */
412 case 'A':
413 /* FALLTHROUGH */
414 case 'B':
415 errno = 0;
416 l = strtoull(optarg, &ep, 10);
417 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
418 ((errno == EINVAL) && (l == 0)))
419 err(2, NULL);
420 else if (ep[0] != '\0') {
421 errno = EINVAL;
422 err(2, NULL);
423 }
424 if (c == 'A')
425 Aflag = l;
426 else if (c == 'B')
427 Bflag = l;
428 else
429 Aflag = Bflag = l;
430 break;
431 case 'a':
432 binbehave = BINFILE_TEXT;
433 break;
434 case 'b':
435 bflag = true;
436 break;
437 case 'c':
438 cflag = true;
439 break;
440 case 'D':
441 if (strcasecmp(optarg, "skip") == 0)
442 devbehave = DEV_SKIP;
443 else if (strcasecmp(optarg, "read") == 0)
444 devbehave = DEV_READ;
445 else
446 errx(2, getstr(3), "--devices");
447 break;
448 case 'd':
449 if (strcasecmp("recurse", optarg) == 0) {
450 Hflag = true;
451 dirbehave = DIR_RECURSE;
452 } else if (strcasecmp("skip", optarg) == 0)
453 dirbehave = DIR_SKIP;
454 else if (strcasecmp("read", optarg) == 0)
455 dirbehave = DIR_READ;
456 else
457 errx(2, getstr(3), "--directories");
458 break;
459 case 'E':
460 grepbehave = GREP_EXTENDED;
461 break;
462 case 'e':
463 add_pattern(optarg, strlen(optarg));
464 needpattern = 0;
465 break;
466 case 'F':
467 grepbehave = GREP_FIXED;
468 break;
469 case 'f':
470 read_patterns(optarg);
471 needpattern = 0;
472 break;
473 case 'G':
474 grepbehave = GREP_BASIC;
475 break;
476 case 'H':
477 Hflag = true;
478 break;
479 case 'h':
480 Hflag = false;
481 hflag = true;
482 break;
483 case 'I':
484 binbehave = BINFILE_SKIP;
485 break;
486 case 'i':
487 case 'y':
488 iflag = true;
489 cflags |= REG_ICASE;
490 break;
491 case 'J':
492 filebehave = FILE_BZIP;
493 break;
494 case 'L':
495 lflag = false;
496 Lflag = true;
497 break;
498 case 'l':
499 Lflag = false;
500 lflag = true;
501 break;
502 case 'm':
503 mflag = true;
504 errno = 0;
505 mcount = strtoull(optarg, &ep, 10);
506 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
507 ((errno == EINVAL) && (mcount == 0)))
508 err(2, NULL);
509 else if (ep[0] != '\0') {
510 errno = EINVAL;
511 err(2, NULL);
512 }
513 break;
514 case 'n':
515 nflag = true;
516 break;
517 case 'O':
518 linkbehave = LINK_EXPLICIT;
519 break;
520 case 'o':
521 oflag = true;
522 break;
523 case 'p':
524 linkbehave = LINK_SKIP;
525 break;
526 case 'q':
527 qflag = true;
528 break;
529 case 'S':
530 linkbehave = LINK_READ;
531 break;
532 case 'R':
533 case 'r':
534 dirbehave = DIR_RECURSE;
535 Hflag = true;
536 break;
537 case 's':
538 sflag = true;
539 break;
540 case 'U':
541 binbehave = BINFILE_BIN;
542 break;
543 case 'u':
544 case MMAP_OPT:
545 /* noop, compatibility */
546 break;
547 case 'V':
548 printf(getstr(9), __progname, VERSION);
549 exit(0);
550 case 'v':
551 vflag = true;
552 break;
553 case 'w':
554 wflag = true;
555 break;
556 case 'x':
557 xflag = true;
558 break;
559 case 'Z':
560 filebehave = FILE_GZIP;
561 break;
562 case BIN_OPT:
563 if (strcasecmp("binary", optarg) == 0)
564 binbehave = BINFILE_BIN;
565 else if (strcasecmp("without-match", optarg) == 0)
566 binbehave = BINFILE_SKIP;
567 else if (strcasecmp("text", optarg) == 0)
568 binbehave = BINFILE_TEXT;
569 else
570 errx(2, getstr(3), "--binary-files");
571 break;
572 case COLOR_OPT:
573 color = NULL;
574 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
575 strcasecmp("tty", optarg) == 0 ||
576 strcasecmp("if-tty", optarg) == 0) {
577 char *term;
578
579 term = getenv("TERM");
580 if (isatty(STDOUT_FILENO) && term != NULL &&
581 strcasecmp(term, "dumb") != 0)
582 color = init_color("01;31");
583 } else if (strcasecmp("always", optarg) == 0 ||
584 strcasecmp("yes", optarg) == 0 ||
585 strcasecmp("force", optarg) == 0) {
586 color = init_color("01;31");
587 } else if (strcasecmp("never", optarg) != 0 &&
588 strcasecmp("none", optarg) != 0 &&
589 strcasecmp("no", optarg) != 0)
590 errx(2, getstr(3), "--color");
591 break;
592 case LABEL_OPT:
593 label = optarg;
594 break;
595 case LINEBUF_OPT:
596 lbflag = true;
597 break;
598 case NULL_OPT:
599 nullflag = true;
600 break;
601 case R_INCLUDE_OPT:
602 finclude = true;
603 add_fpattern(optarg, INCL_PAT);
604 break;
605 case R_EXCLUDE_OPT:
606 fexclude = true;
607 add_fpattern(optarg, EXCL_PAT);
608 break;
609 case R_DINCLUDE_OPT:
610 dinclude = true;
611 add_dpattern(optarg, INCL_PAT);
612 break;
613 case R_DEXCLUDE_OPT:
614 dexclude = true;
615 add_dpattern(optarg, EXCL_PAT);
616 break;
617 case HELP_OPT:
618 default:
619 usage();
620 }
621 lastc = c;
622 newarg = optind != prevoptind;
623 prevoptind = optind;
624 }
625 aargc -= optind;
626 aargv += optind;
627
628 /* Fail if we don't have any pattern */
629 if (aargc == 0 && needpattern)
630 usage();
631
632 /* Process patterns from command line */
633 if (aargc != 0 && needpattern) {
634 add_pattern(*aargv, strlen(*aargv));
635 --aargc;
636 ++aargv;
637 }
638
639 switch (grepbehave) {
640 case GREP_FIXED:
641 case GREP_BASIC:
642 break;
643 case GREP_EXTENDED:
644 cflags |= REG_EXTENDED;
645 break;
646 default:
647 /* NOTREACHED */
648 usage();
649 }
650
651 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
652 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
653/*
654 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
655 * Optimizations should be done there.
656 */
657 /* Check if cheating is allowed (always is for fgrep). */
658 if (grepbehave == GREP_FIXED) {
659 for (i = 0; i < patterns; ++i)
660 fgrepcomp(&fg_pattern[i], pattern[i]);
661 } else {
662 for (i = 0; i < patterns; ++i) {
663 if (fastcomp(&fg_pattern[i], pattern[i])) {
664 /* Fall back to full regex library */
665 c = regcomp(&r_pattern[i], pattern[i], cflags);
666 if (c != 0) {
667 regerror(c, &r_pattern[i], re_error,
668 RE_ERROR_BUF);
669 errx(2, "%s", re_error);
670 }
671 }
672 }
673 }
674
675 if (lbflag)
676 setlinebuf(stdout);
677
678 if ((aargc == 0 || aargc == 1) && !Hflag)
679 hflag = true;
680
681 if (aargc == 0)
682 exit(!procfile("-"));
683
684 if (dirbehave == DIR_RECURSE)
685 c = grep_tree(aargv);
686 else
687 for (c = 0; aargc--; ++aargv) {
688 if ((finclude || fexclude) && !file_matching(*aargv))
689 continue;
690 c+= procfile(*aargv);
691 }
692
693#ifndef WITHOUT_NLS
694 catclose(catalog);
695#endif
696
697 /* Find out the correct return value according to the
698 results and the command line option. */
699 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
700}