grep.c revision 210578
1/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210578 2010-07-29 00:11:14Z gabor $"); 32 33#include <sys/stat.h> 34#include <sys/types.h> 35 36#include <ctype.h> 37#include <err.h> 38#include <errno.h> 39#include <getopt.h> 40#include <limits.h> 41#include <libgen.h> 42#include <locale.h> 43#include <stdbool.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48 49#include "grep.h" 50 51#ifndef WITHOUT_NLS 52#include <nl_types.h> 53nl_catd catalog; 54#endif 55 56/* 57 * Default messags to use when NLS is disabled or no catalogue 58 * is found. 59 */ 60const char *errstr[] = { 61 "", 62/* 1*/ "(standard input)", 63/* 2*/ "cannot read bzip2 compressed file", 64/* 3*/ "unknown --color option", 65/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n", 66/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", 67/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", 68/* 7*/ "\t[--null] [pattern] [file ...]\n", 69/* 8*/ "unknown --binary-files option", 70/* 9*/ "Binary file %s matches\n", 71/*10*/ "%s (BSD grep) %s\n", 72}; 73 74/* Flags passed to regcomp() and regexec() */ 75int cflags = 0; 76int eflags = REG_STARTEND; 77 78/* Shortcut for matching all cases like empty regex */ 79bool matchall; 80 81/* Searching patterns */ 82unsigned int patterns, pattern_sz; 83char **pattern; 84regex_t *r_pattern; 85fastgrep_t *fg_pattern; 86 87/* Filename exclusion/inclusion patterns */ 88unsigned int fpatterns, fpattern_sz; 89unsigned int dpatterns, dpattern_sz; 90struct epat *dpattern, *fpattern; 91 92/* For regex errors */ 93char re_error[RE_ERROR_BUF + 1]; 94 95/* Command-line flags */ 96unsigned long long Aflag; /* -A x: print x lines trailing each match */ 97unsigned long long Bflag; /* -B x: print x lines leading each match */ 98bool Hflag; /* -H: always print file name */ 99bool Lflag; /* -L: only show names of files with no matches */ 100bool bflag; /* -b: show block numbers for each match */ 101bool cflag; /* -c: only show a count of matching lines */ 102bool hflag; /* -h: don't print filename headers */ 103bool iflag; /* -i: ignore case */ 104bool lflag; /* -l: only show names of files with matches */ 105bool mflag; /* -m x: stop reading the files after x matches */ 106unsigned long long mcount; /* count for -m */ 107bool nflag; /* -n: show line numbers in front of matching lines */ 108bool oflag; /* -o: print only matching part */ 109bool qflag; /* -q: quiet mode (don't output anything) */ 110bool sflag; /* -s: silent mode (ignore errors) */ 111bool vflag; /* -v: only show non-matching lines */ 112bool wflag; /* -w: pattern must start and end on word boundaries */ 113bool xflag; /* -x: pattern must match entire line */ 114bool lbflag; /* --line-buffered */ 115bool nullflag; /* --null */ 116char *label; /* --label */ 117const char *color; /* --color */ 118int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ 119int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ 120int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ 121int devbehave = DEV_READ; /* -D: handling of devices */ 122int dirbehave = DIR_READ; /* -dRr: handling of directories */ 123int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ 124 125bool dexclude, dinclude; /* --exclude amd --include */ 126bool fexclude, finclude; /* --exclude-dir and --include-dir */ 127 128enum { 129 BIN_OPT = CHAR_MAX + 1, 130 COLOR_OPT, 131 HELP_OPT, 132 MMAP_OPT, 133 LINEBUF_OPT, 134 LABEL_OPT, 135 NULL_OPT, 136 R_EXCLUDE_OPT, 137 R_INCLUDE_OPT, 138 R_DEXCLUDE_OPT, 139 R_DINCLUDE_OPT 140}; 141 142static inline const char *init_color(const char *); 143 144/* Housekeeping */ 145bool first = true; /* flag whether we are processing the first match */ 146bool prev; /* flag whether or not the previous line matched */ 147int tail; /* lines left to print */ 148bool notfound; /* file not found */ 149 150extern char *__progname; 151 152/* 153 * Prints usage information and returns 2. 154 */ 155static void 156usage(void) 157{ 158 fprintf(stderr, getstr(4), __progname); 159 fprintf(stderr, "%s", getstr(5)); 160 fprintf(stderr, "%s", getstr(5)); 161 fprintf(stderr, "%s", getstr(6)); 162 fprintf(stderr, "%s", getstr(7)); 163 exit(2); 164} 165 166static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy"; 167 168struct option long_options[] = 169{ 170 {"binary-files", required_argument, NULL, BIN_OPT}, 171 {"help", no_argument, NULL, HELP_OPT}, 172 {"mmap", no_argument, NULL, MMAP_OPT}, 173 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 174 {"label", required_argument, NULL, LABEL_OPT}, 175 {"null", no_argument, NULL, NULL_OPT}, 176 {"color", optional_argument, NULL, COLOR_OPT}, 177 {"colour", optional_argument, NULL, COLOR_OPT}, 178 {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, 179 {"include", required_argument, NULL, R_INCLUDE_OPT}, 180 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, 181 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, 182 {"after-context", required_argument, NULL, 'A'}, 183 {"text", no_argument, NULL, 'a'}, 184 {"before-context", required_argument, NULL, 'B'}, 185 {"byte-offset", no_argument, NULL, 'b'}, 186 {"context", optional_argument, NULL, 'C'}, 187 {"count", no_argument, NULL, 'c'}, 188 {"devices", required_argument, NULL, 'D'}, 189 {"directories", required_argument, NULL, 'd'}, 190 {"extended-regexp", no_argument, NULL, 'E'}, 191 {"regexp", required_argument, NULL, 'e'}, 192 {"fixed-strings", no_argument, NULL, 'F'}, 193 {"file", required_argument, NULL, 'f'}, 194 {"basic-regexp", no_argument, NULL, 'G'}, 195 {"no-filename", no_argument, NULL, 'h'}, 196 {"with-filename", no_argument, NULL, 'H'}, 197 {"ignore-case", no_argument, NULL, 'i'}, 198 {"bz2decompress", no_argument, NULL, 'J'}, 199 {"files-with-matches", no_argument, NULL, 'l'}, 200 {"files-without-match", no_argument, NULL, 'L'}, 201 {"max-count", required_argument, NULL, 'm'}, 202 {"line-number", no_argument, NULL, 'n'}, 203 {"only-matching", no_argument, NULL, 'o'}, 204 {"quiet", no_argument, NULL, 'q'}, 205 {"silent", no_argument, NULL, 'q'}, 206 {"recursive", no_argument, NULL, 'r'}, 207 {"no-messages", no_argument, NULL, 's'}, 208 {"binary", no_argument, NULL, 'U'}, 209 {"unix-byte-offsets", no_argument, NULL, 'u'}, 210 {"invert-match", no_argument, NULL, 'v'}, 211 {"version", no_argument, NULL, 'V'}, 212 {"word-regexp", no_argument, NULL, 'w'}, 213 {"line-regexp", no_argument, NULL, 'x'}, 214 {"decompress", no_argument, NULL, 'Z'}, 215 {NULL, no_argument, NULL, 0} 216}; 217 218/* 219 * Adds a searching pattern to the internal array. 220 */ 221static void 222add_pattern(char *pat, size_t len) 223{ 224 225 /* Check if we can do a shortcut */ 226 if (len == 0 || matchall) { 227 matchall = true; 228 return; 229 } 230 /* Increase size if necessary */ 231 if (patterns == pattern_sz) { 232 pattern_sz *= 2; 233 pattern = grep_realloc(pattern, ++pattern_sz * 234 sizeof(*pattern)); 235 } 236 if (len > 0 && pat[len - 1] == '\n') 237 --len; 238 /* pat may not be NUL-terminated */ 239 pattern[patterns] = grep_malloc(len + 1); 240 strlcpy(pattern[patterns], pat, len + 1); 241 ++patterns; 242} 243 244/* 245 * Adds a file include/exclude pattern to the internal array. 246 */ 247static void 248add_fpattern(const char *pat, int mode) 249{ 250 251 /* Increase size if necessary */ 252 if (fpatterns == fpattern_sz) { 253 fpattern_sz *= 2; 254 fpattern = grep_realloc(fpattern, ++fpattern_sz * 255 sizeof(struct epat)); 256 } 257 fpattern[fpatterns].pat = grep_strdup(pat); 258 fpattern[fpatterns].mode = mode; 259 ++fpatterns; 260} 261 262/* 263 * Adds a directory include/exclude pattern to the internal array. 264 */ 265static void 266add_dpattern(const char *pat, int mode) 267{ 268 269 /* Increase size if necessary */ 270 if (dpatterns == dpattern_sz) { 271 dpattern_sz *= 2; 272 dpattern = grep_realloc(dpattern, ++dpattern_sz * 273 sizeof(struct epat)); 274 } 275 dpattern[dpatterns].pat = grep_strdup(pat); 276 dpattern[dpatterns].mode = mode; 277 ++dpatterns; 278} 279 280/* 281 * Reads searching patterns from a file and adds them with add_pattern(). 282 */ 283static void 284read_patterns(const char *fn) 285{ 286 FILE *f; 287 char *line; 288 size_t len; 289 290 if ((f = fopen(fn, "r")) == NULL) 291 err(2, "%s", fn); 292 while ((line = fgetln(f, &len)) != NULL) 293 add_pattern(line, *line == '\n' ? 0 : len); 294 if (ferror(f)) 295 err(2, "%s", fn); 296 fclose(f); 297} 298 299static inline const char * 300init_color(const char *d) 301{ 302 char *c; 303 304 c = getenv("GREP_COLOR"); 305 return (c != NULL ? c : d); 306} 307 308int 309main(int argc, char *argv[]) 310{ 311 char **aargv, **eargv, *eopts; 312 char *ep; 313 unsigned long long l; 314 unsigned int aargc, eargc, i; 315 int c, lastc, needpattern, newarg, prevoptind; 316 317 setlocale(LC_ALL, ""); 318 319#ifndef WITHOUT_NLS 320 catalog = catopen("grep", NL_CAT_LOCALE); 321#endif 322 323 /* Check what is the program name of the binary. In this 324 way we can have all the funcionalities in one binary 325 without the need of scripting and using ugly hacks. */ 326 switch (__progname[0]) { 327 case 'e': 328 grepbehave = GREP_EXTENDED; 329 break; 330 case 'f': 331 grepbehave = GREP_FIXED; 332 break; 333 case 'g': 334 grepbehave = GREP_BASIC; 335 break; 336 case 'z': 337 filebehave = FILE_GZIP; 338 switch(__progname[1]) { 339 case 'e': 340 grepbehave = GREP_EXTENDED; 341 break; 342 case 'f': 343 grepbehave = GREP_FIXED; 344 break; 345 case 'g': 346 grepbehave = GREP_BASIC; 347 break; 348 } 349 break; 350 } 351 352 lastc = '\0'; 353 newarg = 1; 354 prevoptind = 1; 355 needpattern = 1; 356 357 eopts = getenv("GREP_OPTIONS"); 358 359 eargc = 1; 360 if (eopts != NULL) { 361 char *str; 362 363 for(i = 0; i < strlen(eopts); i++) 364 if (eopts[i] == ' ') 365 eargc++; 366 367 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); 368 369 str = strtok(eopts, " "); 370 eargc = 0; 371 372 while(str != NULL) { 373 eargv[++eargc] = (char *)grep_malloc(sizeof(char) * 374 (strlen(str) + 1)); 375 strlcpy(eargv[eargc], str, strlen(str) + 1); 376 str = strtok(NULL, " "); 377 } 378 eargv[++eargc] = NULL; 379 380 aargv = (char **)grep_calloc(eargc + argc + 1, 381 sizeof(char *)); 382 aargv[0] = argv[0]; 383 384 for(i = 1; i < eargc; i++) 385 aargv[i] = eargv[i]; 386 for(int j = 1; j < argc; j++) 387 aargv[i++] = argv[j]; 388 389 aargc = eargc + argc - 1; 390 391 } else { 392 aargv = argv; 393 aargc = argc; 394 } 395 396 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != 397 -1)) { 398 switch (c) { 399 case '0': case '1': case '2': case '3': case '4': 400 case '5': case '6': case '7': case '8': case '9': 401 if (newarg || !isdigit(lastc)) 402 Aflag = 0; 403 else if (Aflag > LLONG_MAX / 10) { 404 errno = ERANGE; 405 err(2, NULL); 406 } 407 Aflag = Bflag = (Aflag * 10) + (c - '0'); 408 break; 409 case 'C': 410 if (optarg == NULL) { 411 Aflag = Bflag = 2; 412 break; 413 } 414 /* FALLTHROUGH */ 415 case 'A': 416 /* FALLTHROUGH */ 417 case 'B': 418 errno = 0; 419 l = strtoull(optarg, &ep, 10); 420 if (((errno == ERANGE) && (l == ULLONG_MAX)) || 421 ((errno == EINVAL) && (l == 0))) 422 err(2, NULL); 423 else if (ep[0] != '\0') { 424 errno = EINVAL; 425 err(2, NULL); 426 } 427 if (c == 'A') 428 Aflag = l; 429 else if (c == 'B') 430 Bflag = l; 431 else 432 Aflag = Bflag = l; 433 break; 434 case 'a': 435 binbehave = BINFILE_TEXT; 436 break; 437 case 'b': 438 bflag = true; 439 break; 440 case 'c': 441 cflag = true; 442 break; 443 case 'D': 444 if (strcasecmp(optarg, "skip") == 0) 445 devbehave = DEV_SKIP; 446 else if (strcasecmp(optarg, "read") == 0) 447 devbehave = DEV_READ; 448 else { 449 errno = EINVAL; 450 err(2, NULL); 451 } 452 break; 453 case 'd': 454 if (strcasecmp("recurse", optarg) == 0) { 455 Hflag = true; 456 dirbehave = DIR_RECURSE; 457 } else if (strcasecmp("skip", optarg) == 0) 458 dirbehave = DIR_SKIP; 459 else if (strcasecmp("read", optarg) == 0) 460 dirbehave = DIR_READ; 461 else { 462 errno = EINVAL; 463 err(2, NULL); 464 } 465 break; 466 case 'E': 467 grepbehave = GREP_EXTENDED; 468 break; 469 case 'e': 470 add_pattern(optarg, strlen(optarg)); 471 needpattern = 0; 472 break; 473 case 'F': 474 grepbehave = GREP_FIXED; 475 break; 476 case 'f': 477 read_patterns(optarg); 478 needpattern = 0; 479 break; 480 case 'G': 481 grepbehave = GREP_BASIC; 482 break; 483 case 'H': 484 Hflag = true; 485 break; 486 case 'h': 487 Hflag = false; 488 hflag = true; 489 break; 490 case 'I': 491 binbehave = BINFILE_SKIP; 492 break; 493 case 'i': 494 case 'y': 495 iflag = true; 496 cflags |= REG_ICASE; 497 break; 498 case 'J': 499 filebehave = FILE_BZIP; 500 break; 501 case 'L': 502 lflag = false; 503 Lflag = true; 504 break; 505 case 'l': 506 Lflag = false; 507 lflag = true; 508 break; 509 case 'm': 510 mflag = true; 511 errno = 0; 512 mcount = strtoull(optarg, &ep, 10); 513 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || 514 ((errno == EINVAL) && (mcount == 0))) 515 err(2, NULL); 516 else if (ep[0] != '\0') { 517 errno = EINVAL; 518 err(2, NULL); 519 } 520 break; 521 case 'n': 522 nflag = true; 523 break; 524 case 'O': 525 linkbehave = LINK_EXPLICIT; 526 break; 527 case 'o': 528 oflag = true; 529 break; 530 case 'p': 531 linkbehave = LINK_SKIP; 532 break; 533 case 'q': 534 qflag = true; 535 break; 536 case 'S': 537 linkbehave = LINK_READ; 538 break; 539 case 'R': 540 case 'r': 541 dirbehave = DIR_RECURSE; 542 Hflag = true; 543 break; 544 case 's': 545 sflag = true; 546 break; 547 case 'U': 548 binbehave = BINFILE_BIN; 549 break; 550 case 'u': 551 case MMAP_OPT: 552 /* noop, compatibility */ 553 break; 554 case 'V': 555 printf(getstr(10), __progname, VERSION); 556 exit(0); 557 case 'v': 558 vflag = true; 559 break; 560 case 'w': 561 wflag = true; 562 break; 563 case 'x': 564 xflag = true; 565 break; 566 case 'Z': 567 filebehave = FILE_GZIP; 568 break; 569 case BIN_OPT: 570 if (strcasecmp("binary", optarg) == 0) 571 binbehave = BINFILE_BIN; 572 else if (strcasecmp("without-match", optarg) == 0) 573 binbehave = BINFILE_SKIP; 574 else if (strcasecmp("text", optarg) == 0) 575 binbehave = BINFILE_TEXT; 576 else 577 errx(2, "%s", getstr(8)); 578 break; 579 case COLOR_OPT: 580 color = NULL; 581 if (optarg == NULL || strcasecmp("auto", optarg) == 0 || 582 strcasecmp("tty", optarg) == 0 || 583 strcasecmp("if-tty", optarg) == 0) { 584 char *term; 585 586 term = getenv("TERM"); 587 if (isatty(STDOUT_FILENO) && term != NULL && 588 strcasecmp(term, "dumb") != 0) 589 color = init_color("01;31"); 590 } else if (strcasecmp("always", optarg) == 0 || 591 strcasecmp("yes", optarg) == 0 || 592 strcasecmp("force", optarg) == 0) { 593 color = init_color("01;31"); 594 } else if (strcasecmp("never", optarg) != 0 && 595 strcasecmp("none", optarg) != 0 && 596 strcasecmp("no", optarg) != 0) 597 errx(2, "%s", getstr(3)); 598 break; 599 case LABEL_OPT: 600 label = optarg; 601 break; 602 case LINEBUF_OPT: 603 lbflag = true; 604 break; 605 case NULL_OPT: 606 nullflag = true; 607 break; 608 case R_INCLUDE_OPT: 609 finclude = true; 610 add_fpattern(optarg, INCL_PAT); 611 break; 612 case R_EXCLUDE_OPT: 613 fexclude = true; 614 add_fpattern(optarg, EXCL_PAT); 615 break; 616 case R_DINCLUDE_OPT: 617 dexclude = true; 618 add_dpattern(optarg, INCL_PAT); 619 break; 620 case R_DEXCLUDE_OPT: 621 dinclude = true; 622 add_dpattern(optarg, EXCL_PAT); 623 break; 624 case HELP_OPT: 625 default: 626 usage(); 627 } 628 lastc = c; 629 newarg = optind != prevoptind; 630 prevoptind = optind; 631 } 632 aargc -= optind; 633 aargv += optind; 634 635 /* Fail if we don't have any pattern */ 636 if (aargc == 0 && needpattern) 637 usage(); 638 639 /* Process patterns from command line */ 640 if (aargc != 0 && needpattern) { 641 add_pattern(*aargv, strlen(*aargv)); 642 --aargc; 643 ++aargv; 644 } 645 646 switch (grepbehave) { 647 case GREP_FIXED: 648 case GREP_BASIC: 649 break; 650 case GREP_EXTENDED: 651 cflags |= REG_EXTENDED; 652 break; 653 default: 654 /* NOTREACHED */ 655 usage(); 656 } 657 658 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 659 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 660/* 661 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. 662 * Optimizations should be done there. 663 */ 664 /* Check if cheating is allowed (always is for fgrep). */ 665 if (grepbehave == GREP_FIXED) { 666 for (i = 0; i < patterns; ++i) 667 fgrepcomp(&fg_pattern[i], pattern[i]); 668 } else { 669 for (i = 0; i < patterns; ++i) { 670 if (fastcomp(&fg_pattern[i], pattern[i])) { 671 /* Fall back to full regex library */ 672 c = regcomp(&r_pattern[i], pattern[i], cflags); 673 if (c != 0) { 674 regerror(c, &r_pattern[i], re_error, 675 RE_ERROR_BUF); 676 errx(2, "%s", re_error); 677 } 678 } 679 } 680 } 681 682 if (lbflag) 683 setlinebuf(stdout); 684 685 if ((aargc == 0 || aargc == 1) && !Hflag) 686 hflag = true; 687 688 if (aargc == 0) 689 exit(!procfile("-")); 690 691 if (dirbehave == DIR_RECURSE) 692 c = grep_tree(aargv); 693 else 694 for (c = 0; aargc--; ++aargv) { 695 if ((finclude || fexclude) && !file_matching(*aargv)) 696 continue; 697 c+= procfile(*aargv); 698 } 699 700#ifndef WITHOUT_NLS 701 catclose(catalog); 702#endif 703 704 /* Find out the correct return value according to the 705 results and the command line option. */ 706 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1)); 707} 708