grep.c revision 210622
1/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210622 2010-07-29 18:02:57Z gabor $"); 32 33#include <sys/stat.h> 34#include <sys/types.h> 35 36#include <ctype.h> 37#include <err.h> 38#include <errno.h> 39#include <getopt.h> 40#include <limits.h> 41#include <libgen.h> 42#include <locale.h> 43#include <stdbool.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48 49#include "grep.h" 50 51#ifndef WITHOUT_NLS 52#include <nl_types.h> 53nl_catd catalog; 54#endif 55 56/* 57 * Default messags to use when NLS is disabled or no catalogue 58 * is found. 59 */ 60const char *errstr[] = { 61 "", 62/* 1*/ "(standard input)", 63/* 2*/ "cannot read bzip2 compressed file", 64/* 3*/ "unknown %s option", 65/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n", 66/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", 67/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", 68/* 7*/ "\t[--null] [pattern] [file ...]\n", 69/* 8*/ "Binary file %s matches\n", 70/* 9*/ "%s (BSD grep) %s\n", 71}; 72 73/* Flags passed to regcomp() and regexec() */ 74int cflags = 0; 75int eflags = REG_STARTEND; 76 77/* Shortcut for matching all cases like empty regex */ 78bool matchall; 79 80/* Searching patterns */ 81unsigned int patterns, pattern_sz; 82char **pattern; 83regex_t *r_pattern; 84fastgrep_t *fg_pattern; 85 86/* Filename exclusion/inclusion patterns */ 87unsigned int fpatterns, fpattern_sz; 88unsigned int dpatterns, dpattern_sz; 89struct epat *dpattern, *fpattern; 90 91/* For regex errors */ 92char re_error[RE_ERROR_BUF + 1]; 93 94/* Command-line flags */ 95unsigned long long Aflag; /* -A x: print x lines trailing each match */ 96unsigned long long Bflag; /* -B x: print x lines leading each match */ 97bool Hflag; /* -H: always print file name */ 98bool Lflag; /* -L: only show names of files with no matches */ 99bool bflag; /* -b: show block numbers for each match */ 100bool cflag; /* -c: only show a count of matching lines */ 101bool hflag; /* -h: don't print filename headers */ 102bool iflag; /* -i: ignore case */ 103bool lflag; /* -l: only show names of files with matches */ 104bool mflag; /* -m x: stop reading the files after x matches */ 105unsigned long long mcount; /* count for -m */ 106bool nflag; /* -n: show line numbers in front of matching lines */ 107bool oflag; /* -o: print only matching part */ 108bool qflag; /* -q: quiet mode (don't output anything) */ 109bool sflag; /* -s: silent mode (ignore errors) */ 110bool vflag; /* -v: only show non-matching lines */ 111bool wflag; /* -w: pattern must start and end on word boundaries */ 112bool xflag; /* -x: pattern must match entire line */ 113bool lbflag; /* --line-buffered */ 114bool nullflag; /* --null */ 115char *label; /* --label */ 116const char *color; /* --color */ 117int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ 118int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ 119int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ 120int devbehave = DEV_READ; /* -D: handling of devices */ 121int dirbehave = DIR_READ; /* -dRr: handling of directories */ 122int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ 123 124bool dexclude, dinclude; /* --exclude amd --include */ 125bool fexclude, finclude; /* --exclude-dir and --include-dir */ 126 127enum { 128 BIN_OPT = CHAR_MAX + 1, 129 COLOR_OPT, 130 HELP_OPT, 131 MMAP_OPT, 132 LINEBUF_OPT, 133 LABEL_OPT, 134 NULL_OPT, 135 R_EXCLUDE_OPT, 136 R_INCLUDE_OPT, 137 R_DEXCLUDE_OPT, 138 R_DINCLUDE_OPT 139}; 140 141static inline const char *init_color(const char *); 142 143/* Housekeeping */ 144bool first = true; /* flag whether we are processing the first match */ 145bool prev; /* flag whether or not the previous line matched */ 146int tail; /* lines left to print */ 147bool notfound; /* file not found */ 148 149extern char *__progname; 150 151/* 152 * Prints usage information and returns 2. 153 */ 154static void 155usage(void) 156{ 157 fprintf(stderr, getstr(4), __progname); 158 fprintf(stderr, "%s", getstr(5)); 159 fprintf(stderr, "%s", getstr(5)); 160 fprintf(stderr, "%s", getstr(6)); 161 fprintf(stderr, "%s", getstr(7)); 162 exit(2); 163} 164 165static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy"; 166 167struct option long_options[] = 168{ 169 {"binary-files", required_argument, NULL, BIN_OPT}, 170 {"help", no_argument, NULL, HELP_OPT}, 171 {"mmap", no_argument, NULL, MMAP_OPT}, 172 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 173 {"label", required_argument, NULL, LABEL_OPT}, 174 {"null", no_argument, NULL, NULL_OPT}, 175 {"color", optional_argument, NULL, COLOR_OPT}, 176 {"colour", optional_argument, NULL, COLOR_OPT}, 177 {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, 178 {"include", required_argument, NULL, R_INCLUDE_OPT}, 179 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, 180 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, 181 {"after-context", required_argument, NULL, 'A'}, 182 {"text", no_argument, NULL, 'a'}, 183 {"before-context", required_argument, NULL, 'B'}, 184 {"byte-offset", no_argument, NULL, 'b'}, 185 {"context", optional_argument, NULL, 'C'}, 186 {"count", no_argument, NULL, 'c'}, 187 {"devices", required_argument, NULL, 'D'}, 188 {"directories", required_argument, NULL, 'd'}, 189 {"extended-regexp", no_argument, NULL, 'E'}, 190 {"regexp", required_argument, NULL, 'e'}, 191 {"fixed-strings", no_argument, NULL, 'F'}, 192 {"file", required_argument, NULL, 'f'}, 193 {"basic-regexp", no_argument, NULL, 'G'}, 194 {"no-filename", no_argument, NULL, 'h'}, 195 {"with-filename", no_argument, NULL, 'H'}, 196 {"ignore-case", no_argument, NULL, 'i'}, 197 {"bz2decompress", no_argument, NULL, 'J'}, 198 {"files-with-matches", no_argument, NULL, 'l'}, 199 {"files-without-match", no_argument, NULL, 'L'}, 200 {"max-count", required_argument, NULL, 'm'}, 201 {"line-number", no_argument, NULL, 'n'}, 202 {"only-matching", no_argument, NULL, 'o'}, 203 {"quiet", no_argument, NULL, 'q'}, 204 {"silent", no_argument, NULL, 'q'}, 205 {"recursive", no_argument, NULL, 'r'}, 206 {"no-messages", no_argument, NULL, 's'}, 207 {"binary", no_argument, NULL, 'U'}, 208 {"unix-byte-offsets", no_argument, NULL, 'u'}, 209 {"invert-match", no_argument, NULL, 'v'}, 210 {"version", no_argument, NULL, 'V'}, 211 {"word-regexp", no_argument, NULL, 'w'}, 212 {"line-regexp", no_argument, NULL, 'x'}, 213 {"decompress", no_argument, NULL, 'Z'}, 214 {NULL, no_argument, NULL, 0} 215}; 216 217/* 218 * Adds a searching pattern to the internal array. 219 */ 220static void 221add_pattern(char *pat, size_t len) 222{ 223 224 /* Check if we can do a shortcut */ 225 if (len == 0 || matchall) { 226 matchall = true; 227 return; 228 } 229 /* Increase size if necessary */ 230 if (patterns == pattern_sz) { 231 pattern_sz *= 2; 232 pattern = grep_realloc(pattern, ++pattern_sz * 233 sizeof(*pattern)); 234 } 235 if (len > 0 && pat[len - 1] == '\n') 236 --len; 237 /* pat may not be NUL-terminated */ 238 pattern[patterns] = grep_malloc(len + 1); 239 strlcpy(pattern[patterns], pat, len + 1); 240 ++patterns; 241} 242 243/* 244 * Adds a file include/exclude pattern to the internal array. 245 */ 246static void 247add_fpattern(const char *pat, int mode) 248{ 249 250 /* Increase size if necessary */ 251 if (fpatterns == fpattern_sz) { 252 fpattern_sz *= 2; 253 fpattern = grep_realloc(fpattern, ++fpattern_sz * 254 sizeof(struct epat)); 255 } 256 fpattern[fpatterns].pat = grep_strdup(pat); 257 fpattern[fpatterns].mode = mode; 258 ++fpatterns; 259} 260 261/* 262 * Adds a directory include/exclude pattern to the internal array. 263 */ 264static void 265add_dpattern(const char *pat, int mode) 266{ 267 268 /* Increase size if necessary */ 269 if (dpatterns == dpattern_sz) { 270 dpattern_sz *= 2; 271 dpattern = grep_realloc(dpattern, ++dpattern_sz * 272 sizeof(struct epat)); 273 } 274 dpattern[dpatterns].pat = grep_strdup(pat); 275 dpattern[dpatterns].mode = mode; 276 ++dpatterns; 277} 278 279/* 280 * Reads searching patterns from a file and adds them with add_pattern(). 281 */ 282static void 283read_patterns(const char *fn) 284{ 285 FILE *f; 286 char *line; 287 size_t len; 288 289 if ((f = fopen(fn, "r")) == NULL) 290 err(2, "%s", fn); 291 while ((line = fgetln(f, &len)) != NULL) 292 add_pattern(line, *line == '\n' ? 0 : len); 293 if (ferror(f)) 294 err(2, "%s", fn); 295 fclose(f); 296} 297 298static inline const char * 299init_color(const char *d) 300{ 301 char *c; 302 303 c = getenv("GREP_COLOR"); 304 return (c != NULL ? c : d); 305} 306 307int 308main(int argc, char *argv[]) 309{ 310 char **aargv, **eargv, *eopts; 311 char *ep; 312 unsigned long long l; 313 unsigned int aargc, eargc, i; 314 int c, lastc, needpattern, newarg, prevoptind; 315 316 setlocale(LC_ALL, ""); 317 318#ifndef WITHOUT_NLS 319 catalog = catopen("grep", NL_CAT_LOCALE); 320#endif 321 322 /* Check what is the program name of the binary. In this 323 way we can have all the funcionalities in one binary 324 without the need of scripting and using ugly hacks. */ 325 switch (__progname[0]) { 326 case 'e': 327 grepbehave = GREP_EXTENDED; 328 break; 329 case 'f': 330 grepbehave = GREP_FIXED; 331 break; 332 case 'g': 333 grepbehave = GREP_BASIC; 334 break; 335 case 'z': 336 filebehave = FILE_GZIP; 337 switch(__progname[1]) { 338 case 'e': 339 grepbehave = GREP_EXTENDED; 340 break; 341 case 'f': 342 grepbehave = GREP_FIXED; 343 break; 344 case 'g': 345 grepbehave = GREP_BASIC; 346 break; 347 } 348 break; 349 } 350 351 lastc = '\0'; 352 newarg = 1; 353 prevoptind = 1; 354 needpattern = 1; 355 356 eopts = getenv("GREP_OPTIONS"); 357 358 eargc = 1; 359 if (eopts != NULL) { 360 char *str; 361 362 for(i = 0; i < strlen(eopts); i++) 363 if (eopts[i] == ' ') 364 eargc++; 365 366 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); 367 368 str = strtok(eopts, " "); 369 eargc = 0; 370 371 while(str != NULL) { 372 eargv[++eargc] = (char *)grep_malloc(sizeof(char) * 373 (strlen(str) + 1)); 374 strlcpy(eargv[eargc], str, strlen(str) + 1); 375 str = strtok(NULL, " "); 376 } 377 eargv[++eargc] = NULL; 378 379 aargv = (char **)grep_calloc(eargc + argc + 1, 380 sizeof(char *)); 381 aargv[0] = argv[0]; 382 383 for(i = 1; i < eargc; i++) 384 aargv[i] = eargv[i]; 385 for(int j = 1; j < argc; j++) 386 aargv[i++] = argv[j]; 387 388 aargc = eargc + argc - 1; 389 390 } else { 391 aargv = argv; 392 aargc = argc; 393 } 394 395 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != 396 -1)) { 397 switch (c) { 398 case '0': case '1': case '2': case '3': case '4': 399 case '5': case '6': case '7': case '8': case '9': 400 if (newarg || !isdigit(lastc)) 401 Aflag = 0; 402 else if (Aflag > LLONG_MAX / 10) { 403 errno = ERANGE; 404 err(2, NULL); 405 } 406 Aflag = Bflag = (Aflag * 10) + (c - '0'); 407 break; 408 case 'C': 409 if (optarg == NULL) { 410 Aflag = Bflag = 2; 411 break; 412 } 413 /* FALLTHROUGH */ 414 case 'A': 415 /* FALLTHROUGH */ 416 case 'B': 417 errno = 0; 418 l = strtoull(optarg, &ep, 10); 419 if (((errno == ERANGE) && (l == ULLONG_MAX)) || 420 ((errno == EINVAL) && (l == 0))) 421 err(2, NULL); 422 else if (ep[0] != '\0') { 423 errno = EINVAL; 424 err(2, NULL); 425 } 426 if (c == 'A') 427 Aflag = l; 428 else if (c == 'B') 429 Bflag = l; 430 else 431 Aflag = Bflag = l; 432 break; 433 case 'a': 434 binbehave = BINFILE_TEXT; 435 break; 436 case 'b': 437 bflag = true; 438 break; 439 case 'c': 440 cflag = true; 441 break; 442 case 'D': 443 if (strcasecmp(optarg, "skip") == 0) 444 devbehave = DEV_SKIP; 445 else if (strcasecmp(optarg, "read") == 0) 446 devbehave = DEV_READ; 447 else 448 errx(2, getstr(3), "--devices"); 449 break; 450 case 'd': 451 if (strcasecmp("recurse", optarg) == 0) { 452 Hflag = true; 453 dirbehave = DIR_RECURSE; 454 } else if (strcasecmp("skip", optarg) == 0) 455 dirbehave = DIR_SKIP; 456 else if (strcasecmp("read", optarg) == 0) 457 dirbehave = DIR_READ; 458 else 459 errx(2, getstr(3), "--directories"); 460 break; 461 case 'E': 462 grepbehave = GREP_EXTENDED; 463 break; 464 case 'e': 465 add_pattern(optarg, strlen(optarg)); 466 needpattern = 0; 467 break; 468 case 'F': 469 grepbehave = GREP_FIXED; 470 break; 471 case 'f': 472 read_patterns(optarg); 473 needpattern = 0; 474 break; 475 case 'G': 476 grepbehave = GREP_BASIC; 477 break; 478 case 'H': 479 Hflag = true; 480 break; 481 case 'h': 482 Hflag = false; 483 hflag = true; 484 break; 485 case 'I': 486 binbehave = BINFILE_SKIP; 487 break; 488 case 'i': 489 case 'y': 490 iflag = true; 491 cflags |= REG_ICASE; 492 break; 493 case 'J': 494 filebehave = FILE_BZIP; 495 break; 496 case 'L': 497 lflag = false; 498 Lflag = true; 499 break; 500 case 'l': 501 Lflag = false; 502 lflag = true; 503 break; 504 case 'm': 505 mflag = true; 506 errno = 0; 507 mcount = strtoull(optarg, &ep, 10); 508 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || 509 ((errno == EINVAL) && (mcount == 0))) 510 err(2, NULL); 511 else if (ep[0] != '\0') { 512 errno = EINVAL; 513 err(2, NULL); 514 } 515 break; 516 case 'n': 517 nflag = true; 518 break; 519 case 'O': 520 linkbehave = LINK_EXPLICIT; 521 break; 522 case 'o': 523 oflag = true; 524 break; 525 case 'p': 526 linkbehave = LINK_SKIP; 527 break; 528 case 'q': 529 qflag = true; 530 break; 531 case 'S': 532 linkbehave = LINK_READ; 533 break; 534 case 'R': 535 case 'r': 536 dirbehave = DIR_RECURSE; 537 Hflag = true; 538 break; 539 case 's': 540 sflag = true; 541 break; 542 case 'U': 543 binbehave = BINFILE_BIN; 544 break; 545 case 'u': 546 case MMAP_OPT: 547 /* noop, compatibility */ 548 break; 549 case 'V': 550 printf(getstr(9), __progname, VERSION); 551 exit(0); 552 case 'v': 553 vflag = true; 554 break; 555 case 'w': 556 wflag = true; 557 break; 558 case 'x': 559 xflag = true; 560 break; 561 case 'Z': 562 filebehave = FILE_GZIP; 563 break; 564 case BIN_OPT: 565 if (strcasecmp("binary", optarg) == 0) 566 binbehave = BINFILE_BIN; 567 else if (strcasecmp("without-match", optarg) == 0) 568 binbehave = BINFILE_SKIP; 569 else if (strcasecmp("text", optarg) == 0) 570 binbehave = BINFILE_TEXT; 571 else 572 errx(2, getstr(3), "--binary-files"); 573 break; 574 case COLOR_OPT: 575 color = NULL; 576 if (optarg == NULL || strcasecmp("auto", optarg) == 0 || 577 strcasecmp("tty", optarg) == 0 || 578 strcasecmp("if-tty", optarg) == 0) { 579 char *term; 580 581 term = getenv("TERM"); 582 if (isatty(STDOUT_FILENO) && term != NULL && 583 strcasecmp(term, "dumb") != 0) 584 color = init_color("01;31"); 585 } else if (strcasecmp("always", optarg) == 0 || 586 strcasecmp("yes", optarg) == 0 || 587 strcasecmp("force", optarg) == 0) { 588 color = init_color("01;31"); 589 } else if (strcasecmp("never", optarg) != 0 && 590 strcasecmp("none", optarg) != 0 && 591 strcasecmp("no", optarg) != 0) 592 errx(2, getstr(3), "--color"); 593 break; 594 case LABEL_OPT: 595 label = optarg; 596 break; 597 case LINEBUF_OPT: 598 lbflag = true; 599 break; 600 case NULL_OPT: 601 nullflag = true; 602 break; 603 case R_INCLUDE_OPT: 604 finclude = true; 605 add_fpattern(optarg, INCL_PAT); 606 break; 607 case R_EXCLUDE_OPT: 608 fexclude = true; 609 add_fpattern(optarg, EXCL_PAT); 610 break; 611 case R_DINCLUDE_OPT: 612 dexclude = true; 613 add_dpattern(optarg, INCL_PAT); 614 break; 615 case R_DEXCLUDE_OPT: 616 dinclude = true; 617 add_dpattern(optarg, EXCL_PAT); 618 break; 619 case HELP_OPT: 620 default: 621 usage(); 622 } 623 lastc = c; 624 newarg = optind != prevoptind; 625 prevoptind = optind; 626 } 627 aargc -= optind; 628 aargv += optind; 629 630 /* Fail if we don't have any pattern */ 631 if (aargc == 0 && needpattern) 632 usage(); 633 634 /* Process patterns from command line */ 635 if (aargc != 0 && needpattern) { 636 add_pattern(*aargv, strlen(*aargv)); 637 --aargc; 638 ++aargv; 639 } 640 641 switch (grepbehave) { 642 case GREP_FIXED: 643 case GREP_BASIC: 644 break; 645 case GREP_EXTENDED: 646 cflags |= REG_EXTENDED; 647 break; 648 default: 649 /* NOTREACHED */ 650 usage(); 651 } 652 653 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 654 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 655/* 656 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. 657 * Optimizations should be done there. 658 */ 659 /* Check if cheating is allowed (always is for fgrep). */ 660 if (grepbehave == GREP_FIXED) { 661 for (i = 0; i < patterns; ++i) 662 fgrepcomp(&fg_pattern[i], pattern[i]); 663 } else { 664 for (i = 0; i < patterns; ++i) { 665 if (fastcomp(&fg_pattern[i], pattern[i])) { 666 /* Fall back to full regex library */ 667 c = regcomp(&r_pattern[i], pattern[i], cflags); 668 if (c != 0) { 669 regerror(c, &r_pattern[i], re_error, 670 RE_ERROR_BUF); 671 errx(2, "%s", re_error); 672 } 673 } 674 } 675 } 676 677 if (lbflag) 678 setlinebuf(stdout); 679 680 if ((aargc == 0 || aargc == 1) && !Hflag) 681 hflag = true; 682 683 if (aargc == 0) 684 exit(!procfile("-")); 685 686 if (dirbehave == DIR_RECURSE) 687 c = grep_tree(aargv); 688 else 689 for (c = 0; aargc--; ++aargv) { 690 if ((finclude || fexclude) && !file_matching(*aargv)) 691 continue; 692 c+= procfile(*aargv); 693 } 694 695#ifndef WITHOUT_NLS 696 catclose(catalog); 697#endif 698 699 /* Find out the correct return value according to the 700 results and the command line option. */ 701 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1)); 702} 703