grep.c revision 210461
1/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210461 2010-07-25 08:42:18Z gabor $"); 32 33#include <sys/stat.h> 34#include <sys/types.h> 35 36#include <ctype.h> 37#include <err.h> 38#include <errno.h> 39#include <getopt.h> 40#include <limits.h> 41#include <libgen.h> 42#include <locale.h> 43#include <stdbool.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48 49#include "grep.h" 50 51#ifndef WITHOUT_NLS 52#include <nl_types.h> 53nl_catd catalog; 54#endif 55 56/* 57 * Default messags to use when NLS is disabled or no catalogue 58 * is found. 59 */ 60const char *errstr[] = { 61 "", 62/* 1*/ "(standard input)", 63/* 2*/ "cannot read bzip2 compressed file", 64/* 3*/ "unknown --color option", 65/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n", 66/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", 67/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", 68/* 7*/ "\t[--null] [pattern] [file ...]\n", 69/* 8*/ "unknown --binary-files option", 70/* 9*/ "Binary file %s matches\n", 71/*10*/ "%s (BSD grep) %s\n", 72}; 73 74/* Flags passed to regcomp() and regexec() */ 75int cflags = 0; 76int eflags = REG_STARTEND; 77 78/* Shortcut for matching all cases like empty regex */ 79bool matchall; 80 81/* Searching patterns */ 82unsigned int patterns, pattern_sz; 83char **pattern; 84regex_t *r_pattern; 85fastgrep_t *fg_pattern; 86 87/* Filename exclusion/inclusion patterns */ 88unsigned int epatterns, epattern_sz; 89struct epat *epattern; 90 91/* For regex errors */ 92char re_error[RE_ERROR_BUF + 1]; 93 94/* Command-line flags */ 95unsigned long long Aflag; /* -A x: print x lines trailing each match */ 96unsigned long long Bflag; /* -B x: print x lines leading each match */ 97bool Hflag; /* -H: always print file name */ 98bool Lflag; /* -L: only show names of files with no matches */ 99bool bflag; /* -b: show block numbers for each match */ 100bool cflag; /* -c: only show a count of matching lines */ 101bool hflag; /* -h: don't print filename headers */ 102bool iflag; /* -i: ignore case */ 103bool lflag; /* -l: only show names of files with matches */ 104bool mflag; /* -m x: stop reading the files after x matches */ 105unsigned long long mcount; /* count for -m */ 106bool nflag; /* -n: show line numbers in front of matching lines */ 107bool oflag; /* -o: print only matching part */ 108bool qflag; /* -q: quiet mode (don't output anything) */ 109bool sflag; /* -s: silent mode (ignore errors) */ 110bool vflag; /* -v: only show non-matching lines */ 111bool wflag; /* -w: pattern must start and end on word boundaries */ 112bool xflag; /* -x: pattern must match entire line */ 113bool lbflag; /* --line-buffered */ 114bool nullflag; /* --null */ 115bool exclflag; /* --exclude */ 116char *label; /* --label */ 117const char *color; /* --color */ 118int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ 119int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ 120int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ 121int devbehave = DEV_READ; /* -D: handling of devices */ 122int dirbehave = DIR_READ; /* -dRr: handling of directories */ 123int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ 124 125enum { 126 BIN_OPT = CHAR_MAX + 1, 127 COLOR_OPT, 128 HELP_OPT, 129 MMAP_OPT, 130 LINEBUF_OPT, 131 LABEL_OPT, 132 NULL_OPT, 133 R_EXCLUDE_OPT, 134 R_INCLUDE_OPT, 135 R_DEXCLUDE_OPT, 136 R_DINCLUDE_OPT 137}; 138 139static inline const char *init_color(const char *); 140 141/* Housekeeping */ 142bool first = true; /* flag whether we are processing the first match */ 143bool prev; /* flag whether or not the previous line matched */ 144int tail; /* lines left to print */ 145bool notfound; /* file not found */ 146 147extern char *__progname; 148 149/* 150 * Prints usage information and returns 2. 151 */ 152static void 153usage(void) 154{ 155 fprintf(stderr, getstr(4), __progname); 156 fprintf(stderr, "%s", getstr(5)); 157 fprintf(stderr, "%s", getstr(5)); 158 fprintf(stderr, "%s", getstr(6)); 159 fprintf(stderr, "%s", getstr(7)); 160 exit(2); 161} 162 163static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy"; 164 165struct option long_options[] = 166{ 167 {"binary-files", required_argument, NULL, BIN_OPT}, 168 {"help", no_argument, NULL, HELP_OPT}, 169 {"mmap", no_argument, NULL, MMAP_OPT}, 170 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 171 {"label", required_argument, NULL, LABEL_OPT}, 172 {"null", no_argument, NULL, NULL_OPT}, 173 {"color", optional_argument, NULL, COLOR_OPT}, 174 {"colour", optional_argument, NULL, COLOR_OPT}, 175 {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, 176 {"include", required_argument, NULL, R_INCLUDE_OPT}, 177 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, 178 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, 179 {"after-context", required_argument, NULL, 'A'}, 180 {"text", no_argument, NULL, 'a'}, 181 {"before-context", required_argument, NULL, 'B'}, 182 {"byte-offset", no_argument, NULL, 'b'}, 183 {"context", optional_argument, NULL, 'C'}, 184 {"count", no_argument, NULL, 'c'}, 185 {"devices", required_argument, NULL, 'D'}, 186 {"directories", required_argument, NULL, 'd'}, 187 {"extended-regexp", no_argument, NULL, 'E'}, 188 {"regexp", required_argument, NULL, 'e'}, 189 {"fixed-strings", no_argument, NULL, 'F'}, 190 {"file", required_argument, NULL, 'f'}, 191 {"basic-regexp", no_argument, NULL, 'G'}, 192 {"no-filename", no_argument, NULL, 'h'}, 193 {"with-filename", no_argument, NULL, 'H'}, 194 {"ignore-case", no_argument, NULL, 'i'}, 195 {"bz2decompress", no_argument, NULL, 'J'}, 196 {"files-with-matches", no_argument, NULL, 'l'}, 197 {"files-without-match", no_argument, NULL, 'L'}, 198 {"max-count", required_argument, NULL, 'm'}, 199 {"line-number", no_argument, NULL, 'n'}, 200 {"only-matching", no_argument, NULL, 'o'}, 201 {"quiet", no_argument, NULL, 'q'}, 202 {"silent", no_argument, NULL, 'q'}, 203 {"recursive", no_argument, NULL, 'r'}, 204 {"no-messages", no_argument, NULL, 's'}, 205 {"binary", no_argument, NULL, 'U'}, 206 {"unix-byte-offsets", no_argument, NULL, 'u'}, 207 {"invert-match", no_argument, NULL, 'v'}, 208 {"version", no_argument, NULL, 'V'}, 209 {"word-regexp", no_argument, NULL, 'w'}, 210 {"line-regexp", no_argument, NULL, 'x'}, 211 {"decompress", no_argument, NULL, 'Z'}, 212 {NULL, no_argument, NULL, 0} 213}; 214 215/* 216 * Adds a searching pattern to the internal array. 217 */ 218static void 219add_pattern(char *pat, size_t len) 220{ 221 222 /* Check if we can do a shortcut */ 223 if (len == 0 || matchall) { 224 matchall = true; 225 return; 226 } 227 /* Increase size if necessary */ 228 if (patterns == pattern_sz) { 229 pattern_sz *= 2; 230 pattern = grep_realloc(pattern, ++pattern_sz * 231 sizeof(*pattern)); 232 } 233 if (len > 0 && pat[len - 1] == '\n') 234 --len; 235 /* pat may not be NUL-terminated */ 236 pattern[patterns] = grep_malloc(len + 1); 237 memcpy(pattern[patterns], pat, len); 238 pattern[patterns][len] = '\0'; 239 ++patterns; 240} 241 242/* 243 * Adds an include/exclude pattern to the internal array. 244 */ 245static void 246add_epattern(char *pat, size_t len, int type, int mode) 247{ 248 249 /* Increase size if necessary */ 250 if (epatterns == epattern_sz) { 251 epattern_sz *= 2; 252 epattern = grep_realloc(epattern, ++epattern_sz * 253 sizeof(struct epat)); 254 } 255 if (len > 0 && pat[len - 1] == '\n') 256 --len; 257 epattern[epatterns].pat = grep_malloc(len + 1); 258 memcpy(epattern[epatterns].pat, pat, len); 259 epattern[epatterns].pat[len] = '\0'; 260 epattern[epatterns].type = type; 261 epattern[epatterns].mode = mode; 262 ++epatterns; 263} 264 265/* 266 * Reads searching patterns from a file and adds them with add_pattern(). 267 */ 268static void 269read_patterns(const char *fn) 270{ 271 FILE *f; 272 char *line; 273 size_t len; 274 275 if ((f = fopen(fn, "r")) == NULL) 276 err(2, "%s", fn); 277 while ((line = fgetln(f, &len)) != NULL) 278 add_pattern(line, *line == '\n' ? 0 : len); 279 if (ferror(f)) 280 err(2, "%s", fn); 281 fclose(f); 282} 283 284static inline const char * 285init_color(const char *d) 286{ 287 char *c; 288 289 c = getenv("GREP_COLOR"); 290 return (c != NULL ? c : d); 291} 292 293int 294main(int argc, char *argv[]) 295{ 296 char **aargv, **eargv, *eopts; 297 char *ep; 298 unsigned long long l; 299 unsigned int aargc, eargc, i; 300 int c, lastc, needpattern, newarg, prevoptind; 301 302 setlocale(LC_ALL, ""); 303 304#ifndef WITHOUT_NLS 305 catalog = catopen("grep", NL_CAT_LOCALE); 306#endif 307 308 /* Check what is the program name of the binary. In this 309 way we can have all the funcionalities in one binary 310 without the need of scripting and using ugly hacks. */ 311 switch (__progname[0]) { 312 case 'e': 313 grepbehave = GREP_EXTENDED; 314 break; 315 case 'f': 316 grepbehave = GREP_FIXED; 317 break; 318 case 'g': 319 grepbehave = GREP_BASIC; 320 break; 321 case 'z': 322 filebehave = FILE_GZIP; 323 switch(__progname[1]) { 324 case 'e': 325 grepbehave = GREP_EXTENDED; 326 break; 327 case 'f': 328 grepbehave = GREP_FIXED; 329 break; 330 case 'g': 331 grepbehave = GREP_BASIC; 332 break; 333 } 334 break; 335 } 336 337 lastc = '\0'; 338 newarg = 1; 339 prevoptind = 1; 340 needpattern = 1; 341 342 eopts = getenv("GREP_OPTIONS"); 343 344 eargc = 1; 345 if (eopts != NULL) { 346 char *str; 347 348 for(i = 0; i < strlen(eopts); i++) 349 if (eopts[i] == ' ') 350 eargc++; 351 352 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); 353 354 str = strtok(eopts, " "); 355 eargc = 0; 356 357 while(str != NULL) { 358 eargv[++eargc] = (char *)grep_malloc(sizeof(char) * 359 (strlen(str) + 1)); 360 strlcpy(eargv[eargc], str, strlen(str) + 1); 361 str = strtok(NULL, " "); 362 } 363 eargv[++eargc] = NULL; 364 365 aargv = (char **)grep_calloc(eargc + argc + 1, 366 sizeof(char *)); 367 aargv[0] = argv[0]; 368 369 for(i = 1; i < eargc; i++) 370 aargv[i] = eargv[i]; 371 for(int j = 1; j < argc; j++) 372 aargv[i++] = argv[j]; 373 374 aargc = eargc + argc - 1; 375 376 } else { 377 aargv = argv; 378 aargc = argc; 379 } 380 381 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != 382 -1)) { 383 switch (c) { 384 case '0': case '1': case '2': case '3': case '4': 385 case '5': case '6': case '7': case '8': case '9': 386 if (newarg || !isdigit(lastc)) 387 Aflag = 0; 388 else if (Aflag > LLONG_MAX / 10) { 389 errno = ERANGE; 390 err(2, NULL); 391 } 392 Aflag = Bflag = (Aflag * 10) + (c - '0'); 393 break; 394 case 'C': 395 if (optarg == NULL) { 396 Aflag = Bflag = 2; 397 break; 398 } 399 /* FALLTHROUGH */ 400 case 'A': 401 /* FALLTHROUGH */ 402 case 'B': 403 errno = 0; 404 l = strtoull(optarg, &ep, 10); 405 if (((errno == ERANGE) && (l == ULLONG_MAX)) || 406 ((errno == EINVAL) && (l == 0))) 407 err(2, NULL); 408 else if (ep[0] != '\0') { 409 errno = EINVAL; 410 err(2, NULL); 411 } 412 if (c == 'A') 413 Aflag = l; 414 else if (c == 'B') 415 Bflag = l; 416 else 417 Aflag = Bflag = l; 418 break; 419 case 'a': 420 binbehave = BINFILE_TEXT; 421 break; 422 case 'b': 423 bflag = true; 424 break; 425 case 'c': 426 cflag = true; 427 break; 428 case 'D': 429 if (strcasecmp(optarg, "skip") == 0) 430 devbehave = DEV_SKIP; 431 else if (strcasecmp(optarg, "read") == 0) 432 devbehave = DEV_READ; 433 else { 434 errno = EINVAL; 435 err(2, NULL); 436 } 437 break; 438 case 'd': 439 if (strcasecmp("recurse", optarg) == 0) { 440 Hflag = true; 441 dirbehave = DIR_RECURSE; 442 } else if (strcasecmp("skip", optarg) == 0) 443 dirbehave = DIR_SKIP; 444 else if (strcasecmp("read", optarg) == 0) 445 dirbehave = DIR_READ; 446 else { 447 errno = EINVAL; 448 err(2, NULL); 449 } 450 break; 451 case 'E': 452 grepbehave = GREP_EXTENDED; 453 break; 454 case 'e': 455 add_pattern(optarg, strlen(optarg)); 456 needpattern = 0; 457 break; 458 case 'F': 459 grepbehave = GREP_FIXED; 460 break; 461 case 'f': 462 read_patterns(optarg); 463 needpattern = 0; 464 break; 465 case 'G': 466 grepbehave = GREP_BASIC; 467 break; 468 case 'H': 469 Hflag = true; 470 break; 471 case 'h': 472 Hflag = false; 473 hflag = true; 474 break; 475 case 'I': 476 binbehave = BINFILE_SKIP; 477 break; 478 case 'i': 479 case 'y': 480 iflag = true; 481 cflags |= REG_ICASE; 482 break; 483 case 'J': 484 filebehave = FILE_BZIP; 485 break; 486 case 'L': 487 lflag = false; 488 Lflag = true; 489 break; 490 case 'l': 491 Lflag = false; 492 lflag = true; 493 break; 494 case 'm': 495 mflag = true; 496 errno = 0; 497 mcount = strtoull(optarg, &ep, 10); 498 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || 499 ((errno == EINVAL) && (mcount == 0))) 500 err(2, NULL); 501 else if (ep[0] != '\0') { 502 errno = EINVAL; 503 err(2, NULL); 504 } 505 break; 506 case 'n': 507 nflag = true; 508 break; 509 case 'O': 510 linkbehave = LINK_EXPLICIT; 511 break; 512 case 'o': 513 oflag = true; 514 break; 515 case 'p': 516 linkbehave = LINK_SKIP; 517 break; 518 case 'q': 519 qflag = true; 520 break; 521 case 'S': 522 linkbehave = LINK_READ; 523 break; 524 case 'R': 525 case 'r': 526 dirbehave = DIR_RECURSE; 527 Hflag = true; 528 break; 529 case 's': 530 sflag = true; 531 break; 532 case 'U': 533 binbehave = BINFILE_BIN; 534 break; 535 case 'u': 536 case MMAP_OPT: 537 /* noop, compatibility */ 538 break; 539 case 'V': 540 printf(getstr(10), __progname, VERSION); 541 exit(0); 542 case 'v': 543 vflag = true; 544 break; 545 case 'w': 546 wflag = true; 547 break; 548 case 'x': 549 xflag = true; 550 break; 551 case 'Z': 552 filebehave = FILE_GZIP; 553 break; 554 case BIN_OPT: 555 if (strcasecmp("binary", optarg) == 0) 556 binbehave = BINFILE_BIN; 557 else if (strcasecmp("without-match", optarg) == 0) 558 binbehave = BINFILE_SKIP; 559 else if (strcasecmp("text", optarg) == 0) 560 binbehave = BINFILE_TEXT; 561 else 562 errx(2, "%s", getstr(8)); 563 break; 564 case COLOR_OPT: 565 color = NULL; 566 if (optarg == NULL || strcasecmp("auto", optarg) == 0 || 567 strcasecmp("tty", optarg) == 0 || 568 strcasecmp("if-tty", optarg) == 0) { 569 char *term; 570 571 term = getenv("TERM"); 572 if (isatty(STDOUT_FILENO) && term != NULL && 573 strcasecmp(term, "dumb") != 0) 574 color = init_color("01;31"); 575 } else if (strcasecmp("always", optarg) == 0 || 576 strcasecmp("yes", optarg) == 0 || 577 strcasecmp("force", optarg) == 0) { 578 color = init_color("01;31"); 579 } else if (strcasecmp("never", optarg) != 0 && 580 strcasecmp("none", optarg) != 0 && 581 strcasecmp("no", optarg) != 0) 582 errx(2, "%s", getstr(3)); 583 break; 584 case LABEL_OPT: 585 label = optarg; 586 break; 587 case LINEBUF_OPT: 588 lbflag = true; 589 break; 590 case NULL_OPT: 591 nullflag = true; 592 break; 593 case R_INCLUDE_OPT: 594 exclflag = true; 595 add_epattern(basename(optarg), strlen(basename(optarg)), 596 FILE_PAT, INCL_PAT); 597 break; 598 case R_EXCLUDE_OPT: 599 exclflag = true; 600 add_epattern(basename(optarg), strlen(basename(optarg)), 601 FILE_PAT, EXCL_PAT); 602 break; 603 case R_DINCLUDE_OPT: 604 exclflag = true; 605 add_epattern(basename(optarg), strlen(basename(optarg)), 606 DIR_PAT, INCL_PAT); 607 break; 608 case R_DEXCLUDE_OPT: 609 exclflag = true; 610 add_epattern(basename(optarg), strlen(basename(optarg)), 611 DIR_PAT, EXCL_PAT); 612 break; 613 case HELP_OPT: 614 default: 615 usage(); 616 } 617 lastc = c; 618 newarg = optind != prevoptind; 619 prevoptind = optind; 620 } 621 aargc -= optind; 622 aargv += optind; 623 624 /* Fail if we don't have any pattern */ 625 if (aargc == 0 && needpattern) 626 usage(); 627 628 /* Process patterns from command line */ 629 if (aargc != 0 && needpattern) { 630 add_pattern(*aargv, strlen(*aargv)); 631 --aargc; 632 ++aargv; 633 } 634 635 switch (grepbehave) { 636 case GREP_FIXED: 637 case GREP_BASIC: 638 break; 639 case GREP_EXTENDED: 640 cflags |= REG_EXTENDED; 641 break; 642 default: 643 /* NOTREACHED */ 644 usage(); 645 } 646 647 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 648 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 649/* 650 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. 651 * Optimizations should be done there. 652 */ 653 /* Check if cheating is allowed (always is for fgrep). */ 654 if (grepbehave == GREP_FIXED) { 655 for (i = 0; i < patterns; ++i) 656 fgrepcomp(&fg_pattern[i], pattern[i]); 657 } else { 658 for (i = 0; i < patterns; ++i) { 659 if (fastcomp(&fg_pattern[i], pattern[i])) { 660 /* Fall back to full regex library */ 661 c = regcomp(&r_pattern[i], pattern[i], cflags); 662 if (c != 0) { 663 regerror(c, &r_pattern[i], re_error, 664 RE_ERROR_BUF); 665 errx(2, "%s", re_error); 666 } 667 } 668 } 669 } 670 671 if (lbflag) 672 setlinebuf(stdout); 673 674 if ((aargc == 0 || aargc == 1) && !Hflag) 675 hflag = true; 676 677 if (aargc == 0) 678 exit(!procfile("-")); 679 680 if (dirbehave == DIR_RECURSE) 681 c = grep_tree(aargv); 682 else 683 for (c = 0; aargc--; ++aargv) 684 c+= procfile(*aargv); 685 686#ifndef WITHOUT_NLS 687 catclose(catalog); 688#endif 689 690 /* Find out the correct return value according to the 691 results and the command line option. */ 692 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1)); 693} 694