grep.c revision 210430
1/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/usr.bin/grep/grep.c 210430 2010-07-23 19:36:11Z delphij $"); 32 33#include <sys/stat.h> 34#include <sys/types.h> 35 36#include <ctype.h> 37#include <err.h> 38#include <errno.h> 39#include <getopt.h> 40#include <limits.h> 41#include <libgen.h> 42#include <locale.h> 43#include <stdbool.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48 49#include "grep.h" 50 51#ifndef WITHOUT_NLS 52#include <nl_types.h> 53nl_catd catalog; 54#endif 55 56/* 57 * Default messags to use when NLS is disabled or no catalogue 58 * is found. 59 */ 60const char *errstr[] = { 61 "", 62/* 1*/ "(standard input)", 63/* 2*/ "cannot read bzip2 compressed file", 64/* 3*/ "unknown --color option", 65/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n", 66/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", 67/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", 68/* 7*/ "\t[--null] [pattern] [file ...]\n", 69/* 8*/ "unknown --binary-files option", 70/* 9*/ "Binary file %s matches\n", 71/*10*/ "%s (BSD grep) %s\n", 72}; 73 74/* Flags passed to regcomp() and regexec() */ 75int cflags = 0; 76int eflags = REG_STARTEND; 77 78/* Shortcut for matching all cases like empty regex */ 79bool matchall; 80 81/* Searching patterns */ 82unsigned int patterns, pattern_sz; 83char **pattern; 84regex_t *r_pattern; 85fastgrep_t *fg_pattern; 86 87/* Filename exclusion/inclusion patterns */ 88unsigned int epatterns, epattern_sz; 89struct epat *epattern; 90 91/* For regex errors */ 92char re_error[RE_ERROR_BUF + 1]; 93 94/* Command-line flags */ 95unsigned long long Aflag; /* -A x: print x lines trailing each match */ 96unsigned long long Bflag; /* -B x: print x lines leading each match */ 97bool Hflag; /* -H: always print file name */ 98bool Lflag; /* -L: only show names of files with no matches */ 99bool bflag; /* -b: show block numbers for each match */ 100bool cflag; /* -c: only show a count of matching lines */ 101bool hflag; /* -h: don't print filename headers */ 102bool iflag; /* -i: ignore case */ 103bool lflag; /* -l: only show names of files with matches */ 104bool mflag; /* -m x: stop reading the files after x matches */ 105unsigned long long mcount; /* count for -m */ 106bool nflag; /* -n: show line numbers in front of matching lines */ 107bool oflag; /* -o: print only matching part */ 108bool qflag; /* -q: quiet mode (don't output anything) */ 109bool sflag; /* -s: silent mode (ignore errors) */ 110bool vflag; /* -v: only show non-matching lines */ 111bool wflag; /* -w: pattern must start and end on word boundaries */ 112bool xflag; /* -x: pattern must match entire line */ 113bool lbflag; /* --line-buffered */ 114bool nullflag; /* --null */ 115bool exclflag; /* --exclude */ 116char *label; /* --label */ 117char *color; /* --color */ 118int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ 119int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ 120int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ 121int devbehave = DEV_GREP; /* -D: handling of devices */ 122int dirbehave = DIR_GREP; /* -dRr: handling of directories */ 123int linkbehave = LINK_GREP; /* -OpS: handling of symlinks */ 124 125enum { 126 BIN_OPT = CHAR_MAX + 1, 127 COLOR_OPT, 128 HELP_OPT, 129 MMAP_OPT, 130 LINEBUF_OPT, 131 LABEL_OPT, 132 NULL_OPT, 133 R_EXCLUDE_OPT, 134 R_INCLUDE_OPT, 135 R_DEXCLUDE_OPT, 136 R_DINCLUDE_OPT 137}; 138 139/* Housekeeping */ 140bool first = true; /* flag whether we are processing the first match */ 141bool prev; /* flag whether or not the previous line matched */ 142int tail; /* lines left to print */ 143bool notfound; /* file not found */ 144 145extern char *__progname; 146 147/* 148 * Prints usage information and returns 2. 149 */ 150static void 151usage(void) 152{ 153 fprintf(stderr, getstr(4), __progname); 154 fprintf(stderr, "%s", getstr(5)); 155 fprintf(stderr, "%s", getstr(5)); 156 fprintf(stderr, "%s", getstr(6)); 157 fprintf(stderr, "%s", getstr(7)); 158 exit(2); 159} 160 161static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy"; 162 163struct option long_options[] = 164{ 165 {"binary-files", required_argument, NULL, BIN_OPT}, 166 {"help", no_argument, NULL, HELP_OPT}, 167 {"mmap", no_argument, NULL, MMAP_OPT}, 168 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 169 {"label", required_argument, NULL, LABEL_OPT}, 170 {"null", no_argument, NULL, NULL_OPT}, 171 {"color", optional_argument, NULL, COLOR_OPT}, 172 {"colour", optional_argument, NULL, COLOR_OPT}, 173 {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, 174 {"include", required_argument, NULL, R_INCLUDE_OPT}, 175 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, 176 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, 177 {"after-context", required_argument, NULL, 'A'}, 178 {"text", no_argument, NULL, 'a'}, 179 {"before-context", required_argument, NULL, 'B'}, 180 {"byte-offset", no_argument, NULL, 'b'}, 181 {"context", optional_argument, NULL, 'C'}, 182 {"count", no_argument, NULL, 'c'}, 183 {"devices", required_argument, NULL, 'D'}, 184 {"directories", required_argument, NULL, 'd'}, 185 {"extended-regexp", no_argument, NULL, 'E'}, 186 {"regexp", required_argument, NULL, 'e'}, 187 {"fixed-strings", no_argument, NULL, 'F'}, 188 {"file", required_argument, NULL, 'f'}, 189 {"basic-regexp", no_argument, NULL, 'G'}, 190 {"no-filename", no_argument, NULL, 'h'}, 191 {"with-filename", no_argument, NULL, 'H'}, 192 {"ignore-case", no_argument, NULL, 'i'}, 193 {"bz2decompress", no_argument, NULL, 'J'}, 194 {"files-with-matches", no_argument, NULL, 'l'}, 195 {"files-without-match", no_argument, NULL, 'L'}, 196 {"max-count", required_argument, NULL, 'm'}, 197 {"line-number", no_argument, NULL, 'n'}, 198 {"only-matching", no_argument, NULL, 'o'}, 199 {"quiet", no_argument, NULL, 'q'}, 200 {"silent", no_argument, NULL, 'q'}, 201 {"recursive", no_argument, NULL, 'r'}, 202 {"no-messages", no_argument, NULL, 's'}, 203 {"binary", no_argument, NULL, 'U'}, 204 {"unix-byte-offsets", no_argument, NULL, 'u'}, 205 {"invert-match", no_argument, NULL, 'v'}, 206 {"version", no_argument, NULL, 'V'}, 207 {"word-regexp", no_argument, NULL, 'w'}, 208 {"line-regexp", no_argument, NULL, 'x'}, 209 {"decompress", no_argument, NULL, 'Z'}, 210 {NULL, no_argument, NULL, 0} 211}; 212 213/* 214 * Adds a searching pattern to the internal array. 215 */ 216static void 217add_pattern(char *pat, size_t len) 218{ 219 220 /* Check if we can do a shortcut */ 221 if (len == 0 || matchall) { 222 matchall = true; 223 return; 224 } 225 /* Increase size if necessary */ 226 if (patterns == pattern_sz) { 227 pattern_sz *= 2; 228 pattern = grep_realloc(pattern, ++pattern_sz * 229 sizeof(*pattern)); 230 } 231 if (len > 0 && pat[len - 1] == '\n') 232 --len; 233 /* pat may not be NUL-terminated */ 234 pattern[patterns] = grep_malloc(len + 1); 235 memcpy(pattern[patterns], pat, len); 236 pattern[patterns][len] = '\0'; 237 ++patterns; 238} 239 240/* 241 * Adds an include/exclude pattern to the internal array. 242 */ 243static void 244add_epattern(char *pat, size_t len, int type, int mode) 245{ 246 247 /* Increase size if necessary */ 248 if (epatterns == epattern_sz) { 249 epattern_sz *= 2; 250 epattern = grep_realloc(epattern, ++epattern_sz * 251 sizeof(struct epat)); 252 } 253 if (len > 0 && pat[len - 1] == '\n') 254 --len; 255 epattern[epatterns].pat = grep_malloc(len + 1); 256 memcpy(epattern[epatterns].pat, pat, len); 257 epattern[epatterns].pat[len] = '\0'; 258 epattern[epatterns].type = type; 259 epattern[epatterns].mode = mode; 260 ++epatterns; 261} 262 263/* 264 * Reads searching patterns from a file and adds them with add_pattern(). 265 */ 266static void 267read_patterns(const char *fn) 268{ 269 FILE *f; 270 char *line; 271 size_t len; 272 273 if ((f = fopen(fn, "r")) == NULL) 274 err(2, "%s", fn); 275 while ((line = fgetln(f, &len)) != NULL) 276 add_pattern(line, *line == '\n' ? 0 : len); 277 if (ferror(f)) 278 err(2, "%s", fn); 279 fclose(f); 280} 281 282int 283main(int argc, char *argv[]) 284{ 285 char **aargv, **eargv, *eopts; 286 char *ep; 287 unsigned long long l; 288 unsigned int aargc, eargc, i; 289 int c, lastc, needpattern, newarg, prevoptind; 290 291 setlocale(LC_ALL, ""); 292 293#ifndef WITHOUT_NLS 294 catalog = catopen("grep", NL_CAT_LOCALE); 295#endif 296 297 /* Check what is the program name of the binary. In this 298 way we can have all the funcionalities in one binary 299 without the need of scripting and using ugly hacks. */ 300 switch (__progname[0]) { 301 case 'e': 302 grepbehave = GREP_EXTENDED; 303 break; 304 case 'f': 305 grepbehave = GREP_FIXED; 306 break; 307 case 'g': 308 grepbehave = GREP_BASIC; 309 break; 310 case 'z': 311 filebehave = FILE_GZIP; 312 switch(__progname[1]) { 313 case 'e': 314 grepbehave = GREP_EXTENDED; 315 break; 316 case 'f': 317 grepbehave = GREP_FIXED; 318 break; 319 case 'g': 320 grepbehave = GREP_BASIC; 321 break; 322 } 323 break; 324 } 325 326 lastc = '\0'; 327 newarg = 1; 328 prevoptind = 1; 329 needpattern = 1; 330 331 eopts = getenv("GREP_OPTIONS"); 332 333 eargc = 1; 334 if (eopts != NULL) { 335 char *str; 336 337 for(i = 0; i < strlen(eopts); i++) 338 if (eopts[i] == ' ') 339 eargc++; 340 341 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); 342 343 str = strtok(eopts, " "); 344 eargc = 0; 345 346 while(str != NULL) { 347 eargv[++eargc] = (char *)grep_malloc(sizeof(char) * 348 (strlen(str) + 1)); 349 strlcpy(eargv[eargc], str, strlen(str) + 1); 350 str = strtok(NULL, " "); 351 } 352 eargv[++eargc] = NULL; 353 354 aargv = (char **)grep_calloc(eargc + argc + 1, 355 sizeof(char *)); 356 aargv[0] = argv[0]; 357 358 for(i = 1; i < eargc; i++) 359 aargv[i] = eargv[i]; 360 for(int j = 1; j < argc; j++) 361 aargv[i++] = argv[j]; 362 363 aargc = eargc + argc - 1; 364 365 } else { 366 aargv = argv; 367 aargc = argc; 368 } 369 370 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != 371 -1)) { 372 switch (c) { 373 case '0': case '1': case '2': case '3': case '4': 374 case '5': case '6': case '7': case '8': case '9': 375 if (newarg || !isdigit(lastc)) 376 Aflag = 0; 377 else if (Aflag > LLONG_MAX / 10) { 378 errno = ERANGE; 379 err(2, NULL); 380 } 381 Aflag = Bflag = (Aflag * 10) + (c - '0'); 382 break; 383 case 'C': 384 if (optarg == NULL) { 385 Aflag = Bflag = 2; 386 break; 387 } 388 /* FALLTHROUGH */ 389 case 'A': 390 /* FALLTHROUGH */ 391 case 'B': 392 errno = 0; 393 l = strtoull(optarg, &ep, 10); 394 if (((errno == ERANGE) && (l == ULLONG_MAX)) || 395 ((errno == EINVAL) && (l == 0))) 396 err(2, NULL); 397 else if (ep[0] != '\0') { 398 errno = EINVAL; 399 err(2, NULL); 400 } 401 if (c == 'A') 402 Aflag = l; 403 else if (c == 'B') 404 Bflag = l; 405 else 406 Aflag = Bflag = l; 407 break; 408 case 'a': 409 binbehave = BINFILE_TEXT; 410 break; 411 case 'b': 412 bflag = true; 413 break; 414 case 'c': 415 cflag = true; 416 break; 417 case 'D': 418 if (strcmp(optarg, "skip") == 0) 419 devbehave = DEV_SKIP; 420 break; 421 case 'd': 422 if (strcmp("recurse", optarg) == 0) { 423 Hflag = true; 424 dirbehave = DIR_RECURSE; 425 } else if (strcmp("skip", optarg) == 0) 426 dirbehave = DIR_SKIP; 427 else if (strcmp("read", optarg) != 0) { 428 errno = EINVAL; 429 err(2, NULL); 430 } 431 break; 432 case 'E': 433 grepbehave = GREP_EXTENDED; 434 break; 435 case 'e': 436 add_pattern(optarg, strlen(optarg)); 437 needpattern = 0; 438 break; 439 case 'F': 440 grepbehave = GREP_FIXED; 441 break; 442 case 'f': 443 read_patterns(optarg); 444 needpattern = 0; 445 break; 446 case 'G': 447 grepbehave = GREP_BASIC; 448 break; 449 case 'H': 450 Hflag = true; 451 break; 452 case 'h': 453 Hflag = false; 454 hflag = true; 455 break; 456 case 'I': 457 binbehave = BINFILE_SKIP; 458 break; 459 case 'i': 460 case 'y': 461 iflag = true; 462 cflags |= REG_ICASE; 463 break; 464 case 'J': 465 filebehave = FILE_BZIP; 466 break; 467 case 'L': 468 lflag = false; 469 Lflag = qflag = true; 470 break; 471 case 'l': 472 Lflag = false; 473 lflag = qflag = true; 474 break; 475 case 'm': 476 mflag = true; 477 errno = 0; 478 mcount = strtoull(optarg, &ep, 10); 479 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || 480 ((errno == EINVAL) && (mcount == 0))) 481 err(2, NULL); 482 else if (ep[0] != '\0') { 483 errno = EINVAL; 484 err(2, NULL); 485 } 486 break; 487 case 'n': 488 nflag = true; 489 break; 490 case 'O': 491 linkbehave = LINK_EXPLICIT; 492 break; 493 case 'o': 494 oflag = true; 495 break; 496 case 'p': 497 linkbehave = LINK_SKIP; 498 break; 499 case 'q': 500 qflag = true; 501 break; 502 case 'S': 503 linkbehave = LINK_GREP; 504 break; 505 case 'R': 506 case 'r': 507 dirbehave = DIR_RECURSE; 508 Hflag = true; 509 break; 510 case 's': 511 sflag = true; 512 break; 513 case 'U': 514 binbehave = BINFILE_BIN; 515 break; 516 case 'u': 517 case MMAP_OPT: 518 /* noop, compatibility */ 519 break; 520 case 'V': 521 printf(getstr(10), __progname, VERSION); 522 exit(0); 523 case 'v': 524 vflag = true; 525 break; 526 case 'w': 527 wflag = true; 528 break; 529 case 'x': 530 xflag = true; 531 break; 532 case 'Z': 533 filebehave = FILE_GZIP; 534 break; 535 case BIN_OPT: 536 if (strcmp("binary", optarg) == 0) 537 binbehave = BINFILE_BIN; 538 else if (strcmp("without-match", optarg) == 0) 539 binbehave = BINFILE_SKIP; 540 else if (strcmp("text", optarg) == 0) 541 binbehave = BINFILE_TEXT; 542 else 543 errx(2, "%s", getstr(8)); 544 break; 545 case COLOR_OPT: 546 if (optarg == NULL || strcmp("auto", optarg) == 0 || 547 strcmp("always", optarg) == 0 ) { 548 color = getenv("GREP_COLOR"); 549 if (color == NULL) { 550 color = grep_malloc(sizeof(char) * 6); 551 strcpy(color, "01;31"); 552 } 553 } else if (strcmp("never", optarg) == 0) 554 color = NULL; 555 else 556 errx(2, "%s", getstr(3)); 557 break; 558 case LABEL_OPT: 559 label = optarg; 560 break; 561 case LINEBUF_OPT: 562 lbflag = true; 563 break; 564 case NULL_OPT: 565 nullflag = true; 566 break; 567 case R_INCLUDE_OPT: 568 exclflag = true; 569 add_epattern(basename(optarg), strlen(basename(optarg)), 570 FILE_PAT, INCL_PAT); 571 break; 572 case R_EXCLUDE_OPT: 573 exclflag = true; 574 add_epattern(basename(optarg), strlen(basename(optarg)), 575 FILE_PAT, EXCL_PAT); 576 break; 577 case R_DINCLUDE_OPT: 578 exclflag = true; 579 add_epattern(basename(optarg), strlen(basename(optarg)), 580 DIR_PAT, INCL_PAT); 581 break; 582 case R_DEXCLUDE_OPT: 583 exclflag = true; 584 add_epattern(basename(optarg), strlen(basename(optarg)), 585 DIR_PAT, EXCL_PAT); 586 break; 587 case HELP_OPT: 588 default: 589 usage(); 590 } 591 lastc = c; 592 newarg = optind != prevoptind; 593 prevoptind = optind; 594 } 595 aargc -= optind; 596 aargv += optind; 597 598 /* Fail if we don't have any pattern */ 599 if (aargc == 0 && needpattern) 600 usage(); 601 602 /* Process patterns from command line */ 603 if (aargc != 0 && needpattern) { 604 add_pattern(*aargv, strlen(*aargv)); 605 --aargc; 606 ++aargv; 607 } 608 609 switch (grepbehave) { 610 case GREP_FIXED: 611 case GREP_BASIC: 612 break; 613 case GREP_EXTENDED: 614 cflags |= REG_EXTENDED; 615 break; 616 default: 617 /* NOTREACHED */ 618 usage(); 619 } 620 621 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 622 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 623/* 624 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. 625 * Optimizations should be done there. 626 */ 627 /* Check if cheating is allowed (always is for fgrep). */ 628 if (grepbehave == GREP_FIXED) { 629 for (i = 0; i < patterns; ++i) 630 fgrepcomp(&fg_pattern[i], pattern[i]); 631 } else { 632 for (i = 0; i < patterns; ++i) { 633 if (fastcomp(&fg_pattern[i], pattern[i])) { 634 /* Fall back to full regex library */ 635 c = regcomp(&r_pattern[i], pattern[i], cflags); 636 if (c != 0) { 637 regerror(c, &r_pattern[i], re_error, 638 RE_ERROR_BUF); 639 errx(2, "%s", re_error); 640 } 641 } 642 } 643 } 644 645 if (lbflag) 646 setlinebuf(stdout); 647 648 if ((aargc == 0 || aargc == 1) && !Hflag) 649 hflag = true; 650 651 if (aargc == 0) 652 exit(!procfile("-")); 653 654 if (dirbehave == DIR_RECURSE) 655 c = grep_tree(aargv); 656 else 657 for (c = 0; aargc--; ++aargv) 658 c+= procfile(*aargv); 659 660#ifndef WITHOUT_NLS 661 catclose(catalog); 662#endif 663 664 /* Find out the correct return value according to the 665 results and the command line option. */ 666 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1)); 667} 668