grep.c revision 55404
1/* grep.c - main driver file for grep. 2 Copyright (C) 1992, 1997, 1998, 1999 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 17 02111-1307, USA. */ 18 19/* Written July 1992 by Mike Haertel. */ 20/* Builtin decompression 1997 by Wolfram Schneider <wosch@FreeBSD.org>. */ 21 22/* $FreeBSD: head/gnu/usr.bin/grep/grep.c 55404 2000-01-04 10:32:55Z ru $ */ 23 24#ifdef HAVE_CONFIG_H 25# include <config.h> 26#endif 27#include <sys/types.h> 28#include <sys/stat.h> 29#if defined(HAVE_MMAP) 30# include <sys/mman.h> 31#endif 32#if defined(HAVE_SETRLIMIT) 33# include <sys/time.h> 34# include <sys/resource.h> 35#endif 36#include <stdio.h> 37#include "system.h" 38#include "getopt.h" 39#include "getpagesize.h" 40#include "grep.h" 41#include "savedir.h" 42 43#undef MAX 44#define MAX(A,B) ((A) > (B) ? (A) : (B)) 45 46struct stats 47{ 48 struct stats *parent; 49 struct stat stat; 50}; 51 52/* base of chain of stat buffers, used to detect directory loops */ 53static struct stats stats_base; 54 55/* if non-zero, display usage information and exit */ 56static int show_help; 57 58/* If non-zero, print the version on standard output and exit. */ 59static int show_version; 60 61/* If nonzero, use mmap if possible. */ 62static int mmap_option; 63 64/* If zero, output nulls after filenames. */ 65static int filename_mask; 66 67/* Short options. */ 68static char const short_options[] = 69"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz"; 70 71/* Long options equivalences. */ 72static struct option long_options[] = 73{ 74 {"after-context", required_argument, NULL, 'A'}, 75 {"basic-regexp", no_argument, NULL, 'G'}, 76 {"before-context", required_argument, NULL, 'B'}, 77 {"byte-offset", no_argument, NULL, 'b'}, 78 {"context", optional_argument, NULL, 'C'}, 79 {"count", no_argument, NULL, 'c'}, 80 {"directories", required_argument, NULL, 'd'}, 81 {"extended-regexp", no_argument, NULL, 'E'}, 82 {"file", required_argument, NULL, 'f'}, 83 {"files-with-matches", no_argument, NULL, 'l'}, 84 {"files-without-match", no_argument, NULL, 'L'}, 85 {"fixed-regexp", no_argument, NULL, 'F'}, 86 {"fixed-strings", no_argument, NULL, 'F'}, 87 {"help", no_argument, &show_help, 1}, 88 {"ignore-case", no_argument, NULL, 'i'}, 89 {"line-number", no_argument, NULL, 'n'}, 90 {"line-regexp", no_argument, NULL, 'x'}, 91 {"mmap", no_argument, &mmap_option, 1}, 92 {"no-filename", no_argument, NULL, 'h'}, 93 {"no-messages", no_argument, NULL, 's'}, 94#if HAVE_LIBZ > 0 95 {"decompress", no_argument, NULL, 'Z'}, 96 {"null", no_argument, &filename_mask, 0}, 97#else 98 {"null", no_argument, NULL, 'Z'}, 99#endif 100 {"null-data", no_argument, NULL, 'z'}, 101 {"quiet", no_argument, NULL, 'q'}, 102 {"recursive", no_argument, NULL, 'r'}, 103 {"regexp", required_argument, NULL, 'e'}, 104 {"invert-match", no_argument, NULL, 'v'}, 105 {"silent", no_argument, NULL, 'q'}, 106 {"text", no_argument, NULL, 'a'}, 107 {"binary", no_argument, NULL, 'U'}, 108 {"unix-byte-offsets", no_argument, NULL, 'u'}, 109 {"version", no_argument, NULL, 'V'}, 110 {"with-filename", no_argument, NULL, 'H'}, 111 {"word-regexp", no_argument, NULL, 'w'}, 112 {0, 0, 0, 0} 113}; 114 115/* Define flags declared in grep.h. */ 116char const *matcher; 117int match_icase; 118int match_words; 119int match_lines; 120unsigned char eolbyte; 121 122/* For error messages. */ 123static char *prog; 124static char const *filename; 125static int errseen; 126 127/* How to handle directories. */ 128static enum 129 { 130 READ_DIRECTORIES, 131 RECURSE_DIRECTORIES, 132 SKIP_DIRECTORIES 133 } directories; 134 135static int ck_atoi PARAMS ((char const *, int *)); 136static void usage PARAMS ((int)) __attribute__((noreturn)); 137static void error PARAMS ((const char *, int)); 138static void setmatcher PARAMS ((char const *)); 139static int install_matcher PARAMS ((char const *)); 140static int prepend_args PARAMS ((char const *, char *, char **)); 141static void prepend_default_options PARAMS ((char const *, int *, char ***)); 142static char *page_alloc PARAMS ((size_t, char **)); 143static int reset PARAMS ((int, char const *, struct stats *)); 144static int fillbuf PARAMS ((size_t, struct stats *)); 145static int grepbuf PARAMS ((char *, char *)); 146static void prtext PARAMS ((char *, char *, int *)); 147static void prpending PARAMS ((char *)); 148static void prline PARAMS ((char *, char *, int)); 149static void print_offset_sep PARAMS ((off_t, int)); 150static void nlscan PARAMS ((char *)); 151static int grep PARAMS ((int, char const *, struct stats *)); 152static int grepdir PARAMS ((char const *, struct stats *)); 153static int grepfile PARAMS ((char const *, struct stats *)); 154#if O_BINARY 155static inline int undossify_input PARAMS ((register char *, size_t)); 156#endif 157 158/* Functions we'll use to search. */ 159static void (*compile) PARAMS ((char *, size_t)); 160static char *(*execute) PARAMS ((char *, size_t, char **)); 161 162/* Print a message and possibly an error string. Remember 163 that something awful happened. */ 164static void 165error (mesg, errnum) 166 const char *mesg; 167 int errnum; 168{ 169 if (errnum) 170 fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum)); 171 else 172 fprintf (stderr, "%s: %s\n", prog, mesg); 173 errseen = 1; 174} 175 176/* Like error (), but die horribly after printing. */ 177void 178fatal (mesg, errnum) 179 const char *mesg; 180 int errnum; 181{ 182 error (mesg, errnum); 183 exit (2); 184} 185 186/* Interface to handle errors and fix library lossage. */ 187char * 188xmalloc (size) 189 size_t size; 190{ 191 char *result; 192 193 result = malloc (size); 194 if (size && !result) 195 fatal (_("memory exhausted"), 0); 196 return result; 197} 198 199/* Interface to handle errors and fix some library lossage. */ 200char * 201xrealloc (ptr, size) 202 char *ptr; 203 size_t size; 204{ 205 char *result; 206 207 if (ptr) 208 result = realloc (ptr, size); 209 else 210 result = malloc (size); 211 if (size && !result) 212 fatal (_("memory exhausted"), 0); 213 return result; 214} 215 216/* Convert STR to a positive integer, storing the result in *OUT. 217 If STR is not a valid integer, return -1 (otherwise 0). */ 218static int 219ck_atoi (str, out) 220 char const *str; 221 int *out; 222{ 223 char const *p; 224 for (p = str; *p; p++) 225 if (*p < '0' || *p > '9') 226 return -1; 227 228 *out = atoi (optarg); 229 return 0; 230} 231 232 233/* Hairy buffering mechanism for grep. The intent is to keep 234 all reads aligned on a page boundary and multiples of the 235 page size. */ 236 237static char *ubuffer; /* Unaligned base of buffer. */ 238static char *buffer; /* Base of buffer. */ 239static size_t bufsalloc; /* Allocated size of buffer save region. */ 240static size_t bufalloc; /* Total buffer size. */ 241#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */ 242static int bufdesc; /* File descriptor. */ 243static char *bufbeg; /* Beginning of user-visible stuff. */ 244static char *buflim; /* Limit of user-visible stuff. */ 245static size_t pagesize; /* alignment of memory pages */ 246static off_t bufoffset; /* Read offset; defined on regular files. */ 247 248#if defined(HAVE_MMAP) 249static int bufmapped; /* True if buffer is memory-mapped. */ 250static off_t initial_bufoffset; /* Initial value of bufoffset. */ 251#endif 252 253#if HAVE_LIBZ > 0 254#include <zlib.h> 255static gzFile gzbufdesc; /* zlib file descriptor. */ 256static int Zflag; /* uncompress before searching. */ 257#endif 258 259/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be 260 an integer or a pointer. Both args must be free of side effects. */ 261#define ALIGN_TO(val, alignment) \ 262 ((size_t) (val) % (alignment) == 0 \ 263 ? (val) \ 264 : (val) + ((alignment) - (size_t) (val) % (alignment))) 265 266/* Return the address of a page-aligned buffer of size SIZE, 267 reallocating it from *UP. Set *UP to the newly allocated (but 268 possibly unaligned) buffer used to build the aligned buffer. To 269 free the buffer, free (*UP). */ 270static char * 271page_alloc (size, up) 272 size_t size; 273 char **up; 274{ 275 size_t asize = size + pagesize - 1; 276 if (size <= asize) 277 { 278 char *p = *up ? realloc (*up, asize) : malloc (asize); 279 if (p) 280 { 281 *up = p; 282 return ALIGN_TO (p, pagesize); 283 } 284 } 285 return NULL; 286} 287 288/* Reset the buffer for a new file, returning zero if we should skip it. 289 Initialize on the first time through. */ 290static int 291reset (fd, file, stats) 292 int fd; 293 char const *file; 294 struct stats *stats; 295{ 296 if (pagesize) 297 bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize); 298 else 299 { 300 size_t ubufsalloc; 301 pagesize = getpagesize (); 302 if (pagesize == 0) 303 abort (); 304#ifndef BUFSALLOC 305 ubufsalloc = MAX (8192, pagesize); 306#else 307 ubufsalloc = BUFSALLOC; 308#endif 309 bufsalloc = ALIGN_TO (ubufsalloc, pagesize); 310 bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc; 311 /* The 1 byte of overflow is a kludge for dfaexec(), which 312 inserts a sentinel newline at the end of the buffer 313 being searched. There's gotta be a better way... */ 314 if (bufsalloc < ubufsalloc 315 || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc 316 || bufalloc + 1 < bufalloc 317 || ! (buffer = page_alloc (bufalloc + 1, &ubuffer))) 318 fatal (_("memory exhausted"), 0); 319 } 320#if HAVE_LIBZ > 0 321 if (Zflag) 322 { 323 gzbufdesc = gzdopen(fd, "r"); 324 if (gzbufdesc == NULL) 325 fatal(_("memory exhausted"), 0); 326 } 327#endif 328 329 buflim = buffer; 330 bufdesc = fd; 331 332 if (fstat (fd, &stats->stat) != 0) 333 { 334 error ("fstat", errno); 335 return 0; 336 } 337 if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) 338 return 0; 339 if ( 340#if HAVE_LIBZ > 0 341 Zflag || 342#endif 343 S_ISREG (stats->stat.st_mode)) 344 { 345 if (file) 346 bufoffset = 0; 347 else 348 { 349 bufoffset = lseek (fd, 0, SEEK_CUR); 350 if (bufoffset < 0) 351 { 352 error ("lseek", errno); 353 return 0; 354 } 355 } 356#ifdef HAVE_MMAP 357 initial_bufoffset = bufoffset; 358 bufmapped = mmap_option && bufoffset % pagesize == 0; 359#endif 360 } 361 else 362 { 363#ifdef HAVE_MMAP 364 bufmapped = 0; 365#endif 366 } 367 return 1; 368} 369 370/* Read new stuff into the buffer, saving the specified 371 amount of old stuff. When we're done, 'bufbeg' points 372 to the beginning of the buffer contents, and 'buflim' 373 points just after the end. Return zero if there's an error. */ 374static int 375fillbuf (save, stats) 376 size_t save; 377 struct stats *stats; 378{ 379 size_t fillsize = 0; 380 int cc = 1; 381 size_t readsize; 382 383 /* Offset from start of unaligned buffer to start of old stuff 384 that we want to save. */ 385 size_t saved_offset = buflim - ubuffer - save; 386 387 if (bufsalloc < save) 388 { 389 size_t aligned_save = ALIGN_TO (save, pagesize); 390 size_t maxalloc = (size_t) -1; 391 size_t newalloc; 392 393 if (S_ISREG (stats->stat.st_mode)) 394 { 395 /* Calculate an upper bound on how much memory we should allocate. 396 We can't use ALIGN_TO here, since off_t might be longer than 397 size_t. Watch out for arithmetic overflow. */ 398 off_t to_be_read = stats->stat.st_size - bufoffset; 399 size_t slop = to_be_read % pagesize; 400 off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0); 401 off_t maxalloc_off = aligned_save + aligned_to_be_read; 402 if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off) 403 maxalloc = maxalloc_off; 404 } 405 406 /* Grow bufsalloc until it is at least as great as `save'; but 407 if there is an overflow, just grow it to the next page boundary. */ 408 while (bufsalloc < save) 409 if (bufsalloc < bufsalloc * 2) 410 bufsalloc *= 2; 411 else 412 { 413 bufsalloc = aligned_save; 414 break; 415 } 416 417 /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times 418 bufsalloc.... */ 419 newalloc = PREFERRED_SAVE_FACTOR * bufsalloc; 420 if (maxalloc < newalloc) 421 { 422 /* ... except don't grow it more than a pagesize past the 423 file size, as that might cause unnecessary memory 424 exhaustion if the file is large. */ 425 newalloc = maxalloc; 426 bufsalloc = aligned_save; 427 } 428 429 /* Check that the above calculations made progress, which might 430 not occur if there is arithmetic overflow. If there's no 431 progress, or if the new buffer size is larger than the old 432 and buffer reallocation fails, report memory exhaustion. */ 433 if (bufsalloc < save || newalloc < save 434 || (newalloc == save && newalloc != maxalloc) 435 || (bufalloc < newalloc 436 && ! (buffer 437 = page_alloc ((bufalloc = newalloc) + 1, &ubuffer)))) 438 fatal (_("memory exhausted"), 0); 439 } 440 441 bufbeg = buffer + bufsalloc - save; 442 memmove (bufbeg, ubuffer + saved_offset, save); 443 readsize = bufalloc - bufsalloc; 444 445#if defined(HAVE_MMAP) 446 if (bufmapped) 447 { 448 size_t mmapsize = readsize; 449 450 /* Don't mmap past the end of the file; some hosts don't allow this. 451 Use `read' on the last page. */ 452 if (stats->stat.st_size - bufoffset < mmapsize) 453 { 454 mmapsize = stats->stat.st_size - bufoffset; 455 mmapsize -= mmapsize % pagesize; 456 } 457 458 if (mmapsize 459 && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize, 460 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, 461 bufdesc, bufoffset) 462 != (caddr_t) -1)) 463 { 464 /* Do not bother to use madvise with MADV_SEQUENTIAL or 465 MADV_WILLNEED on the mmapped memory. One might think it 466 would help, but it slows us down about 30% on SunOS 4.1. */ 467 fillsize = mmapsize; 468 } 469 else 470 { 471 /* Stop using mmap on this file. Synchronize the file 472 offset. Do not warn about mmap failures. On some hosts 473 (e.g. Solaris 2.5) mmap can fail merely because some 474 other process has an advisory read lock on the file. 475 There's no point alarming the user about this misfeature. */ 476 bufmapped = 0; 477 if (bufoffset != initial_bufoffset 478 && lseek (bufdesc, bufoffset, SEEK_SET) < 0) 479 { 480 error ("lseek", errno); 481 cc = 0; 482 } 483 } 484 } 485#endif /*HAVE_MMAP*/ 486 487 if (! fillsize) 488 { 489 ssize_t bytesread; 490 do 491#if HAVE_LIBZ > 0 492 if (Zflag) 493 bytesread = gzread (gzbufdesc, buffer + bufsalloc, readsize); 494 else 495#endif 496 bytesread = read (bufdesc, buffer + bufsalloc, readsize); 497 while (bytesread < 0 && errno == EINTR); 498 if (bytesread < 0) 499 cc = 0; 500 else 501 fillsize = bytesread; 502 } 503 504 bufoffset += fillsize; 505#if O_BINARY 506 if (fillsize) 507 fillsize = undossify_input (buffer + bufsalloc, fillsize); 508#endif 509 buflim = buffer + bufsalloc + fillsize; 510 return cc; 511} 512 513/* Flags controlling the style of output. */ 514static int always_text; /* Assume the input is always text. */ 515static int out_quiet; /* Suppress all normal output. */ 516static int out_invert; /* Print nonmatching stuff. */ 517static int out_file; /* Print filenames. */ 518static int out_line; /* Print line numbers. */ 519static int out_byte; /* Print byte offsets. */ 520static int out_before; /* Lines of leading context. */ 521static int out_after; /* Lines of trailing context. */ 522static int count_matches; /* Count matching lines. */ 523static int list_files; /* List matching files. */ 524static int no_filenames; /* Suppress file names. */ 525static int suppress_errors; /* Suppress diagnostics. */ 526 527/* Internal variables to keep track of byte count, context, etc. */ 528static off_t totalcc; /* Total character count before bufbeg. */ 529static char *lastnl; /* Pointer after last newline counted. */ 530static char *lastout; /* Pointer after last character output; 531 NULL if no character has been output 532 or if it's conceptually before bufbeg. */ 533static off_t totalnl; /* Total newline count before lastnl. */ 534static int pending; /* Pending lines of output. */ 535static int done_on_match; /* Stop scanning file on first match */ 536 537#if O_BINARY 538# include "dosbuf.c" 539#endif 540 541static void 542nlscan (lim) 543 char *lim; 544{ 545 char *beg; 546 for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++) 547 totalnl++; 548 lastnl = lim; 549} 550 551static void 552print_offset_sep (pos, sep) 553 off_t pos; 554 int sep; 555{ 556 /* Do not rely on printf to print pos, since off_t may be longer than long, 557 and long long is not portable. */ 558 559 char buf[sizeof pos * CHAR_BIT]; 560 char *p = buf + sizeof buf - 1; 561 *p = sep; 562 563 do 564 *--p = '0' + pos % 10; 565 while ((pos /= 10) != 0); 566 567 fwrite (p, 1, buf + sizeof buf - p, stdout); 568} 569 570static void 571prline (beg, lim, sep) 572 char *beg; 573 char *lim; 574 int sep; 575{ 576 if (out_file) 577 printf ("%s%c", filename, sep & filename_mask); 578 if (out_line) 579 { 580 nlscan (beg); 581 print_offset_sep (++totalnl, sep); 582 lastnl = lim; 583 } 584 if (out_byte) 585 { 586 off_t pos = totalcc + (beg - bufbeg); 587#if O_BINARY 588 pos = dossified_pos (pos); 589#endif 590 print_offset_sep (pos, sep); 591 } 592 fwrite (beg, 1, lim - beg, stdout); 593 if (ferror (stdout)) 594 error (_("writing output"), errno); 595 lastout = lim; 596} 597 598/* Print pending lines of trailing context prior to LIM. */ 599static void 600prpending (lim) 601 char *lim; 602{ 603 char *nl; 604 605 if (!lastout) 606 lastout = bufbeg; 607 while (pending > 0 && lastout < lim) 608 { 609 --pending; 610 if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0) 611 ++nl; 612 else 613 nl = lim; 614 prline (lastout, nl, '-'); 615 } 616} 617 618/* Print the lines between BEG and LIM. Deal with context crap. 619 If NLINESP is non-null, store a count of lines between BEG and LIM. */ 620static void 621prtext (beg, lim, nlinesp) 622 char *beg; 623 char *lim; 624 int *nlinesp; 625{ 626 static int used; /* avoid printing "--" before any output */ 627 char *bp, *p, *nl; 628 char eol = eolbyte; 629 int i, n; 630 631 if (!out_quiet && pending > 0) 632 prpending (beg); 633 634 p = beg; 635 636 if (!out_quiet) 637 { 638 /* Deal with leading context crap. */ 639 640 bp = lastout ? lastout : bufbeg; 641 for (i = 0; i < out_before; ++i) 642 if (p > bp) 643 do 644 --p; 645 while (p > bp && p[-1] != eol); 646 647 /* We only print the "--" separator if our output is 648 discontiguous from the last output in the file. */ 649 if ((out_before || out_after) && used && p != lastout) 650 puts ("--"); 651 652 while (p < beg) 653 { 654 nl = memchr (p, eol, beg - p); 655 prline (p, nl + 1, '-'); 656 p = nl + 1; 657 } 658 } 659 660 if (nlinesp) 661 { 662 /* Caller wants a line count. */ 663 for (n = 0; p < lim; ++n) 664 { 665 if ((nl = memchr (p, eol, lim - p)) != 0) 666 ++nl; 667 else 668 nl = lim; 669 if (!out_quiet) 670 prline (p, nl, ':'); 671 p = nl; 672 } 673 *nlinesp = n; 674 } 675 else 676 if (!out_quiet) 677 prline (beg, lim, ':'); 678 679 pending = out_quiet ? 0 : out_after; 680 used = 1; 681} 682 683/* Scan the specified portion of the buffer, matching lines (or 684 between matching lines if OUT_INVERT is true). Return a count of 685 lines printed. */ 686static int 687grepbuf (beg, lim) 688 char *beg; 689 char *lim; 690{ 691 int nlines, n; 692 register char *p, *b; 693 char *endp; 694 char eol = eolbyte; 695 696 nlines = 0; 697 p = beg; 698 while ((b = (*execute)(p, lim - p, &endp)) != 0) 699 { 700 /* Avoid matching the empty line at the end of the buffer. */ 701 if (b == lim && ((b > beg && b[-1] == eol) || b == beg)) 702 break; 703 if (!out_invert) 704 { 705 prtext (b, endp, (int *) 0); 706 nlines += 1; 707 if (done_on_match) 708 return nlines; 709 } 710 else if (p < b) 711 { 712 prtext (p, b, &n); 713 nlines += n; 714 } 715 p = endp; 716 } 717 if (out_invert && p < lim) 718 { 719 prtext (p, lim, &n); 720 nlines += n; 721 } 722 return nlines; 723} 724 725/* Search a given file. Normally, return a count of lines printed; 726 but if the file is a directory and we search it recursively, then 727 return -2 if there was a match, and -1 otherwise. */ 728static int 729grep (fd, file, stats) 730 int fd; 731 char const *file; 732 struct stats *stats; 733{ 734 int nlines, i; 735 int not_text; 736 size_t residue, save; 737 char *beg, *lim; 738 char eol = eolbyte; 739 740 if (!reset (fd, file, stats)) 741 return 0; 742 743 if (file && directories == RECURSE_DIRECTORIES 744 && S_ISDIR (stats->stat.st_mode)) 745 { 746 /* Close fd now, so that we don't open a lot of file descriptors 747 when we recurse deeply. */ 748#if HAVE_LIBZ > 0 749 if (Zflag) 750 gzclose(gzbufdesc); 751 else 752#endif 753 if (close (fd) != 0) 754 error (file, errno); 755 return grepdir (file, stats) - 2; 756 } 757 758 totalcc = 0; 759 lastout = 0; 760 totalnl = 0; 761 pending = 0; 762 763 nlines = 0; 764 residue = 0; 765 save = 0; 766 767 if (! fillbuf (save, stats)) 768 { 769 if (! (is_EISDIR (errno, file) && suppress_errors)) 770 error (filename, errno); 771 return nlines; 772 } 773 774 not_text = (! (always_text | out_quiet) 775 && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg)); 776 done_on_match += not_text; 777 out_quiet += not_text; 778 779 for (;;) 780 { 781 lastnl = bufbeg; 782 if (lastout) 783 lastout = bufbeg; 784 if (buflim - bufbeg == save) 785 break; 786 beg = bufbeg + save - residue; 787 for (lim = buflim; lim > beg && lim[-1] != eol; --lim) 788 ; 789 residue = buflim - lim; 790 if (beg < lim) 791 { 792 nlines += grepbuf (beg, lim); 793 if (pending) 794 prpending (lim); 795 if (nlines && done_on_match && !out_invert) 796 goto finish_grep; 797 } 798 i = 0; 799 beg = lim; 800 while (i < out_before && beg > bufbeg && beg != lastout) 801 { 802 ++i; 803 do 804 --beg; 805 while (beg > bufbeg && beg[-1] != eol); 806 } 807 if (beg != lastout) 808 lastout = 0; 809 save = residue + lim - beg; 810 totalcc += buflim - bufbeg - save; 811 if (out_line) 812 nlscan (beg); 813 if (! fillbuf (save, stats)) 814 { 815 if (! (is_EISDIR (errno, file) && suppress_errors)) 816 error (filename, errno); 817 goto finish_grep; 818 } 819 } 820 if (residue) 821 { 822 nlines += grepbuf (bufbeg + save - residue, buflim); 823 if (pending) 824 prpending (buflim); 825 } 826 827 finish_grep: 828 done_on_match -= not_text; 829 out_quiet -= not_text; 830 if ((not_text & ~out_quiet) && nlines != 0) 831 printf (_("Binary file %s matches\n"), filename); 832 return nlines; 833} 834 835static int 836grepfile (file, stats) 837 char const *file; 838 struct stats *stats; 839{ 840 int desc; 841 int count; 842 int status; 843 844 if (! file) 845 { 846 desc = 0; 847 filename = _("(standard input)"); 848 } 849 else 850 { 851 while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) 852 continue; 853 854 if (desc < 0) 855 { 856 int e = errno; 857 858 if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES) 859 { 860 if (stat (file, &stats->stat) != 0) 861 { 862 error (file, errno); 863 return 1; 864 } 865 866 return grepdir (file, stats); 867 } 868 869 if (!suppress_errors) 870 { 871 if (directories == SKIP_DIRECTORIES) 872 switch (e) 873 { 874#ifdef EISDIR 875 case EISDIR: 876 return 1; 877#endif 878 case EACCES: 879 /* When skipping directories, don't worry about 880 directories that can't be opened. */ 881 if (stat (file, &stats->stat) == 0 882 && S_ISDIR (stats->stat.st_mode)) 883 return 1; 884 break; 885 } 886 887 error (file, e); 888 } 889 890 return 1; 891 } 892 893 filename = file; 894 } 895 896#if O_BINARY 897 /* Set input to binary mode. Pipes are simulated with files 898 on DOS, so this includes the case of "foo | grep bar". */ 899 if (!isatty (desc)) 900 SET_BINARY (desc); 901#endif 902 903 count = grep (desc, file, stats); 904 if (count < 0) 905 status = count + 2; 906 else 907 { 908 if (count_matches) 909 { 910 if (out_file) 911 printf ("%s%c", filename, ':' & filename_mask); 912 printf ("%d\n", count); 913 } 914 915 status = !count; 916 if (list_files == 1 - 2 * status) 917 printf ("%s%c", filename, '\n' & filename_mask); 918 919#if HAVE_LIBZ > 0 920 if (Zflag) 921 gzclose(gzbufdesc); 922 else 923#endif 924 if (file) 925 while (close (desc) != 0) 926 if (errno != EINTR) 927 { 928 error (file, errno); 929 break; 930 } 931 } 932 933 return status; 934} 935 936static int 937grepdir (dir, stats) 938 char const *dir; 939 struct stats *stats; 940{ 941 int status = 1; 942 struct stats *ancestor; 943 char *name_space; 944 945 for (ancestor = stats; (ancestor = ancestor->parent) != 0; ) 946 if (ancestor->stat.st_ino == stats->stat.st_ino 947 && ancestor->stat.st_dev == stats->stat.st_dev) 948 { 949 if (!suppress_errors) 950 fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir, 951 _("recursive directory loop")); 952 return 1; 953 } 954 955 name_space = savedir (dir, (unsigned) stats->stat.st_size); 956 957 if (! name_space) 958 { 959 if (errno) 960 { 961 if (!suppress_errors) 962 error (dir, errno); 963 } 964 else 965 fatal (_("Memory exhausted"), 0); 966 } 967 else 968 { 969 size_t dirlen = strlen (dir); 970 int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir) 971 || IS_SLASH (dir[dirlen - 1])); 972 char *file = NULL; 973 char *namep = name_space; 974 struct stats child; 975 child.parent = stats; 976 out_file += !no_filenames; 977 while (*namep) 978 { 979 size_t namelen = strlen (namep); 980 file = xrealloc (file, dirlen + 1 + namelen + 1); 981 strcpy (file, dir); 982 file[dirlen] = '/'; 983 strcpy (file + dirlen + needs_slash, namep); 984 namep += namelen + 1; 985 status &= grepfile (file, &child); 986 } 987 out_file -= !no_filenames; 988 if (file) 989 free (file); 990 free (name_space); 991 } 992 993 return status; 994} 995 996static void 997usage(status) 998int status; 999{ 1000 if (status != 0) 1001 { 1002 fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog); 1003 fprintf (stderr, _("Try `%s --help' for more information.\n"), prog); 1004 } 1005 else 1006 { 1007 printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog); 1008 printf (_("\ 1009Search for PATTERN in each FILE or standard input.\n\ 1010Example: %s -i 'hello.*world' menu.h main.c\n\ 1011\n\ 1012Regexp selection and interpretation:\n"), prog); 1013 printf (_("\ 1014 -E, --extended-regexp PATTERN is an extended regular expression\n\ 1015 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ 1016 -G, --basic-regexp PATTERN is a basic regular expression\n")); 1017 printf (_("\ 1018 -e, --regexp=PATTERN use PATTERN as a regular expression\n\ 1019 -f, --file=FILE obtain PATTERN from FILE\n\ 1020 -i, --ignore-case ignore case distinctions\n\ 1021 -w, --word-regexp force PATTERN to match only whole words\n\ 1022 -x, --line-regexp force PATTERN to match only whole lines\n\ 1023 -z, --null-data a data line ends in 0 byte, not newline\n")); 1024 printf (_("\ 1025\n\ 1026Miscellaneous:\n\ 1027 -s, --no-messages suppress error messages\n\ 1028 -v, --invert-match select non-matching lines\n\ 1029 -V, --version print version information and exit\n\ 1030 --help display this help and exit\n\ 1031 -Z, --decompress decompress input before searching (HAVE_LIBZ=1)\n\ 1032 --mmap use memory-mapped input if possible\n")); 1033 printf (_("\ 1034\n\ 1035Output control:\n\ 1036 -b, --byte-offset print the byte offset with output lines\n\ 1037 -n, --line-number print line number with output lines\n\ 1038 -H, --with-filename print the filename for each match\n\ 1039 -h, --no-filename suppress the prefixing filename on output\n\ 1040 -q, --quiet, --silent suppress all normal output\n\ 1041 -a, --text do not suppress binary output\n\ 1042 -d, --directories=ACTION how to handle directories\n\ 1043 ACTION is 'read', 'recurse', or 'skip'.\n\ 1044 -r, --recursive equivalent to --directories=recurse.\n\ 1045 -L, --files-without-match only print FILE names containing no match\n\ 1046 -l, --files-with-matches only print FILE names containing matches\n\ 1047 -c, --count only print a count of matching lines per FILE\n\ 1048 --null print 0 byte after FILE name\n")); 1049 printf (_("\ 1050\n\ 1051Context control:\n\ 1052 -B, --before-context=NUM print NUM lines of leading context\n\ 1053 -A, --after-context=NUM print NUM lines of trailing context\n\ 1054 -C, --context[=NUM] print NUM (default 2) lines of output context\n\ 1055 unless overridden by -A or -B\n\ 1056 -NUM same as --context=NUM\n\ 1057 -U, --binary do not strip CR characters at EOL (MSDOS)\n\ 1058 -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\ 1059\n\ 1060`egrep' means `grep -E'. `fgrep' means `grep -F'.\n\ 1061With no FILE, or when FILE is -, read standard input. If less than\n\ 1062two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\ 1063and 2 if trouble.\n")); 1064 printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n")); 1065 } 1066 exit (status); 1067} 1068 1069/* Set the matcher to M, reporting any conflicts. */ 1070static void 1071setmatcher (m) 1072 char const *m; 1073{ 1074 if (matcher && strcmp (matcher, m) != 0) 1075 fatal (_("conflicting matchers specified"), 0); 1076 matcher = m; 1077} 1078 1079/* Go through the matchers vector and look for the specified matcher. 1080 If we find it, install it in compile and execute, and return 1. */ 1081static int 1082install_matcher (name) 1083 char const *name; 1084{ 1085 int i; 1086#ifdef HAVE_SETRLIMIT 1087 struct rlimit rlim; 1088#endif 1089 1090 for (i = 0; matchers[i].name; ++i) 1091 if (strcmp (name, matchers[i].name) == 0) 1092 { 1093 compile = matchers[i].compile; 1094 execute = matchers[i].execute; 1095#if HAVE_SETRLIMIT && defined(RLIMIT_STACK) 1096 /* I think every platform needs to do this, so that regex.c 1097 doesn't oveflow the stack. The default value of 1098 `re_max_failures' is too large for some platforms: it needs 1099 more than 3MB-large stack. 1100 1101 The test for HAVE_SETRLIMIT should go into `configure'. */ 1102 if (!getrlimit (RLIMIT_STACK, &rlim)) 1103 { 1104 long newlim; 1105 extern long int re_max_failures; /* from regex.c */ 1106 1107 /* Approximate the amount regex.c needs, plus some more. */ 1108 newlim = re_max_failures * 2 * 20 * sizeof (char *); 1109 if (newlim > rlim.rlim_max) 1110 { 1111 newlim = rlim.rlim_max; 1112 re_max_failures = newlim / (2 * 20 * sizeof (char *)); 1113 } 1114 if (rlim.rlim_cur < newlim) 1115 rlim.rlim_cur = newlim; 1116 1117 setrlimit (RLIMIT_STACK, &rlim); 1118 } 1119#endif 1120 return 1; 1121 } 1122 return 0; 1123} 1124 1125/* Find the white-space-separated options specified by OPTIONS, and 1126 using BUF to store copies of these options, set ARGV[0], ARGV[1], 1127 etc. to the option copies. Return the number N of options found. 1128 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] 1129 etc. Backslash can be used to escape whitespace (and backslashes). */ 1130static int 1131prepend_args (options, buf, argv) 1132 char const *options; 1133 char *buf; 1134 char **argv; 1135{ 1136 char const *o = options; 1137 char *b = buf; 1138 int n = 0; 1139 1140 for (;;) 1141 { 1142 while (ISSPACE ((unsigned char) *o)) 1143 o++; 1144 if (!*o) 1145 return n; 1146 if (argv) 1147 argv[n] = b; 1148 n++; 1149 1150 do 1151 if ((*b++ = *o++) == '\\' && *o) 1152 b[-1] = *o++; 1153 while (*o && ! ISSPACE ((unsigned char) *o)); 1154 1155 *b++ = '\0'; 1156 } 1157} 1158 1159/* Prepend the whitespace-separated options in OPTIONS to the argument 1160 vector of a main program with argument count *PARGC and argument 1161 vector *PARGV. */ 1162static void 1163prepend_default_options (options, pargc, pargv) 1164 char const *options; 1165 int *pargc; 1166 char ***pargv; 1167{ 1168 if (options) 1169 { 1170 char *buf = xmalloc (strlen (options) + 1); 1171 int prepended = prepend_args (options, buf, (char **) NULL); 1172 int argc = *pargc; 1173 char * const *argv = *pargv; 1174 char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp); 1175 *pargc = prepended + argc; 1176 *pargv = pp; 1177 *pp++ = *argv++; 1178 pp += prepend_args (options, buf, pp); 1179 while ((*pp++ = *argv++)) 1180 continue; 1181 } 1182} 1183 1184int 1185main (argc, argv) 1186 int argc; 1187 char *argv[]; 1188{ 1189 char *keys; 1190 size_t keycc, oldcc, keyalloc; 1191 int with_filenames; 1192 int opt, cc, status; 1193 unsigned digit_args_val, default_context; 1194 FILE *fp; 1195 extern char *optarg; 1196 extern int optind; 1197 1198 initialize_main (&argc, &argv); 1199 prog = argv[0]; 1200 if (prog && strrchr (prog, '/')) 1201 prog = strrchr (prog, '/') + 1; 1202 1203#if HAVE_LIBZ > 0 1204 if (prog[0] == 'z') { 1205 Zflag = 1; 1206 ++prog; 1207 } 1208#endif 1209 1210#if defined(__MSDOS__) || defined(_WIN32) 1211 /* DOS and MS-Windows use backslashes as directory separators, and usually 1212 have an .exe suffix. They also have case-insensitive filesystems. */ 1213 if (prog) 1214 { 1215 char *p = prog; 1216 char *bslash = strrchr (argv[0], '\\'); 1217 1218 if (bslash && bslash >= prog) /* for mixed forward/backslash case */ 1219 prog = bslash + 1; 1220 else if (prog == argv[0] 1221 && argv[0][0] && argv[0][1] == ':') /* "c:progname" */ 1222 prog = argv[0] + 2; 1223 1224 /* Collapse the letter-case, so `strcmp' could be used hence. */ 1225 for ( ; *p; p++) 1226 if (*p >= 'A' && *p <= 'Z') 1227 *p += 'a' - 'A'; 1228 1229 /* Remove the .exe extension, if any. */ 1230 if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0) 1231 *p = '\0'; 1232 } 1233#endif 1234 1235 keys = NULL; 1236 keycc = 0; 1237 with_filenames = 0; 1238 eolbyte = '\n'; 1239 filename_mask = ~0; 1240 1241 /* The value -1 means to use DEFAULT_CONTEXT. */ 1242 out_after = out_before = -1; 1243 /* Default before/after context: chaged by -C/-NUM options */ 1244 default_context = 0; 1245 /* Accumulated value of individual digits in a -NUM option */ 1246 digit_args_val = 0; 1247 1248 1249/* Internationalization. */ 1250#if HAVE_SETLOCALE 1251 setlocale (LC_ALL, ""); 1252#endif 1253#if ENABLE_NLS 1254 bindtextdomain (PACKAGE, LOCALEDIR); 1255 textdomain (PACKAGE); 1256#endif 1257 1258 prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); 1259 1260 while ((opt = getopt_long (argc, argv, short_options, long_options, NULL)) 1261 != -1) 1262 switch (opt) 1263 { 1264 case '0': 1265 case '1': 1266 case '2': 1267 case '3': 1268 case '4': 1269 case '5': 1270 case '6': 1271 case '7': 1272 case '8': 1273 case '9': 1274 digit_args_val = 10 * digit_args_val + opt - '0'; 1275 default_context = digit_args_val; 1276 break; 1277 case 'A': 1278 if (optarg) 1279 { 1280 if (ck_atoi (optarg, &out_after)) 1281 fatal (_("invalid context length argument"), 0); 1282 } 1283 break; 1284 case 'B': 1285 if (optarg) 1286 { 1287 if (ck_atoi (optarg, &out_before)) 1288 fatal (_("invalid context length argument"), 0); 1289 } 1290 break; 1291 case 'C': 1292 /* Set output match context, but let any explicit leading or 1293 trailing amount specified with -A or -B stand. */ 1294 if (optarg) 1295 { 1296 if (ck_atoi (optarg, &default_context)) 1297 fatal (_("invalid context length argument"), 0); 1298 } 1299 else 1300 default_context = 2; 1301 break; 1302 case 'E': 1303 setmatcher ("egrep"); 1304 break; 1305 case 'F': 1306 setmatcher ("fgrep"); 1307 break; 1308 case 'G': 1309 setmatcher ("grep"); 1310 break; 1311 case 'H': 1312 with_filenames = 1; 1313 break; 1314 case 'U': 1315#if O_BINARY 1316 dos_use_file_type = DOS_BINARY; 1317#endif 1318 break; 1319 case 'u': 1320#if O_BINARY 1321 dos_report_unix_offset = 1; 1322#endif 1323 break; 1324 case 'V': 1325 show_version = 1; 1326 break; 1327 case 'X': 1328 setmatcher (optarg); 1329 break; 1330 case 'a': 1331 always_text = 1; 1332 break; 1333 case 'b': 1334 out_byte = 1; 1335 break; 1336 case 'c': 1337 out_quiet = 1; 1338 count_matches = 1; 1339 break; 1340 case 'd': 1341 if (strcmp (optarg, "read") == 0) 1342 directories = READ_DIRECTORIES; 1343 else if (strcmp (optarg, "skip") == 0) 1344 directories = SKIP_DIRECTORIES; 1345 else if (strcmp (optarg, "recurse") == 0) 1346 directories = RECURSE_DIRECTORIES; 1347 else 1348 fatal (_("unknown directories method"), 0); 1349 break; 1350 case 'e': 1351 cc = strlen (optarg); 1352 keys = xrealloc (keys, keycc + cc + 1); 1353 strcpy (&keys[keycc], optarg); 1354 keycc += cc; 1355 keys[keycc++] = '\n'; 1356 break; 1357 case 'f': 1358 fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin; 1359 if (!fp) 1360 fatal (optarg, errno); 1361 for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) 1362 ; 1363 keys = xrealloc (keys, keyalloc); 1364 oldcc = keycc; 1365 while (!feof (fp) 1366 && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0) 1367 { 1368 keycc += cc; 1369 if (keycc == keyalloc - 1) 1370 keys = xrealloc (keys, keyalloc *= 2); 1371 } 1372 if (fp != stdin) 1373 fclose(fp); 1374 /* Append final newline if file ended in non-newline. */ 1375 if (oldcc != keycc && keys[keycc - 1] != '\n') 1376 keys[keycc++] = '\n'; 1377 break; 1378 case 'h': 1379 no_filenames = 1; 1380 break; 1381 case 'i': 1382 case 'y': /* For old-timers . . . */ 1383 match_icase = 1; 1384 break; 1385 case 'L': 1386 /* Like -l, except list files that don't contain matches. 1387 Inspired by the same option in Hume's gre. */ 1388 out_quiet = 1; 1389 list_files = -1; 1390 done_on_match = 1; 1391 break; 1392 case 'l': 1393 out_quiet = 1; 1394 list_files = 1; 1395 done_on_match = 1; 1396 break; 1397 case 'n': 1398 out_line = 1; 1399 break; 1400 case 'q': 1401 done_on_match = 1; 1402 out_quiet = 1; 1403 break; 1404 case 'R': 1405 case 'r': 1406 directories = RECURSE_DIRECTORIES; 1407 break; 1408 case 's': 1409 suppress_errors = 1; 1410 break; 1411 case 'v': 1412 out_invert = 1; 1413 break; 1414 case 'w': 1415 match_words = 1; 1416 break; 1417 case 'x': 1418 match_lines = 1; 1419 break; 1420 case 'Z': 1421#if HAVE_LIBZ > 0 1422 Zflag = 1; 1423#else 1424 filename_mask = 0; 1425#endif 1426 break; 1427 case 'z': 1428 eolbyte = '\0'; 1429 break; 1430 case 0: 1431 /* long options */ 1432 break; 1433 default: 1434 usage (2); 1435 break; 1436 } 1437 1438 if (out_after < 0) 1439 out_after = default_context; 1440 if (out_before < 0) 1441 out_before = default_context; 1442 1443 if (! matcher) 1444 matcher = prog; 1445 1446 if (show_version) 1447 { 1448 printf (_("%s (GNU grep) %s\n"), matcher, VERSION); 1449 printf ("\n"); 1450 printf (_("\ 1451Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n")); 1452 printf (_("\ 1453This is free software; see the source for copying conditions. There is NO\n\ 1454warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")); 1455 printf ("\n"); 1456 exit (0); 1457 } 1458 1459 if (show_help) 1460 usage (0); 1461 1462 if (keys) 1463 { 1464 if (keycc == 0) 1465 /* No keys were specified (e.g. -f /dev/null). Match nothing. */ 1466 out_invert ^= 1; 1467 else 1468 /* Strip trailing newline. */ 1469 --keycc; 1470 } 1471 else 1472 if (optind < argc) 1473 { 1474 keys = argv[optind++]; 1475 keycc = strlen (keys); 1476 } 1477 else 1478 usage (2); 1479 1480 if (!install_matcher (matcher) && !install_matcher ("default")) 1481 abort (); 1482 1483 (*compile)(keys, keycc); 1484 1485 if ((argc - optind > 1 && !no_filenames) || with_filenames) 1486 out_file = 1; 1487 1488#if O_BINARY 1489 /* Output is set to binary mode because we shouldn't convert 1490 NL to CR-LF pairs, especially when grepping binary files. */ 1491 if (!isatty (1)) 1492 SET_BINARY (1); 1493#endif 1494 1495 1496 if (optind < argc) 1497 { 1498 status = 1; 1499 do 1500 { 1501 char *file = argv[optind]; 1502 status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file, 1503 &stats_base); 1504 } 1505 while ( ++optind < argc); 1506 } 1507 else 1508 status = grepfile ((char *) NULL, &stats_base); 1509 1510 if (fclose (stdout) == EOF) 1511 error (_("writing output"), errno); 1512 1513 exit (errseen ? 2 : status); 1514} 1515