grep.c revision 55404
1/* grep.c - main driver file for grep.
2   Copyright (C) 1992, 1997, 1998, 1999 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software
16   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17   02111-1307, USA.  */
18
19/* Written July 1992 by Mike Haertel.  */
20/* Builtin decompression 1997 by Wolfram Schneider <wosch@FreeBSD.org>.  */
21
22/* $FreeBSD: head/gnu/usr.bin/grep/grep.c 55404 2000-01-04 10:32:55Z ru $ */
23
24#ifdef HAVE_CONFIG_H
25# include <config.h>
26#endif
27#include <sys/types.h>
28#include <sys/stat.h>
29#if defined(HAVE_MMAP)
30# include <sys/mman.h>
31#endif
32#if defined(HAVE_SETRLIMIT)
33# include <sys/time.h>
34# include <sys/resource.h>
35#endif
36#include <stdio.h>
37#include "system.h"
38#include "getopt.h"
39#include "getpagesize.h"
40#include "grep.h"
41#include "savedir.h"
42
43#undef MAX
44#define MAX(A,B) ((A) > (B) ? (A) : (B))
45
46struct stats
47{
48  struct stats *parent;
49  struct stat stat;
50};
51
52/* base of chain of stat buffers, used to detect directory loops */
53static struct stats stats_base;
54
55/* if non-zero, display usage information and exit */
56static int show_help;
57
58/* If non-zero, print the version on standard output and exit.  */
59static int show_version;
60
61/* If nonzero, use mmap if possible.  */
62static int mmap_option;
63
64/* If zero, output nulls after filenames.  */
65static int filename_mask;
66
67/* Short options.  */
68static char const short_options[] =
69"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
70
71/* Long options equivalences. */
72static struct option long_options[] =
73{
74  {"after-context", required_argument, NULL, 'A'},
75  {"basic-regexp", no_argument, NULL, 'G'},
76  {"before-context", required_argument, NULL, 'B'},
77  {"byte-offset", no_argument, NULL, 'b'},
78  {"context", optional_argument, NULL, 'C'},
79  {"count", no_argument, NULL, 'c'},
80  {"directories", required_argument, NULL, 'd'},
81  {"extended-regexp", no_argument, NULL, 'E'},
82  {"file", required_argument, NULL, 'f'},
83  {"files-with-matches", no_argument, NULL, 'l'},
84  {"files-without-match", no_argument, NULL, 'L'},
85  {"fixed-regexp", no_argument, NULL, 'F'},
86  {"fixed-strings", no_argument, NULL, 'F'},
87  {"help", no_argument, &show_help, 1},
88  {"ignore-case", no_argument, NULL, 'i'},
89  {"line-number", no_argument, NULL, 'n'},
90  {"line-regexp", no_argument, NULL, 'x'},
91  {"mmap", no_argument, &mmap_option, 1},
92  {"no-filename", no_argument, NULL, 'h'},
93  {"no-messages", no_argument, NULL, 's'},
94#if HAVE_LIBZ > 0
95  {"decompress", no_argument, NULL, 'Z'},
96  {"null", no_argument, &filename_mask, 0},
97#else
98  {"null", no_argument, NULL, 'Z'},
99#endif
100  {"null-data", no_argument, NULL, 'z'},
101  {"quiet", no_argument, NULL, 'q'},
102  {"recursive", no_argument, NULL, 'r'},
103  {"regexp", required_argument, NULL, 'e'},
104  {"invert-match", no_argument, NULL, 'v'},
105  {"silent", no_argument, NULL, 'q'},
106  {"text", no_argument, NULL, 'a'},
107  {"binary", no_argument, NULL, 'U'},
108  {"unix-byte-offsets", no_argument, NULL, 'u'},
109  {"version", no_argument, NULL, 'V'},
110  {"with-filename", no_argument, NULL, 'H'},
111  {"word-regexp", no_argument, NULL, 'w'},
112  {0, 0, 0, 0}
113};
114
115/* Define flags declared in grep.h. */
116char const *matcher;
117int match_icase;
118int match_words;
119int match_lines;
120unsigned char eolbyte;
121
122/* For error messages. */
123static char *prog;
124static char const *filename;
125static int errseen;
126
127/* How to handle directories.  */
128static enum
129  {
130    READ_DIRECTORIES,
131    RECURSE_DIRECTORIES,
132    SKIP_DIRECTORIES
133  } directories;
134
135static int  ck_atoi PARAMS ((char const *, int *));
136static void usage PARAMS ((int)) __attribute__((noreturn));
137static void error PARAMS ((const char *, int));
138static void setmatcher PARAMS ((char const *));
139static int  install_matcher PARAMS ((char const *));
140static int  prepend_args PARAMS ((char const *, char *, char **));
141static void prepend_default_options PARAMS ((char const *, int *, char ***));
142static char *page_alloc PARAMS ((size_t, char **));
143static int  reset PARAMS ((int, char const *, struct stats *));
144static int  fillbuf PARAMS ((size_t, struct stats *));
145static int  grepbuf PARAMS ((char *, char *));
146static void prtext PARAMS ((char *, char *, int *));
147static void prpending PARAMS ((char *));
148static void prline PARAMS ((char *, char *, int));
149static void print_offset_sep PARAMS ((off_t, int));
150static void nlscan PARAMS ((char *));
151static int  grep PARAMS ((int, char const *, struct stats *));
152static int  grepdir PARAMS ((char const *, struct stats *));
153static int  grepfile PARAMS ((char const *, struct stats *));
154#if O_BINARY
155static inline int undossify_input PARAMS ((register char *, size_t));
156#endif
157
158/* Functions we'll use to search. */
159static void (*compile) PARAMS ((char *, size_t));
160static char *(*execute) PARAMS ((char *, size_t, char **));
161
162/* Print a message and possibly an error string.  Remember
163   that something awful happened. */
164static void
165error (mesg, errnum)
166     const char *mesg;
167     int errnum;
168{
169  if (errnum)
170    fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum));
171  else
172    fprintf (stderr, "%s: %s\n", prog, mesg);
173  errseen = 1;
174}
175
176/* Like error (), but die horribly after printing. */
177void
178fatal (mesg, errnum)
179     const char *mesg;
180     int errnum;
181{
182  error (mesg, errnum);
183  exit (2);
184}
185
186/* Interface to handle errors and fix library lossage. */
187char *
188xmalloc (size)
189     size_t size;
190{
191  char *result;
192
193  result = malloc (size);
194  if (size && !result)
195    fatal (_("memory exhausted"), 0);
196  return result;
197}
198
199/* Interface to handle errors and fix some library lossage. */
200char *
201xrealloc (ptr, size)
202     char *ptr;
203     size_t size;
204{
205  char *result;
206
207  if (ptr)
208    result = realloc (ptr, size);
209  else
210    result = malloc (size);
211  if (size && !result)
212    fatal (_("memory exhausted"), 0);
213  return result;
214}
215
216/* Convert STR to a positive integer, storing the result in *OUT.
217   If STR is not a valid integer, return -1 (otherwise 0). */
218static int
219ck_atoi (str, out)
220     char const *str;
221     int *out;
222{
223  char const *p;
224  for (p = str; *p; p++)
225    if (*p < '0' || *p > '9')
226      return -1;
227
228  *out = atoi (optarg);
229  return 0;
230}
231
232
233/* Hairy buffering mechanism for grep.  The intent is to keep
234   all reads aligned on a page boundary and multiples of the
235   page size. */
236
237static char *ubuffer;		/* Unaligned base of buffer. */
238static char *buffer;		/* Base of buffer. */
239static size_t bufsalloc;	/* Allocated size of buffer save region. */
240static size_t bufalloc;		/* Total buffer size. */
241#define PREFERRED_SAVE_FACTOR 5	/* Preferred value of bufalloc / bufsalloc.  */
242static int bufdesc;		/* File descriptor. */
243static char *bufbeg;		/* Beginning of user-visible stuff. */
244static char *buflim;		/* Limit of user-visible stuff. */
245static size_t pagesize;		/* alignment of memory pages */
246static off_t bufoffset;		/* Read offset; defined on regular files.  */
247
248#if defined(HAVE_MMAP)
249static int bufmapped;		/* True if buffer is memory-mapped.  */
250static off_t initial_bufoffset;	/* Initial value of bufoffset. */
251#endif
252
253#if HAVE_LIBZ > 0
254#include <zlib.h>
255static gzFile gzbufdesc;	/* zlib file descriptor. */
256static int Zflag;		/* uncompress before searching. */
257#endif
258
259/* Return VAL aligned to the next multiple of ALIGNMENT.  VAL can be
260   an integer or a pointer.  Both args must be free of side effects.  */
261#define ALIGN_TO(val, alignment) \
262  ((size_t) (val) % (alignment) == 0 \
263   ? (val) \
264   : (val) + ((alignment) - (size_t) (val) % (alignment)))
265
266/* Return the address of a page-aligned buffer of size SIZE,
267   reallocating it from *UP.  Set *UP to the newly allocated (but
268   possibly unaligned) buffer used to build the aligned buffer.  To
269   free the buffer, free (*UP).  */
270static char *
271page_alloc (size, up)
272     size_t size;
273     char **up;
274{
275  size_t asize = size + pagesize - 1;
276  if (size <= asize)
277    {
278      char *p = *up ? realloc (*up, asize) : malloc (asize);
279      if (p)
280	{
281	  *up = p;
282	  return ALIGN_TO (p, pagesize);
283	}
284    }
285  return NULL;
286}
287
288/* Reset the buffer for a new file, returning zero if we should skip it.
289   Initialize on the first time through. */
290static int
291reset (fd, file, stats)
292     int fd;
293     char const *file;
294     struct stats *stats;
295{
296  if (pagesize)
297    bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
298  else
299    {
300      size_t ubufsalloc;
301      pagesize = getpagesize ();
302      if (pagesize == 0)
303	abort ();
304#ifndef BUFSALLOC
305      ubufsalloc = MAX (8192, pagesize);
306#else
307      ubufsalloc = BUFSALLOC;
308#endif
309      bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
310      bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
311      /* The 1 byte of overflow is a kludge for dfaexec(), which
312	 inserts a sentinel newline at the end of the buffer
313	 being searched.  There's gotta be a better way... */
314      if (bufsalloc < ubufsalloc
315	  || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
316	  || bufalloc + 1 < bufalloc
317	  || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
318	fatal (_("memory exhausted"), 0);
319    }
320#if HAVE_LIBZ > 0
321  if (Zflag)
322    {
323    gzbufdesc = gzdopen(fd, "r");
324    if (gzbufdesc == NULL)
325      fatal(_("memory exhausted"), 0);
326    }
327#endif
328
329  buflim = buffer;
330  bufdesc = fd;
331
332  if (fstat (fd, &stats->stat) != 0)
333    {
334      error ("fstat", errno);
335      return 0;
336    }
337  if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
338    return 0;
339  if (
340#if HAVE_LIBZ > 0
341      Zflag ||
342#endif
343      S_ISREG (stats->stat.st_mode))
344    {
345      if (file)
346	bufoffset = 0;
347      else
348	{
349	  bufoffset = lseek (fd, 0, SEEK_CUR);
350	  if (bufoffset < 0)
351	    {
352	      error ("lseek", errno);
353	      return 0;
354	    }
355	}
356#ifdef HAVE_MMAP
357      initial_bufoffset = bufoffset;
358      bufmapped = mmap_option && bufoffset % pagesize == 0;
359#endif
360    }
361  else
362    {
363#ifdef HAVE_MMAP
364      bufmapped = 0;
365#endif
366    }
367  return 1;
368}
369
370/* Read new stuff into the buffer, saving the specified
371   amount of old stuff.  When we're done, 'bufbeg' points
372   to the beginning of the buffer contents, and 'buflim'
373   points just after the end.  Return zero if there's an error.  */
374static int
375fillbuf (save, stats)
376     size_t save;
377     struct stats *stats;
378{
379  size_t fillsize = 0;
380  int cc = 1;
381  size_t readsize;
382
383  /* Offset from start of unaligned buffer to start of old stuff
384     that we want to save.  */
385  size_t saved_offset = buflim - ubuffer - save;
386
387  if (bufsalloc < save)
388    {
389      size_t aligned_save = ALIGN_TO (save, pagesize);
390      size_t maxalloc = (size_t) -1;
391      size_t newalloc;
392
393      if (S_ISREG (stats->stat.st_mode))
394	{
395	  /* Calculate an upper bound on how much memory we should allocate.
396	     We can't use ALIGN_TO here, since off_t might be longer than
397	     size_t.  Watch out for arithmetic overflow.  */
398	  off_t to_be_read = stats->stat.st_size - bufoffset;
399	  size_t slop = to_be_read % pagesize;
400	  off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
401	  off_t maxalloc_off = aligned_save + aligned_to_be_read;
402	  if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
403	    maxalloc = maxalloc_off;
404	}
405
406      /* Grow bufsalloc until it is at least as great as `save'; but
407	 if there is an overflow, just grow it to the next page boundary.  */
408      while (bufsalloc < save)
409	if (bufsalloc < bufsalloc * 2)
410	  bufsalloc *= 2;
411	else
412	  {
413	    bufsalloc = aligned_save;
414	    break;
415	  }
416
417      /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
418	 bufsalloc....  */
419      newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
420      if (maxalloc < newalloc)
421	{
422	  /* ... except don't grow it more than a pagesize past the
423	     file size, as that might cause unnecessary memory
424	     exhaustion if the file is large.  */
425	  newalloc = maxalloc;
426	  bufsalloc = aligned_save;
427	}
428
429      /* Check that the above calculations made progress, which might
430         not occur if there is arithmetic overflow.  If there's no
431	 progress, or if the new buffer size is larger than the old
432	 and buffer reallocation fails, report memory exhaustion.  */
433      if (bufsalloc < save || newalloc < save
434	  || (newalloc == save && newalloc != maxalloc)
435	  || (bufalloc < newalloc
436	      && ! (buffer
437		    = page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
438	fatal (_("memory exhausted"), 0);
439    }
440
441  bufbeg = buffer + bufsalloc - save;
442  memmove (bufbeg, ubuffer + saved_offset, save);
443  readsize = bufalloc - bufsalloc;
444
445#if defined(HAVE_MMAP)
446  if (bufmapped)
447    {
448      size_t mmapsize = readsize;
449
450      /* Don't mmap past the end of the file; some hosts don't allow this.
451	 Use `read' on the last page.  */
452      if (stats->stat.st_size - bufoffset < mmapsize)
453	{
454	  mmapsize = stats->stat.st_size - bufoffset;
455	  mmapsize -= mmapsize % pagesize;
456	}
457
458      if (mmapsize
459	  && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
460		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
461		    bufdesc, bufoffset)
462	      != (caddr_t) -1))
463	{
464	  /* Do not bother to use madvise with MADV_SEQUENTIAL or
465	     MADV_WILLNEED on the mmapped memory.  One might think it
466	     would help, but it slows us down about 30% on SunOS 4.1.  */
467	  fillsize = mmapsize;
468	}
469      else
470	{
471	  /* Stop using mmap on this file.  Synchronize the file
472	     offset.  Do not warn about mmap failures.  On some hosts
473	     (e.g. Solaris 2.5) mmap can fail merely because some
474	     other process has an advisory read lock on the file.
475	     There's no point alarming the user about this misfeature.  */
476	  bufmapped = 0;
477	  if (bufoffset != initial_bufoffset
478	      && lseek (bufdesc, bufoffset, SEEK_SET) < 0)
479	    {
480	      error ("lseek", errno);
481	      cc = 0;
482	    }
483	}
484    }
485#endif /*HAVE_MMAP*/
486
487  if (! fillsize)
488    {
489      ssize_t bytesread;
490      do
491#if HAVE_LIBZ > 0
492	if (Zflag)
493	  bytesread = gzread (gzbufdesc, buffer + bufsalloc, readsize);
494	else
495#endif
496	  bytesread = read (bufdesc, buffer + bufsalloc, readsize);
497      while (bytesread < 0 && errno == EINTR);
498      if (bytesread < 0)
499	cc = 0;
500      else
501	fillsize = bytesread;
502    }
503
504  bufoffset += fillsize;
505#if O_BINARY
506  if (fillsize)
507    fillsize = undossify_input (buffer + bufsalloc, fillsize);
508#endif
509  buflim = buffer + bufsalloc + fillsize;
510  return cc;
511}
512
513/* Flags controlling the style of output. */
514static int always_text;		/* Assume the input is always text. */
515static int out_quiet;		/* Suppress all normal output. */
516static int out_invert;		/* Print nonmatching stuff. */
517static int out_file;		/* Print filenames. */
518static int out_line;		/* Print line numbers. */
519static int out_byte;		/* Print byte offsets. */
520static int out_before;		/* Lines of leading context. */
521static int out_after;		/* Lines of trailing context. */
522static int count_matches;	/* Count matching lines.  */
523static int list_files;		/* List matching files.  */
524static int no_filenames;	/* Suppress file names.  */
525static int suppress_errors;	/* Suppress diagnostics.  */
526
527/* Internal variables to keep track of byte count, context, etc. */
528static off_t totalcc;		/* Total character count before bufbeg. */
529static char *lastnl;		/* Pointer after last newline counted. */
530static char *lastout;		/* Pointer after last character output;
531				   NULL if no character has been output
532				   or if it's conceptually before bufbeg. */
533static off_t totalnl;		/* Total newline count before lastnl. */
534static int pending;		/* Pending lines of output. */
535static int done_on_match;		/* Stop scanning file on first match */
536
537#if O_BINARY
538# include "dosbuf.c"
539#endif
540
541static void
542nlscan (lim)
543     char *lim;
544{
545  char *beg;
546  for (beg = lastnl;  (beg = memchr (beg, eolbyte, lim - beg));  beg++)
547    totalnl++;
548  lastnl = lim;
549}
550
551static void
552print_offset_sep (pos, sep)
553     off_t pos;
554     int sep;
555{
556  /* Do not rely on printf to print pos, since off_t may be longer than long,
557     and long long is not portable.  */
558
559  char buf[sizeof pos * CHAR_BIT];
560  char *p = buf + sizeof buf - 1;
561  *p = sep;
562
563  do
564    *--p = '0' + pos % 10;
565  while ((pos /= 10) != 0);
566
567  fwrite (p, 1, buf + sizeof buf - p, stdout);
568}
569
570static void
571prline (beg, lim, sep)
572     char *beg;
573     char *lim;
574     int sep;
575{
576  if (out_file)
577    printf ("%s%c", filename, sep & filename_mask);
578  if (out_line)
579    {
580      nlscan (beg);
581      print_offset_sep (++totalnl, sep);
582      lastnl = lim;
583    }
584  if (out_byte)
585    {
586      off_t pos = totalcc + (beg - bufbeg);
587#if O_BINARY
588      pos = dossified_pos (pos);
589#endif
590      print_offset_sep (pos, sep);
591    }
592  fwrite (beg, 1, lim - beg, stdout);
593  if (ferror (stdout))
594    error (_("writing output"), errno);
595  lastout = lim;
596}
597
598/* Print pending lines of trailing context prior to LIM. */
599static void
600prpending (lim)
601     char *lim;
602{
603  char *nl;
604
605  if (!lastout)
606    lastout = bufbeg;
607  while (pending > 0 && lastout < lim)
608    {
609      --pending;
610      if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
611	++nl;
612      else
613	nl = lim;
614      prline (lastout, nl, '-');
615    }
616}
617
618/* Print the lines between BEG and LIM.  Deal with context crap.
619   If NLINESP is non-null, store a count of lines between BEG and LIM. */
620static void
621prtext (beg, lim, nlinesp)
622     char *beg;
623     char *lim;
624     int *nlinesp;
625{
626  static int used;		/* avoid printing "--" before any output */
627  char *bp, *p, *nl;
628  char eol = eolbyte;
629  int i, n;
630
631  if (!out_quiet && pending > 0)
632    prpending (beg);
633
634  p = beg;
635
636  if (!out_quiet)
637    {
638      /* Deal with leading context crap. */
639
640      bp = lastout ? lastout : bufbeg;
641      for (i = 0; i < out_before; ++i)
642	if (p > bp)
643	  do
644	    --p;
645	  while (p > bp && p[-1] != eol);
646
647      /* We only print the "--" separator if our output is
648	 discontiguous from the last output in the file. */
649      if ((out_before || out_after) && used && p != lastout)
650	puts ("--");
651
652      while (p < beg)
653	{
654	  nl = memchr (p, eol, beg - p);
655	  prline (p, nl + 1, '-');
656	  p = nl + 1;
657	}
658    }
659
660  if (nlinesp)
661    {
662      /* Caller wants a line count. */
663      for (n = 0; p < lim; ++n)
664	{
665	  if ((nl = memchr (p, eol, lim - p)) != 0)
666	    ++nl;
667	  else
668	    nl = lim;
669	  if (!out_quiet)
670	    prline (p, nl, ':');
671	  p = nl;
672	}
673      *nlinesp = n;
674    }
675  else
676    if (!out_quiet)
677      prline (beg, lim, ':');
678
679  pending = out_quiet ? 0 : out_after;
680  used = 1;
681}
682
683/* Scan the specified portion of the buffer, matching lines (or
684   between matching lines if OUT_INVERT is true).  Return a count of
685   lines printed. */
686static int
687grepbuf (beg, lim)
688     char *beg;
689     char *lim;
690{
691  int nlines, n;
692  register char *p, *b;
693  char *endp;
694  char eol = eolbyte;
695
696  nlines = 0;
697  p = beg;
698  while ((b = (*execute)(p, lim - p, &endp)) != 0)
699    {
700      /* Avoid matching the empty line at the end of the buffer. */
701      if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
702	break;
703      if (!out_invert)
704	{
705	  prtext (b, endp, (int *) 0);
706	  nlines += 1;
707	  if (done_on_match)
708	    return nlines;
709	}
710      else if (p < b)
711	{
712	  prtext (p, b, &n);
713	  nlines += n;
714	}
715      p = endp;
716    }
717  if (out_invert && p < lim)
718    {
719      prtext (p, lim, &n);
720      nlines += n;
721    }
722  return nlines;
723}
724
725/* Search a given file.  Normally, return a count of lines printed;
726   but if the file is a directory and we search it recursively, then
727   return -2 if there was a match, and -1 otherwise.  */
728static int
729grep (fd, file, stats)
730     int fd;
731     char const *file;
732     struct stats *stats;
733{
734  int nlines, i;
735  int not_text;
736  size_t residue, save;
737  char *beg, *lim;
738  char eol = eolbyte;
739
740  if (!reset (fd, file, stats))
741    return 0;
742
743  if (file && directories == RECURSE_DIRECTORIES
744      && S_ISDIR (stats->stat.st_mode))
745    {
746      /* Close fd now, so that we don't open a lot of file descriptors
747	 when we recurse deeply.  */
748#if HAVE_LIBZ > 0
749      if (Zflag)
750	gzclose(gzbufdesc);
751      else
752#endif
753      if (close (fd) != 0)
754	error (file, errno);
755      return grepdir (file, stats) - 2;
756    }
757
758  totalcc = 0;
759  lastout = 0;
760  totalnl = 0;
761  pending = 0;
762
763  nlines = 0;
764  residue = 0;
765  save = 0;
766
767  if (! fillbuf (save, stats))
768    {
769      if (! (is_EISDIR (errno, file) && suppress_errors))
770	error (filename, errno);
771      return nlines;
772    }
773
774  not_text = (! (always_text | out_quiet)
775	      && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
776  done_on_match += not_text;
777  out_quiet += not_text;
778
779  for (;;)
780    {
781      lastnl = bufbeg;
782      if (lastout)
783	lastout = bufbeg;
784      if (buflim - bufbeg == save)
785	break;
786      beg = bufbeg + save - residue;
787      for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
788	;
789      residue = buflim - lim;
790      if (beg < lim)
791	{
792	  nlines += grepbuf (beg, lim);
793	  if (pending)
794	    prpending (lim);
795	  if (nlines && done_on_match && !out_invert)
796	    goto finish_grep;
797	}
798      i = 0;
799      beg = lim;
800      while (i < out_before && beg > bufbeg && beg != lastout)
801	{
802	  ++i;
803	  do
804	    --beg;
805	  while (beg > bufbeg && beg[-1] != eol);
806	}
807      if (beg != lastout)
808	lastout = 0;
809      save = residue + lim - beg;
810      totalcc += buflim - bufbeg - save;
811      if (out_line)
812	nlscan (beg);
813      if (! fillbuf (save, stats))
814	{
815	  if (! (is_EISDIR (errno, file) && suppress_errors))
816	    error (filename, errno);
817	  goto finish_grep;
818	}
819    }
820  if (residue)
821    {
822      nlines += grepbuf (bufbeg + save - residue, buflim);
823      if (pending)
824	prpending (buflim);
825    }
826
827 finish_grep:
828  done_on_match -= not_text;
829  out_quiet -= not_text;
830  if ((not_text & ~out_quiet) && nlines != 0)
831    printf (_("Binary file %s matches\n"), filename);
832  return nlines;
833}
834
835static int
836grepfile (file, stats)
837     char const *file;
838     struct stats *stats;
839{
840  int desc;
841  int count;
842  int status;
843
844  if (! file)
845    {
846      desc = 0;
847      filename = _("(standard input)");
848    }
849  else
850    {
851      while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
852	continue;
853
854      if (desc < 0)
855	{
856	  int e = errno;
857
858	  if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES)
859	    {
860	      if (stat (file, &stats->stat) != 0)
861		{
862		  error (file, errno);
863		  return 1;
864		}
865
866	      return grepdir (file, stats);
867	    }
868
869	  if (!suppress_errors)
870	    {
871	      if (directories == SKIP_DIRECTORIES)
872		switch (e)
873		  {
874#ifdef EISDIR
875		  case EISDIR:
876		    return 1;
877#endif
878		  case EACCES:
879		    /* When skipping directories, don't worry about
880		       directories that can't be opened.  */
881		    if (stat (file, &stats->stat) == 0
882			&& S_ISDIR (stats->stat.st_mode))
883		      return 1;
884		    break;
885		  }
886
887	      error (file, e);
888	    }
889
890	  return 1;
891	}
892
893      filename = file;
894    }
895
896#if O_BINARY
897  /* Set input to binary mode.  Pipes are simulated with files
898     on DOS, so this includes the case of "foo | grep bar".  */
899  if (!isatty (desc))
900    SET_BINARY (desc);
901#endif
902
903  count = grep (desc, file, stats);
904  if (count < 0)
905    status = count + 2;
906  else
907    {
908      if (count_matches)
909	{
910	  if (out_file)
911	    printf ("%s%c", filename, ':' & filename_mask);
912	  printf ("%d\n", count);
913	}
914
915      status = !count;
916      if (list_files == 1 - 2 * status)
917	printf ("%s%c", filename, '\n' & filename_mask);
918
919#if HAVE_LIBZ > 0
920      if (Zflag)
921	gzclose(gzbufdesc);
922      else
923#endif
924      if (file)
925	while (close (desc) != 0)
926	  if (errno != EINTR)
927	    {
928	      error (file, errno);
929	      break;
930	    }
931    }
932
933  return status;
934}
935
936static int
937grepdir (dir, stats)
938     char const *dir;
939     struct stats *stats;
940{
941  int status = 1;
942  struct stats *ancestor;
943  char *name_space;
944
945  for (ancestor = stats;  (ancestor = ancestor->parent) != 0;  )
946    if (ancestor->stat.st_ino == stats->stat.st_ino
947	&& ancestor->stat.st_dev == stats->stat.st_dev)
948      {
949	if (!suppress_errors)
950	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
951		   _("recursive directory loop"));
952	return 1;
953      }
954
955  name_space = savedir (dir, (unsigned) stats->stat.st_size);
956
957  if (! name_space)
958    {
959      if (errno)
960	{
961	  if (!suppress_errors)
962	    error (dir, errno);
963	}
964      else
965	fatal (_("Memory exhausted"), 0);
966    }
967  else
968    {
969      size_t dirlen = strlen (dir);
970      int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir)
971			   || IS_SLASH (dir[dirlen - 1]));
972      char *file = NULL;
973      char *namep = name_space;
974      struct stats child;
975      child.parent = stats;
976      out_file += !no_filenames;
977      while (*namep)
978	{
979	  size_t namelen = strlen (namep);
980	  file = xrealloc (file, dirlen + 1 + namelen + 1);
981	  strcpy (file, dir);
982	  file[dirlen] = '/';
983	  strcpy (file + dirlen + needs_slash, namep);
984	  namep += namelen + 1;
985	  status &= grepfile (file, &child);
986	}
987      out_file -= !no_filenames;
988      if (file)
989        free (file);
990      free (name_space);
991    }
992
993  return status;
994}
995
996static void
997usage(status)
998int status;
999{
1000  if (status != 0)
1001    {
1002      fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog);
1003      fprintf (stderr, _("Try `%s --help' for more information.\n"), prog);
1004    }
1005  else
1006    {
1007      printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
1008      printf (_("\
1009Search for PATTERN in each FILE or standard input.\n\
1010Example: %s -i 'hello.*world' menu.h main.c\n\
1011\n\
1012Regexp selection and interpretation:\n"), prog);
1013      printf (_("\
1014  -E, --extended-regexp     PATTERN is an extended regular expression\n\
1015  -F, --fixed-strings       PATTERN is a set of newline-separated strings\n\
1016  -G, --basic-regexp        PATTERN is a basic regular expression\n"));
1017      printf (_("\
1018  -e, --regexp=PATTERN      use PATTERN as a regular expression\n\
1019  -f, --file=FILE           obtain PATTERN from FILE\n\
1020  -i, --ignore-case         ignore case distinctions\n\
1021  -w, --word-regexp         force PATTERN to match only whole words\n\
1022  -x, --line-regexp         force PATTERN to match only whole lines\n\
1023  -z, --null-data           a data line ends in 0 byte, not newline\n"));
1024      printf (_("\
1025\n\
1026Miscellaneous:\n\
1027  -s, --no-messages         suppress error messages\n\
1028  -v, --invert-match        select non-matching lines\n\
1029  -V, --version             print version information and exit\n\
1030      --help                display this help and exit\n\
1031  -Z, --decompress          decompress input before searching (HAVE_LIBZ=1)\n\
1032      --mmap                use memory-mapped input if possible\n"));
1033      printf (_("\
1034\n\
1035Output control:\n\
1036  -b, --byte-offset         print the byte offset with output lines\n\
1037  -n, --line-number         print line number with output lines\n\
1038  -H, --with-filename       print the filename for each match\n\
1039  -h, --no-filename         suppress the prefixing filename on output\n\
1040  -q, --quiet, --silent     suppress all normal output\n\
1041  -a, --text                do not suppress binary output\n\
1042  -d, --directories=ACTION  how to handle directories\n\
1043                            ACTION is 'read', 'recurse', or 'skip'.\n\
1044  -r, --recursive           equivalent to --directories=recurse.\n\
1045  -L, --files-without-match only print FILE names containing no match\n\
1046  -l, --files-with-matches  only print FILE names containing matches\n\
1047  -c, --count               only print a count of matching lines per FILE\n\
1048      --null                print 0 byte after FILE name\n"));
1049      printf (_("\
1050\n\
1051Context control:\n\
1052  -B, --before-context=NUM  print NUM lines of leading context\n\
1053  -A, --after-context=NUM   print NUM lines of trailing context\n\
1054  -C, --context[=NUM]       print NUM (default 2) lines of output context\n\
1055                            unless overridden by -A or -B\n\
1056  -NUM                      same as --context=NUM\n\
1057  -U, --binary              do not strip CR characters at EOL (MSDOS)\n\
1058  -u, --unix-byte-offsets   report offsets as if CRs were not there (MSDOS)\n\
1059\n\
1060`egrep' means `grep -E'.  `fgrep' means `grep -F'.\n\
1061With no FILE, or when FILE is -, read standard input.  If less than\n\
1062two FILEs given, assume -h.  Exit status is 0 if match, 1 if no match,\n\
1063and 2 if trouble.\n"));
1064      printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
1065    }
1066  exit (status);
1067}
1068
1069/* Set the matcher to M, reporting any conflicts.  */
1070static void
1071setmatcher (m)
1072     char const *m;
1073{
1074  if (matcher && strcmp (matcher, m) != 0)
1075    fatal (_("conflicting matchers specified"), 0);
1076  matcher = m;
1077}
1078
1079/* Go through the matchers vector and look for the specified matcher.
1080   If we find it, install it in compile and execute, and return 1.  */
1081static int
1082install_matcher (name)
1083     char const *name;
1084{
1085  int i;
1086#ifdef HAVE_SETRLIMIT
1087  struct rlimit rlim;
1088#endif
1089
1090  for (i = 0; matchers[i].name; ++i)
1091    if (strcmp (name, matchers[i].name) == 0)
1092      {
1093	compile = matchers[i].compile;
1094	execute = matchers[i].execute;
1095#if HAVE_SETRLIMIT && defined(RLIMIT_STACK)
1096	/* I think every platform needs to do this, so that regex.c
1097	   doesn't oveflow the stack.  The default value of
1098	   `re_max_failures' is too large for some platforms: it needs
1099	   more than 3MB-large stack.
1100
1101	   The test for HAVE_SETRLIMIT should go into `configure'.  */
1102	if (!getrlimit (RLIMIT_STACK, &rlim))
1103	  {
1104	    long newlim;
1105	    extern long int re_max_failures; /* from regex.c */
1106
1107	    /* Approximate the amount regex.c needs, plus some more.  */
1108	    newlim = re_max_failures * 2 * 20 * sizeof (char *);
1109	    if (newlim > rlim.rlim_max)
1110	      {
1111		newlim = rlim.rlim_max;
1112		re_max_failures = newlim / (2 * 20 * sizeof (char *));
1113	      }
1114	    if (rlim.rlim_cur < newlim)
1115	      rlim.rlim_cur = newlim;
1116
1117	    setrlimit (RLIMIT_STACK, &rlim);
1118	  }
1119#endif
1120	return 1;
1121      }
1122  return 0;
1123}
1124
1125/* Find the white-space-separated options specified by OPTIONS, and
1126   using BUF to store copies of these options, set ARGV[0], ARGV[1],
1127   etc. to the option copies.  Return the number N of options found.
1128   Do not set ARGV[N] to NULL.  If ARGV is NULL, do not store ARGV[0]
1129   etc.  Backslash can be used to escape whitespace (and backslashes).  */
1130static int
1131prepend_args (options, buf, argv)
1132     char const *options;
1133     char *buf;
1134     char **argv;
1135{
1136  char const *o = options;
1137  char *b = buf;
1138  int n = 0;
1139
1140  for (;;)
1141    {
1142      while (ISSPACE ((unsigned char) *o))
1143	o++;
1144      if (!*o)
1145	return n;
1146      if (argv)
1147	argv[n] = b;
1148      n++;
1149
1150      do
1151	if ((*b++ = *o++) == '\\' && *o)
1152	  b[-1] = *o++;
1153      while (*o && ! ISSPACE ((unsigned char) *o));
1154
1155      *b++ = '\0';
1156    }
1157}
1158
1159/* Prepend the whitespace-separated options in OPTIONS to the argument
1160   vector of a main program with argument count *PARGC and argument
1161   vector *PARGV.  */
1162static void
1163prepend_default_options (options, pargc, pargv)
1164     char const *options;
1165     int *pargc;
1166     char ***pargv;
1167{
1168  if (options)
1169    {
1170      char *buf = xmalloc (strlen (options) + 1);
1171      int prepended = prepend_args (options, buf, (char **) NULL);
1172      int argc = *pargc;
1173      char * const *argv = *pargv;
1174      char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp);
1175      *pargc = prepended + argc;
1176      *pargv = pp;
1177      *pp++ = *argv++;
1178      pp += prepend_args (options, buf, pp);
1179      while ((*pp++ = *argv++))
1180	continue;
1181    }
1182}
1183
1184int
1185main (argc, argv)
1186     int argc;
1187     char *argv[];
1188{
1189  char *keys;
1190  size_t keycc, oldcc, keyalloc;
1191  int with_filenames;
1192  int opt, cc, status;
1193  unsigned digit_args_val, default_context;
1194  FILE *fp;
1195  extern char *optarg;
1196  extern int optind;
1197
1198  initialize_main (&argc, &argv);
1199  prog = argv[0];
1200  if (prog && strrchr (prog, '/'))
1201    prog = strrchr (prog, '/') + 1;
1202
1203#if HAVE_LIBZ > 0
1204  if (prog[0] == 'z') {
1205    Zflag = 1;
1206    ++prog;
1207  }
1208#endif
1209
1210#if defined(__MSDOS__) || defined(_WIN32)
1211  /* DOS and MS-Windows use backslashes as directory separators, and usually
1212     have an .exe suffix.  They also have case-insensitive filesystems.  */
1213  if (prog)
1214    {
1215      char *p = prog;
1216      char *bslash = strrchr (argv[0], '\\');
1217
1218      if (bslash && bslash >= prog) /* for mixed forward/backslash case */
1219	prog = bslash + 1;
1220      else if (prog == argv[0]
1221	       && argv[0][0] && argv[0][1] == ':') /* "c:progname" */
1222	prog = argv[0] + 2;
1223
1224      /* Collapse the letter-case, so `strcmp' could be used hence.  */
1225      for ( ; *p; p++)
1226	if (*p >= 'A' && *p <= 'Z')
1227	  *p += 'a' - 'A';
1228
1229      /* Remove the .exe extension, if any.  */
1230      if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0)
1231	*p = '\0';
1232    }
1233#endif
1234
1235  keys = NULL;
1236  keycc = 0;
1237  with_filenames = 0;
1238  eolbyte = '\n';
1239  filename_mask = ~0;
1240
1241  /* The value -1 means to use DEFAULT_CONTEXT. */
1242  out_after = out_before = -1;
1243  /* Default before/after context: chaged by -C/-NUM options */
1244  default_context = 0;
1245  /* Accumulated value of individual digits in a -NUM option */
1246  digit_args_val = 0;
1247
1248
1249/* Internationalization. */
1250#if HAVE_SETLOCALE
1251  setlocale (LC_ALL, "");
1252#endif
1253#if ENABLE_NLS
1254  bindtextdomain (PACKAGE, LOCALEDIR);
1255  textdomain (PACKAGE);
1256#endif
1257
1258  prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
1259
1260  while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
1261	 != -1)
1262    switch (opt)
1263      {
1264      case '0':
1265      case '1':
1266      case '2':
1267      case '3':
1268      case '4':
1269      case '5':
1270      case '6':
1271      case '7':
1272      case '8':
1273      case '9':
1274	digit_args_val = 10 * digit_args_val + opt - '0';
1275	default_context = digit_args_val;
1276	break;
1277      case 'A':
1278	if (optarg)
1279	  {
1280	    if (ck_atoi (optarg, &out_after))
1281	      fatal (_("invalid context length argument"), 0);
1282	  }
1283	break;
1284      case 'B':
1285	if (optarg)
1286	  {
1287	    if (ck_atoi (optarg, &out_before))
1288	      fatal (_("invalid context length argument"), 0);
1289	  }
1290	break;
1291      case 'C':
1292	/* Set output match context, but let any explicit leading or
1293	   trailing amount specified with -A or -B stand. */
1294	if (optarg)
1295	  {
1296	    if (ck_atoi (optarg, &default_context))
1297	      fatal (_("invalid context length argument"), 0);
1298	  }
1299	else
1300	  default_context = 2;
1301	break;
1302      case 'E':
1303	setmatcher ("egrep");
1304	break;
1305      case 'F':
1306	setmatcher ("fgrep");
1307	break;
1308      case 'G':
1309	setmatcher ("grep");
1310	break;
1311      case 'H':
1312	with_filenames = 1;
1313	break;
1314      case 'U':
1315#if O_BINARY
1316	dos_use_file_type = DOS_BINARY;
1317#endif
1318	break;
1319      case 'u':
1320#if O_BINARY
1321	dos_report_unix_offset = 1;
1322#endif
1323	break;
1324      case 'V':
1325	show_version = 1;
1326	break;
1327      case 'X':
1328	setmatcher (optarg);
1329	break;
1330      case 'a':
1331	always_text = 1;
1332	break;
1333      case 'b':
1334	out_byte = 1;
1335	break;
1336      case 'c':
1337	out_quiet = 1;
1338	count_matches = 1;
1339	break;
1340      case 'd':
1341	if (strcmp (optarg, "read") == 0)
1342	  directories = READ_DIRECTORIES;
1343	else if (strcmp (optarg, "skip") == 0)
1344	  directories = SKIP_DIRECTORIES;
1345	else if (strcmp (optarg, "recurse") == 0)
1346	  directories = RECURSE_DIRECTORIES;
1347	else
1348	  fatal (_("unknown directories method"), 0);
1349	break;
1350      case 'e':
1351	cc = strlen (optarg);
1352	keys = xrealloc (keys, keycc + cc + 1);
1353	strcpy (&keys[keycc], optarg);
1354	keycc += cc;
1355	keys[keycc++] = '\n';
1356	break;
1357      case 'f':
1358	fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin;
1359	if (!fp)
1360	  fatal (optarg, errno);
1361	for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
1362	  ;
1363	keys = xrealloc (keys, keyalloc);
1364	oldcc = keycc;
1365	while (!feof (fp)
1366	       && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0)
1367	  {
1368	    keycc += cc;
1369	    if (keycc == keyalloc - 1)
1370	      keys = xrealloc (keys, keyalloc *= 2);
1371	  }
1372	if (fp != stdin)
1373	  fclose(fp);
1374	/* Append final newline if file ended in non-newline. */
1375	if (oldcc != keycc && keys[keycc - 1] != '\n')
1376	  keys[keycc++] = '\n';
1377	break;
1378      case 'h':
1379	no_filenames = 1;
1380	break;
1381      case 'i':
1382      case 'y':			/* For old-timers . . . */
1383	match_icase = 1;
1384	break;
1385      case 'L':
1386	/* Like -l, except list files that don't contain matches.
1387	   Inspired by the same option in Hume's gre. */
1388	out_quiet = 1;
1389	list_files = -1;
1390	done_on_match = 1;
1391	break;
1392      case 'l':
1393	out_quiet = 1;
1394	list_files = 1;
1395	done_on_match = 1;
1396	break;
1397      case 'n':
1398	out_line = 1;
1399	break;
1400      case 'q':
1401	done_on_match = 1;
1402	out_quiet = 1;
1403	break;
1404      case 'R':
1405      case 'r':
1406	directories = RECURSE_DIRECTORIES;
1407	break;
1408      case 's':
1409	suppress_errors = 1;
1410	break;
1411      case 'v':
1412	out_invert = 1;
1413	break;
1414      case 'w':
1415	match_words = 1;
1416	break;
1417      case 'x':
1418	match_lines = 1;
1419	break;
1420      case 'Z':
1421#if HAVE_LIBZ > 0
1422	Zflag = 1;
1423#else
1424	filename_mask = 0;
1425#endif
1426	break;
1427      case 'z':
1428	eolbyte = '\0';
1429	break;
1430      case 0:
1431	/* long options */
1432	break;
1433      default:
1434	usage (2);
1435	break;
1436      }
1437
1438  if (out_after < 0)
1439    out_after = default_context;
1440  if (out_before < 0)
1441    out_before = default_context;
1442
1443  if (! matcher)
1444    matcher = prog;
1445
1446  if (show_version)
1447    {
1448      printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
1449      printf ("\n");
1450      printf (_("\
1451Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
1452      printf (_("\
1453This is free software; see the source for copying conditions. There is NO\n\
1454warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
1455      printf ("\n");
1456      exit (0);
1457    }
1458
1459  if (show_help)
1460    usage (0);
1461
1462  if (keys)
1463    {
1464      if (keycc == 0)
1465	/* No keys were specified (e.g. -f /dev/null).  Match nothing.  */
1466        out_invert ^= 1;
1467      else
1468	/* Strip trailing newline. */
1469        --keycc;
1470    }
1471  else
1472    if (optind < argc)
1473      {
1474	keys = argv[optind++];
1475	keycc = strlen (keys);
1476      }
1477    else
1478      usage (2);
1479
1480  if (!install_matcher (matcher) && !install_matcher ("default"))
1481    abort ();
1482
1483  (*compile)(keys, keycc);
1484
1485  if ((argc - optind > 1 && !no_filenames) || with_filenames)
1486    out_file = 1;
1487
1488#if O_BINARY
1489  /* Output is set to binary mode because we shouldn't convert
1490     NL to CR-LF pairs, especially when grepping binary files.  */
1491  if (!isatty (1))
1492    SET_BINARY (1);
1493#endif
1494
1495
1496  if (optind < argc)
1497    {
1498	status = 1;
1499	do
1500	{
1501	  char *file = argv[optind];
1502	  status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
1503			      &stats_base);
1504	}
1505	while ( ++optind < argc);
1506    }
1507  else
1508    status = grepfile ((char *) NULL, &stats_base);
1509
1510  if (fclose (stdout) == EOF)
1511    error (_("writing output"), errno);
1512
1513  exit (errseen ? 2 : status);
1514}
1515