grep.c revision 53705
1/* grep.c - main driver file for grep.
2   Copyright (C) 1992, 1997, 1998, 1999 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software
16   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17   02111-1307, USA.  */
18
19/* Written July 1992 by Mike Haertel.  */
20/* Builtin decompression 1997 by Wolfram Schneider <wosch@FreeBSD.org>.  */
21
22/* $FreeBSD: head/gnu/usr.bin/grep/grep.c 53705 1999-11-26 03:26:59Z obrien $ */
23
24#ifdef HAVE_CONFIG_H
25# include <config.h>
26#endif
27#include <sys/types.h>
28#include <sys/stat.h>
29#if defined(HAVE_MMAP)
30# include <sys/mman.h>
31#endif
32#if defined(HAVE_SETRLIMIT)
33# include <sys/time.h>
34# include <sys/resource.h>
35#endif
36#include <stdio.h>
37#include "system.h"
38#include "getopt.h"
39#include "getpagesize.h"
40#include "grep.h"
41#include "savedir.h"
42
43#undef MAX
44#define MAX(A,B) ((A) > (B) ? (A) : (B))
45
46struct stats
47{
48  struct stats *parent;
49  struct stat stat;
50};
51
52/* base of chain of stat buffers, used to detect directory loops */
53static struct stats stats_base;
54
55/* if non-zero, display usage information and exit */
56static int show_help;
57
58/* If non-zero, print the version on standard output and exit.  */
59static int show_version;
60
61/* Long options equivalences. */
62static struct option long_options[] =
63{
64  {"after-context", required_argument, NULL, 'A'},
65  {"basic-regexp", no_argument, NULL, 'G'},
66  {"before-context", required_argument, NULL, 'B'},
67  {"byte-offset", no_argument, NULL, 'b'},
68  {"context", optional_argument, NULL, 'C'},
69  {"count", no_argument, NULL, 'c'},
70  {"directories", required_argument, NULL, 'd'},
71  {"extended-regexp", no_argument, NULL, 'E'},
72  {"file", required_argument, NULL, 'f'},
73  {"files-with-matches", no_argument, NULL, 'l'},
74  {"files-without-match", no_argument, NULL, 'L'},
75  {"fixed-regexp", no_argument, NULL, 'F'},
76  {"fixed-strings", no_argument, NULL, 'F'},
77  {"help", no_argument, &show_help, 1},
78  {"ignore-case", no_argument, NULL, 'i'},
79  {"line-number", no_argument, NULL, 'n'},
80  {"line-regexp", no_argument, NULL, 'x'},
81  {"no-filename", no_argument, NULL, 'h'},
82  {"no-messages", no_argument, NULL, 's'},
83  {"quiet", no_argument, NULL, 'q'},
84  {"recursive", no_argument, NULL, 'r'},
85  {"regexp", required_argument, NULL, 'e'},
86  {"revert-match", no_argument, NULL, 'v'},
87  {"silent", no_argument, NULL, 'q'},
88  {"text", no_argument, NULL, 'a'},
89#if O_BINARY
90  {"binary", no_argument, NULL, 'U'},
91  {"unix-byte-offsets", no_argument, NULL, 'u'},
92#endif
93  {"version", no_argument, NULL, 'V'},
94  {"with-filename", no_argument, NULL, 'H'},
95  {"word-regexp", no_argument, NULL, 'w'},
96#if HAVE_LIBZ > 0
97  {"decompress", no_argument, NULL, 'Z'},
98#endif
99  {0, 0, 0, 0}
100};
101
102/* Define flags declared in grep.h. */
103char const *matcher;
104int match_icase;
105int match_words;
106int match_lines;
107
108/* For error messages. */
109static char *prog;
110static char const *filename;
111static int errseen;
112
113/* How to handle directories.  */
114static enum
115  {
116    READ_DIRECTORIES,
117    RECURSE_DIRECTORIES,
118    SKIP_DIRECTORIES
119  } directories;
120
121static int  ck_atoi PARAMS ((char const *, int *));
122static void usage PARAMS ((int)) __attribute__((noreturn));
123static void error PARAMS ((const char *, int));
124static int  setmatcher PARAMS ((char const *));
125static char *page_alloc PARAMS ((size_t, char **));
126static int  reset PARAMS ((int, char const *, struct stats *));
127static int  fillbuf PARAMS ((size_t, struct stats *));
128static int  grepbuf PARAMS ((char *, char *));
129static void prtext PARAMS ((char *, char *, int *));
130static void prpending PARAMS ((char *));
131static void prline PARAMS ((char *, char *, int));
132static void print_offset_sep PARAMS ((off_t, int));
133static void nlscan PARAMS ((char *));
134static int  grep PARAMS ((int, char const *, struct stats *));
135static int  grepdir PARAMS ((char const *, struct stats *));
136static int  grepfile PARAMS ((char const *, struct stats *));
137#if O_BINARY
138static inline int undossify_input PARAMS ((register char *, size_t));
139#endif
140
141/* Functions we'll use to search. */
142static void (*compile) PARAMS ((char *, size_t));
143static char *(*execute) PARAMS ((char *, size_t, char **));
144
145/* Print a message and possibly an error string.  Remember
146   that something awful happened. */
147static void
148error (mesg, errnum)
149     const char *mesg;
150     int errnum;
151{
152  if (errnum)
153    fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum));
154  else
155    fprintf (stderr, "%s: %s\n", prog, mesg);
156  errseen = 1;
157}
158
159/* Like error (), but die horribly after printing. */
160void
161fatal (mesg, errnum)
162     const char *mesg;
163     int errnum;
164{
165  error (mesg, errnum);
166  exit (2);
167}
168
169/* Interface to handle errors and fix library lossage. */
170char *
171xmalloc (size)
172     size_t size;
173{
174  char *result;
175
176  result = malloc (size);
177  if (size && !result)
178    fatal (_("memory exhausted"), 0);
179  return result;
180}
181
182/* Interface to handle errors and fix some library lossage. */
183char *
184xrealloc (ptr, size)
185     char *ptr;
186     size_t size;
187{
188  char *result;
189
190  if (ptr)
191    result = realloc (ptr, size);
192  else
193    result = malloc (size);
194  if (size && !result)
195    fatal (_("memory exhausted"), 0);
196  return result;
197}
198
199/* Convert STR to a positive integer, storing the result in *OUT.
200   If STR is not a valid integer, return -1 (otherwise 0). */
201static int
202ck_atoi (str, out)
203     char const *str;
204     int *out;
205{
206  char const *p;
207  for (p = str; *p; p++)
208    if (*p < '0' || *p > '9')
209      return -1;
210
211  *out = atoi (optarg);
212  return 0;
213}
214
215
216/* Hairy buffering mechanism for grep.  The intent is to keep
217   all reads aligned on a page boundary and multiples of the
218   page size. */
219
220static char *ubuffer;		/* Unaligned base of buffer. */
221static char *buffer;		/* Base of buffer. */
222static size_t bufsalloc;	/* Allocated size of buffer save region. */
223static size_t bufalloc;		/* Total buffer size. */
224static int bufdesc;		/* File descriptor. */
225static char *bufbeg;		/* Beginning of user-visible stuff. */
226static char *buflim;		/* Limit of user-visible stuff. */
227static size_t pagesize;		/* alignment of memory pages */
228
229#if defined(HAVE_MMAP)
230static int bufmapped;		/* True for ordinary files. */
231static off_t bufoffset;		/* What read() normally remembers. */
232static off_t initial_bufoffset;	/* Initial value of bufoffset. */
233#endif
234
235#if HAVE_LIBZ > 0
236#include <zlib.h>
237static gzFile gzbufdesc;	/* zlib file descriptor. */
238static int Zflag;		/* uncompress before searching. */
239#endif
240
241/* Return VAL aligned to the next multiple of ALIGNMENT.  VAL can be
242   an integer or a pointer.  Both args must be free of side effects.  */
243#define ALIGN_TO(val, alignment) \
244  ((size_t) (val) % (alignment) == 0 \
245   ? (val) \
246   : (val) + ((alignment) - (size_t) (val) % (alignment)))
247
248/* Return the address of a new page-aligned buffer of size SIZE.  Set
249   *UP to the newly allocated (but possibly unaligned) buffer used to
250   *build the aligned buffer.  To free the buffer, free (*UP).  */
251static char *
252page_alloc (size, up)
253     size_t size;
254     char **up;
255{
256  /* HAVE_WORKING_VALLOC means that valloc is properly declared, and
257     you can free the result of valloc.  This symbol is not (yet)
258     autoconfigured.  It can be useful to define HAVE_WORKING_VALLOC
259     while debugging, since some debugging memory allocators might
260     catch more bugs if this symbol is enabled.  */
261#if HAVE_WORKING_VALLOC
262  *up = valloc (size);
263  return *up;
264#else
265  size_t asize = size + pagesize - 1;
266  if (size <= asize)
267    {
268      *up = malloc (asize);
269      if (*up)
270	return ALIGN_TO (*up, pagesize);
271    }
272  return NULL;
273#endif
274}
275
276/* Reset the buffer for a new file, returning zero if we should skip it.
277   Initialize on the first time through. */
278static int
279reset (fd, file, stats)
280     int fd;
281     char const *file;
282     struct stats *stats;
283{
284  if (pagesize == 0)
285    {
286      size_t ubufsalloc;
287      pagesize = getpagesize ();
288      if (pagesize == 0)
289	abort ();
290#ifndef BUFSALLOC
291      ubufsalloc = MAX (8192, pagesize);
292#else
293      ubufsalloc = BUFSALLOC;
294#endif
295      bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
296      bufalloc = 5 * bufsalloc;
297      /* The 1 byte of overflow is a kludge for dfaexec(), which
298	 inserts a sentinel newline at the end of the buffer
299	 being searched.  There's gotta be a better way... */
300      if (bufsalloc < ubufsalloc
301	  || bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
302	  || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
303	fatal (_("memory exhausted"), 0);
304      bufbeg = buffer;
305      buflim = buffer;
306    }
307#if HAVE_LIBZ > 0
308  if (Zflag) {
309    gzbufdesc = gzdopen(fd, "r");
310    if (gzbufdesc == NULL)
311      fatal(_("memory exhausted"), 0);
312  }
313#endif
314  bufdesc = fd;
315
316  if (
317#if defined(HAVE_MMAP)
318      1
319#else
320      directories != READ_DIRECTORIES
321#endif
322      )
323    if (fstat (fd, &stats->stat) != 0)
324      {
325	error ("fstat", errno);
326	return 0;
327      }
328  if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
329    return 0;
330#if defined(HAVE_MMAP)
331  if (
332#if HAVE_LIBZ > 0
333      Zflag ||
334#endif
335      !S_ISREG (stats->stat.st_mode))
336    bufmapped = 0;
337  else
338    {
339      bufmapped = 1;
340      bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1);
341    }
342#endif
343  return 1;
344}
345
346/* Read new stuff into the buffer, saving the specified
347   amount of old stuff.  When we're done, 'bufbeg' points
348   to the beginning of the buffer contents, and 'buflim'
349   points just after the end.  Return count of new stuff. */
350static int
351fillbuf (save, stats)
352     size_t save;
353     struct stats *stats;
354{
355  int cc;
356#if defined(HAVE_MMAP)
357  caddr_t maddr;
358#endif
359
360  if (save > bufsalloc)
361    {
362      char *nubuffer;
363      char *nbuffer;
364
365      while (save > bufsalloc)
366	bufsalloc *= 2;
367      bufalloc = 5 * bufsalloc;
368      if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
369	  || ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer)))
370	fatal (_("memory exhausted"), 0);
371
372      bufbeg = nbuffer + bufsalloc - save;
373      memcpy (bufbeg, buflim - save, save);
374      free (ubuffer);
375      ubuffer = nubuffer;
376      buffer = nbuffer;
377    }
378  else
379    {
380      bufbeg = buffer + bufsalloc - save;
381      memcpy (bufbeg, buflim - save, save);
382    }
383
384#if defined(HAVE_MMAP)
385  if (bufmapped && bufoffset % pagesize == 0
386      && stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
387    {
388      maddr = buffer + bufsalloc;
389      maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
390		   MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
391      if (maddr == (caddr_t) -1)
392	{
393          /* This used to issue a warning, but on some hosts
394             (e.g. Solaris 2.5) mmap can fail merely because some
395             other process has an advisory read lock on the file.
396             There's no point alarming the user about this misfeature.  */
397#if 0
398	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
399		  strerror (errno));
400#endif
401	  goto tryread;
402	}
403#if 0
404      /* You might thing this (or MADV_WILLNEED) would help,
405	 but it doesn't, at least not on a Sun running 4.1.
406	 In fact, it actually slows us down about 30%! */
407      madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
408#endif
409      cc = bufalloc - bufsalloc;
410      bufoffset += cc;
411    }
412  else
413    {
414    tryread:
415      /* We come here when we're not going to use mmap() any more.
416	 Note that we need to synchronize the file offset the
417	 first time through. */
418      if (bufmapped)
419	{
420	  bufmapped = 0;
421	  if (bufoffset != initial_bufoffset)
422	    lseek (bufdesc, bufoffset, 0);
423	}
424#if HAVE_LIBZ > 0
425      if (Zflag)
426        cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
427      else
428#endif
429      cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
430    }
431#else
432#if HAVE_LIBZ > 0
433  if (Zflag)
434    cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
435  else
436#endif
437  cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
438#endif /*HAVE_MMAP*/
439#if O_BINARY
440  if (cc > 0)
441    cc = undossify_input (buffer + bufsalloc, cc);
442#endif
443  if (cc > 0)
444    buflim = buffer + bufsalloc + cc;
445  else
446    buflim = buffer + bufsalloc;
447  return cc;
448}
449
450/* Flags controlling the style of output. */
451static int always_text;		/* Assume the input is always text. */
452static int out_quiet;		/* Suppress all normal output. */
453static int out_invert;		/* Print nonmatching stuff. */
454static int out_file;		/* Print filenames. */
455static int out_line;		/* Print line numbers. */
456static int out_byte;		/* Print byte offsets. */
457static int out_before;		/* Lines of leading context. */
458static int out_after;		/* Lines of trailing context. */
459static int count_matches;	/* Count matching lines.  */
460static int list_files;		/* List matching files.  */
461static int no_filenames;	/* Suppress file names.  */
462static int suppress_errors;	/* Suppress diagnostics.  */
463
464/* Internal variables to keep track of byte count, context, etc. */
465static off_t totalcc;		/* Total character count before bufbeg. */
466static char *lastnl;		/* Pointer after last newline counted. */
467static char *lastout;		/* Pointer after last character output;
468				   NULL if no character has been output
469				   or if it's conceptually before bufbeg. */
470static off_t totalnl;		/* Total newline count before lastnl. */
471static int pending;		/* Pending lines of output. */
472static int done_on_match;		/* Stop scanning file on first match */
473
474#if O_BINARY
475# include "dosbuf.c"
476#endif
477
478static void
479nlscan (lim)
480     char *lim;
481{
482  char *beg;
483
484  for (beg = lastnl; beg < lim; ++beg)
485    if (*beg == '\n')
486      ++totalnl;
487  lastnl = beg;
488}
489
490static void
491print_offset_sep (pos, sep)
492     off_t pos;
493     int sep;
494{
495  /* Do not rely on printf to print pos, since off_t may be longer than long,
496     and long long is not portable.  */
497
498  char buf[sizeof pos * CHAR_BIT];
499  char *p = buf + sizeof buf - 1;
500  *p = sep;
501
502  do
503    *--p = '0' + pos % 10;
504  while ((pos /= 10) != 0);
505
506  fwrite (p, 1, buf + sizeof buf - p, stdout);
507}
508
509static void
510prline (beg, lim, sep)
511     char *beg;
512     char *lim;
513     int sep;
514{
515  if (out_file)
516    printf ("%s%c", filename, sep);
517  if (out_line)
518    {
519      nlscan (beg);
520      print_offset_sep (++totalnl, sep);
521      lastnl = lim;
522    }
523  if (out_byte)
524    {
525      off_t pos = totalcc + (beg - bufbeg);
526#if O_BINARY
527      pos = dossified_pos (pos);
528#endif
529      print_offset_sep (pos, sep);
530    }
531  fwrite (beg, 1, lim - beg, stdout);
532  if (ferror (stdout))
533    error (_("writing output"), errno);
534  lastout = lim;
535}
536
537/* Print pending lines of trailing context prior to LIM. */
538static void
539prpending (lim)
540     char *lim;
541{
542  char *nl;
543
544  if (!lastout)
545    lastout = bufbeg;
546  while (pending > 0 && lastout < lim)
547    {
548      --pending;
549      if ((nl = memchr (lastout, '\n', lim - lastout)) != 0)
550	++nl;
551      else
552	nl = lim;
553      prline (lastout, nl, '-');
554    }
555}
556
557/* Print the lines between BEG and LIM.  Deal with context crap.
558   If NLINESP is non-null, store a count of lines between BEG and LIM. */
559static void
560prtext (beg, lim, nlinesp)
561     char *beg;
562     char *lim;
563     int *nlinesp;
564{
565  static int used;		/* avoid printing "--" before any output */
566  char *bp, *p, *nl;
567  int i, n;
568
569  if (!out_quiet && pending > 0)
570    prpending (beg);
571
572  p = beg;
573
574  if (!out_quiet)
575    {
576      /* Deal with leading context crap. */
577
578      bp = lastout ? lastout : bufbeg;
579      for (i = 0; i < out_before; ++i)
580	if (p > bp)
581	  do
582	    --p;
583	  while (p > bp && p[-1] != '\n');
584
585      /* We only print the "--" separator if our output is
586	 discontiguous from the last output in the file. */
587      if ((out_before || out_after) && used && p != lastout)
588	puts ("--");
589
590      while (p < beg)
591	{
592	  nl = memchr (p, '\n', beg - p);
593	  prline (p, nl + 1, '-');
594	  p = nl + 1;
595	}
596    }
597
598  if (nlinesp)
599    {
600      /* Caller wants a line count. */
601      for (n = 0; p < lim; ++n)
602	{
603	  if ((nl = memchr (p, '\n', lim - p)) != 0)
604	    ++nl;
605	  else
606	    nl = lim;
607	  if (!out_quiet)
608	    prline (p, nl, ':');
609	  p = nl;
610	}
611      *nlinesp = n;
612    }
613  else
614    if (!out_quiet)
615      prline (beg, lim, ':');
616
617  pending = out_after;
618  used = 1;
619}
620
621/* Scan the specified portion of the buffer, matching lines (or
622   between matching lines if OUT_INVERT is true).  Return a count of
623   lines printed. */
624static int
625grepbuf (beg, lim)
626     char *beg;
627     char *lim;
628{
629  int nlines, n;
630  register char *p, *b;
631  char *endp;
632
633  nlines = 0;
634  p = beg;
635  while ((b = (*execute)(p, lim - p, &endp)) != 0)
636    {
637      /* Avoid matching the empty line at the end of the buffer. */
638      if (b == lim && ((b > beg && b[-1] == '\n') || b == beg))
639	break;
640      if (!out_invert)
641	{
642	  prtext (b, endp, (int *) 0);
643	  nlines += 1;
644	  if (done_on_match)
645	    return nlines;
646	}
647      else if (p < b)
648	{
649	  prtext (p, b, &n);
650	  nlines += n;
651	}
652      p = endp;
653    }
654  if (out_invert && p < lim)
655    {
656      prtext (p, lim, &n);
657      nlines += n;
658    }
659  return nlines;
660}
661
662/* Search a given file.  Normally, return a count of lines printed;
663   but if the file is a directory and we search it recursively, then
664   return -2 if there was a match, and -1 otherwise.  */
665static int
666grep (fd, file, stats)
667     int fd;
668     char const *file;
669     struct stats *stats;
670{
671  int nlines, i;
672  int not_text;
673  size_t residue, save;
674  char *beg, *lim;
675
676  if (!reset (fd, file, stats))
677    return 0;
678
679  if (file && directories == RECURSE_DIRECTORIES
680      && S_ISDIR (stats->stat.st_mode))
681    {
682      /* Close fd now, so that we don't open a lot of file descriptors
683	 when we recurse deeply.  */
684#if HAVE_LIBZ > 0
685      if (Zflag)
686	gzclose(gzbufdesc);
687      else
688#endif
689      if (close (fd) != 0)
690	error (file, errno);
691      return grepdir (file, stats) - 2;
692    }
693
694  totalcc = 0;
695  lastout = 0;
696  totalnl = 0;
697  pending = 0;
698
699  nlines = 0;
700  residue = 0;
701  save = 0;
702
703  if (fillbuf (save, stats) < 0)
704    {
705      if (! (is_EISDIR (errno, file) && suppress_errors))
706	error (filename, errno);
707      return nlines;
708    }
709
710  not_text = (! (always_text | out_quiet)
711	      && memchr (bufbeg, '\0', buflim - bufbeg));
712  done_on_match += not_text;
713  out_quiet += not_text;
714
715  for (;;)
716    {
717      lastnl = bufbeg;
718      if (lastout)
719	lastout = bufbeg;
720      if (buflim - bufbeg == save)
721	break;
722      beg = bufbeg + save - residue;
723      for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim)
724	;
725      residue = buflim - lim;
726      if (beg < lim)
727	{
728	  nlines += grepbuf (beg, lim);
729	  if (pending)
730	    prpending (lim);
731	  if (nlines && done_on_match && !out_invert)
732	    goto finish_grep;
733	}
734      i = 0;
735      beg = lim;
736      while (i < out_before && beg > bufbeg && beg != lastout)
737	{
738	  ++i;
739	  do
740	    --beg;
741	  while (beg > bufbeg && beg[-1] != '\n');
742	}
743      if (beg != lastout)
744	lastout = 0;
745      save = residue + lim - beg;
746      totalcc += buflim - bufbeg - save;
747      if (out_line)
748	nlscan (beg);
749      if (fillbuf (save, stats) < 0)
750	{
751	  if (! (is_EISDIR (errno, file) && suppress_errors))
752	    error (filename, errno);
753	  goto finish_grep;
754	}
755    }
756  if (residue)
757    {
758      nlines += grepbuf (bufbeg + save - residue, buflim);
759      if (pending)
760	prpending (buflim);
761    }
762
763 finish_grep:
764  done_on_match -= not_text;
765  out_quiet -= not_text;
766  if ((not_text & ~out_quiet) && nlines != 0)
767    printf (_("Binary file %s matches\n"), filename);
768  return nlines;
769}
770
771static int
772grepfile (file, stats)
773     char const *file;
774     struct stats *stats;
775{
776  int desc;
777  int count;
778  int status;
779
780  if (! file)
781    {
782      desc = 0;
783      filename = _("(standard input)");
784    }
785  else
786    {
787      desc = open (file, O_RDONLY);
788
789      if (desc < 0)
790	{
791	  int e = errno;
792
793	  if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES)
794	    {
795	      if (stat (file, &stats->stat) != 0)
796		{
797		  error (file, errno);
798		  return 1;
799		}
800
801	      return grepdir (file, stats);
802	    }
803
804	  if (!suppress_errors)
805	    {
806	      if (directories == SKIP_DIRECTORIES)
807		switch (e)
808		  {
809#ifdef EISDIR
810		  case EISDIR:
811		    return 1;
812#endif
813		  case EACCES:
814		    /* When skipping directories, don't worry about
815		       directories that can't be opened.  */
816		    if (stat (file, &stats->stat) == 0
817			&& S_ISDIR (stats->stat.st_mode))
818		      return 1;
819		    break;
820		  }
821
822	      error (file, e);
823	    }
824
825	  return 1;
826	}
827
828      filename = file;
829    }
830
831#if O_BINARY
832  /* Set input to binary mode.  Pipes are simulated with files
833     on DOS, so this includes the case of "foo | grep bar".  */
834  if (!isatty (desc))
835    SET_BINARY (desc);
836#endif
837
838  count = grep (desc, file, stats);
839  if (count < 0)
840    status = count + 2;
841  else
842    {
843      if (count_matches)
844	{
845	  if (out_file)
846	    printf ("%s:", filename);
847	  printf ("%d\n", count);
848	}
849
850      if (count)
851	{
852	  status = 0;
853	  if (list_files == 1)
854	    printf ("%s\n", filename);
855	}
856      else
857	{
858	  status = 1;
859	  if (list_files == -1)
860	    printf ("%s\n", filename);
861	}
862
863#if HAVE_LIBZ > 0
864      if (Zflag)
865	gzclose(gzbufdesc);
866      else
867#endif
868      if (file && close (desc) != 0)
869	error (file, errno);
870    }
871
872  return status;
873}
874
875static int
876grepdir (dir, stats)
877     char const *dir;
878     struct stats *stats;
879{
880  int status = 1;
881  struct stats *ancestor;
882  char *name_space;
883
884  for (ancestor = stats;  (ancestor = ancestor->parent) != 0;  )
885    if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino)
886	   | (ancestor->stat.st_dev ^ stats->stat.st_dev)))
887      {
888	if (!suppress_errors)
889	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
890		   _("recursive directory loop"));
891	return 1;
892      }
893
894  name_space = savedir (dir, (unsigned) stats->stat.st_size);
895
896  if (! name_space)
897    {
898      if (errno)
899	{
900	  if (!suppress_errors)
901	    error (dir, errno);
902	}
903      else
904	fatal (_("Memory exhausted"), 0);
905    }
906  else
907    {
908      size_t dirlen = strlen (dir);
909      int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir)
910			   || IS_SLASH (dir[dirlen - 1]));
911      char *file = NULL;
912      char *namep = name_space;
913      struct stats child;
914      child.parent = stats;
915      out_file += !no_filenames;
916      while (*namep)
917	{
918	  size_t namelen = strlen (namep);
919	  file = xrealloc (file, dirlen + 1 + namelen + 1);
920	  strcpy (file, dir);
921	  file[dirlen] = '/';
922	  strcpy (file + dirlen + needs_slash, namep);
923	  namep += namelen + 1;
924	  status &= grepfile (file, &child);
925	}
926      out_file -= !no_filenames;
927      if (file)
928        free (file);
929      free (name_space);
930    }
931
932  return status;
933}
934
935static void
936usage(status)
937int status;
938{
939  if (status != 0)
940    {
941      fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog);
942      fprintf (stderr, _("Try `%s --help' for more information.\n"), prog);
943    }
944  else
945    {
946      printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
947      printf (_("\
948Search for PATTERN in each FILE or standard input.\n\
949\n\
950Regexp selection and interpretation:\n\
951  -E, --extended-regexp     PATTERN is an extended regular expression\n\
952  -F, --fixed-regexp        PATTERN is a fixed string separated by newlines\n\
953  -G, --basic-regexp        PATTERN is a basic regular expression\n\
954  -e, --regexp=PATTERN      use PATTERN as a regular expression\n\
955  -f, --file=FILE           obtain PATTERN from FILE\n\
956  -i, --ignore-case         ignore case distinctions\n\
957  -w, --word-regexp         force PATTERN to match only whole words\n\
958  -x, --line-regexp         force PATTERN to match only whole lines\n"));
959      printf (_("\
960\n\
961Miscellaneous:\n\
962  -s, --no-messages         suppress error messages\n\
963  -v, --revert-match        select non-matching lines\n\
964  -V, --version             print version information and exit\n\
965  -Z, --decompress          decompress input before searching (HAVE_LIBZ=1)\n\
966      --help                display this help and exit\n"));
967      printf (_("\
968\n\
969Output control:\n\
970  -b, --byte-offset         print the byte offset with output lines\n\
971  -n, --line-number         print line number with output lines\n\
972  -H, --with-filename       print the filename for each match\n\
973  -h, --no-filename         suppress the prefixing filename on output\n\
974  -q, --quiet, --silent     suppress all normal output\n\
975  -a, --text                do not suppress binary output\n\
976  -d, --directories=ACTION  how to handle directories\n\
977                            ACTION is 'read', 'recurse', or 'skip'.\n\
978  -r, --recursive           equivalent to --directories=recurse.\n\
979  -L, --files-without-match only print FILE names containing no match\n\
980  -l, --files-with-matches  only print FILE names containing matches\n\
981  -c, --count               only print a count of matching lines per FILE\n"));
982      printf (_("\
983\n\
984Context control:\n\
985  -B, --before-context=NUM  print NUM lines of leading context\n\
986  -A, --after-context=NUM   print NUM lines of trailing context\n\
987  -C, --context[=NUM]       print NUM (default 2) lines of output context\n\
988                            unless overriden by -A or -B\n\
989  -NUM                      same as --context=NUM\n\
990  -U, --binary              do not strip CR characters at EOL (MSDOS)\n\
991  -u, --unix-byte-offsets   report offsets as if CRs were not there (MSDOS)\n\
992\n\
993If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\
994With no FILE, or when FILE is -, read standard input. If less than\n\
995two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\
996Exit with 2 if syntax errors or system errors.\n"));
997      printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
998    }
999  exit (status);
1000}
1001
1002/* Go through the matchers vector and look for the specified matcher.
1003   If we find it, install it in compile and execute, and return 1.  */
1004static int
1005setmatcher (name)
1006     char const *name;
1007{
1008  int i;
1009#ifdef HAVE_SETRLIMIT
1010  struct rlimit rlim;
1011#endif
1012
1013  for (i = 0; matchers[i].name; ++i)
1014    if (strcmp (name, matchers[i].name) == 0)
1015      {
1016	compile = matchers[i].compile;
1017	execute = matchers[i].execute;
1018#if HAVE_SETRLIMIT && defined(RLIMIT_STACK)
1019	/* I think every platform needs to do this, so that regex.c
1020	   doesn't oveflow the stack.  The default value of
1021	   `re_max_failures' is too large for some platforms: it needs
1022	   more than 3MB-large stack.
1023
1024	   The test for HAVE_SETRLIMIT should go into `configure'.  */
1025	if (!getrlimit (RLIMIT_STACK, &rlim))
1026	  {
1027	    long newlim;
1028	    extern long int re_max_failures; /* from regex.c */
1029
1030	    /* Approximate the amount regex.c needs, plus some more.  */
1031	    newlim = re_max_failures * 2 * 20 * sizeof (char *);
1032	    if (newlim > rlim.rlim_max)
1033	      {
1034		newlim = rlim.rlim_max;
1035		re_max_failures = newlim / (2 * 20 * sizeof (char *));
1036	      }
1037	    if (rlim.rlim_cur < newlim)
1038	      rlim.rlim_cur = newlim;
1039
1040	    setrlimit (RLIMIT_STACK, &rlim);
1041	  }
1042#endif
1043	return 1;
1044      }
1045  return 0;
1046}
1047
1048/* Find the white-space-separated options specified by OPTIONS, and
1049   using BUF to store copies of these options, set ARGV[0], ARGV[1],
1050   etc. to the option copies.  Return the number N of options found.
1051   Do not set ARGV[N] to NULL.  If ARGV is NULL, do not store ARGV[0]
1052   etc.  Backslash can be used to escape whitespace (and backslashes).  */
1053static int
1054prepend_args (options, buf, argv)
1055     char const *options;
1056     char *buf;
1057     char **argv;
1058{
1059  char const *o = options;
1060  char *b = buf;
1061  int n = 0;
1062
1063  for (;;)
1064    {
1065      while (ISSPACE ((unsigned char) *o))
1066	o++;
1067      if (!*o)
1068	return n;
1069      if (argv)
1070	argv[n] = b;
1071      n++;
1072
1073      do
1074	if ((*b++ = *o++) == '\\' && *o)
1075	  b[-1] = *o++;
1076      while (*o && ! ISSPACE ((unsigned char) *o));
1077
1078      *b++ = '\0';
1079    }
1080}
1081
1082/* Prepend the whitespace-separated options in OPTIONS to the argument
1083   vector of a main program with argument count *PARGC and argument
1084   vector *PARGV.  */
1085static void
1086prepend_default_options (options, pargc, pargv)
1087     char const *options;
1088     int *pargc;
1089     char ***pargv;
1090{
1091  if (options)
1092    {
1093      char *buf = xmalloc (strlen (options) + 1);
1094      int prepended = prepend_args (options, buf, (char **) NULL);
1095      int argc = *pargc;
1096      char * const *argv = *pargv;
1097      char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp);
1098      *pargc = prepended + argc;
1099      *pargv = pp;
1100      *pp++ = *argv++;
1101      pp += prepend_args (options, buf, pp);
1102      while ((*pp++ = *argv++))
1103	continue;
1104    }
1105}
1106
1107int
1108main (argc, argv)
1109     int argc;
1110     char *argv[];
1111{
1112  char *keys;
1113  size_t keycc, oldcc, keyalloc;
1114  int with_filenames;
1115  int opt, cc, status;
1116  unsigned digit_args_val, default_context;
1117  FILE *fp;
1118  extern char *optarg;
1119  extern int optind;
1120
1121  initialize_main (&argc, &argv);
1122  prog = argv[0];
1123  if (prog && strrchr (prog, '/'))
1124    prog = strrchr (prog, '/') + 1;
1125
1126#if HAVE_LIBZ > 0
1127  if (prog[0] == 'z') {
1128    Zflag = 1;
1129    ++prog;
1130  }
1131#endif
1132
1133#if defined(__MSDOS__) || defined(_WIN32)
1134  /* DOS and MS-Windows use backslashes as directory separators, and usually
1135     have an .exe suffix.  They also have case-insensitive filesystems.  */
1136  if (prog)
1137    {
1138      char *p = prog;
1139      char *bslash = strrchr (argv[0], '\\');
1140
1141      if (bslash && bslash >= prog) /* for mixed forward/backslash case */
1142	prog = bslash + 1;
1143      else if (prog == argv[0]
1144	       && argv[0][0] && argv[0][1] == ':') /* "c:progname" */
1145	prog = argv[0] + 2;
1146
1147      /* Collapse the letter-case, so `strcmp' could be used hence.  */
1148      for ( ; *p; p++)
1149	if (*p >= 'A' && *p <= 'Z')
1150	  *p += 'a' - 'A';
1151
1152      /* Remove the .exe extension, if any.  */
1153      if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0)
1154	*p = '\0';
1155    }
1156#endif
1157
1158  keys = NULL;
1159  keycc = 0;
1160  with_filenames = 0;
1161  matcher = NULL;
1162
1163  /* The value -1 means to use DEFAULT_CONTEXT. */
1164  out_after = out_before = -1;
1165  /* Default before/after context: chaged by -C/-NUM options */
1166  default_context = 0;
1167  /* Accumulated value of individual digits in a -NUM option */
1168  digit_args_val = 0;
1169
1170
1171/* Internationalization. */
1172#if HAVE_SETLOCALE
1173  setlocale (LC_ALL, "");
1174#endif
1175#if ENABLE_NLS
1176  bindtextdomain (PACKAGE, LOCALEDIR);
1177  textdomain (PACKAGE);
1178#endif
1179
1180  prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
1181
1182  while ((opt = getopt_long (argc, argv,
1183#if O_BINARY
1184         "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu",
1185#elif HAVE_LIBZ > 0
1186         "0123456789A:B:C::EFGHRVX:Zabcd:e:f:hiLlnqrsvwxy",
1187#else
1188         "0123456789A:B:C::EFGHRVX:abcd:e:f:hiLlnqrsvwxy",
1189#endif
1190         long_options, NULL)) != EOF)
1191    switch (opt)
1192      {
1193      case '0':
1194      case '1':
1195      case '2':
1196      case '3':
1197      case '4':
1198      case '5':
1199      case '6':
1200      case '7':
1201      case '8':
1202      case '9':
1203	digit_args_val = 10 * digit_args_val + opt - '0';
1204	default_context = digit_args_val;
1205	break;
1206      case 'A':
1207	if (optarg)
1208	  {
1209	    if (ck_atoi (optarg, &out_after))
1210	      fatal (_("invalid context length argument"), 0);
1211	  }
1212	break;
1213      case 'B':
1214	if (optarg)
1215	  {
1216	    if (ck_atoi (optarg, &out_before))
1217	      fatal (_("invalid context length argument"), 0);
1218	  }
1219	break;
1220      case 'C':
1221	/* Set output match context, but let any explicit leading or
1222	   trailing amount specified with -A or -B stand. */
1223	if (optarg)
1224	  {
1225	    if (ck_atoi (optarg, &default_context))
1226	      fatal (_("invalid context length argument"), 0);
1227	  }
1228	else
1229	  default_context = 2;
1230	break;
1231      case 'E':
1232	if (matcher && strcmp (matcher, "posix-egrep") != 0)
1233	  fatal (_("you may specify only one of -E, -F, or -G"), 0);
1234	matcher = "posix-egrep";
1235	break;
1236      case 'F':
1237	if (matcher && strcmp(matcher, "fgrep") != 0)
1238	  fatal(_("you may specify only one of -E, -F, or -G"), 0);;
1239	matcher = "fgrep";
1240	break;
1241      case 'G':
1242	if (matcher && strcmp (matcher, "grep") != 0)
1243	  fatal (_("you may specify only one of -E, -F, or -G"), 0);
1244	matcher = "grep";
1245	break;
1246      case 'H':
1247	with_filenames = 1;
1248	break;
1249#if O_BINARY
1250      case 'U':
1251	dos_use_file_type = DOS_BINARY;
1252	break;
1253      case 'u':
1254	dos_report_unix_offset = 1;
1255	break;
1256#endif
1257      case 'V':
1258	show_version = 1;
1259	break;
1260      case 'X':
1261	if (matcher)
1262	  fatal (_("matcher already specified"), 0);
1263	matcher = optarg;
1264	break;
1265#if HAVE_LIBZ > 0
1266      case 'Z':
1267	Zflag = 1;
1268	break;
1269#endif
1270      case 'a':
1271	always_text = 1;
1272	break;
1273      case 'b':
1274	out_byte = 1;
1275	break;
1276      case 'c':
1277	out_quiet = 1;
1278	count_matches = 1;
1279	break;
1280      case 'd':
1281	if (strcmp (optarg, "read") == 0)
1282	  directories = READ_DIRECTORIES;
1283	else if (strcmp (optarg, "skip") == 0)
1284	  directories = SKIP_DIRECTORIES;
1285	else if (strcmp (optarg, "recurse") == 0)
1286	  directories = RECURSE_DIRECTORIES;
1287	else
1288	  fatal (_("unknown directories method"), 0);
1289	break;
1290      case 'e':
1291	cc = strlen (optarg);
1292	keys = xrealloc (keys, keycc + cc + 1);
1293	strcpy (&keys[keycc], optarg);
1294	keycc += cc;
1295	keys[keycc++] = '\n';
1296	break;
1297      case 'f':
1298	fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin;
1299	if (!fp)
1300	  fatal (optarg, errno);
1301	for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
1302	  ;
1303	keys = xrealloc (keys, keyalloc);
1304	oldcc = keycc;
1305	while (!feof (fp)
1306	       && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0)
1307	  {
1308	    keycc += cc;
1309	    if (keycc == keyalloc - 1)
1310	      keys = xrealloc (keys, keyalloc *= 2);
1311	  }
1312	if (fp != stdin)
1313	  fclose(fp);
1314	/* Append final newline if file ended in non-newline. */
1315	if (oldcc != keycc && keys[keycc - 1] != '\n')
1316	  keys[keycc++] = '\n';
1317	break;
1318      case 'h':
1319	no_filenames = 1;
1320	break;
1321      case 'i':
1322      case 'y':			/* For old-timers . . . */
1323	match_icase = 1;
1324	break;
1325      case 'L':
1326	/* Like -l, except list files that don't contain matches.
1327	   Inspired by the same option in Hume's gre. */
1328	out_quiet = 1;
1329	list_files = -1;
1330	done_on_match = 1;
1331	break;
1332      case 'l':
1333	out_quiet = 1;
1334	list_files = 1;
1335	done_on_match = 1;
1336	break;
1337      case 'n':
1338	out_line = 1;
1339	break;
1340      case 'q':
1341	done_on_match = 1;
1342	out_quiet = 1;
1343	break;
1344      case 'R':
1345      case 'r':
1346	directories = RECURSE_DIRECTORIES;
1347	break;
1348      case 's':
1349	suppress_errors = 1;
1350	break;
1351      case 'v':
1352	out_invert = 1;
1353	break;
1354      case 'w':
1355	match_words = 1;
1356	break;
1357      case 'x':
1358	match_lines = 1;
1359	break;
1360      case 0:
1361	/* long options */
1362	break;
1363      default:
1364	usage (2);
1365	break;
1366      }
1367
1368  if (out_after < 0)
1369    out_after = default_context;
1370  if (out_before < 0)
1371    out_before = default_context;
1372
1373  if (show_version)
1374    {
1375      printf (_("grep (GNU grep) %s\n"), VERSION);
1376      printf ("\n");
1377      printf (_("\
1378Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
1379      printf (_("\
1380This is free software; see the source for copying conditions. There is NO\n\
1381warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
1382      printf ("\n");
1383      exit (0);
1384    }
1385
1386  if (show_help)
1387    usage (0);
1388
1389  if (keys)
1390    {
1391      if (keycc == 0)
1392	/* No keys were specified (e.g. -f /dev/null).  Match nothing.  */
1393        out_invert ^= 1;
1394      else
1395	/* Strip trailing newline. */
1396        --keycc;
1397    }
1398  else
1399    if (optind < argc)
1400      {
1401	keys = argv[optind++];
1402	keycc = strlen (keys);
1403      }
1404    else
1405      usage (2);
1406
1407  if (! matcher)
1408    matcher = prog;
1409
1410  if (!setmatcher (matcher) && !setmatcher ("default"))
1411    abort ();
1412
1413  (*compile)(keys, keycc);
1414
1415  if ((argc - optind > 1 && !no_filenames) || with_filenames)
1416    out_file = 1;
1417
1418#if O_BINARY
1419  /* Output is set to binary mode because we shouldn't convert
1420     NL to CR-LF pairs, especially when grepping binary files.  */
1421  if (!isatty (1))
1422    SET_BINARY (1);
1423#endif
1424
1425
1426  if (optind < argc)
1427    {
1428	status = 1;
1429	do
1430	{
1431	  char *file = argv[optind];
1432	  status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
1433			      &stats_base);
1434	}
1435	while ( ++optind < argc);
1436    }
1437  else
1438    status = grepfile ((char *) NULL, &stats_base);
1439
1440  if (fclose (stdout) == EOF)
1441    error (_("writing output"), errno);
1442
1443  exit (errseen ? 2 : status);
1444}
1445