1/*************************************************
2*               pcregrep program                 *
3*************************************************/
4
5/* This is a grep program that uses the PCRE regular expression library to do
6its pattern matching. On a Unix or Win32 system it can recurse into
7directories.
8
9           Copyright (c) 1997-2009 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15    * Redistributions of source code must retain the above copyright notice,
16      this list of conditions and the following disclaimer.
17
18    * Redistributions in binary form must reproduce the above copyright
19      notice, this list of conditions and the following disclaimer in the
20      documentation and/or other materials provided with the distribution.
21
22    * Neither the name of the University of Cambridge nor the names of its
23      contributors may be used to endorse or promote products derived from
24      this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40#ifdef HAVE_CONFIG_H
41#include "config.h"
42#endif
43
44#include <ctype.h>
45#include <locale.h>
46#include <stdio.h>
47#include <string.h>
48#include <stdlib.h>
49#include <errno.h>
50
51#include <sys/types.h>
52#include <sys/stat.h>
53
54#ifdef HAVE_UNISTD_H
55#include <unistd.h>
56#endif
57
58#ifdef SUPPORT_LIBZ
59#include <zlib.h>
60#endif
61
62#ifdef SUPPORT_LIBBZ2
63#include <bzlib.h>
64#endif
65
66#include "pcre.h"
67
68#define FALSE 0
69#define TRUE 1
70
71typedef int BOOL;
72
73#define MAX_PATTERN_COUNT 100
74#define OFFSET_SIZE 99
75
76#if BUFSIZ > 8192
77#define MBUFTHIRD BUFSIZ
78#else
79#define MBUFTHIRD 8192
80#endif
81
82/* Values for the "filenames" variable, which specifies options for file name
83output. The order is important; it is assumed that a file name is wanted for
84all values greater than FN_DEFAULT. */
85
86enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88/* File reading styles */
89
90enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92/* Actions for the -d and -D options */
93
94enum { dee_READ, dee_SKIP, dee_RECURSE };
95enum { DEE_READ, DEE_SKIP };
96
97/* Actions for special processing options (flag bits) */
98
99#define PO_WORD_MATCH     0x0001
100#define PO_LINE_MATCH     0x0002
101#define PO_FIXED_STRINGS  0x0004
102
103/* Line ending types */
104
105enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107
108
109/*************************************************
110*               Global variables                 *
111*************************************************/
112
113/* Jeffrey Friedl has some debugging requirements that are not part of the
114regular code. */
115
116#ifdef JFRIEDL_DEBUG
117static int S_arg = -1;
118static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120static const char *jfriedl_prefix = "";
121static const char *jfriedl_postfix = "";
122#endif
123
124static int  endlinetype;
125
126static char *colour_string = (char *)"1;31";
127static char *colour_option = NULL;
128static char *dee_option = NULL;
129static char *DEE_option = NULL;
130static char *newline = NULL;
131static char *pattern_filename = NULL;
132static char *stdin_name = (char *)"(standard input)";
133static char *locale = NULL;
134
135static const unsigned char *pcretables = NULL;
136
137static int  pattern_count = 0;
138static pcre **pattern_list = NULL;
139static pcre_extra **hints_list = NULL;
140
141static char *include_pattern = NULL;
142static char *exclude_pattern = NULL;
143static char *include_dir_pattern = NULL;
144static char *exclude_dir_pattern = NULL;
145
146static pcre *include_compiled = NULL;
147static pcre *exclude_compiled = NULL;
148static pcre *include_dir_compiled = NULL;
149static pcre *exclude_dir_compiled = NULL;
150
151static int after_context = 0;
152static int before_context = 0;
153static int both_context = 0;
154static int dee_action = dee_READ;
155static int DEE_action = DEE_READ;
156static int error_count = 0;
157static int filenames = FN_DEFAULT;
158static int process_options = 0;
159
160static BOOL count_only = FALSE;
161static BOOL do_colour = FALSE;
162static BOOL file_offsets = FALSE;
163static BOOL hyphenpending = FALSE;
164static BOOL invert = FALSE;
165static BOOL line_offsets = FALSE;
166static BOOL multiline = FALSE;
167static BOOL number = FALSE;
168static BOOL omit_zero_count = FALSE;
169static BOOL only_matching = FALSE;
170static BOOL quiet = FALSE;
171static BOOL silent = FALSE;
172static BOOL utf8 = FALSE;
173
174/* Structure for options and list of them */
175
176enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
177       OP_PATLIST };
178
179typedef struct option_item {
180  int type;
181  int one_char;
182  void *dataptr;
183  const char *long_name;
184  const char *help_text;
185} option_item;
186
187/* Options without a single-letter equivalent get a negative value. This can be
188used to identify them. */
189
190#define N_COLOUR       (-1)
191#define N_EXCLUDE      (-2)
192#define N_EXCLUDE_DIR  (-3)
193#define N_HELP         (-4)
194#define N_INCLUDE      (-5)
195#define N_INCLUDE_DIR  (-6)
196#define N_LABEL        (-7)
197#define N_LOCALE       (-8)
198#define N_NULL         (-9)
199#define N_LOFFSETS     (-10)
200#define N_FOFFSETS     (-11)
201
202static option_item optionlist[] = {
203  { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
204  { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
205  { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
206  { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
207  { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
208  { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
209  { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
210  { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
211  { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
212  { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
213  { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
214  { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
215  { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
216  { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
217  { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
218  { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
219  { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
220  { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
221  { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
222  { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
223  { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
224  { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
225  { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
226  { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
227  { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
228  { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
229  { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
230  { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
231  { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
232  { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
233  { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
234  { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
235#ifdef JFRIEDL_DEBUG
236  { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
237#endif
238  { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
239  { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
240  { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
241  { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
242  { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
243  { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
244  { OP_NODATA,    0,        NULL,               NULL,            NULL }
245};
246
247/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
248options. These set the 1, 2, and 4 bits in process_options, respectively. Note
249that the combination of -w and -x has the same effect as -x on its own, so we
250can treat them as the same. */
251
252static const char *prefix[] = {
253  "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
254
255static const char *suffix[] = {
256  "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
257
258/* UTF-8 tables - used only when the newline setting is "any". */
259
260const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
261
262const char utf8_table4[] = {
263  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
265  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
266  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
267
268
269
270/*************************************************
271*            OS-specific functions               *
272*************************************************/
273
274/* These functions are defined so that they can be made system specific,
275although at present the only ones are for Unix, Win32, and for "no support". */
276
277
278/************* Directory scanning in Unix ***********/
279
280#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
281#include <sys/types.h>
282#include <sys/stat.h>
283#include <dirent.h>
284
285typedef DIR directory_type;
286
287static int
288isdirectory(char *filename)
289{
290struct stat statbuf;
291if (stat(filename, &statbuf) < 0)
292  return 0;        /* In the expectation that opening as a file will fail */
293return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
294}
295
296static directory_type *
297opendirectory(char *filename)
298{
299return opendir(filename);
300}
301
302static char *
303readdirectory(directory_type *dir)
304{
305for (;;)
306  {
307  struct dirent *dent = readdir(dir);
308  if (dent == NULL) return NULL;
309  if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
310    return dent->d_name;
311  }
312/* Control never reaches here */
313}
314
315static void
316closedirectory(directory_type *dir)
317{
318closedir(dir);
319}
320
321
322/************* Test for regular file in Unix **********/
323
324static int
325isregfile(char *filename)
326{
327struct stat statbuf;
328if (stat(filename, &statbuf) < 0)
329  return 1;        /* In the expectation that opening as a file will fail */
330return (statbuf.st_mode & S_IFMT) == S_IFREG;
331}
332
333
334/************* Test stdout for being a terminal in Unix **********/
335
336static BOOL
337is_stdout_tty(void)
338{
339return isatty(fileno(stdout));
340}
341
342
343/************* Directory scanning in Win32 ***********/
344
345/* I (Philip Hazel) have no means of testing this code. It was contributed by
346Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
347when it did not exist. David Byron added a patch that moved the #include of
348<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
349*/
350
351#elif HAVE_WINDOWS_H
352
353#ifndef STRICT
354# define STRICT
355#endif
356#ifndef WIN32_LEAN_AND_MEAN
357# define WIN32_LEAN_AND_MEAN
358#endif
359
360#include <windows.h>
361
362#ifndef INVALID_FILE_ATTRIBUTES
363#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
364#endif
365
366typedef struct directory_type
367{
368HANDLE handle;
369BOOL first;
370WIN32_FIND_DATA data;
371} directory_type;
372
373int
374isdirectory(char *filename)
375{
376DWORD attr = GetFileAttributes(filename);
377if (attr == INVALID_FILE_ATTRIBUTES)
378  return 0;
379return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
380}
381
382directory_type *
383opendirectory(char *filename)
384{
385size_t len;
386char *pattern;
387directory_type *dir;
388DWORD err;
389len = strlen(filename);
390pattern = (char *) malloc(len + 3);
391dir = (directory_type *) malloc(sizeof(*dir));
392if ((pattern == NULL) || (dir == NULL))
393  {
394  fprintf(stderr, "pcregrep: malloc failed\n");
395  exit(2);
396  }
397memcpy(pattern, filename, len);
398memcpy(&(pattern[len]), "\\*", 3);
399dir->handle = FindFirstFile(pattern, &(dir->data));
400if (dir->handle != INVALID_HANDLE_VALUE)
401  {
402  free(pattern);
403  dir->first = TRUE;
404  return dir;
405  }
406err = GetLastError();
407free(pattern);
408free(dir);
409errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
410return NULL;
411}
412
413char *
414readdirectory(directory_type *dir)
415{
416for (;;)
417  {
418  if (!dir->first)
419    {
420    if (!FindNextFile(dir->handle, &(dir->data)))
421      return NULL;
422    }
423  else
424    {
425    dir->first = FALSE;
426    }
427  if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
428    return dir->data.cFileName;
429  }
430#ifndef _MSC_VER
431return NULL;   /* Keep compiler happy; never executed */
432#endif
433}
434
435void
436closedirectory(directory_type *dir)
437{
438FindClose(dir->handle);
439free(dir);
440}
441
442
443/************* Test for regular file in Win32 **********/
444
445/* I don't know how to do this, or if it can be done; assume all paths are
446regular if they are not directories. */
447
448int isregfile(char *filename)
449{
450return !isdirectory(filename);
451}
452
453
454/************* Test stdout for being a terminal in Win32 **********/
455
456/* I don't know how to do this; assume never */
457
458static BOOL
459is_stdout_tty(void)
460{
461return FALSE;
462}
463
464
465/************* Directory scanning when we can't do it ***********/
466
467/* The type is void, and apart from isdirectory(), the functions do nothing. */
468
469#else
470
471typedef void directory_type;
472
473int isdirectory(char *filename) { return 0; }
474directory_type * opendirectory(char *filename) { return (directory_type*)0;}
475char *readdirectory(directory_type *dir) { return (char*)0;}
476void closedirectory(directory_type *dir) {}
477
478
479/************* Test for regular when we can't do it **********/
480
481/* Assume all files are regular. */
482
483int isregfile(char *filename) { return 1; }
484
485
486/************* Test stdout for being a terminal when we can't do it **********/
487
488static BOOL
489is_stdout_tty(void)
490{
491return FALSE;
492}
493
494
495#endif
496
497
498
499#ifndef HAVE_STRERROR
500/*************************************************
501*     Provide strerror() for non-ANSI libraries  *
502*************************************************/
503
504/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
505in their libraries, but can provide the same facility by this simple
506alternative function. */
507
508extern int   sys_nerr;
509extern char *sys_errlist[];
510
511char *
512strerror(int n)
513{
514if (n < 0 || n >= sys_nerr) return "unknown error number";
515return sys_errlist[n];
516}
517#endif /* HAVE_STRERROR */
518
519
520
521/*************************************************
522*             Find end of line                   *
523*************************************************/
524
525/* The length of the endline sequence that is found is set via lenptr. This may
526be zero at the very end of the file if there is no line-ending sequence there.
527
528Arguments:
529  p         current position in line
530  endptr    end of available data
531  lenptr    where to put the length of the eol sequence
532
533Returns:    pointer to the last byte of the line
534*/
535
536static char *
537end_of_line(char *p, char *endptr, int *lenptr)
538{
539switch(endlinetype)
540  {
541  default:      /* Just in case */
542  case EL_LF:
543  while (p < endptr && *p != '\n') p++;
544  if (p < endptr)
545    {
546    *lenptr = 1;
547    return p + 1;
548    }
549  *lenptr = 0;
550  return endptr;
551
552  case EL_CR:
553  while (p < endptr && *p != '\r') p++;
554  if (p < endptr)
555    {
556    *lenptr = 1;
557    return p + 1;
558    }
559  *lenptr = 0;
560  return endptr;
561
562  case EL_CRLF:
563  for (;;)
564    {
565    while (p < endptr && *p != '\r') p++;
566    if (++p >= endptr)
567      {
568      *lenptr = 0;
569      return endptr;
570      }
571    if (*p == '\n')
572      {
573      *lenptr = 2;
574      return p + 1;
575      }
576    }
577  break;
578
579  case EL_ANYCRLF:
580  while (p < endptr)
581    {
582    int extra = 0;
583    register int c = *((unsigned char *)p);
584
585    if (utf8 && c >= 0xc0)
586      {
587      int gcii, gcss;
588      extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
589      gcss = 6*extra;
590      c = (c & utf8_table3[extra]) << gcss;
591      for (gcii = 1; gcii <= extra; gcii++)
592        {
593        gcss -= 6;
594        c |= (p[gcii] & 0x3f) << gcss;
595        }
596      }
597
598    p += 1 + extra;
599
600    switch (c)
601      {
602      case 0x0a:    /* LF */
603      *lenptr = 1;
604      return p;
605
606      case 0x0d:    /* CR */
607      if (p < endptr && *p == 0x0a)
608        {
609        *lenptr = 2;
610        p++;
611        }
612      else *lenptr = 1;
613      return p;
614
615      default:
616      break;
617      }
618    }   /* End of loop for ANYCRLF case */
619
620  *lenptr = 0;  /* Must have hit the end */
621  return endptr;
622
623  case EL_ANY:
624  while (p < endptr)
625    {
626    int extra = 0;
627    register int c = *((unsigned char *)p);
628
629    if (utf8 && c >= 0xc0)
630      {
631      int gcii, gcss;
632      extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
633      gcss = 6*extra;
634      c = (c & utf8_table3[extra]) << gcss;
635      for (gcii = 1; gcii <= extra; gcii++)
636        {
637        gcss -= 6;
638        c |= (p[gcii] & 0x3f) << gcss;
639        }
640      }
641
642    p += 1 + extra;
643
644    switch (c)
645      {
646      case 0x0a:    /* LF */
647      case 0x0b:    /* VT */
648      case 0x0c:    /* FF */
649      *lenptr = 1;
650      return p;
651
652      case 0x0d:    /* CR */
653      if (p < endptr && *p == 0x0a)
654        {
655        *lenptr = 2;
656        p++;
657        }
658      else *lenptr = 1;
659      return p;
660
661      case 0x85:    /* NEL */
662      *lenptr = utf8? 2 : 1;
663      return p;
664
665      case 0x2028:  /* LS */
666      case 0x2029:  /* PS */
667      *lenptr = 3;
668      return p;
669
670      default:
671      break;
672      }
673    }   /* End of loop for ANY case */
674
675  *lenptr = 0;  /* Must have hit the end */
676  return endptr;
677  }     /* End of overall switch */
678}
679
680
681
682/*************************************************
683*         Find start of previous line            *
684*************************************************/
685
686/* This is called when looking back for before lines to print.
687
688Arguments:
689  p         start of the subsequent line
690  startptr  start of available data
691
692Returns:    pointer to the start of the previous line
693*/
694
695static char *
696previous_line(char *p, char *startptr)
697{
698switch(endlinetype)
699  {
700  default:      /* Just in case */
701  case EL_LF:
702  p--;
703  while (p > startptr && p[-1] != '\n') p--;
704  return p;
705
706  case EL_CR:
707  p--;
708  while (p > startptr && p[-1] != '\n') p--;
709  return p;
710
711  case EL_CRLF:
712  for (;;)
713    {
714    p -= 2;
715    while (p > startptr && p[-1] != '\n') p--;
716    if (p <= startptr + 1 || p[-2] == '\r') return p;
717    }
718  return p;   /* But control should never get here */
719
720  case EL_ANY:
721  case EL_ANYCRLF:
722  if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
723  if (utf8) while ((*p & 0xc0) == 0x80) p--;
724
725  while (p > startptr)
726    {
727    register int c;
728    char *pp = p - 1;
729
730    if (utf8)
731      {
732      int extra = 0;
733      while ((*pp & 0xc0) == 0x80) pp--;
734      c = *((unsigned char *)pp);
735      if (c >= 0xc0)
736        {
737        int gcii, gcss;
738        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
739        gcss = 6*extra;
740        c = (c & utf8_table3[extra]) << gcss;
741        for (gcii = 1; gcii <= extra; gcii++)
742          {
743          gcss -= 6;
744          c |= (pp[gcii] & 0x3f) << gcss;
745          }
746        }
747      }
748    else c = *((unsigned char *)pp);
749
750    if (endlinetype == EL_ANYCRLF) switch (c)
751      {
752      case 0x0a:    /* LF */
753      case 0x0d:    /* CR */
754      return p;
755
756      default:
757      break;
758      }
759
760    else switch (c)
761      {
762      case 0x0a:    /* LF */
763      case 0x0b:    /* VT */
764      case 0x0c:    /* FF */
765      case 0x0d:    /* CR */
766      case 0x85:    /* NEL */
767      case 0x2028:  /* LS */
768      case 0x2029:  /* PS */
769      return p;
770
771      default:
772      break;
773      }
774
775    p = pp;  /* Back one character */
776    }        /* End of loop for ANY case */
777
778  return startptr;  /* Hit start of data */
779  }     /* End of overall switch */
780}
781
782
783
784
785
786/*************************************************
787*       Print the previous "after" lines         *
788*************************************************/
789
790/* This is called if we are about to lose said lines because of buffer filling,
791and at the end of the file. The data in the line is written using fwrite() so
792that a binary zero does not terminate it.
793
794Arguments:
795  lastmatchnumber   the number of the last matching line, plus one
796  lastmatchrestart  where we restarted after the last match
797  endptr            end of available data
798  printname         filename for printing
799
800Returns:            nothing
801*/
802
803static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
804  char *endptr, char *printname)
805{
806if (after_context > 0 && lastmatchnumber > 0)
807  {
808  int count = 0;
809  while (lastmatchrestart < endptr && count++ < after_context)
810    {
811    int ellength;
812    char *pp = lastmatchrestart;
813    if (printname != NULL) fprintf(stdout, "%s-", printname);
814    if (number) fprintf(stdout, "%d-", lastmatchnumber++);
815    pp = end_of_line(pp, endptr, &ellength);
816    fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
817    lastmatchrestart = pp;
818    }
819  hyphenpending = TRUE;
820  }
821}
822
823
824
825/*************************************************
826*   Apply patterns to subject till one matches   *
827*************************************************/
828
829/* This function is called to run through all patterns, looking for a match. It
830is used multiple times for the same subject when colouring is enabled, in order
831to find all possible matches.
832
833Arguments:
834  matchptr    the start of the subject
835  length      the length of the subject to match
836  offsets     the offets vector to fill in
837  mrc         address of where to put the result of pcre_exec()
838
839Returns:      TRUE if there was a match
840              FALSE if there was no match
841              invert if there was a non-fatal error
842*/
843
844static BOOL
845match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
846{
847int i;
848for (i = 0; i < pattern_count; i++)
849  {
850  *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
851    PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
852  if (*mrc >= 0) return TRUE;
853  if (*mrc == PCRE_ERROR_NOMATCH) continue;
854  fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
855  if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
856  fprintf(stderr, "this text:\n");
857  fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
858  fprintf(stderr, "\n");
859  if (error_count == 0 &&
860      (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
861    {
862    fprintf(stderr, "pcregrep: error %d means that a resource limit "
863      "was exceeded\n", *mrc);
864    fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
865    }
866  if (error_count++ > 20)
867    {
868    fprintf(stderr, "pcregrep: too many errors - abandoned\n");
869    exit(2);
870    }
871  return invert;    /* No more matching; don't show the line again */
872  }
873
874return FALSE;  /* No match, no errors */
875}
876
877
878
879/*************************************************
880*            Grep an individual file             *
881*************************************************/
882
883/* This is called from grep_or_recurse() below. It uses a buffer that is three
884times the value of MBUFTHIRD. The matching point is never allowed to stray into
885the top third of the buffer, thus keeping more of the file available for
886context printing or for multiline scanning. For large files, the pointer will
887be in the middle third most of the time, so the bottom third is available for
888"before" context printing.
889
890Arguments:
891  handle       the fopened FILE stream for a normal file
892               the gzFile pointer when reading is via libz
893               the BZFILE pointer when reading is via libbz2
894  frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
895  printname    the file name if it is to be printed for each match
896               or NULL if the file name is not to be printed
897               it cannot be NULL if filenames[_nomatch]_only is set
898
899Returns:       0 if there was at least one match
900               1 otherwise (no matches)
901               2 if there is a read error on a .bz2 file
902*/
903
904static int
905pcregrep(void *handle, int frtype, char *printname)
906{
907int rc = 1;
908int linenumber = 1;
909int lastmatchnumber = 0;
910int count = 0;
911int filepos = 0;
912int offsets[OFFSET_SIZE];
913char *lastmatchrestart = NULL;
914char buffer[3*MBUFTHIRD];
915char *ptr = buffer;
916char *endptr;
917size_t bufflength;
918BOOL endhyphenpending = FALSE;
919FILE *in = NULL;                    /* Ensure initialized */
920
921#ifdef SUPPORT_LIBZ
922gzFile ingz = NULL;
923#endif
924
925#ifdef SUPPORT_LIBBZ2
926BZFILE *inbz2 = NULL;
927#endif
928
929
930/* Do the first read into the start of the buffer and set up the pointer to end
931of what we have. In the case of libz, a non-zipped .gz file will be read as a
932plain file. However, if a .bz2 file isn't actually bzipped, the first read will
933fail. */
934
935#ifdef SUPPORT_LIBZ
936if (frtype == FR_LIBZ)
937  {
938  ingz = (gzFile)handle;
939  bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
940  }
941else
942#endif
943
944#ifdef SUPPORT_LIBBZ2
945if (frtype == FR_LIBBZ2)
946  {
947  inbz2 = (BZFILE *)handle;
948  bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
949  if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
950  }                                    /* without the cast it is unsigned. */
951else
952#endif
953
954  {
955  in = (FILE *)handle;
956  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
957  }
958
959endptr = buffer + bufflength;
960
961/* Loop while the current pointer is not at the end of the file. For large
962files, endptr will be at the end of the buffer when we are in the middle of the
963file, but ptr will never get there, because as soon as it gets over 2/3 of the
964way, the buffer is shifted left and re-filled. */
965
966while (ptr < endptr)
967  {
968  int endlinelength;
969  int mrc = 0;
970  BOOL match;
971  char *matchptr = ptr;
972  char *t = ptr;
973  size_t length, linelength;
974
975  /* At this point, ptr is at the start of a line. We need to find the length
976  of the subject string to pass to pcre_exec(). In multiline mode, it is the
977  length remainder of the data in the buffer. Otherwise, it is the length of
978  the next line, excluding the terminating newline. After matching, we always
979  advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
980  option is used for compiling, so that any match is constrained to be in the
981  first line. */
982
983  t = end_of_line(t, endptr, &endlinelength);
984  linelength = t - ptr - endlinelength;
985  length = multiline? (size_t)(endptr - ptr) : linelength;
986
987  /* Extra processing for Jeffrey Friedl's debugging. */
988
989#ifdef JFRIEDL_DEBUG
990  if (jfriedl_XT || jfriedl_XR)
991  {
992      #include <sys/time.h>
993      #include <time.h>
994      struct timeval start_time, end_time;
995      struct timezone dummy;
996      int i;
997
998      if (jfriedl_XT)
999      {
1000          unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1001          const char *orig = ptr;
1002          ptr = malloc(newlen + 1);
1003          if (!ptr) {
1004                  printf("out of memory");
1005                  exit(2);
1006          }
1007          endptr = ptr;
1008          strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1009          for (i = 0; i < jfriedl_XT; i++) {
1010                  strncpy(endptr, orig,  length);
1011                  endptr += length;
1012          }
1013          strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1014          length = newlen;
1015      }
1016
1017      if (gettimeofday(&start_time, &dummy) != 0)
1018              perror("bad gettimeofday");
1019
1020
1021      for (i = 0; i < jfriedl_XR; i++)
1022          match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1023              PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1024
1025      if (gettimeofday(&end_time, &dummy) != 0)
1026              perror("bad gettimeofday");
1027
1028      double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1029                      -
1030                      (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1031
1032      printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1033      return 0;
1034  }
1035#endif
1036
1037  /* We come back here after a match when the -o option (only_matching) is set,
1038  in order to find any further matches in the same line. */
1039
1040  ONLY_MATCHING_RESTART:
1041
1042  /* Run through all the patterns until one matches or there is an error other
1043  than NOMATCH. This code is in a subroutine so that it can be re-used for
1044  finding subsequent matches when colouring matched lines. */
1045
1046  match = match_patterns(matchptr, length, offsets, &mrc);
1047
1048  /* If it's a match or a not-match (as required), do what's wanted. */
1049
1050  if (match != invert)
1051    {
1052    BOOL hyphenprinted = FALSE;
1053
1054    /* We've failed if we want a file that doesn't have any matches. */
1055
1056    if (filenames == FN_NOMATCH_ONLY) return 1;
1057
1058    /* Just count if just counting is wanted. */
1059
1060    if (count_only) count++;
1061
1062    /* If all we want is a file name, there is no need to scan any more lines
1063    in the file. */
1064
1065    else if (filenames == FN_MATCH_ONLY)
1066      {
1067      fprintf(stdout, "%s\n", printname);
1068      return 0;
1069      }
1070
1071    /* Likewise, if all we want is a yes/no answer. */
1072
1073    else if (quiet) return 0;
1074
1075    /* The --only-matching option prints just the substring that matched, and
1076    the --file-offsets and --line-offsets options output offsets for the
1077    matching substring (they both force --only-matching). None of these options
1078    prints any context. Afterwards, adjust the start and length, and then jump
1079    back to look for further matches in the same line. If we are in invert
1080    mode, however, nothing is printed - this could be still useful because the
1081    return code is set. */
1082
1083    else if (only_matching)
1084      {
1085      if (!invert)
1086        {
1087        if (printname != NULL) fprintf(stdout, "%s:", printname);
1088        if (number) fprintf(stdout, "%d:", linenumber);
1089        if (line_offsets)
1090          fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1091            offsets[1] - offsets[0]);
1092        else if (file_offsets)
1093          fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1094            offsets[1] - offsets[0]);
1095        else
1096          {
1097          if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1098          fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1099          if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1100          }
1101        fprintf(stdout, "\n");
1102        matchptr += offsets[1];
1103        length -= offsets[1];
1104        match = FALSE;
1105        goto ONLY_MATCHING_RESTART;
1106        }
1107      }
1108
1109    /* This is the default case when none of the above options is set. We print
1110    the matching lines(s), possibly preceded and/or followed by other lines of
1111    context. */
1112
1113    else
1114      {
1115      /* See if there is a requirement to print some "after" lines from a
1116      previous match. We never print any overlaps. */
1117
1118      if (after_context > 0 && lastmatchnumber > 0)
1119        {
1120        int ellength;
1121        int linecount = 0;
1122        char *p = lastmatchrestart;
1123
1124        while (p < ptr && linecount < after_context)
1125          {
1126          p = end_of_line(p, ptr, &ellength);
1127          linecount++;
1128          }
1129
1130        /* It is important to advance lastmatchrestart during this printing so
1131        that it interacts correctly with any "before" printing below. Print
1132        each line's data using fwrite() in case there are binary zeroes. */
1133
1134        while (lastmatchrestart < p)
1135          {
1136          char *pp = lastmatchrestart;
1137          if (printname != NULL) fprintf(stdout, "%s-", printname);
1138          if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1139          pp = end_of_line(pp, endptr, &ellength);
1140          fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1141          lastmatchrestart = pp;
1142          }
1143        if (lastmatchrestart != ptr) hyphenpending = TRUE;
1144        }
1145
1146      /* If there were non-contiguous lines printed above, insert hyphens. */
1147
1148      if (hyphenpending)
1149        {
1150        fprintf(stdout, "--\n");
1151        hyphenpending = FALSE;
1152        hyphenprinted = TRUE;
1153        }
1154
1155      /* See if there is a requirement to print some "before" lines for this
1156      match. Again, don't print overlaps. */
1157
1158      if (before_context > 0)
1159        {
1160        int linecount = 0;
1161        char *p = ptr;
1162
1163        while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1164               linecount < before_context)
1165          {
1166          linecount++;
1167          p = previous_line(p, buffer);
1168          }
1169
1170        if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1171          fprintf(stdout, "--\n");
1172
1173        while (p < ptr)
1174          {
1175          int ellength;
1176          char *pp = p;
1177          if (printname != NULL) fprintf(stdout, "%s-", printname);
1178          if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1179          pp = end_of_line(pp, endptr, &ellength);
1180          fwrite(p, 1, pp - p, stdout);
1181          p = pp;
1182          }
1183        }
1184
1185      /* Now print the matching line(s); ensure we set hyphenpending at the end
1186      of the file if any context lines are being output. */
1187
1188      if (after_context > 0 || before_context > 0)
1189        endhyphenpending = TRUE;
1190
1191      if (printname != NULL) fprintf(stdout, "%s:", printname);
1192      if (number) fprintf(stdout, "%d:", linenumber);
1193
1194      /* In multiline mode, we want to print to the end of the line in which
1195      the end of the matched string is found, so we adjust linelength and the
1196      line number appropriately, but only when there actually was a match
1197      (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1198      the match will always be before the first newline sequence. */
1199
1200      if (multiline)
1201        {
1202        int ellength;
1203        char *endmatch = ptr;
1204        if (!invert)
1205          {
1206          endmatch += offsets[1];
1207          t = ptr;
1208          while (t < endmatch)
1209            {
1210            t = end_of_line(t, endptr, &ellength);
1211            if (t <= endmatch) linenumber++; else break;
1212            }
1213          }
1214        endmatch = end_of_line(endmatch, endptr, &ellength);
1215        linelength = endmatch - ptr - ellength;
1216        }
1217
1218      /*** NOTE: Use only fwrite() to output the data line, so that binary
1219      zeroes are treated as just another data character. */
1220
1221      /* This extra option, for Jeffrey Friedl's debugging requirements,
1222      replaces the matched string, or a specific captured string if it exists,
1223      with X. When this happens, colouring is ignored. */
1224
1225#ifdef JFRIEDL_DEBUG
1226      if (S_arg >= 0 && S_arg < mrc)
1227        {
1228        int first = S_arg * 2;
1229        int last  = first + 1;
1230        fwrite(ptr, 1, offsets[first], stdout);
1231        fprintf(stdout, "X");
1232        fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1233        }
1234      else
1235#endif
1236
1237      /* We have to split the line(s) up if colouring, and search for further
1238      matches. */
1239
1240      if (do_colour)
1241        {
1242        int last_offset = 0;
1243        fwrite(ptr, 1, offsets[0], stdout);
1244        fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1245        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1246        fprintf(stdout, "%c[00m", 0x1b);
1247        for (;;)
1248          {
1249          last_offset += offsets[1];
1250          matchptr += offsets[1];
1251          length -= offsets[1];
1252          if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1253          fwrite(matchptr, 1, offsets[0], stdout);
1254          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1255          fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1256          fprintf(stdout, "%c[00m", 0x1b);
1257          }
1258        fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1259          stdout);
1260        }
1261
1262      /* Not colouring; no need to search for further matches */
1263
1264      else fwrite(ptr, 1, linelength + endlinelength, stdout);
1265      }
1266
1267    /* End of doing what has to be done for a match */
1268
1269    rc = 0;    /* Had some success */
1270
1271    /* Remember where the last match happened for after_context. We remember
1272    where we are about to restart, and that line's number. */
1273
1274    lastmatchrestart = ptr + linelength + endlinelength;
1275    lastmatchnumber = linenumber + 1;
1276    }
1277
1278  /* For a match in multiline inverted mode (which of course did not cause
1279  anything to be printed), we have to move on to the end of the match before
1280  proceeding. */
1281
1282  if (multiline && invert && match)
1283    {
1284    int ellength;
1285    char *endmatch = ptr + offsets[1];
1286    t = ptr;
1287    while (t < endmatch)
1288      {
1289      t = end_of_line(t, endptr, &ellength);
1290      if (t <= endmatch) linenumber++; else break;
1291      }
1292    endmatch = end_of_line(endmatch, endptr, &ellength);
1293    linelength = endmatch - ptr - ellength;
1294    }
1295
1296  /* Advance to after the newline and increment the line number. The file
1297  offset to the current line is maintained in filepos. */
1298
1299  ptr += linelength + endlinelength;
1300  filepos += linelength + endlinelength;
1301  linenumber++;
1302
1303  /* If we haven't yet reached the end of the file (the buffer is full), and
1304  the current point is in the top 1/3 of the buffer, slide the buffer down by
1305  1/3 and refill it. Before we do this, if some unprinted "after" lines are
1306  about to be lost, print them. */
1307
1308  if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1309    {
1310    if (after_context > 0 &&
1311        lastmatchnumber > 0 &&
1312        lastmatchrestart < buffer + MBUFTHIRD)
1313      {
1314      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1315      lastmatchnumber = 0;
1316      }
1317
1318    /* Now do the shuffle */
1319
1320    memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1321    ptr -= MBUFTHIRD;
1322
1323#ifdef SUPPORT_LIBZ
1324    if (frtype == FR_LIBZ)
1325      bufflength = 2*MBUFTHIRD +
1326        gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1327    else
1328#endif
1329
1330#ifdef SUPPORT_LIBBZ2
1331    if (frtype == FR_LIBBZ2)
1332      bufflength = 2*MBUFTHIRD +
1333        BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1334    else
1335#endif
1336
1337    bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1338
1339    endptr = buffer + bufflength;
1340
1341    /* Adjust any last match point */
1342
1343    if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1344    }
1345  }     /* Loop through the whole file */
1346
1347/* End of file; print final "after" lines if wanted; do_after_lines sets
1348hyphenpending if it prints something. */
1349
1350if (!only_matching && !count_only)
1351  {
1352  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1353  hyphenpending |= endhyphenpending;
1354  }
1355
1356/* Print the file name if we are looking for those without matches and there
1357were none. If we found a match, we won't have got this far. */
1358
1359if (filenames == FN_NOMATCH_ONLY)
1360  {
1361  fprintf(stdout, "%s\n", printname);
1362  return 0;
1363  }
1364
1365/* Print the match count if wanted */
1366
1367if (count_only)
1368  {
1369  if (count > 0 || !omit_zero_count)
1370    {
1371    if (printname != NULL && filenames != FN_NONE)
1372      fprintf(stdout, "%s:", printname);
1373    fprintf(stdout, "%d\n", count);
1374    }
1375  }
1376
1377return rc;
1378}
1379
1380
1381
1382/*************************************************
1383*     Grep a file or recurse into a directory    *
1384*************************************************/
1385
1386/* Given a path name, if it's a directory, scan all the files if we are
1387recursing; if it's a file, grep it.
1388
1389Arguments:
1390  pathname          the path to investigate
1391  dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1392  only_one_at_top   TRUE if the path is the only one at toplevel
1393
1394Returns:   0 if there was at least one match
1395           1 if there were no matches
1396           2 there was some kind of error
1397
1398However, file opening failures are suppressed if "silent" is set.
1399*/
1400
1401static int
1402grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1403{
1404int rc = 1;
1405int sep;
1406int frtype;
1407int pathlen;
1408void *handle;
1409FILE *in = NULL;           /* Ensure initialized */
1410
1411#ifdef SUPPORT_LIBZ
1412gzFile ingz = NULL;
1413#endif
1414
1415#ifdef SUPPORT_LIBBZ2
1416BZFILE *inbz2 = NULL;
1417#endif
1418
1419/* If the file name is "-" we scan stdin */
1420
1421if (strcmp(pathname, "-") == 0)
1422  {
1423  return pcregrep(stdin, FR_PLAIN,
1424    (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1425      stdin_name : NULL);
1426  }
1427
1428/* If the file is a directory, skip if skipping or if we are recursing, scan
1429each file and directory within it, subject to any include or exclude patterns
1430that were set. The scanning code is localized so it can be made
1431system-specific. */
1432
1433if ((sep = isdirectory(pathname)) != 0)
1434  {
1435  if (dee_action == dee_SKIP) return 1;
1436  if (dee_action == dee_RECURSE)
1437    {
1438    char buffer[1024];
1439    char *nextfile;
1440    directory_type *dir = opendirectory(pathname);
1441
1442    if (dir == NULL)
1443      {
1444      if (!silent)
1445        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1446          strerror(errno));
1447      return 2;
1448      }
1449
1450    while ((nextfile = readdirectory(dir)) != NULL)
1451      {
1452      int frc, nflen;
1453      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1454      nflen = strlen(nextfile);
1455
1456      if (isdirectory(buffer))
1457        {
1458        if (exclude_dir_compiled != NULL &&
1459            pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1460          continue;
1461
1462        if (include_dir_compiled != NULL &&
1463            pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1464          continue;
1465        }
1466      else
1467        {
1468        if (exclude_compiled != NULL &&
1469            pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1470          continue;
1471
1472        if (include_compiled != NULL &&
1473            pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1474          continue;
1475        }
1476
1477      frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1478      if (frc > 1) rc = frc;
1479       else if (frc == 0 && rc == 1) rc = 0;
1480      }
1481
1482    closedirectory(dir);
1483    return rc;
1484    }
1485  }
1486
1487/* If the file is not a directory and not a regular file, skip it if that's
1488been requested. */
1489
1490else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1491
1492/* Control reaches here if we have a regular file, or if we have a directory
1493and recursion or skipping was not requested, or if we have anything else and
1494skipping was not requested. The scan proceeds. If this is the first and only
1495argument at top level, we don't show the file name, unless we are only showing
1496the file name, or the filename was forced (-H). */
1497
1498pathlen = strlen(pathname);
1499
1500/* Open using zlib if it is supported and the file name ends with .gz. */
1501
1502#ifdef SUPPORT_LIBZ
1503if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1504  {
1505  ingz = gzopen(pathname, "rb");
1506  if (ingz == NULL)
1507    {
1508    if (!silent)
1509      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1510        strerror(errno));
1511    return 2;
1512    }
1513  handle = (void *)ingz;
1514  frtype = FR_LIBZ;
1515  }
1516else
1517#endif
1518
1519/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1520
1521#ifdef SUPPORT_LIBBZ2
1522if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1523  {
1524  inbz2 = BZ2_bzopen(pathname, "rb");
1525  handle = (void *)inbz2;
1526  frtype = FR_LIBBZ2;
1527  }
1528else
1529#endif
1530
1531/* Otherwise use plain fopen(). The label is so that we can come back here if
1532an attempt to read a .bz2 file indicates that it really is a plain file. */
1533
1534#ifdef SUPPORT_LIBBZ2
1535PLAIN_FILE:
1536#endif
1537  {
1538  in = fopen(pathname, "rb");
1539  handle = (void *)in;
1540  frtype = FR_PLAIN;
1541  }
1542
1543/* All the opening methods return errno when they fail. */
1544
1545if (handle == NULL)
1546  {
1547  if (!silent)
1548    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1549      strerror(errno));
1550  return 2;
1551  }
1552
1553/* Now grep the file */
1554
1555rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1556  (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1557
1558/* Close in an appropriate manner. */
1559
1560#ifdef SUPPORT_LIBZ
1561if (frtype == FR_LIBZ)
1562  gzclose(ingz);
1563else
1564#endif
1565
1566/* If it is a .bz2 file and the result is 2, it means that the first attempt to
1567read failed. If the error indicates that the file isn't in fact bzipped, try
1568again as a normal file. */
1569
1570#ifdef SUPPORT_LIBBZ2
1571if (frtype == FR_LIBBZ2)
1572  {
1573  if (rc == 2)
1574    {
1575    int errnum;
1576    const char *err = BZ2_bzerror(inbz2, &errnum);
1577    if (errnum == BZ_DATA_ERROR_MAGIC)
1578      {
1579      BZ2_bzclose(inbz2);
1580      goto PLAIN_FILE;
1581      }
1582    else if (!silent)
1583      fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1584        pathname, err);
1585    }
1586  BZ2_bzclose(inbz2);
1587  }
1588else
1589#endif
1590
1591/* Normal file close */
1592
1593fclose(in);
1594
1595/* Pass back the yield from pcregrep(). */
1596
1597return rc;
1598}
1599
1600
1601
1602
1603/*************************************************
1604*                Usage function                  *
1605*************************************************/
1606
1607static int
1608usage(int rc)
1609{
1610option_item *op;
1611fprintf(stderr, "Usage: pcregrep [-");
1612for (op = optionlist; op->one_char != 0; op++)
1613  {
1614  if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1615  }
1616fprintf(stderr, "] [long options] [pattern] [files]\n");
1617fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1618  "options.\n");
1619return rc;
1620}
1621
1622
1623
1624
1625/*************************************************
1626*                Help function                   *
1627*************************************************/
1628
1629static void
1630help(void)
1631{
1632option_item *op;
1633
1634printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1635printf("Search for PATTERN in each FILE or standard input.\n");
1636printf("PATTERN must be present if neither -e nor -f is used.\n");
1637printf("\"-\" can be used as a file name to mean STDIN.\n");
1638
1639#ifdef SUPPORT_LIBZ
1640printf("Files whose names end in .gz are read using zlib.\n");
1641#endif
1642
1643#ifdef SUPPORT_LIBBZ2
1644printf("Files whose names end in .bz2 are read using bzlib2.\n");
1645#endif
1646
1647#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1648printf("Other files and the standard input are read as plain files.\n\n");
1649#else
1650printf("All files are read as plain files, without any interpretation.\n\n");
1651#endif
1652
1653printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1654printf("Options:\n");
1655
1656for (op = optionlist; op->one_char != 0; op++)
1657  {
1658  int n;
1659  char s[4];
1660  if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1661  n = 30 - printf("  %s --%s", s, op->long_name);
1662  if (n < 1) n = 1;
1663  printf("%.*s%s\n", n, "                    ", op->help_text);
1664  }
1665
1666printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1667printf("trailing white space is removed and blank lines are ignored.\n");
1668printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1669
1670printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1671printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1672}
1673
1674
1675
1676
1677/*************************************************
1678*    Handle a single-letter, no data option      *
1679*************************************************/
1680
1681static int
1682handle_option(int letter, int options)
1683{
1684switch(letter)
1685  {
1686  case N_FOFFSETS: file_offsets = TRUE; break;
1687  case N_HELP: help(); exit(0);
1688  case N_LOFFSETS: line_offsets = number = TRUE; break;
1689  case 'c': count_only = TRUE; break;
1690  case 'F': process_options |= PO_FIXED_STRINGS; break;
1691  case 'H': filenames = FN_FORCE; break;
1692  case 'h': filenames = FN_NONE; break;
1693  case 'i': options |= PCRE_CASELESS; break;
1694  case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1695  case 'L': filenames = FN_NOMATCH_ONLY; break;
1696  case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1697  case 'n': number = TRUE; break;
1698  case 'o': only_matching = TRUE; break;
1699  case 'q': quiet = TRUE; break;
1700  case 'r': dee_action = dee_RECURSE; break;
1701  case 's': silent = TRUE; break;
1702  case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1703  case 'v': invert = TRUE; break;
1704  case 'w': process_options |= PO_WORD_MATCH; break;
1705  case 'x': process_options |= PO_LINE_MATCH; break;
1706
1707  case 'V':
1708  fprintf(stderr, "pcregrep version %s\n", pcre_version());
1709  exit(0);
1710  break;
1711
1712  default:
1713  fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1714  exit(usage(2));
1715  }
1716
1717return options;
1718}
1719
1720
1721
1722
1723/*************************************************
1724*          Construct printed ordinal             *
1725*************************************************/
1726
1727/* This turns a number into "1st", "3rd", etc. */
1728
1729static char *
1730ordin(int n)
1731{
1732static char buffer[8];
1733char *p = buffer;
1734sprintf(p, "%d", n);
1735while (*p != 0) p++;
1736switch (n%10)
1737  {
1738  case 1: strcpy(p, "st"); break;
1739  case 2: strcpy(p, "nd"); break;
1740  case 3: strcpy(p, "rd"); break;
1741  default: strcpy(p, "th"); break;
1742  }
1743return buffer;
1744}
1745
1746
1747
1748/*************************************************
1749*          Compile a single pattern              *
1750*************************************************/
1751
1752/* When the -F option has been used, this is called for each substring.
1753Otherwise it's called for each supplied pattern.
1754
1755Arguments:
1756  pattern        the pattern string
1757  options        the PCRE options
1758  filename       the file name, or NULL for a command-line pattern
1759  count          0 if this is the only command line pattern, or
1760                 number of the command line pattern, or
1761                 linenumber for a pattern from a file
1762
1763Returns:         TRUE on success, FALSE after an error
1764*/
1765
1766static BOOL
1767compile_single_pattern(char *pattern, int options, char *filename, int count)
1768{
1769char buffer[MBUFTHIRD + 16];
1770const char *error;
1771int errptr;
1772
1773if (pattern_count >= MAX_PATTERN_COUNT)
1774  {
1775  fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1776    (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1777  return FALSE;
1778  }
1779
1780sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1781  suffix[process_options]);
1782pattern_list[pattern_count] =
1783  pcre_compile(buffer, options, &error, &errptr, pcretables);
1784if (pattern_list[pattern_count] != NULL)
1785  {
1786  pattern_count++;
1787  return TRUE;
1788  }
1789
1790/* Handle compile errors */
1791
1792errptr -= (int)strlen(prefix[process_options]);
1793if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1794
1795if (filename == NULL)
1796  {
1797  if (count == 0)
1798    fprintf(stderr, "pcregrep: Error in command-line regex "
1799      "at offset %d: %s\n", errptr, error);
1800  else
1801    fprintf(stderr, "pcregrep: Error in %s command-line regex "
1802      "at offset %d: %s\n", ordin(count), errptr, error);
1803  }
1804else
1805  {
1806  fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1807    "at offset %d: %s\n", count, filename, errptr, error);
1808  }
1809
1810return FALSE;
1811}
1812
1813
1814
1815/*************************************************
1816*           Compile one supplied pattern         *
1817*************************************************/
1818
1819/* When the -F option has been used, each string may be a list of strings,
1820separated by line breaks. They will be matched literally.
1821
1822Arguments:
1823  pattern        the pattern string
1824  options        the PCRE options
1825  filename       the file name, or NULL for a command-line pattern
1826  count          0 if this is the only command line pattern, or
1827                 number of the command line pattern, or
1828                 linenumber for a pattern from a file
1829
1830Returns:         TRUE on success, FALSE after an error
1831*/
1832
1833static BOOL
1834compile_pattern(char *pattern, int options, char *filename, int count)
1835{
1836if ((process_options & PO_FIXED_STRINGS) != 0)
1837  {
1838  char *eop = pattern + strlen(pattern);
1839  char buffer[MBUFTHIRD];
1840  for(;;)
1841    {
1842    int ellength;
1843    char *p = end_of_line(pattern, eop, &ellength);
1844    if (ellength == 0)
1845      return compile_single_pattern(pattern, options, filename, count);
1846    sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1847    pattern = p;
1848    if (!compile_single_pattern(buffer, options, filename, count))
1849      return FALSE;
1850    }
1851  }
1852else return compile_single_pattern(pattern, options, filename, count);
1853}
1854
1855
1856
1857/*************************************************
1858*                Main program                    *
1859*************************************************/
1860
1861/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1862
1863int
1864main(int argc, char **argv)
1865{
1866int i, j;
1867int rc = 1;
1868int pcre_options = 0;
1869int cmd_pattern_count = 0;
1870int hint_count = 0;
1871int errptr;
1872BOOL only_one_at_top;
1873char *patterns[MAX_PATTERN_COUNT];
1874const char *locale_from = "--locale";
1875const char *error;
1876
1877/* Set the default line ending value from the default in the PCRE library;
1878"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1879Note that the return values from pcre_config(), though derived from the ASCII
1880codes, are the same in EBCDIC environments, so we must use the actual values
1881rather than escapes such as as '\r'. */
1882
1883(void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1884switch(i)
1885  {
1886  default:               newline = (char *)"lf"; break;
1887  case 13:               newline = (char *)"cr"; break;
1888  case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1889  case -1:               newline = (char *)"any"; break;
1890  case -2:               newline = (char *)"anycrlf"; break;
1891  }
1892
1893/* Process the options */
1894
1895for (i = 1; i < argc; i++)
1896  {
1897  option_item *op = NULL;
1898  char *option_data = (char *)"";    /* default to keep compiler happy */
1899  BOOL longop;
1900  BOOL longopwasequals = FALSE;
1901
1902  if (argv[i][0] != '-') break;
1903
1904  /* If we hit an argument that is just "-", it may be a reference to STDIN,
1905  but only if we have previously had -e or -f to define the patterns. */
1906
1907  if (argv[i][1] == 0)
1908    {
1909    if (pattern_filename != NULL || pattern_count > 0) break;
1910      else exit(usage(2));
1911    }
1912
1913  /* Handle a long name option, or -- to terminate the options */
1914
1915  if (argv[i][1] == '-')
1916    {
1917    char *arg = argv[i] + 2;
1918    char *argequals = strchr(arg, '=');
1919
1920    if (*arg == 0)    /* -- terminates options */
1921      {
1922      i++;
1923      break;                /* out of the options-handling loop */
1924      }
1925
1926    longop = TRUE;
1927
1928    /* Some long options have data that follows after =, for example file=name.
1929    Some options have variations in the long name spelling: specifically, we
1930    allow "regexp" because GNU grep allows it, though I personally go along
1931    with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1932    These options are entered in the table as "regex(p)". Options can be in
1933    both these categories. */
1934
1935    for (op = optionlist; op->one_char != 0; op++)
1936      {
1937      char *opbra = strchr(op->long_name, '(');
1938      char *equals = strchr(op->long_name, '=');
1939
1940      /* Handle options with only one spelling of the name */
1941
1942      if (opbra == NULL)     /* Does not contain '(' */
1943        {
1944        if (equals == NULL)  /* Not thing=data case */
1945          {
1946          if (strcmp(arg, op->long_name) == 0) break;
1947          }
1948        else                 /* Special case xxx=data */
1949          {
1950          int oplen = equals - op->long_name;
1951          int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1952          if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1953            {
1954            option_data = arg + arglen;
1955            if (*option_data == '=')
1956              {
1957              option_data++;
1958              longopwasequals = TRUE;
1959              }
1960            break;
1961            }
1962          }
1963        }
1964
1965      /* Handle options with an alternate spelling of the name */
1966
1967      else
1968        {
1969        char buff1[24];
1970        char buff2[24];
1971
1972        int baselen = opbra - op->long_name;
1973        int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1974        int arglen = (argequals == NULL || equals == NULL)?
1975          (int)strlen(arg) : argequals - arg;
1976
1977        sprintf(buff1, "%.*s", baselen, op->long_name);
1978        sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1979
1980        if (strncmp(arg, buff1, arglen) == 0 ||
1981           strncmp(arg, buff2, arglen) == 0)
1982          {
1983          if (equals != NULL && argequals != NULL)
1984            {
1985            option_data = argequals;
1986            if (*option_data == '=')
1987              {
1988              option_data++;
1989              longopwasequals = TRUE;
1990              }
1991            }
1992          break;
1993          }
1994        }
1995      }
1996
1997    if (op->one_char == 0)
1998      {
1999      fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2000      exit(usage(2));
2001      }
2002    }
2003
2004  /* Jeffrey Friedl's debugging harness uses these additional options which
2005  are not in the right form for putting in the option table because they use
2006  only one hyphen, yet are more than one character long. By putting them
2007  separately here, they will not get displayed as part of the help() output,
2008  but I don't think Jeffrey will care about that. */
2009
2010#ifdef JFRIEDL_DEBUG
2011  else if (strcmp(argv[i], "-pre") == 0) {
2012          jfriedl_prefix = argv[++i];
2013          continue;
2014  } else if (strcmp(argv[i], "-post") == 0) {
2015          jfriedl_postfix = argv[++i];
2016          continue;
2017  } else if (strcmp(argv[i], "-XT") == 0) {
2018          sscanf(argv[++i], "%d", &jfriedl_XT);
2019          continue;
2020  } else if (strcmp(argv[i], "-XR") == 0) {
2021          sscanf(argv[++i], "%d", &jfriedl_XR);
2022          continue;
2023  }
2024#endif
2025
2026
2027  /* One-char options; many that have no data may be in a single argument; we
2028  continue till we hit the last one or one that needs data. */
2029
2030  else
2031    {
2032    char *s = argv[i] + 1;
2033    longop = FALSE;
2034    while (*s != 0)
2035      {
2036      for (op = optionlist; op->one_char != 0; op++)
2037        { if (*s == op->one_char) break; }
2038      if (op->one_char == 0)
2039        {
2040        fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2041          *s, argv[i]);
2042        exit(usage(2));
2043        }
2044      if (op->type != OP_NODATA || s[1] == 0)
2045        {
2046        option_data = s+1;
2047        break;
2048        }
2049      pcre_options = handle_option(*s++, pcre_options);
2050      }
2051    }
2052
2053  /* At this point we should have op pointing to a matched option. If the type
2054  is NO_DATA, it means that there is no data, and the option might set
2055  something in the PCRE options. */
2056
2057  if (op->type == OP_NODATA)
2058    {
2059    pcre_options = handle_option(op->one_char, pcre_options);
2060    continue;
2061    }
2062
2063  /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2064  either has a value or defaults to something. It cannot have data in a
2065  separate item. At the moment, the only such options are "colo(u)r" and
2066  Jeffrey Friedl's special -S debugging option. */
2067
2068  if (*option_data == 0 &&
2069      (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2070    {
2071    switch (op->one_char)
2072      {
2073      case N_COLOUR:
2074      colour_option = (char *)"auto";
2075      break;
2076#ifdef JFRIEDL_DEBUG
2077      case 'S':
2078      S_arg = 0;
2079      break;
2080#endif
2081      }
2082    continue;
2083    }
2084
2085  /* Otherwise, find the data string for the option. */
2086
2087  if (*option_data == 0)
2088    {
2089    if (i >= argc - 1 || longopwasequals)
2090      {
2091      fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2092      exit(usage(2));
2093      }
2094    option_data = argv[++i];
2095    }
2096
2097  /* If the option type is OP_PATLIST, it's the -e option, which can be called
2098  multiple times to create a list of patterns. */
2099
2100  if (op->type == OP_PATLIST)
2101    {
2102    if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2103      {
2104      fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2105        MAX_PATTERN_COUNT);
2106      return 2;
2107      }
2108    patterns[cmd_pattern_count++] = option_data;
2109    }
2110
2111  /* Otherwise, deal with single string or numeric data values. */
2112
2113  else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2114    {
2115    *((char **)op->dataptr) = option_data;
2116    }
2117  else
2118    {
2119    char *endptr;
2120    int n = strtoul(option_data, &endptr, 10);
2121    if (*endptr != 0)
2122      {
2123      if (longop)
2124        {
2125        char *equals = strchr(op->long_name, '=');
2126        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2127          equals - op->long_name;
2128        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2129          option_data, nlen, op->long_name);
2130        }
2131      else
2132        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2133          option_data, op->one_char);
2134      exit(usage(2));
2135      }
2136    *((int *)op->dataptr) = n;
2137    }
2138  }
2139
2140/* Options have been decoded. If -C was used, its value is used as a default
2141for -A and -B. */
2142
2143if (both_context > 0)
2144  {
2145  if (after_context == 0) after_context = both_context;
2146  if (before_context == 0) before_context = both_context;
2147  }
2148
2149/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2150However, the latter two set the only_matching flag. */
2151
2152if ((only_matching && (file_offsets || line_offsets)) ||
2153    (file_offsets && line_offsets))
2154  {
2155  fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2156    "and/or --line-offsets\n");
2157  exit(usage(2));
2158  }
2159
2160if (file_offsets || line_offsets) only_matching = TRUE;
2161
2162/* If a locale has not been provided as an option, see if the LC_CTYPE or
2163LC_ALL environment variable is set, and if so, use it. */
2164
2165if (locale == NULL)
2166  {
2167  locale = getenv("LC_ALL");
2168  locale_from = "LCC_ALL";
2169  }
2170
2171if (locale == NULL)
2172  {
2173  locale = getenv("LC_CTYPE");
2174  locale_from = "LC_CTYPE";
2175  }
2176
2177/* If a locale has been provided, set it, and generate the tables the PCRE
2178needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2179
2180if (locale != NULL)
2181  {
2182  if (setlocale(LC_CTYPE, locale) == NULL)
2183    {
2184    fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2185      locale, locale_from);
2186    return 2;
2187    }
2188  pcretables = pcre_maketables();
2189  }
2190
2191/* Sort out colouring */
2192
2193if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2194  {
2195  if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2196  else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2197  else
2198    {
2199    fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2200      colour_option);
2201    return 2;
2202    }
2203  if (do_colour)
2204    {
2205    char *cs = getenv("PCREGREP_COLOUR");
2206    if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2207    if (cs != NULL) colour_string = cs;
2208    }
2209  }
2210
2211/* Interpret the newline type; the default settings are Unix-like. */
2212
2213if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2214  {
2215  pcre_options |= PCRE_NEWLINE_CR;
2216  endlinetype = EL_CR;
2217  }
2218else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2219  {
2220  pcre_options |= PCRE_NEWLINE_LF;
2221  endlinetype = EL_LF;
2222  }
2223else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2224  {
2225  pcre_options |= PCRE_NEWLINE_CRLF;
2226  endlinetype = EL_CRLF;
2227  }
2228else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2229  {
2230  pcre_options |= PCRE_NEWLINE_ANY;
2231  endlinetype = EL_ANY;
2232  }
2233else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2234  {
2235  pcre_options |= PCRE_NEWLINE_ANYCRLF;
2236  endlinetype = EL_ANYCRLF;
2237  }
2238else
2239  {
2240  fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2241  return 2;
2242  }
2243
2244/* Interpret the text values for -d and -D */
2245
2246if (dee_option != NULL)
2247  {
2248  if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2249  else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2250  else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2251  else
2252    {
2253    fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2254    return 2;
2255    }
2256  }
2257
2258if (DEE_option != NULL)
2259  {
2260  if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2261  else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2262  else
2263    {
2264    fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2265    return 2;
2266    }
2267  }
2268
2269/* Check the values for Jeffrey Friedl's debugging options. */
2270
2271#ifdef JFRIEDL_DEBUG
2272if (S_arg > 9)
2273  {
2274  fprintf(stderr, "pcregrep: bad value for -S option\n");
2275  return 2;
2276  }
2277if (jfriedl_XT != 0 || jfriedl_XR != 0)
2278  {
2279  if (jfriedl_XT == 0) jfriedl_XT = 1;
2280  if (jfriedl_XR == 0) jfriedl_XR = 1;
2281  }
2282#endif
2283
2284/* Get memory to store the pattern and hints lists. */
2285
2286pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2287hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2288
2289if (pattern_list == NULL || hints_list == NULL)
2290  {
2291  fprintf(stderr, "pcregrep: malloc failed\n");
2292  goto EXIT2;
2293  }
2294
2295/* If no patterns were provided by -e, and there is no file provided by -f,
2296the first argument is the one and only pattern, and it must exist. */
2297
2298if (cmd_pattern_count == 0 && pattern_filename == NULL)
2299  {
2300  if (i >= argc) return usage(2);
2301  patterns[cmd_pattern_count++] = argv[i++];
2302  }
2303
2304/* Compile the patterns that were provided on the command line, either by
2305multiple uses of -e or as a single unkeyed pattern. */
2306
2307for (j = 0; j < cmd_pattern_count; j++)
2308  {
2309  if (!compile_pattern(patterns[j], pcre_options, NULL,
2310       (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2311    goto EXIT2;
2312  }
2313
2314/* Compile the regular expressions that are provided in a file. */
2315
2316if (pattern_filename != NULL)
2317  {
2318  int linenumber = 0;
2319  FILE *f;
2320  char *filename;
2321  char buffer[MBUFTHIRD];
2322
2323  if (strcmp(pattern_filename, "-") == 0)
2324    {
2325    f = stdin;
2326    filename = stdin_name;
2327    }
2328  else
2329    {
2330    f = fopen(pattern_filename, "r");
2331    if (f == NULL)
2332      {
2333      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2334        strerror(errno));
2335      goto EXIT2;
2336      }
2337    filename = pattern_filename;
2338    }
2339
2340  while (fgets(buffer, MBUFTHIRD, f) != NULL)
2341    {
2342    char *s = buffer + (int)strlen(buffer);
2343    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2344    *s = 0;
2345    linenumber++;
2346    if (buffer[0] == 0) continue;   /* Skip blank lines */
2347    if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2348      goto EXIT2;
2349    }
2350
2351  if (f != stdin) fclose(f);
2352  }
2353
2354/* Study the regular expressions, as we will be running them many times */
2355
2356for (j = 0; j < pattern_count; j++)
2357  {
2358  hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2359  if (error != NULL)
2360    {
2361    char s[16];
2362    if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2363    fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2364    goto EXIT2;
2365    }
2366  hint_count++;
2367  }
2368
2369/* If there are include or exclude patterns, compile them. */
2370
2371if (exclude_pattern != NULL)
2372  {
2373  exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2374    pcretables);
2375  if (exclude_compiled == NULL)
2376    {
2377    fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2378      errptr, error);
2379    goto EXIT2;
2380    }
2381  }
2382
2383if (include_pattern != NULL)
2384  {
2385  include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2386    pcretables);
2387  if (include_compiled == NULL)
2388    {
2389    fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2390      errptr, error);
2391    goto EXIT2;
2392    }
2393  }
2394
2395if (exclude_dir_pattern != NULL)
2396  {
2397  exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2398    pcretables);
2399  if (exclude_dir_compiled == NULL)
2400    {
2401    fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2402      errptr, error);
2403    goto EXIT2;
2404    }
2405  }
2406
2407if (include_dir_pattern != NULL)
2408  {
2409  include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2410    pcretables);
2411  if (include_dir_compiled == NULL)
2412    {
2413    fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2414      errptr, error);
2415    goto EXIT2;
2416    }
2417  }
2418
2419/* If there are no further arguments, do the business on stdin and exit. */
2420
2421if (i >= argc)
2422  {
2423  rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2424  goto EXIT;
2425  }
2426
2427/* Otherwise, work through the remaining arguments as files or directories.
2428Pass in the fact that there is only one argument at top level - this suppresses
2429the file name if the argument is not a directory and filenames are not
2430otherwise forced. */
2431
2432only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2433
2434for (; i < argc; i++)
2435  {
2436  int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2437    only_one_at_top);
2438  if (frc > 1) rc = frc;
2439    else if (frc == 0 && rc == 1) rc = 0;
2440  }
2441
2442EXIT:
2443if (pattern_list != NULL)
2444  {
2445  for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2446  free(pattern_list);
2447  }
2448if (hints_list != NULL)
2449  {
2450  for (i = 0; i < hint_count; i++) free(hints_list[i]);
2451  free(hints_list);
2452  }
2453return rc;
2454
2455EXIT2:
2456rc = 2;
2457goto EXIT;
2458}
2459
2460/* End of pcregrep */
2461