1/*
2  agrep.c - Approximate grep
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#ifdef HAVE_CONFIG_H
10#include <config.h>
11#endif /* HAVE_CONFIG_H */
12#include <stdio.h>
13#include <stdlib.h>
14#include <locale.h>
15#include <string.h>
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <fcntl.h>
19#include <errno.h>
20#include <assert.h>
21#include <limits.h>
22#include <unistd.h>
23#ifdef HAVE_GETOPT_H
24#include <getopt.h>
25#endif /* HAVE_GETOPT_H */
26#include "regex.h"
27
28#ifdef HAVE_GETTEXT
29#include <libintl.h>
30#else
31#define gettext(s) s
32#define bindtextdomain(p, d)
33#define textdomain(p)
34#endif
35
36#define _(String) gettext(String)
37
38#undef MAX
39#undef MIN
40#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
41#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
42
43/* Short options. */
44static char const short_options[] =
45"cd:e:hiklnqrsvwyBD:E:HI:MS:V0123456789-:";
46
47static int show_help;
48char *program_name;
49
50#ifdef HAVE_GETOPT_LONG
51/* Long options that have no corresponding short equivalents. */
52enum {
53  COLOR_OPTION = CHAR_MAX + 1,
54  SHOW_POSITION_OPTION
55};
56
57/* Long option equivalences. */
58static struct option const long_options[] =
59{
60  {"best-match", no_argument, NULL, 'B'},
61  {"color", no_argument, NULL, COLOR_OPTION},
62  {"colour", no_argument, NULL, COLOR_OPTION},
63  {"count", no_argument, NULL, 'c'},
64  {"delete-cost", required_argument, NULL, 'D'},
65  {"delimiter", no_argument, NULL, 'd'},
66  {"delimiter-after", no_argument, NULL, 'M'},
67  {"files-with-matches", no_argument, NULL, 'l'},
68  {"help", no_argument, &show_help, 1},
69  {"ignore-case", no_argument, NULL, 'i'},
70  {"insert-cost", required_argument, NULL, 'I'},
71  {"invert-match", no_argument, NULL, 'v'},
72  {"line-number", no_argument, NULL, 'n'},
73  {"literal", no_argument, NULL, 'k'},
74  {"max-errors", required_argument, NULL, 'E'},
75  {"no-filename", no_argument, NULL, 'h'},
76  {"nothing", no_argument, NULL, 'y'},
77  {"quiet", no_argument, NULL, 'q'},
78  {"record-number", no_argument, NULL, 'n'},
79  {"recursive", no_argument, NULL, 'r'},
80  {"regexp", required_argument, NULL, 'e'},
81  {"show-cost", no_argument, NULL, 's'},
82  {"show-position", no_argument, NULL, SHOW_POSITION_OPTION},
83  {"silent", no_argument, NULL, 'q'},
84  {"substitute-cost", required_argument, NULL, 'S'},
85  {"version", no_argument, NULL, 'V'},
86  {"with-filename", no_argument, NULL, 'H'},
87  {"word-regexp", no_argument, NULL, 'w'},
88  {0, 0, 0, 0}
89};
90#endif /* HAVE_GETOPT_LONG */
91
92__dead static void
93tre_agrep_usage(int status)
94{
95  if (status != 0)
96    {
97      fprintf(stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
98	      program_name);
99      fprintf(stderr, _("Try `%s --help' for more information.\n"),
100              program_name);
101    }
102  else
103    {
104      printf(_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
105      printf(_("\
106Searches for approximate matches of PATTERN in each FILE or standard input.\n\
107Example: `%s -2 optimize foo.txt' outputs all lines in file `foo.txt' that\n\
108match \"optimize\" within two errors.  E.g. lines which contain \"optimise\",\n\
109\"optmise\", and \"opitmize\" all match.\n"), program_name);
110      printf("\n");
111      printf(_("\
112Regexp selection and interpretation:\n\
113  -e, --regexp=PATTERN	    use PATTERN as a regular expression\n\
114  -i, --ignore-case	    ignore case distinctions\n\
115  -k, --literal		    PATTERN is a literal string\n\
116  -w, --word-regexp	    force PATTERN to match only whole words\n\
117\n\
118Approximate matching settings:\n\
119  -D, --delete-cost=NUM	    set cost of missing characters\n\
120  -I, --insert-cost=NUM	    set cost of extra characters\n\
121  -S, --substitute-cost=NUM set cost of wrong characters\n\
122  -E, --max-errors=NUM	    select records that have at most NUM errors\n\
123  -#			    select records that have at most # errors (# is a\n\
124			    digit between 0 and 9)\n\
125\n\
126Miscellaneous:\n\
127  -d, --delimiter=PATTERN   set the record delimiter regular expression\n\
128  -v, --invert-match	    select non-matching records\n\
129  -V, --version		    print version information and exit\n\
130  -r, --recursive           also search in any subdirectories\n\
131  -y, --nothing		    does nothing (for compatibility with the non-free\n\
132			    agrep program)\n\
133      --help		    display this help and exit\n\
134\n\
135Output control:\n\
136  -B, --best-match	    only output records with least errors\n\
137  -c, --count		    only print a count of matching records per FILE\n\
138  -h, --no-filename	    suppress the prefixing filename on output\n\
139  -H, --with-filename	    print the filename for each match\n\
140  -l, --files-with-matches  only print FILE names containing matches\n\
141  -M, --delimiter-after     print record delimiter after record if -d is used\n\
142  -n, --record-number	    print record number with output\n\
143      --line-number         same as -n\n\
144  -q, --quiet, --silent	    suppress all normal output\n\
145  -s, --show-cost	    print match cost with output\n\
146      --colour, --color     use markers to distinguish the matching \
147strings\n\
148      --show-position       prefix each output record with start and end\n\
149                            position of the first match within the record\n"));
150      printf("\n");
151      printf(_("\
152With no FILE, or when FILE is -, reads standard input.  If less than two\n\
153FILEs are given, -h is assumed.  Exit status is 0 if a match is found, 1 for\n\
154no match, and 2 if there were errors.  If -E or -# is not specified, only\n\
155exact matches are selected.\n"));
156      printf("\n");
157      printf(_("\
158PATTERN is a POSIX extended regular expression (ERE) with the TRE extensions.\n\
159See tre(7) for a complete description.\n"));
160      printf("\n");
161      printf(_("Report bugs to: "));
162      printf("%s.\n", PACKAGE_BUGREPORT);
163    }
164  exit(status);
165}
166
167static regex_t preg;	  /* Compiled pattern to search for. */
168static regex_t delim;	  /* Compiled record delimiter pattern. */
169
170#define INITIAL_BUF_SIZE 10240	/* Initial size of the buffer. */
171static char *buf;	   /* Buffer for scanning text. */
172static int buf_size;	   /* Current size of the buffer. */
173static int data_len;	   /* Amount of data in the buffer. */
174static char *record;	   /* Start of current record. */
175static char *next_record;  /* Start of next record. */
176static int record_len;	   /* Length of current record. */
177static int delim_len;      /* Length of delimiter before record. */
178static int next_delim_len; /* Length of delimiter after record. */
179static int delim_after = 1;/* If true, print the delimiter after the record. */
180static int at_eof;
181static int have_matches;   /* If true, matches have been found. */
182static int is_binary;      /* -1 unknown, 0 ascii, 1 binary */
183
184static int invert_match;   /* Show only non-matching records. */
185static int print_filename; /* Output filename. */
186static int print_recnum;   /* Output record number. */
187static int print_cost;	   /* Output match cost. */
188static int count_matches;  /* Count matching records. */
189static int list_files;	   /* List matching files. */
190static int color_option;   /* Highlight matches. */
191static int print_position;  /* Show start and end offsets for matches. */
192static int recursive;       /* Search in subdirectories too */
193
194static int best_match;	     /* Output only best matches. */
195static int best_cost;	     /* Best match cost found so far. */
196static int be_silent;	     /* Never output anything */
197
198static regaparams_t match_params;
199
200/* The color string used with the --color option.  If set, the
201   environment variable GREP_COLOR overrides this default value. */
202static const char *highlight = "01;31";
203
204static int
205isbinaryfile(void)
206{
207	return buf != NULL && memchr(buf, '\0', data_len) != NULL;
208}
209
210/* Sets `record' to the next complete record from file `fd', and `record_len'
211   to the length of the record.	 Returns 1 when there are no more records,
212   0 otherwise. */
213static inline int
214tre_agrep_get_next_record(int fd, const char *filename)
215{
216  if (at_eof)
217    return 1;
218
219  while (1)
220    {
221      int errcode;
222      regmatch_t pmatch[1];
223
224      if (next_record == NULL)
225	{
226	  int r;
227	  int read_size = buf_size - data_len;
228
229	  if (read_size <= 0)
230	    {
231	      /* The buffer is full and no record delimiter found yet,
232		 we need to grow the buffer.  We double the size to
233		 avoid rescanning the data too many times when the
234		 records are very large. */
235	      buf_size *= 2;
236	      buf = realloc(buf, buf_size);
237	      if (buf == NULL)
238		{
239		  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
240		  exit(2);
241		}
242	      read_size = buf_size - data_len;
243	    }
244
245	  r = read(fd, buf + data_len, read_size);
246	  if (r < 0)
247	    {
248	      /* Read error. */
249	      char *err;
250	      if (errno == EINTR)
251		continue;
252	      err = strerror(errno);
253	      fprintf(stderr, "%s: ", program_name);
254	      fprintf(stderr, _("Error reading from %s: %s\n"), filename, err);
255	      return 1;
256	    }
257
258	  if (r == 0)
259	    {
260	      /* End of file.  Return the last record. */
261	      record = buf;
262	      record_len = data_len;
263	      at_eof = 1;
264	      /* The empty string after a trailing delimiter is not considered
265		 to be a record. */
266	      if (record_len == 0)
267		return 1;
268	      return 0;
269	    }
270	  data_len += r;
271	  next_record = buf;
272
273	  if (is_binary < 0)
274	    is_binary = isbinaryfile();
275	}
276
277      /* Find the next record delimiter. */
278      errcode = tre_regnexec(&delim, next_record, data_len - (next_record - buf),
279			 1, pmatch, 0);
280
281
282      switch (errcode)
283	{
284	case REG_OK:
285	  /* Record delimiter found, now we know how long the current
286	     record is. */
287	  record = next_record;
288	  record_len = pmatch[0].rm_so;
289	  delim_len = next_delim_len;
290
291	  next_delim_len = pmatch[0].rm_eo - pmatch[0].rm_so;
292	  next_record = next_record + pmatch[0].rm_eo;
293	  return 0;
294	  break;
295
296	case REG_NOMATCH:
297	  if (next_record == buf)
298	    {
299	      next_record = NULL;
300	      continue;
301	    }
302
303	  /* Move the data to start of the buffer and read more
304	     data. */
305	  memmove(buf, next_record, buf + data_len - next_record);
306	  data_len = buf + data_len - next_record;
307	  next_record = NULL;
308	  continue;
309	  break;
310
311	case REG_ESPACE:
312	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
313	  exit(2);
314	  break;
315
316	default:
317	  assert(0);
318	  break;
319	}
320    }
321}
322
323#include <dirent.h>
324
325static int tre_agrep_handle_file(const char */*filename*/);
326
327static int
328tre_agrep_handle_dirent(const char *ent)
329{
330	struct dirent	 storage;
331	struct dirent	*dp;
332	struct stat	 st;
333	char		 path[8192];
334	DIR		*dirp;
335	int		 ret;
336	int		 ok;
337
338	if (ent == NULL || strcmp(ent, "-") == 0) {
339		return tre_agrep_handle_file(ent);
340	}
341	if (lstat(ent, &st) < 0) {
342		return tre_agrep_handle_file(ent);
343	}
344	if ((st.st_mode & S_IFMT) == S_IFDIR && recursive) {
345		if ((dirp = opendir(ent)) == NULL) {
346			fprintf(stderr, "can't open directory '%s'\n", ent);
347			return 0;
348		}
349		for (ret = 0 ; readdir_r(dirp, &storage, &dp) == 0 && dp != NULL ; ) {
350			if (strcmp(dp->d_name, ".") == 0 ||
351			    strcmp(dp->d_name, "..") == 0) {
352				continue;
353			}
354			snprintf(path, sizeof(path), "%s/%s", ent, dp->d_name);
355			if ((ok = tre_agrep_handle_dirent(path)) != 0) {
356				ret = ok;
357			}
358		}
359		closedir(dirp);
360		return ret;
361	}
362	return tre_agrep_handle_file(ent);
363}
364
365static int
366tre_agrep_handle_file(const char *filename)
367{
368  int fd;
369  int count = 0;
370  int recnum = 0;
371
372  is_binary = -1;
373
374  /* Allocate the initial buffer. */
375  if (buf == NULL)
376    {
377      buf = malloc(INITIAL_BUF_SIZE);
378      if (buf == NULL)
379	{
380	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
381	  exit(2);
382	}
383      buf_size = INITIAL_BUF_SIZE;
384    }
385
386  /* Reset read buffer state. */
387  next_record = NULL;
388  data_len = 0;
389
390  if (!filename || strcmp(filename, "-") == 0)
391    {
392      if (best_match)
393	{
394	  fprintf(stderr, "%s: %s\n", program_name,
395		  _("Cannot use -B when reading from standard input."));
396	  return 2;
397	}
398      fd = 0;
399      filename = _("(standard input)");
400    }
401  else
402    {
403      fd = open(filename, O_RDONLY);
404    }
405
406  if (fd < 0)
407    {
408      fprintf(stderr, "%s: %s: %s\n", program_name, filename, strerror(errno));
409      return 1;
410    }
411
412
413  /* Go through all records and output the matching ones, or the non-matching
414     ones if `invert_match' is true. */
415  at_eof = 0;
416  while (!tre_agrep_get_next_record(fd, filename))
417    {
418      int errcode;
419      regamatch_t match;
420      regmatch_t pmatch[1];
421      recnum++;
422      memset(&match, 0, sizeof(match));
423      if (best_match)
424	match_params.max_cost = best_cost;
425      if (color_option || print_position)
426	{
427	  match.pmatch = pmatch;
428	  match.nmatch = 1;
429	}
430
431      /* Stop searching for better matches if an exact match is found. */
432      if (best_match == 1 && best_cost == 0)
433	break;
434
435      /* See if the record matches. */
436      errcode = tre_reganexec(&preg, record, record_len, &match, match_params, 0);
437      if ((!invert_match && errcode == REG_OK)
438	  || (invert_match && errcode == REG_NOMATCH))
439	{
440	  if (be_silent)
441	    exit(0);
442
443	  count++;
444	  have_matches = 1;
445	  if (best_match)
446	    {
447	      if (best_match == 1)
448		{
449		  /* First best match pass. */
450		  if (match.cost < best_cost)
451		    best_cost = match.cost;
452		  continue;
453		}
454	      /* Second best match pass. */
455	      if (match.cost > best_cost)
456		continue;
457	    }
458
459	  if (list_files)
460	    {
461	      printf("%s\n", filename);
462	      break;
463	    }
464	  else if (!count_matches && is_binary > 0)
465	    {
466	      if (print_filename)
467		printf("%s:", filename);
468	      printf("Binary file matches\n");
469	      break;
470	    }
471	  else if (!count_matches)
472	    {
473	      if (print_filename)
474		printf("%s:", filename);
475	      if (print_recnum)
476		printf("%d:", recnum);
477	      if (print_cost)
478		printf("%d:", match.cost);
479	      if (print_position)
480		printf("%d-%d:",
481		       invert_match ? 0 : (int)pmatch[0].rm_so,
482		       invert_match ? record_len : (int)pmatch[0].rm_eo);
483
484	      /* Adjust record boundaries so we print the delimiter
485		 before or after the record. */
486	      if (delim_after)
487		{
488		  record_len += next_delim_len;
489		}
490	      else
491		{
492		  record -= delim_len;
493		  record_len += delim_len;
494		  pmatch[0].rm_so += delim_len;
495		  pmatch[0].rm_eo += delim_len;
496		}
497
498	      if (color_option && !invert_match)
499		{
500		  printf("%.*s", (int)pmatch[0].rm_so, record);
501		  printf("\33[%sm", highlight);
502		  printf("%.*s", (int)(pmatch[0].rm_eo - pmatch[0].rm_so),
503			 record + pmatch[0].rm_so);
504		  fputs("\33[00m", stdout);
505		  printf("%.*s", (int)(record_len - pmatch[0].rm_eo),
506			 record + pmatch[0].rm_eo);
507		}
508	      else
509		{
510		  printf("%.*s", record_len, record);
511		}
512	    }
513	}
514    }
515
516  if (count_matches && !best_match && !be_silent)
517    {
518      if (print_filename)
519	printf("%s:", filename);
520      printf("%d\n", count);
521    }
522
523  if (fd)
524    close(fd);
525
526  return 0;
527}
528
529
530
531int
532main(int argc, char **argv)
533{
534  int c, errcode;
535  int comp_flags = REG_EXTENDED;
536  char *tmp_str;
537  char *regexp = NULL;
538  const char *delim_regexp = "\n";
539  int word_regexp = 0;
540  int literal_string = 0;
541  int max_cost_set = 0;
542
543  setlocale (LC_ALL, "");
544  bindtextdomain (PACKAGE, LOCALEDIR);
545  textdomain (PACKAGE);
546
547  /* Get the program name without the path (for error messages etc). */
548  program_name = argv[0];
549  if (program_name)
550    {
551      tmp_str = strrchr(program_name, '/');
552      if (tmp_str)
553	program_name = tmp_str + 1;
554    }
555
556  /* Defaults. */
557  print_filename = -1;
558  print_cost = 0;
559  be_silent = 0;
560  tre_regaparams_default(&match_params);
561  match_params.max_cost = 0;
562
563  /* Parse command line options. */
564  while (1)
565    {
566#ifdef HAVE_GETOPT_LONG
567      c = getopt_long(argc, argv, short_options, long_options, NULL);
568#else /* !HAVE_GETOPT_LONG */
569      c = getopt(argc, argv, short_options);
570#endif /* !HAVE_GETOPT_LONG */
571      if (c == -1)
572	break;
573
574      switch (c)
575	{
576	case 'c':
577	  /* Count number of matching records. */
578	  count_matches = 1;
579	  break;
580	case 'd':
581	  /* Set record delimiter regexp. */
582	  delim_regexp = optarg;
583	  if (delim_after == 1)
584	    delim_after = 0;
585	  break;
586	case 'e':
587	  /* Regexp to use. */
588	  regexp = optarg;
589	  break;
590	case 'h':
591	  /* Don't prefix filename on output if there are multiple files. */
592	  print_filename = 0;
593	  break;
594	case 'i':
595	  /* Ignore case. */
596	  comp_flags |= REG_ICASE;
597	  break;
598	case 'k':
599	  /* The pattern is a literal string. */
600	  literal_string = 1;
601	  break;
602	case 'l':
603	  /* Only print files that contain matches. */
604	  list_files = 1;
605	  break;
606	case 'n':
607	  /* Print record number of matching record. */
608	  print_recnum = 1;
609	  break;
610	case 'q':
611	  be_silent = 1;
612	  break;
613	case 'r':
614	  /* also search in sub-directories */
615	  recursive = 1;
616	  print_filename = 1;
617	  break;
618	case 's':
619	  /* Print match cost of matching record. */
620	  print_cost = 1;
621	  break;
622	case 'v':
623	  /* Select non-matching records. */
624	  invert_match = 1;
625	  break;
626	case 'w':
627	  /* Match only whole words. */
628	  word_regexp = 1;
629	  break;
630	case 'y':
631	  /* Compatibility option, does nothing. */
632	  break;
633	case 'B':
634	  /* Select only the records which have the best match. */
635	  best_match = 1;
636	  break;
637	case 'D':
638	  /* Set the cost of a deletion. */
639	  match_params.cost_del = atoi(optarg);
640	  break;
641	case 'E':
642	  /* Set the maximum number of errors allowed for a record to match. */
643	  match_params.max_cost = atoi(optarg);
644	  max_cost_set = 1;
645	  break;
646	case 'H':
647	  /* Always print filename prefix on output. */
648	  print_filename = 1;
649	  break;
650	case 'I':
651	  /* Set the cost of an insertion. */
652	  match_params.cost_ins = atoi(optarg);
653	  break;
654	case 'M':
655	  /* Print delimiters after matches instead of before. */
656	  delim_after = 2;
657	  break;
658	case 'S':
659	  /* Set the cost of a substitution. */
660	  match_params.cost_subst = atoi(optarg);
661	  break;
662	case 'V':
663	  {
664	    /* Print version string and exit. */
665	    char *version;
666	    tre_config(TRE_CONFIG_VERSION, &version);
667	    printf("%s (TRE agrep) %s\n\n", program_name, version);
668	    printf(_("\
669Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>.\n"));
670	    printf("\n");
671	    exit(0);
672	    break;
673	  }
674	case '?':
675	  /* Ambiguous match or extraneous parameter. */
676	  break;
677
678	case '-':
679	  /* Emulate some long options on systems which don't
680	     have getopt_long. */
681	  if (strcmp(optarg, "color") == 0
682	      || strcmp(optarg, "colour") == 0)
683	    color_option = 1;
684	  else if (strcmp(optarg, "show-position") == 0)
685	    print_position = 1;
686	  else if (strcmp(optarg, "help") == 0)
687	    show_help = 1;
688	  else
689	    {
690	      fprintf(stderr, _("%s: invalid option --%s\n"),
691		      program_name, optarg);
692	      exit(2);
693	    }
694	  break;
695
696#ifdef HAVE_GETOPT_LONG
697	case COLOR_OPTION:
698	  color_option = 1;
699	  break;
700	case SHOW_POSITION_OPTION:
701	  print_position = 1;
702	  break;
703#endif /* HAVE_GETOPT_LONG */
704	case 0:
705	  /* Long options without corresponding short options. */
706	  break;
707
708	default:
709	  if (c >= '0' && c <= '9')
710	    match_params.max_cost = c - '0';
711	  else
712	    tre_agrep_usage(2);
713	  max_cost_set = 1;
714	  break;
715	}
716    }
717
718  if (show_help)
719    tre_agrep_usage(0);
720
721  if (color_option)
722    {
723      char *user_highlight = getenv("GREP_COLOR");
724      if (user_highlight && *user_highlight != '\0')
725	highlight = user_highlight;
726    }
727
728  /* Get the pattern. */
729  if (regexp == NULL)
730    {
731      if (optind >= argc)
732	tre_agrep_usage(2);
733      regexp = argv[optind++];
734    }
735
736  /* If -k is specified, make the regexp literal.  This uses
737     the \Q and \E extensions.	If the string already contains
738     occurrences of \E, we need to handle them separately.  This is a
739     pain, but can't really be avoided if we want to create a regexp
740     which works together with -w (see below). */
741  if (literal_string)
742    {
743      char *next_pos = regexp;
744      char *new_re, *new_re_end;
745      int n = 0;
746      int len;
747
748      next_pos = regexp;
749      while (next_pos)
750	{
751	  next_pos = strstr(next_pos, "\\E");
752	  if (next_pos)
753	    {
754	      n++;
755	      next_pos += 2;
756	    }
757	}
758
759      len = strlen(regexp);
760      new_re = malloc(len + 5 + n * 7);
761      if (!new_re)
762	{
763	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
764	  return 2;
765	}
766
767      next_pos = regexp;
768      new_re_end = new_re;
769      strcpy(new_re_end, "\\Q");
770      new_re_end += 2;
771      while (next_pos)
772	{
773	  char *start = next_pos;
774	  next_pos = strstr(next_pos, "\\E");
775	  if (next_pos)
776	    {
777	      strncpy(new_re_end, start, next_pos - start);
778	      new_re_end += next_pos - start;
779	      strcpy(new_re_end, "\\E\\\\E\\Q");
780	      new_re_end += 7;
781	      next_pos += 2;
782	    }
783	  else
784	    {
785	      strcpy(new_re_end, start);
786	      new_re_end += strlen(start);
787	    }
788	}
789      strcpy(new_re_end, "\\E");
790      regexp = new_re;
791    }
792
793  /* If -w is specified, prepend beginning-of-word and end-of-word
794     assertions to the regexp before compiling. */
795  if (word_regexp)
796    {
797      char *tmp = regexp;
798      int len = strlen(tmp);
799      regexp = malloc(len + 7);
800      if (regexp == NULL)
801	{
802	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
803	  return 2;
804	}
805      strcpy(regexp, "\\<(");
806      strcpy(regexp + 3, tmp);
807      strcpy(regexp + len + 3, ")\\>");
808    }
809
810  /* Compile the pattern. */
811  errcode = tre_regcomp(&preg, regexp, comp_flags);
812  if (errcode)
813    {
814      char errbuf[256];
815      tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
816      fprintf(stderr, "%s: %s: %s\n",
817	      program_name, _("Error in search pattern"), errbuf);
818      return 2;
819    }
820
821  /* Compile the record delimiter pattern. */
822  errcode = tre_regcomp(&delim, delim_regexp, REG_EXTENDED | REG_NEWLINE);
823  if (errcode)
824    {
825      char errbuf[256];
826      tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
827      fprintf(stderr, "%s: %s: %s\n",
828	      program_name, _("Error in record delimiter pattern"), errbuf);
829      return 2;
830    }
831
832  if (tre_regexec(&delim, "", 0, NULL, 0) == REG_OK)
833    {
834      fprintf(stderr, "%s: %s\n", program_name,
835	      _("Record delimiter pattern must not match an empty string"));
836      return 2;
837    }
838
839  /* The rest of the arguments are file(s) to match. */
840
841  /* If -h or -H were not specified, print filenames if there are more
842     than one files specified. */
843  if (print_filename == -1)
844    {
845      if (argc - optind <= 1)
846	print_filename = 0;
847      else
848	print_filename = 1;
849    }
850
851  if (optind >= argc)
852    {
853      /* There are no files specified, read from stdin. */
854      tre_agrep_handle_file(NULL);
855    }
856  else if (best_match)
857    {
858      int first_ind = optind;
859
860      /* Best match mode.  Set up the limits first. */
861      if (!max_cost_set)
862	match_params.max_cost = INT_MAX;
863      best_cost = INT_MAX;
864
865      /* Scan all files once without outputting anything, searching
866	 for the best matches. */
867      while (optind < argc)
868	tre_agrep_handle_dirent(argv[optind++]);
869
870      /* If there were no matches, bail out now. */
871      if (best_cost == INT_MAX)
872	return 1;
873
874      /* Otherwise, rescan the files with max_cost set to the cost
875	 of the best match found previously, this time outputting
876	 the matches. */
877      match_params.max_cost = best_cost;
878      best_match = 2;
879      optind = first_ind;
880      while (optind < argc)
881	tre_agrep_handle_dirent(argv[optind++]);
882    }
883  else
884    {
885      /* Normal mode. */
886      while (optind < argc)
887	tre_agrep_handle_dirent(argv[optind++]);
888    }
889
890  return have_matches == 0;
891}
892