1/* 2 agrep.c - Approximate grep 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9#ifdef HAVE_CONFIG_H 10#include <config.h> 11#endif /* HAVE_CONFIG_H */ 12#include <stdio.h> 13#include <stdlib.h> 14#include <locale.h> 15#include <string.h> 16#include <sys/types.h> 17#include <sys/stat.h> 18#include <fcntl.h> 19#include <errno.h> 20#include <assert.h> 21#include <limits.h> 22#include <unistd.h> 23#ifdef HAVE_GETOPT_H 24#include <getopt.h> 25#endif /* HAVE_GETOPT_H */ 26#include "regex.h" 27 28#ifdef HAVE_GETTEXT 29#include <libintl.h> 30#else 31#define gettext(s) s 32#define bindtextdomain(p, d) 33#define textdomain(p) 34#endif 35 36#define _(String) gettext(String) 37 38#undef MAX 39#undef MIN 40#define MAX(a, b) (((a) >= (b)) ? (a) : (b)) 41#define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 42 43/* Short options. */ 44static char const short_options[] = 45"cd:e:hiklnqrsvwyBD:E:HI:MS:V0123456789-:"; 46 47static int show_help; 48char *program_name; 49 50#ifdef HAVE_GETOPT_LONG 51/* Long options that have no corresponding short equivalents. */ 52enum { 53 COLOR_OPTION = CHAR_MAX + 1, 54 SHOW_POSITION_OPTION 55}; 56 57/* Long option equivalences. */ 58static struct option const long_options[] = 59{ 60 {"best-match", no_argument, NULL, 'B'}, 61 {"color", no_argument, NULL, COLOR_OPTION}, 62 {"colour", no_argument, NULL, COLOR_OPTION}, 63 {"count", no_argument, NULL, 'c'}, 64 {"delete-cost", required_argument, NULL, 'D'}, 65 {"delimiter", no_argument, NULL, 'd'}, 66 {"delimiter-after", no_argument, NULL, 'M'}, 67 {"files-with-matches", no_argument, NULL, 'l'}, 68 {"help", no_argument, &show_help, 1}, 69 {"ignore-case", no_argument, NULL, 'i'}, 70 {"insert-cost", required_argument, NULL, 'I'}, 71 {"invert-match", no_argument, NULL, 'v'}, 72 {"line-number", no_argument, NULL, 'n'}, 73 {"literal", no_argument, NULL, 'k'}, 74 {"max-errors", required_argument, NULL, 'E'}, 75 {"no-filename", no_argument, NULL, 'h'}, 76 {"nothing", no_argument, NULL, 'y'}, 77 {"quiet", no_argument, NULL, 'q'}, 78 {"record-number", no_argument, NULL, 'n'}, 79 {"recursive", no_argument, NULL, 'r'}, 80 {"regexp", required_argument, NULL, 'e'}, 81 {"show-cost", no_argument, NULL, 's'}, 82 {"show-position", no_argument, NULL, SHOW_POSITION_OPTION}, 83 {"silent", no_argument, NULL, 'q'}, 84 {"substitute-cost", required_argument, NULL, 'S'}, 85 {"version", no_argument, NULL, 'V'}, 86 {"with-filename", no_argument, NULL, 'H'}, 87 {"word-regexp", no_argument, NULL, 'w'}, 88 {0, 0, 0, 0} 89}; 90#endif /* HAVE_GETOPT_LONG */ 91 92__dead static void 93tre_agrep_usage(int status) 94{ 95 if (status != 0) 96 { 97 fprintf(stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), 98 program_name); 99 fprintf(stderr, _("Try `%s --help' for more information.\n"), 100 program_name); 101 } 102 else 103 { 104 printf(_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); 105 printf(_("\ 106Searches for approximate matches of PATTERN in each FILE or standard input.\n\ 107Example: `%s -2 optimize foo.txt' outputs all lines in file `foo.txt' that\n\ 108match \"optimize\" within two errors. E.g. lines which contain \"optimise\",\n\ 109\"optmise\", and \"opitmize\" all match.\n"), program_name); 110 printf("\n"); 111 printf(_("\ 112Regexp selection and interpretation:\n\ 113 -e, --regexp=PATTERN use PATTERN as a regular expression\n\ 114 -i, --ignore-case ignore case distinctions\n\ 115 -k, --literal PATTERN is a literal string\n\ 116 -w, --word-regexp force PATTERN to match only whole words\n\ 117\n\ 118Approximate matching settings:\n\ 119 -D, --delete-cost=NUM set cost of missing characters\n\ 120 -I, --insert-cost=NUM set cost of extra characters\n\ 121 -S, --substitute-cost=NUM set cost of wrong characters\n\ 122 -E, --max-errors=NUM select records that have at most NUM errors\n\ 123 -# select records that have at most # errors (# is a\n\ 124 digit between 0 and 9)\n\ 125\n\ 126Miscellaneous:\n\ 127 -d, --delimiter=PATTERN set the record delimiter regular expression\n\ 128 -v, --invert-match select non-matching records\n\ 129 -V, --version print version information and exit\n\ 130 -r, --recursive also search in any subdirectories\n\ 131 -y, --nothing does nothing (for compatibility with the non-free\n\ 132 agrep program)\n\ 133 --help display this help and exit\n\ 134\n\ 135Output control:\n\ 136 -B, --best-match only output records with least errors\n\ 137 -c, --count only print a count of matching records per FILE\n\ 138 -h, --no-filename suppress the prefixing filename on output\n\ 139 -H, --with-filename print the filename for each match\n\ 140 -l, --files-with-matches only print FILE names containing matches\n\ 141 -M, --delimiter-after print record delimiter after record if -d is used\n\ 142 -n, --record-number print record number with output\n\ 143 --line-number same as -n\n\ 144 -q, --quiet, --silent suppress all normal output\n\ 145 -s, --show-cost print match cost with output\n\ 146 --colour, --color use markers to distinguish the matching \ 147strings\n\ 148 --show-position prefix each output record with start and end\n\ 149 position of the first match within the record\n")); 150 printf("\n"); 151 printf(_("\ 152With no FILE, or when FILE is -, reads standard input. If less than two\n\ 153FILEs are given, -h is assumed. Exit status is 0 if a match is found, 1 for\n\ 154no match, and 2 if there were errors. If -E or -# is not specified, only\n\ 155exact matches are selected.\n")); 156 printf("\n"); 157 printf(_("\ 158PATTERN is a POSIX extended regular expression (ERE) with the TRE extensions.\n\ 159See tre(7) for a complete description.\n")); 160 printf("\n"); 161 printf(_("Report bugs to: ")); 162 printf("%s.\n", PACKAGE_BUGREPORT); 163 } 164 exit(status); 165} 166 167static regex_t preg; /* Compiled pattern to search for. */ 168static regex_t delim; /* Compiled record delimiter pattern. */ 169 170#define INITIAL_BUF_SIZE 10240 /* Initial size of the buffer. */ 171static char *buf; /* Buffer for scanning text. */ 172static int buf_size; /* Current size of the buffer. */ 173static int data_len; /* Amount of data in the buffer. */ 174static char *record; /* Start of current record. */ 175static char *next_record; /* Start of next record. */ 176static int record_len; /* Length of current record. */ 177static int delim_len; /* Length of delimiter before record. */ 178static int next_delim_len; /* Length of delimiter after record. */ 179static int delim_after = 1;/* If true, print the delimiter after the record. */ 180static int at_eof; 181static int have_matches; /* If true, matches have been found. */ 182static int is_binary; /* -1 unknown, 0 ascii, 1 binary */ 183 184static int invert_match; /* Show only non-matching records. */ 185static int print_filename; /* Output filename. */ 186static int print_recnum; /* Output record number. */ 187static int print_cost; /* Output match cost. */ 188static int count_matches; /* Count matching records. */ 189static int list_files; /* List matching files. */ 190static int color_option; /* Highlight matches. */ 191static int print_position; /* Show start and end offsets for matches. */ 192static int recursive; /* Search in subdirectories too */ 193 194static int best_match; /* Output only best matches. */ 195static int best_cost; /* Best match cost found so far. */ 196static int be_silent; /* Never output anything */ 197 198static regaparams_t match_params; 199 200/* The color string used with the --color option. If set, the 201 environment variable GREP_COLOR overrides this default value. */ 202static const char *highlight = "01;31"; 203 204static int 205isbinaryfile(void) 206{ 207 return buf != NULL && memchr(buf, '\0', data_len) != NULL; 208} 209 210/* Sets `record' to the next complete record from file `fd', and `record_len' 211 to the length of the record. Returns 1 when there are no more records, 212 0 otherwise. */ 213static inline int 214tre_agrep_get_next_record(int fd, const char *filename) 215{ 216 if (at_eof) 217 return 1; 218 219 while (1) 220 { 221 int errcode; 222 regmatch_t pmatch[1]; 223 224 if (next_record == NULL) 225 { 226 int r; 227 int read_size = buf_size - data_len; 228 229 if (read_size <= 0) 230 { 231 /* The buffer is full and no record delimiter found yet, 232 we need to grow the buffer. We double the size to 233 avoid rescanning the data too many times when the 234 records are very large. */ 235 buf_size *= 2; 236 buf = realloc(buf, buf_size); 237 if (buf == NULL) 238 { 239 fprintf(stderr, "%s: %s\n", program_name, _("Out of memory")); 240 exit(2); 241 } 242 read_size = buf_size - data_len; 243 } 244 245 r = read(fd, buf + data_len, read_size); 246 if (r < 0) 247 { 248 /* Read error. */ 249 char *err; 250 if (errno == EINTR) 251 continue; 252 err = strerror(errno); 253 fprintf(stderr, "%s: ", program_name); 254 fprintf(stderr, _("Error reading from %s: %s\n"), filename, err); 255 return 1; 256 } 257 258 if (r == 0) 259 { 260 /* End of file. Return the last record. */ 261 record = buf; 262 record_len = data_len; 263 at_eof = 1; 264 /* The empty string after a trailing delimiter is not considered 265 to be a record. */ 266 if (record_len == 0) 267 return 1; 268 return 0; 269 } 270 data_len += r; 271 next_record = buf; 272 273 if (is_binary < 0) 274 is_binary = isbinaryfile(); 275 } 276 277 /* Find the next record delimiter. */ 278 errcode = tre_regnexec(&delim, next_record, data_len - (next_record - buf), 279 1, pmatch, 0); 280 281 282 switch (errcode) 283 { 284 case REG_OK: 285 /* Record delimiter found, now we know how long the current 286 record is. */ 287 record = next_record; 288 record_len = pmatch[0].rm_so; 289 delim_len = next_delim_len; 290 291 next_delim_len = pmatch[0].rm_eo - pmatch[0].rm_so; 292 next_record = next_record + pmatch[0].rm_eo; 293 return 0; 294 break; 295 296 case REG_NOMATCH: 297 if (next_record == buf) 298 { 299 next_record = NULL; 300 continue; 301 } 302 303 /* Move the data to start of the buffer and read more 304 data. */ 305 memmove(buf, next_record, buf + data_len - next_record); 306 data_len = buf + data_len - next_record; 307 next_record = NULL; 308 continue; 309 break; 310 311 case REG_ESPACE: 312 fprintf(stderr, "%s: %s\n", program_name, _("Out of memory")); 313 exit(2); 314 break; 315 316 default: 317 assert(0); 318 break; 319 } 320 } 321} 322 323#include <dirent.h> 324 325static int tre_agrep_handle_file(const char */*filename*/); 326 327static int 328tre_agrep_handle_dirent(const char *ent) 329{ 330 struct dirent storage; 331 struct dirent *dp; 332 struct stat st; 333 char path[8192]; 334 DIR *dirp; 335 int ret; 336 int ok; 337 338 if (ent == NULL || strcmp(ent, "-") == 0) { 339 return tre_agrep_handle_file(ent); 340 } 341 if (lstat(ent, &st) < 0) { 342 return tre_agrep_handle_file(ent); 343 } 344 if ((st.st_mode & S_IFMT) == S_IFDIR && recursive) { 345 if ((dirp = opendir(ent)) == NULL) { 346 fprintf(stderr, "can't open directory '%s'\n", ent); 347 return 0; 348 } 349 for (ret = 0 ; readdir_r(dirp, &storage, &dp) == 0 && dp != NULL ; ) { 350 if (strcmp(dp->d_name, ".") == 0 || 351 strcmp(dp->d_name, "..") == 0) { 352 continue; 353 } 354 snprintf(path, sizeof(path), "%s/%s", ent, dp->d_name); 355 if ((ok = tre_agrep_handle_dirent(path)) != 0) { 356 ret = ok; 357 } 358 } 359 closedir(dirp); 360 return ret; 361 } 362 return tre_agrep_handle_file(ent); 363} 364 365static int 366tre_agrep_handle_file(const char *filename) 367{ 368 int fd; 369 int count = 0; 370 int recnum = 0; 371 372 is_binary = -1; 373 374 /* Allocate the initial buffer. */ 375 if (buf == NULL) 376 { 377 buf = malloc(INITIAL_BUF_SIZE); 378 if (buf == NULL) 379 { 380 fprintf(stderr, "%s: %s\n", program_name, _("Out of memory")); 381 exit(2); 382 } 383 buf_size = INITIAL_BUF_SIZE; 384 } 385 386 /* Reset read buffer state. */ 387 next_record = NULL; 388 data_len = 0; 389 390 if (!filename || strcmp(filename, "-") == 0) 391 { 392 if (best_match) 393 { 394 fprintf(stderr, "%s: %s\n", program_name, 395 _("Cannot use -B when reading from standard input.")); 396 return 2; 397 } 398 fd = 0; 399 filename = _("(standard input)"); 400 } 401 else 402 { 403 fd = open(filename, O_RDONLY); 404 } 405 406 if (fd < 0) 407 { 408 fprintf(stderr, "%s: %s: %s\n", program_name, filename, strerror(errno)); 409 return 1; 410 } 411 412 413 /* Go through all records and output the matching ones, or the non-matching 414 ones if `invert_match' is true. */ 415 at_eof = 0; 416 while (!tre_agrep_get_next_record(fd, filename)) 417 { 418 int errcode; 419 regamatch_t match; 420 regmatch_t pmatch[1]; 421 recnum++; 422 memset(&match, 0, sizeof(match)); 423 if (best_match) 424 match_params.max_cost = best_cost; 425 if (color_option || print_position) 426 { 427 match.pmatch = pmatch; 428 match.nmatch = 1; 429 } 430 431 /* Stop searching for better matches if an exact match is found. */ 432 if (best_match == 1 && best_cost == 0) 433 break; 434 435 /* See if the record matches. */ 436 errcode = tre_reganexec(&preg, record, record_len, &match, match_params, 0); 437 if ((!invert_match && errcode == REG_OK) 438 || (invert_match && errcode == REG_NOMATCH)) 439 { 440 if (be_silent) 441 exit(0); 442 443 count++; 444 have_matches = 1; 445 if (best_match) 446 { 447 if (best_match == 1) 448 { 449 /* First best match pass. */ 450 if (match.cost < best_cost) 451 best_cost = match.cost; 452 continue; 453 } 454 /* Second best match pass. */ 455 if (match.cost > best_cost) 456 continue; 457 } 458 459 if (list_files) 460 { 461 printf("%s\n", filename); 462 break; 463 } 464 else if (!count_matches && is_binary > 0) 465 { 466 if (print_filename) 467 printf("%s:", filename); 468 printf("Binary file matches\n"); 469 break; 470 } 471 else if (!count_matches) 472 { 473 if (print_filename) 474 printf("%s:", filename); 475 if (print_recnum) 476 printf("%d:", recnum); 477 if (print_cost) 478 printf("%d:", match.cost); 479 if (print_position) 480 printf("%d-%d:", 481 invert_match ? 0 : (int)pmatch[0].rm_so, 482 invert_match ? record_len : (int)pmatch[0].rm_eo); 483 484 /* Adjust record boundaries so we print the delimiter 485 before or after the record. */ 486 if (delim_after) 487 { 488 record_len += next_delim_len; 489 } 490 else 491 { 492 record -= delim_len; 493 record_len += delim_len; 494 pmatch[0].rm_so += delim_len; 495 pmatch[0].rm_eo += delim_len; 496 } 497 498 if (color_option && !invert_match) 499 { 500 printf("%.*s", (int)pmatch[0].rm_so, record); 501 printf("\33[%sm", highlight); 502 printf("%.*s", (int)(pmatch[0].rm_eo - pmatch[0].rm_so), 503 record + pmatch[0].rm_so); 504 fputs("\33[00m", stdout); 505 printf("%.*s", (int)(record_len - pmatch[0].rm_eo), 506 record + pmatch[0].rm_eo); 507 } 508 else 509 { 510 printf("%.*s", record_len, record); 511 } 512 } 513 } 514 } 515 516 if (count_matches && !best_match && !be_silent) 517 { 518 if (print_filename) 519 printf("%s:", filename); 520 printf("%d\n", count); 521 } 522 523 if (fd) 524 close(fd); 525 526 return 0; 527} 528 529 530 531int 532main(int argc, char **argv) 533{ 534 int c, errcode; 535 int comp_flags = REG_EXTENDED; 536 char *tmp_str; 537 char *regexp = NULL; 538 const char *delim_regexp = "\n"; 539 int word_regexp = 0; 540 int literal_string = 0; 541 int max_cost_set = 0; 542 543 setlocale (LC_ALL, ""); 544 bindtextdomain (PACKAGE, LOCALEDIR); 545 textdomain (PACKAGE); 546 547 /* Get the program name without the path (for error messages etc). */ 548 program_name = argv[0]; 549 if (program_name) 550 { 551 tmp_str = strrchr(program_name, '/'); 552 if (tmp_str) 553 program_name = tmp_str + 1; 554 } 555 556 /* Defaults. */ 557 print_filename = -1; 558 print_cost = 0; 559 be_silent = 0; 560 tre_regaparams_default(&match_params); 561 match_params.max_cost = 0; 562 563 /* Parse command line options. */ 564 while (1) 565 { 566#ifdef HAVE_GETOPT_LONG 567 c = getopt_long(argc, argv, short_options, long_options, NULL); 568#else /* !HAVE_GETOPT_LONG */ 569 c = getopt(argc, argv, short_options); 570#endif /* !HAVE_GETOPT_LONG */ 571 if (c == -1) 572 break; 573 574 switch (c) 575 { 576 case 'c': 577 /* Count number of matching records. */ 578 count_matches = 1; 579 break; 580 case 'd': 581 /* Set record delimiter regexp. */ 582 delim_regexp = optarg; 583 if (delim_after == 1) 584 delim_after = 0; 585 break; 586 case 'e': 587 /* Regexp to use. */ 588 regexp = optarg; 589 break; 590 case 'h': 591 /* Don't prefix filename on output if there are multiple files. */ 592 print_filename = 0; 593 break; 594 case 'i': 595 /* Ignore case. */ 596 comp_flags |= REG_ICASE; 597 break; 598 case 'k': 599 /* The pattern is a literal string. */ 600 literal_string = 1; 601 break; 602 case 'l': 603 /* Only print files that contain matches. */ 604 list_files = 1; 605 break; 606 case 'n': 607 /* Print record number of matching record. */ 608 print_recnum = 1; 609 break; 610 case 'q': 611 be_silent = 1; 612 break; 613 case 'r': 614 /* also search in sub-directories */ 615 recursive = 1; 616 print_filename = 1; 617 break; 618 case 's': 619 /* Print match cost of matching record. */ 620 print_cost = 1; 621 break; 622 case 'v': 623 /* Select non-matching records. */ 624 invert_match = 1; 625 break; 626 case 'w': 627 /* Match only whole words. */ 628 word_regexp = 1; 629 break; 630 case 'y': 631 /* Compatibility option, does nothing. */ 632 break; 633 case 'B': 634 /* Select only the records which have the best match. */ 635 best_match = 1; 636 break; 637 case 'D': 638 /* Set the cost of a deletion. */ 639 match_params.cost_del = atoi(optarg); 640 break; 641 case 'E': 642 /* Set the maximum number of errors allowed for a record to match. */ 643 match_params.max_cost = atoi(optarg); 644 max_cost_set = 1; 645 break; 646 case 'H': 647 /* Always print filename prefix on output. */ 648 print_filename = 1; 649 break; 650 case 'I': 651 /* Set the cost of an insertion. */ 652 match_params.cost_ins = atoi(optarg); 653 break; 654 case 'M': 655 /* Print delimiters after matches instead of before. */ 656 delim_after = 2; 657 break; 658 case 'S': 659 /* Set the cost of a substitution. */ 660 match_params.cost_subst = atoi(optarg); 661 break; 662 case 'V': 663 { 664 /* Print version string and exit. */ 665 char *version; 666 tre_config(TRE_CONFIG_VERSION, &version); 667 printf("%s (TRE agrep) %s\n\n", program_name, version); 668 printf(_("\ 669Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>.\n")); 670 printf("\n"); 671 exit(0); 672 break; 673 } 674 case '?': 675 /* Ambiguous match or extraneous parameter. */ 676 break; 677 678 case '-': 679 /* Emulate some long options on systems which don't 680 have getopt_long. */ 681 if (strcmp(optarg, "color") == 0 682 || strcmp(optarg, "colour") == 0) 683 color_option = 1; 684 else if (strcmp(optarg, "show-position") == 0) 685 print_position = 1; 686 else if (strcmp(optarg, "help") == 0) 687 show_help = 1; 688 else 689 { 690 fprintf(stderr, _("%s: invalid option --%s\n"), 691 program_name, optarg); 692 exit(2); 693 } 694 break; 695 696#ifdef HAVE_GETOPT_LONG 697 case COLOR_OPTION: 698 color_option = 1; 699 break; 700 case SHOW_POSITION_OPTION: 701 print_position = 1; 702 break; 703#endif /* HAVE_GETOPT_LONG */ 704 case 0: 705 /* Long options without corresponding short options. */ 706 break; 707 708 default: 709 if (c >= '0' && c <= '9') 710 match_params.max_cost = c - '0'; 711 else 712 tre_agrep_usage(2); 713 max_cost_set = 1; 714 break; 715 } 716 } 717 718 if (show_help) 719 tre_agrep_usage(0); 720 721 if (color_option) 722 { 723 char *user_highlight = getenv("GREP_COLOR"); 724 if (user_highlight && *user_highlight != '\0') 725 highlight = user_highlight; 726 } 727 728 /* Get the pattern. */ 729 if (regexp == NULL) 730 { 731 if (optind >= argc) 732 tre_agrep_usage(2); 733 regexp = argv[optind++]; 734 } 735 736 /* If -k is specified, make the regexp literal. This uses 737 the \Q and \E extensions. If the string already contains 738 occurrences of \E, we need to handle them separately. This is a 739 pain, but can't really be avoided if we want to create a regexp 740 which works together with -w (see below). */ 741 if (literal_string) 742 { 743 char *next_pos = regexp; 744 char *new_re, *new_re_end; 745 int n = 0; 746 int len; 747 748 next_pos = regexp; 749 while (next_pos) 750 { 751 next_pos = strstr(next_pos, "\\E"); 752 if (next_pos) 753 { 754 n++; 755 next_pos += 2; 756 } 757 } 758 759 len = strlen(regexp); 760 new_re = malloc(len + 5 + n * 7); 761 if (!new_re) 762 { 763 fprintf(stderr, "%s: %s\n", program_name, _("Out of memory")); 764 return 2; 765 } 766 767 next_pos = regexp; 768 new_re_end = new_re; 769 strcpy(new_re_end, "\\Q"); 770 new_re_end += 2; 771 while (next_pos) 772 { 773 char *start = next_pos; 774 next_pos = strstr(next_pos, "\\E"); 775 if (next_pos) 776 { 777 strncpy(new_re_end, start, next_pos - start); 778 new_re_end += next_pos - start; 779 strcpy(new_re_end, "\\E\\\\E\\Q"); 780 new_re_end += 7; 781 next_pos += 2; 782 } 783 else 784 { 785 strcpy(new_re_end, start); 786 new_re_end += strlen(start); 787 } 788 } 789 strcpy(new_re_end, "\\E"); 790 regexp = new_re; 791 } 792 793 /* If -w is specified, prepend beginning-of-word and end-of-word 794 assertions to the regexp before compiling. */ 795 if (word_regexp) 796 { 797 char *tmp = regexp; 798 int len = strlen(tmp); 799 regexp = malloc(len + 7); 800 if (regexp == NULL) 801 { 802 fprintf(stderr, "%s: %s\n", program_name, _("Out of memory")); 803 return 2; 804 } 805 strcpy(regexp, "\\<("); 806 strcpy(regexp + 3, tmp); 807 strcpy(regexp + len + 3, ")\\>"); 808 } 809 810 /* Compile the pattern. */ 811 errcode = tre_regcomp(&preg, regexp, comp_flags); 812 if (errcode) 813 { 814 char errbuf[256]; 815 tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); 816 fprintf(stderr, "%s: %s: %s\n", 817 program_name, _("Error in search pattern"), errbuf); 818 return 2; 819 } 820 821 /* Compile the record delimiter pattern. */ 822 errcode = tre_regcomp(&delim, delim_regexp, REG_EXTENDED | REG_NEWLINE); 823 if (errcode) 824 { 825 char errbuf[256]; 826 tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); 827 fprintf(stderr, "%s: %s: %s\n", 828 program_name, _("Error in record delimiter pattern"), errbuf); 829 return 2; 830 } 831 832 if (tre_regexec(&delim, "", 0, NULL, 0) == REG_OK) 833 { 834 fprintf(stderr, "%s: %s\n", program_name, 835 _("Record delimiter pattern must not match an empty string")); 836 return 2; 837 } 838 839 /* The rest of the arguments are file(s) to match. */ 840 841 /* If -h or -H were not specified, print filenames if there are more 842 than one files specified. */ 843 if (print_filename == -1) 844 { 845 if (argc - optind <= 1) 846 print_filename = 0; 847 else 848 print_filename = 1; 849 } 850 851 if (optind >= argc) 852 { 853 /* There are no files specified, read from stdin. */ 854 tre_agrep_handle_file(NULL); 855 } 856 else if (best_match) 857 { 858 int first_ind = optind; 859 860 /* Best match mode. Set up the limits first. */ 861 if (!max_cost_set) 862 match_params.max_cost = INT_MAX; 863 best_cost = INT_MAX; 864 865 /* Scan all files once without outputting anything, searching 866 for the best matches. */ 867 while (optind < argc) 868 tre_agrep_handle_dirent(argv[optind++]); 869 870 /* If there were no matches, bail out now. */ 871 if (best_cost == INT_MAX) 872 return 1; 873 874 /* Otherwise, rescan the files with max_cost set to the cost 875 of the best match found previously, this time outputting 876 the matches. */ 877 match_params.max_cost = best_cost; 878 best_match = 2; 879 optind = first_ind; 880 while (optind < argc) 881 tre_agrep_handle_dirent(argv[optind++]); 882 } 883 else 884 { 885 /* Normal mode. */ 886 while (optind < argc) 887 tre_agrep_handle_dirent(argv[optind++]); 888 } 889 890 return have_matches == 0; 891} 892