1/************************************************* 2* pcregrep program * 3*************************************************/ 4 5/* This is a grep program that uses the PCRE regular expression library to do 6its pattern matching. On a Unix or Win32 system it can recurse into 7directories. 8 9 Copyright (c) 1997-2009 University of Cambridge 10 11----------------------------------------------------------------------------- 12Redistribution and use in source and binary forms, with or without 13modification, are permitted provided that the following conditions are met: 14 15 * Redistributions of source code must retain the above copyright notice, 16 this list of conditions and the following disclaimer. 17 18 * Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in the 20 documentation and/or other materials provided with the distribution. 21 22 * Neither the name of the University of Cambridge nor the names of its 23 contributors may be used to endorse or promote products derived from 24 this software without specific prior written permission. 25 26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36POSSIBILITY OF SUCH DAMAGE. 37----------------------------------------------------------------------------- 38*/ 39 40#ifdef HAVE_CONFIG_H 41#include "config.h" 42#endif 43 44#include <ctype.h> 45#include <locale.h> 46#include <stdio.h> 47#include <string.h> 48#include <stdlib.h> 49#include <errno.h> 50 51#include <sys/types.h> 52#include <sys/stat.h> 53 54#ifdef HAVE_UNISTD_H 55#include <unistd.h> 56#endif 57 58#ifdef SUPPORT_LIBZ 59#include <zlib.h> 60#endif 61 62#ifdef SUPPORT_LIBBZ2 63#include <bzlib.h> 64#endif 65 66#include "pcre.h" 67 68#define FALSE 0 69#define TRUE 1 70 71typedef int BOOL; 72 73#define MAX_PATTERN_COUNT 100 74#define OFFSET_SIZE 99 75 76#if BUFSIZ > 8192 77#define MBUFTHIRD BUFSIZ 78#else 79#define MBUFTHIRD 8192 80#endif 81 82/* Values for the "filenames" variable, which specifies options for file name 83output. The order is important; it is assumed that a file name is wanted for 84all values greater than FN_DEFAULT. */ 85 86enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; 87 88/* File reading styles */ 89 90enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; 91 92/* Actions for the -d and -D options */ 93 94enum { dee_READ, dee_SKIP, dee_RECURSE }; 95enum { DEE_READ, DEE_SKIP }; 96 97/* Actions for special processing options (flag bits) */ 98 99#define PO_WORD_MATCH 0x0001 100#define PO_LINE_MATCH 0x0002 101#define PO_FIXED_STRINGS 0x0004 102 103/* Line ending types */ 104 105enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; 106 107 108 109/************************************************* 110* Global variables * 111*************************************************/ 112 113/* Jeffrey Friedl has some debugging requirements that are not part of the 114regular code. */ 115 116#ifdef JFRIEDL_DEBUG 117static int S_arg = -1; 118static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ 119static unsigned int jfriedl_XT = 0; /* replicate text this many times */ 120static const char *jfriedl_prefix = ""; 121static const char *jfriedl_postfix = ""; 122#endif 123 124static int endlinetype; 125 126static char *colour_string = (char *)"1;31"; 127static char *colour_option = NULL; 128static char *dee_option = NULL; 129static char *DEE_option = NULL; 130static char *newline = NULL; 131static char *pattern_filename = NULL; 132static char *stdin_name = (char *)"(standard input)"; 133static char *locale = NULL; 134 135static const unsigned char *pcretables = NULL; 136 137static int pattern_count = 0; 138static pcre **pattern_list = NULL; 139static pcre_extra **hints_list = NULL; 140 141static char *include_pattern = NULL; 142static char *exclude_pattern = NULL; 143static char *include_dir_pattern = NULL; 144static char *exclude_dir_pattern = NULL; 145 146static pcre *include_compiled = NULL; 147static pcre *exclude_compiled = NULL; 148static pcre *include_dir_compiled = NULL; 149static pcre *exclude_dir_compiled = NULL; 150 151static int after_context = 0; 152static int before_context = 0; 153static int both_context = 0; 154static int dee_action = dee_READ; 155static int DEE_action = DEE_READ; 156static int error_count = 0; 157static int filenames = FN_DEFAULT; 158static int process_options = 0; 159 160static BOOL count_only = FALSE; 161static BOOL do_colour = FALSE; 162static BOOL file_offsets = FALSE; 163static BOOL hyphenpending = FALSE; 164static BOOL invert = FALSE; 165static BOOL line_offsets = FALSE; 166static BOOL multiline = FALSE; 167static BOOL number = FALSE; 168static BOOL omit_zero_count = FALSE; 169static BOOL only_matching = FALSE; 170static BOOL quiet = FALSE; 171static BOOL silent = FALSE; 172static BOOL utf8 = FALSE; 173 174/* Structure for options and list of them */ 175 176enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER, 177 OP_PATLIST }; 178 179typedef struct option_item { 180 int type; 181 int one_char; 182 void *dataptr; 183 const char *long_name; 184 const char *help_text; 185} option_item; 186 187/* Options without a single-letter equivalent get a negative value. This can be 188used to identify them. */ 189 190#define N_COLOUR (-1) 191#define N_EXCLUDE (-2) 192#define N_EXCLUDE_DIR (-3) 193#define N_HELP (-4) 194#define N_INCLUDE (-5) 195#define N_INCLUDE_DIR (-6) 196#define N_LABEL (-7) 197#define N_LOCALE (-8) 198#define N_NULL (-9) 199#define N_LOFFSETS (-10) 200#define N_FOFFSETS (-11) 201 202static option_item optionlist[] = { 203 { OP_NODATA, N_NULL, NULL, "", " terminate options" }, 204 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, 205 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, 206 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, 207 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, 208 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, 209 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, 210 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, 211 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, 212 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, 213 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" }, 214 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, 215 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, 216 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, 217 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, 218 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, 219 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, 220 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, 221 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, 222 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, 223 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, 224 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, 225 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, 226 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, 227 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, 228 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, 229 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, 230 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, 231 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, 232 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, 233 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" }, 234 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" }, 235#ifdef JFRIEDL_DEBUG 236 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, 237#endif 238 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, 239 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, 240 { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, 241 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, 242 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, 243 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, 244 { OP_NODATA, 0, NULL, NULL, NULL } 245}; 246 247/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F 248options. These set the 1, 2, and 4 bits in process_options, respectively. Note 249that the combination of -w and -x has the same effect as -x on its own, so we 250can treat them as the same. */ 251 252static const char *prefix[] = { 253 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; 254 255static const char *suffix[] = { 256 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; 257 258/* UTF-8 tables - used only when the newline setting is "any". */ 259 260const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; 261 262const char utf8_table4[] = { 263 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 264 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 265 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 266 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 267 268 269 270/************************************************* 271* OS-specific functions * 272*************************************************/ 273 274/* These functions are defined so that they can be made system specific, 275although at present the only ones are for Unix, Win32, and for "no support". */ 276 277 278/************* Directory scanning in Unix ***********/ 279 280#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H 281#include <sys/types.h> 282#include <sys/stat.h> 283#include <dirent.h> 284 285typedef DIR directory_type; 286 287static int 288isdirectory(char *filename) 289{ 290struct stat statbuf; 291if (stat(filename, &statbuf) < 0) 292 return 0; /* In the expectation that opening as a file will fail */ 293return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; 294} 295 296static directory_type * 297opendirectory(char *filename) 298{ 299return opendir(filename); 300} 301 302static char * 303readdirectory(directory_type *dir) 304{ 305for (;;) 306 { 307 struct dirent *dent = readdir(dir); 308 if (dent == NULL) return NULL; 309 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) 310 return dent->d_name; 311 } 312/* Control never reaches here */ 313} 314 315static void 316closedirectory(directory_type *dir) 317{ 318closedir(dir); 319} 320 321 322/************* Test for regular file in Unix **********/ 323 324static int 325isregfile(char *filename) 326{ 327struct stat statbuf; 328if (stat(filename, &statbuf) < 0) 329 return 1; /* In the expectation that opening as a file will fail */ 330return (statbuf.st_mode & S_IFMT) == S_IFREG; 331} 332 333 334/************* Test stdout for being a terminal in Unix **********/ 335 336static BOOL 337is_stdout_tty(void) 338{ 339return isatty(fileno(stdout)); 340} 341 342 343/************* Directory scanning in Win32 ***********/ 344 345/* I (Philip Hazel) have no means of testing this code. It was contributed by 346Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES 347when it did not exist. David Byron added a patch that moved the #include of 348<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. 349*/ 350 351#elif HAVE_WINDOWS_H 352 353#ifndef STRICT 354# define STRICT 355#endif 356#ifndef WIN32_LEAN_AND_MEAN 357# define WIN32_LEAN_AND_MEAN 358#endif 359 360#include <windows.h> 361 362#ifndef INVALID_FILE_ATTRIBUTES 363#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF 364#endif 365 366typedef struct directory_type 367{ 368HANDLE handle; 369BOOL first; 370WIN32_FIND_DATA data; 371} directory_type; 372 373int 374isdirectory(char *filename) 375{ 376DWORD attr = GetFileAttributes(filename); 377if (attr == INVALID_FILE_ATTRIBUTES) 378 return 0; 379return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0; 380} 381 382directory_type * 383opendirectory(char *filename) 384{ 385size_t len; 386char *pattern; 387directory_type *dir; 388DWORD err; 389len = strlen(filename); 390pattern = (char *) malloc(len + 3); 391dir = (directory_type *) malloc(sizeof(*dir)); 392if ((pattern == NULL) || (dir == NULL)) 393 { 394 fprintf(stderr, "pcregrep: malloc failed\n"); 395 exit(2); 396 } 397memcpy(pattern, filename, len); 398memcpy(&(pattern[len]), "\\*", 3); 399dir->handle = FindFirstFile(pattern, &(dir->data)); 400if (dir->handle != INVALID_HANDLE_VALUE) 401 { 402 free(pattern); 403 dir->first = TRUE; 404 return dir; 405 } 406err = GetLastError(); 407free(pattern); 408free(dir); 409errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; 410return NULL; 411} 412 413char * 414readdirectory(directory_type *dir) 415{ 416for (;;) 417 { 418 if (!dir->first) 419 { 420 if (!FindNextFile(dir->handle, &(dir->data))) 421 return NULL; 422 } 423 else 424 { 425 dir->first = FALSE; 426 } 427 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) 428 return dir->data.cFileName; 429 } 430#ifndef _MSC_VER 431return NULL; /* Keep compiler happy; never executed */ 432#endif 433} 434 435void 436closedirectory(directory_type *dir) 437{ 438FindClose(dir->handle); 439free(dir); 440} 441 442 443/************* Test for regular file in Win32 **********/ 444 445/* I don't know how to do this, or if it can be done; assume all paths are 446regular if they are not directories. */ 447 448int isregfile(char *filename) 449{ 450return !isdirectory(filename); 451} 452 453 454/************* Test stdout for being a terminal in Win32 **********/ 455 456/* I don't know how to do this; assume never */ 457 458static BOOL 459is_stdout_tty(void) 460{ 461return FALSE; 462} 463 464 465/************* Directory scanning when we can't do it ***********/ 466 467/* The type is void, and apart from isdirectory(), the functions do nothing. */ 468 469#else 470 471typedef void directory_type; 472 473int isdirectory(char *filename) { return 0; } 474directory_type * opendirectory(char *filename) { return (directory_type*)0;} 475char *readdirectory(directory_type *dir) { return (char*)0;} 476void closedirectory(directory_type *dir) {} 477 478 479/************* Test for regular when we can't do it **********/ 480 481/* Assume all files are regular. */ 482 483int isregfile(char *filename) { return 1; } 484 485 486/************* Test stdout for being a terminal when we can't do it **********/ 487 488static BOOL 489is_stdout_tty(void) 490{ 491return FALSE; 492} 493 494 495#endif 496 497 498 499#ifndef HAVE_STRERROR 500/************************************************* 501* Provide strerror() for non-ANSI libraries * 502*************************************************/ 503 504/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 505in their libraries, but can provide the same facility by this simple 506alternative function. */ 507 508extern int sys_nerr; 509extern char *sys_errlist[]; 510 511char * 512strerror(int n) 513{ 514if (n < 0 || n >= sys_nerr) return "unknown error number"; 515return sys_errlist[n]; 516} 517#endif /* HAVE_STRERROR */ 518 519 520 521/************************************************* 522* Find end of line * 523*************************************************/ 524 525/* The length of the endline sequence that is found is set via lenptr. This may 526be zero at the very end of the file if there is no line-ending sequence there. 527 528Arguments: 529 p current position in line 530 endptr end of available data 531 lenptr where to put the length of the eol sequence 532 533Returns: pointer to the last byte of the line 534*/ 535 536static char * 537end_of_line(char *p, char *endptr, int *lenptr) 538{ 539switch(endlinetype) 540 { 541 default: /* Just in case */ 542 case EL_LF: 543 while (p < endptr && *p != '\n') p++; 544 if (p < endptr) 545 { 546 *lenptr = 1; 547 return p + 1; 548 } 549 *lenptr = 0; 550 return endptr; 551 552 case EL_CR: 553 while (p < endptr && *p != '\r') p++; 554 if (p < endptr) 555 { 556 *lenptr = 1; 557 return p + 1; 558 } 559 *lenptr = 0; 560 return endptr; 561 562 case EL_CRLF: 563 for (;;) 564 { 565 while (p < endptr && *p != '\r') p++; 566 if (++p >= endptr) 567 { 568 *lenptr = 0; 569 return endptr; 570 } 571 if (*p == '\n') 572 { 573 *lenptr = 2; 574 return p + 1; 575 } 576 } 577 break; 578 579 case EL_ANYCRLF: 580 while (p < endptr) 581 { 582 int extra = 0; 583 register int c = *((unsigned char *)p); 584 585 if (utf8 && c >= 0xc0) 586 { 587 int gcii, gcss; 588 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 589 gcss = 6*extra; 590 c = (c & utf8_table3[extra]) << gcss; 591 for (gcii = 1; gcii <= extra; gcii++) 592 { 593 gcss -= 6; 594 c |= (p[gcii] & 0x3f) << gcss; 595 } 596 } 597 598 p += 1 + extra; 599 600 switch (c) 601 { 602 case 0x0a: /* LF */ 603 *lenptr = 1; 604 return p; 605 606 case 0x0d: /* CR */ 607 if (p < endptr && *p == 0x0a) 608 { 609 *lenptr = 2; 610 p++; 611 } 612 else *lenptr = 1; 613 return p; 614 615 default: 616 break; 617 } 618 } /* End of loop for ANYCRLF case */ 619 620 *lenptr = 0; /* Must have hit the end */ 621 return endptr; 622 623 case EL_ANY: 624 while (p < endptr) 625 { 626 int extra = 0; 627 register int c = *((unsigned char *)p); 628 629 if (utf8 && c >= 0xc0) 630 { 631 int gcii, gcss; 632 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 633 gcss = 6*extra; 634 c = (c & utf8_table3[extra]) << gcss; 635 for (gcii = 1; gcii <= extra; gcii++) 636 { 637 gcss -= 6; 638 c |= (p[gcii] & 0x3f) << gcss; 639 } 640 } 641 642 p += 1 + extra; 643 644 switch (c) 645 { 646 case 0x0a: /* LF */ 647 case 0x0b: /* VT */ 648 case 0x0c: /* FF */ 649 *lenptr = 1; 650 return p; 651 652 case 0x0d: /* CR */ 653 if (p < endptr && *p == 0x0a) 654 { 655 *lenptr = 2; 656 p++; 657 } 658 else *lenptr = 1; 659 return p; 660 661 case 0x85: /* NEL */ 662 *lenptr = utf8? 2 : 1; 663 return p; 664 665 case 0x2028: /* LS */ 666 case 0x2029: /* PS */ 667 *lenptr = 3; 668 return p; 669 670 default: 671 break; 672 } 673 } /* End of loop for ANY case */ 674 675 *lenptr = 0; /* Must have hit the end */ 676 return endptr; 677 } /* End of overall switch */ 678} 679 680 681 682/************************************************* 683* Find start of previous line * 684*************************************************/ 685 686/* This is called when looking back for before lines to print. 687 688Arguments: 689 p start of the subsequent line 690 startptr start of available data 691 692Returns: pointer to the start of the previous line 693*/ 694 695static char * 696previous_line(char *p, char *startptr) 697{ 698switch(endlinetype) 699 { 700 default: /* Just in case */ 701 case EL_LF: 702 p--; 703 while (p > startptr && p[-1] != '\n') p--; 704 return p; 705 706 case EL_CR: 707 p--; 708 while (p > startptr && p[-1] != '\n') p--; 709 return p; 710 711 case EL_CRLF: 712 for (;;) 713 { 714 p -= 2; 715 while (p > startptr && p[-1] != '\n') p--; 716 if (p <= startptr + 1 || p[-2] == '\r') return p; 717 } 718 return p; /* But control should never get here */ 719 720 case EL_ANY: 721 case EL_ANYCRLF: 722 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; 723 if (utf8) while ((*p & 0xc0) == 0x80) p--; 724 725 while (p > startptr) 726 { 727 register int c; 728 char *pp = p - 1; 729 730 if (utf8) 731 { 732 int extra = 0; 733 while ((*pp & 0xc0) == 0x80) pp--; 734 c = *((unsigned char *)pp); 735 if (c >= 0xc0) 736 { 737 int gcii, gcss; 738 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 739 gcss = 6*extra; 740 c = (c & utf8_table3[extra]) << gcss; 741 for (gcii = 1; gcii <= extra; gcii++) 742 { 743 gcss -= 6; 744 c |= (pp[gcii] & 0x3f) << gcss; 745 } 746 } 747 } 748 else c = *((unsigned char *)pp); 749 750 if (endlinetype == EL_ANYCRLF) switch (c) 751 { 752 case 0x0a: /* LF */ 753 case 0x0d: /* CR */ 754 return p; 755 756 default: 757 break; 758 } 759 760 else switch (c) 761 { 762 case 0x0a: /* LF */ 763 case 0x0b: /* VT */ 764 case 0x0c: /* FF */ 765 case 0x0d: /* CR */ 766 case 0x85: /* NEL */ 767 case 0x2028: /* LS */ 768 case 0x2029: /* PS */ 769 return p; 770 771 default: 772 break; 773 } 774 775 p = pp; /* Back one character */ 776 } /* End of loop for ANY case */ 777 778 return startptr; /* Hit start of data */ 779 } /* End of overall switch */ 780} 781 782 783 784 785 786/************************************************* 787* Print the previous "after" lines * 788*************************************************/ 789 790/* This is called if we are about to lose said lines because of buffer filling, 791and at the end of the file. The data in the line is written using fwrite() so 792that a binary zero does not terminate it. 793 794Arguments: 795 lastmatchnumber the number of the last matching line, plus one 796 lastmatchrestart where we restarted after the last match 797 endptr end of available data 798 printname filename for printing 799 800Returns: nothing 801*/ 802 803static void do_after_lines(int lastmatchnumber, char *lastmatchrestart, 804 char *endptr, char *printname) 805{ 806if (after_context > 0 && lastmatchnumber > 0) 807 { 808 int count = 0; 809 while (lastmatchrestart < endptr && count++ < after_context) 810 { 811 int ellength; 812 char *pp = lastmatchrestart; 813 if (printname != NULL) fprintf(stdout, "%s-", printname); 814 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 815 pp = end_of_line(pp, endptr, &ellength); 816 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 817 lastmatchrestart = pp; 818 } 819 hyphenpending = TRUE; 820 } 821} 822 823 824 825/************************************************* 826* Apply patterns to subject till one matches * 827*************************************************/ 828 829/* This function is called to run through all patterns, looking for a match. It 830is used multiple times for the same subject when colouring is enabled, in order 831to find all possible matches. 832 833Arguments: 834 matchptr the start of the subject 835 length the length of the subject to match 836 offsets the offets vector to fill in 837 mrc address of where to put the result of pcre_exec() 838 839Returns: TRUE if there was a match 840 FALSE if there was no match 841 invert if there was a non-fatal error 842*/ 843 844static BOOL 845match_patterns(char *matchptr, size_t length, int *offsets, int *mrc) 846{ 847int i; 848for (i = 0; i < pattern_count; i++) 849 { 850 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 851 PCRE_NOTEMPTY, offsets, OFFSET_SIZE); 852 if (*mrc >= 0) return TRUE; 853 if (*mrc == PCRE_ERROR_NOMATCH) continue; 854 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc); 855 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); 856 fprintf(stderr, "this text:\n"); 857 fwrite(matchptr, 1, length, stderr); /* In case binary zero included */ 858 fprintf(stderr, "\n"); 859 if (error_count == 0 && 860 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)) 861 { 862 fprintf(stderr, "pcregrep: error %d means that a resource limit " 863 "was exceeded\n", *mrc); 864 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); 865 } 866 if (error_count++ > 20) 867 { 868 fprintf(stderr, "pcregrep: too many errors - abandoned\n"); 869 exit(2); 870 } 871 return invert; /* No more matching; don't show the line again */ 872 } 873 874return FALSE; /* No match, no errors */ 875} 876 877 878 879/************************************************* 880* Grep an individual file * 881*************************************************/ 882 883/* This is called from grep_or_recurse() below. It uses a buffer that is three 884times the value of MBUFTHIRD. The matching point is never allowed to stray into 885the top third of the buffer, thus keeping more of the file available for 886context printing or for multiline scanning. For large files, the pointer will 887be in the middle third most of the time, so the bottom third is available for 888"before" context printing. 889 890Arguments: 891 handle the fopened FILE stream for a normal file 892 the gzFile pointer when reading is via libz 893 the BZFILE pointer when reading is via libbz2 894 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 895 printname the file name if it is to be printed for each match 896 or NULL if the file name is not to be printed 897 it cannot be NULL if filenames[_nomatch]_only is set 898 899Returns: 0 if there was at least one match 900 1 otherwise (no matches) 901 2 if there is a read error on a .bz2 file 902*/ 903 904static int 905pcregrep(void *handle, int frtype, char *printname) 906{ 907int rc = 1; 908int linenumber = 1; 909int lastmatchnumber = 0; 910int count = 0; 911int filepos = 0; 912int offsets[OFFSET_SIZE]; 913char *lastmatchrestart = NULL; 914char buffer[3*MBUFTHIRD]; 915char *ptr = buffer; 916char *endptr; 917size_t bufflength; 918BOOL endhyphenpending = FALSE; 919FILE *in = NULL; /* Ensure initialized */ 920 921#ifdef SUPPORT_LIBZ 922gzFile ingz = NULL; 923#endif 924 925#ifdef SUPPORT_LIBBZ2 926BZFILE *inbz2 = NULL; 927#endif 928 929 930/* Do the first read into the start of the buffer and set up the pointer to end 931of what we have. In the case of libz, a non-zipped .gz file will be read as a 932plain file. However, if a .bz2 file isn't actually bzipped, the first read will 933fail. */ 934 935#ifdef SUPPORT_LIBZ 936if (frtype == FR_LIBZ) 937 { 938 ingz = (gzFile)handle; 939 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD); 940 } 941else 942#endif 943 944#ifdef SUPPORT_LIBBZ2 945if (frtype == FR_LIBBZ2) 946 { 947 inbz2 = (BZFILE *)handle; 948 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD); 949 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ 950 } /* without the cast it is unsigned. */ 951else 952#endif 953 954 { 955 in = (FILE *)handle; 956 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in); 957 } 958 959endptr = buffer + bufflength; 960 961/* Loop while the current pointer is not at the end of the file. For large 962files, endptr will be at the end of the buffer when we are in the middle of the 963file, but ptr will never get there, because as soon as it gets over 2/3 of the 964way, the buffer is shifted left and re-filled. */ 965 966while (ptr < endptr) 967 { 968 int endlinelength; 969 int mrc = 0; 970 BOOL match; 971 char *matchptr = ptr; 972 char *t = ptr; 973 size_t length, linelength; 974 975 /* At this point, ptr is at the start of a line. We need to find the length 976 of the subject string to pass to pcre_exec(). In multiline mode, it is the 977 length remainder of the data in the buffer. Otherwise, it is the length of 978 the next line, excluding the terminating newline. After matching, we always 979 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE 980 option is used for compiling, so that any match is constrained to be in the 981 first line. */ 982 983 t = end_of_line(t, endptr, &endlinelength); 984 linelength = t - ptr - endlinelength; 985 length = multiline? (size_t)(endptr - ptr) : linelength; 986 987 /* Extra processing for Jeffrey Friedl's debugging. */ 988 989#ifdef JFRIEDL_DEBUG 990 if (jfriedl_XT || jfriedl_XR) 991 { 992 #include <sys/time.h> 993 #include <time.h> 994 struct timeval start_time, end_time; 995 struct timezone dummy; 996 int i; 997 998 if (jfriedl_XT) 999 { 1000 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); 1001 const char *orig = ptr; 1002 ptr = malloc(newlen + 1); 1003 if (!ptr) { 1004 printf("out of memory"); 1005 exit(2); 1006 } 1007 endptr = ptr; 1008 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); 1009 for (i = 0; i < jfriedl_XT; i++) { 1010 strncpy(endptr, orig, length); 1011 endptr += length; 1012 } 1013 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); 1014 length = newlen; 1015 } 1016 1017 if (gettimeofday(&start_time, &dummy) != 0) 1018 perror("bad gettimeofday"); 1019 1020 1021 for (i = 0; i < jfriedl_XR; i++) 1022 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 1023 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); 1024 1025 if (gettimeofday(&end_time, &dummy) != 0) 1026 perror("bad gettimeofday"); 1027 1028 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) 1029 - 1030 (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); 1031 1032 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); 1033 return 0; 1034 } 1035#endif 1036 1037 /* We come back here after a match when the -o option (only_matching) is set, 1038 in order to find any further matches in the same line. */ 1039 1040 ONLY_MATCHING_RESTART: 1041 1042 /* Run through all the patterns until one matches or there is an error other 1043 than NOMATCH. This code is in a subroutine so that it can be re-used for 1044 finding subsequent matches when colouring matched lines. */ 1045 1046 match = match_patterns(matchptr, length, offsets, &mrc); 1047 1048 /* If it's a match or a not-match (as required), do what's wanted. */ 1049 1050 if (match != invert) 1051 { 1052 BOOL hyphenprinted = FALSE; 1053 1054 /* We've failed if we want a file that doesn't have any matches. */ 1055 1056 if (filenames == FN_NOMATCH_ONLY) return 1; 1057 1058 /* Just count if just counting is wanted. */ 1059 1060 if (count_only) count++; 1061 1062 /* If all we want is a file name, there is no need to scan any more lines 1063 in the file. */ 1064 1065 else if (filenames == FN_MATCH_ONLY) 1066 { 1067 fprintf(stdout, "%s\n", printname); 1068 return 0; 1069 } 1070 1071 /* Likewise, if all we want is a yes/no answer. */ 1072 1073 else if (quiet) return 0; 1074 1075 /* The --only-matching option prints just the substring that matched, and 1076 the --file-offsets and --line-offsets options output offsets for the 1077 matching substring (they both force --only-matching). None of these options 1078 prints any context. Afterwards, adjust the start and length, and then jump 1079 back to look for further matches in the same line. If we are in invert 1080 mode, however, nothing is printed - this could be still useful because the 1081 return code is set. */ 1082 1083 else if (only_matching) 1084 { 1085 if (!invert) 1086 { 1087 if (printname != NULL) fprintf(stdout, "%s:", printname); 1088 if (number) fprintf(stdout, "%d:", linenumber); 1089 if (line_offsets) 1090 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr), 1091 offsets[1] - offsets[0]); 1092 else if (file_offsets) 1093 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr), 1094 offsets[1] - offsets[0]); 1095 else 1096 { 1097 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1098 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1099 if (do_colour) fprintf(stdout, "%c[00m", 0x1b); 1100 } 1101 fprintf(stdout, "\n"); 1102 matchptr += offsets[1]; 1103 length -= offsets[1]; 1104 match = FALSE; 1105 goto ONLY_MATCHING_RESTART; 1106 } 1107 } 1108 1109 /* This is the default case when none of the above options is set. We print 1110 the matching lines(s), possibly preceded and/or followed by other lines of 1111 context. */ 1112 1113 else 1114 { 1115 /* See if there is a requirement to print some "after" lines from a 1116 previous match. We never print any overlaps. */ 1117 1118 if (after_context > 0 && lastmatchnumber > 0) 1119 { 1120 int ellength; 1121 int linecount = 0; 1122 char *p = lastmatchrestart; 1123 1124 while (p < ptr && linecount < after_context) 1125 { 1126 p = end_of_line(p, ptr, &ellength); 1127 linecount++; 1128 } 1129 1130 /* It is important to advance lastmatchrestart during this printing so 1131 that it interacts correctly with any "before" printing below. Print 1132 each line's data using fwrite() in case there are binary zeroes. */ 1133 1134 while (lastmatchrestart < p) 1135 { 1136 char *pp = lastmatchrestart; 1137 if (printname != NULL) fprintf(stdout, "%s-", printname); 1138 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 1139 pp = end_of_line(pp, endptr, &ellength); 1140 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 1141 lastmatchrestart = pp; 1142 } 1143 if (lastmatchrestart != ptr) hyphenpending = TRUE; 1144 } 1145 1146 /* If there were non-contiguous lines printed above, insert hyphens. */ 1147 1148 if (hyphenpending) 1149 { 1150 fprintf(stdout, "--\n"); 1151 hyphenpending = FALSE; 1152 hyphenprinted = TRUE; 1153 } 1154 1155 /* See if there is a requirement to print some "before" lines for this 1156 match. Again, don't print overlaps. */ 1157 1158 if (before_context > 0) 1159 { 1160 int linecount = 0; 1161 char *p = ptr; 1162 1163 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && 1164 linecount < before_context) 1165 { 1166 linecount++; 1167 p = previous_line(p, buffer); 1168 } 1169 1170 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) 1171 fprintf(stdout, "--\n"); 1172 1173 while (p < ptr) 1174 { 1175 int ellength; 1176 char *pp = p; 1177 if (printname != NULL) fprintf(stdout, "%s-", printname); 1178 if (number) fprintf(stdout, "%d-", linenumber - linecount--); 1179 pp = end_of_line(pp, endptr, &ellength); 1180 fwrite(p, 1, pp - p, stdout); 1181 p = pp; 1182 } 1183 } 1184 1185 /* Now print the matching line(s); ensure we set hyphenpending at the end 1186 of the file if any context lines are being output. */ 1187 1188 if (after_context > 0 || before_context > 0) 1189 endhyphenpending = TRUE; 1190 1191 if (printname != NULL) fprintf(stdout, "%s:", printname); 1192 if (number) fprintf(stdout, "%d:", linenumber); 1193 1194 /* In multiline mode, we want to print to the end of the line in which 1195 the end of the matched string is found, so we adjust linelength and the 1196 line number appropriately, but only when there actually was a match 1197 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of 1198 the match will always be before the first newline sequence. */ 1199 1200 if (multiline) 1201 { 1202 int ellength; 1203 char *endmatch = ptr; 1204 if (!invert) 1205 { 1206 endmatch += offsets[1]; 1207 t = ptr; 1208 while (t < endmatch) 1209 { 1210 t = end_of_line(t, endptr, &ellength); 1211 if (t <= endmatch) linenumber++; else break; 1212 } 1213 } 1214 endmatch = end_of_line(endmatch, endptr, &ellength); 1215 linelength = endmatch - ptr - ellength; 1216 } 1217 1218 /*** NOTE: Use only fwrite() to output the data line, so that binary 1219 zeroes are treated as just another data character. */ 1220 1221 /* This extra option, for Jeffrey Friedl's debugging requirements, 1222 replaces the matched string, or a specific captured string if it exists, 1223 with X. When this happens, colouring is ignored. */ 1224 1225#ifdef JFRIEDL_DEBUG 1226 if (S_arg >= 0 && S_arg < mrc) 1227 { 1228 int first = S_arg * 2; 1229 int last = first + 1; 1230 fwrite(ptr, 1, offsets[first], stdout); 1231 fprintf(stdout, "X"); 1232 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout); 1233 } 1234 else 1235#endif 1236 1237 /* We have to split the line(s) up if colouring, and search for further 1238 matches. */ 1239 1240 if (do_colour) 1241 { 1242 int last_offset = 0; 1243 fwrite(ptr, 1, offsets[0], stdout); 1244 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1245 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1246 fprintf(stdout, "%c[00m", 0x1b); 1247 for (;;) 1248 { 1249 last_offset += offsets[1]; 1250 matchptr += offsets[1]; 1251 length -= offsets[1]; 1252 if (!match_patterns(matchptr, length, offsets, &mrc)) break; 1253 fwrite(matchptr, 1, offsets[0], stdout); 1254 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1255 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1256 fprintf(stdout, "%c[00m", 0x1b); 1257 } 1258 fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset, 1259 stdout); 1260 } 1261 1262 /* Not colouring; no need to search for further matches */ 1263 1264 else fwrite(ptr, 1, linelength + endlinelength, stdout); 1265 } 1266 1267 /* End of doing what has to be done for a match */ 1268 1269 rc = 0; /* Had some success */ 1270 1271 /* Remember where the last match happened for after_context. We remember 1272 where we are about to restart, and that line's number. */ 1273 1274 lastmatchrestart = ptr + linelength + endlinelength; 1275 lastmatchnumber = linenumber + 1; 1276 } 1277 1278 /* For a match in multiline inverted mode (which of course did not cause 1279 anything to be printed), we have to move on to the end of the match before 1280 proceeding. */ 1281 1282 if (multiline && invert && match) 1283 { 1284 int ellength; 1285 char *endmatch = ptr + offsets[1]; 1286 t = ptr; 1287 while (t < endmatch) 1288 { 1289 t = end_of_line(t, endptr, &ellength); 1290 if (t <= endmatch) linenumber++; else break; 1291 } 1292 endmatch = end_of_line(endmatch, endptr, &ellength); 1293 linelength = endmatch - ptr - ellength; 1294 } 1295 1296 /* Advance to after the newline and increment the line number. The file 1297 offset to the current line is maintained in filepos. */ 1298 1299 ptr += linelength + endlinelength; 1300 filepos += linelength + endlinelength; 1301 linenumber++; 1302 1303 /* If we haven't yet reached the end of the file (the buffer is full), and 1304 the current point is in the top 1/3 of the buffer, slide the buffer down by 1305 1/3 and refill it. Before we do this, if some unprinted "after" lines are 1306 about to be lost, print them. */ 1307 1308 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD) 1309 { 1310 if (after_context > 0 && 1311 lastmatchnumber > 0 && 1312 lastmatchrestart < buffer + MBUFTHIRD) 1313 { 1314 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 1315 lastmatchnumber = 0; 1316 } 1317 1318 /* Now do the shuffle */ 1319 1320 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD); 1321 ptr -= MBUFTHIRD; 1322 1323#ifdef SUPPORT_LIBZ 1324 if (frtype == FR_LIBZ) 1325 bufflength = 2*MBUFTHIRD + 1326 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD); 1327 else 1328#endif 1329 1330#ifdef SUPPORT_LIBBZ2 1331 if (frtype == FR_LIBBZ2) 1332 bufflength = 2*MBUFTHIRD + 1333 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD); 1334 else 1335#endif 1336 1337 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in); 1338 1339 endptr = buffer + bufflength; 1340 1341 /* Adjust any last match point */ 1342 1343 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD; 1344 } 1345 } /* Loop through the whole file */ 1346 1347/* End of file; print final "after" lines if wanted; do_after_lines sets 1348hyphenpending if it prints something. */ 1349 1350if (!only_matching && !count_only) 1351 { 1352 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 1353 hyphenpending |= endhyphenpending; 1354 } 1355 1356/* Print the file name if we are looking for those without matches and there 1357were none. If we found a match, we won't have got this far. */ 1358 1359if (filenames == FN_NOMATCH_ONLY) 1360 { 1361 fprintf(stdout, "%s\n", printname); 1362 return 0; 1363 } 1364 1365/* Print the match count if wanted */ 1366 1367if (count_only) 1368 { 1369 if (count > 0 || !omit_zero_count) 1370 { 1371 if (printname != NULL && filenames != FN_NONE) 1372 fprintf(stdout, "%s:", printname); 1373 fprintf(stdout, "%d\n", count); 1374 } 1375 } 1376 1377return rc; 1378} 1379 1380 1381 1382/************************************************* 1383* Grep a file or recurse into a directory * 1384*************************************************/ 1385 1386/* Given a path name, if it's a directory, scan all the files if we are 1387recursing; if it's a file, grep it. 1388 1389Arguments: 1390 pathname the path to investigate 1391 dir_recurse TRUE if recursing is wanted (-r or -drecurse) 1392 only_one_at_top TRUE if the path is the only one at toplevel 1393 1394Returns: 0 if there was at least one match 1395 1 if there were no matches 1396 2 there was some kind of error 1397 1398However, file opening failures are suppressed if "silent" is set. 1399*/ 1400 1401static int 1402grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) 1403{ 1404int rc = 1; 1405int sep; 1406int frtype; 1407int pathlen; 1408void *handle; 1409FILE *in = NULL; /* Ensure initialized */ 1410 1411#ifdef SUPPORT_LIBZ 1412gzFile ingz = NULL; 1413#endif 1414 1415#ifdef SUPPORT_LIBBZ2 1416BZFILE *inbz2 = NULL; 1417#endif 1418 1419/* If the file name is "-" we scan stdin */ 1420 1421if (strcmp(pathname, "-") == 0) 1422 { 1423 return pcregrep(stdin, FR_PLAIN, 1424 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? 1425 stdin_name : NULL); 1426 } 1427 1428/* If the file is a directory, skip if skipping or if we are recursing, scan 1429each file and directory within it, subject to any include or exclude patterns 1430that were set. The scanning code is localized so it can be made 1431system-specific. */ 1432 1433if ((sep = isdirectory(pathname)) != 0) 1434 { 1435 if (dee_action == dee_SKIP) return 1; 1436 if (dee_action == dee_RECURSE) 1437 { 1438 char buffer[1024]; 1439 char *nextfile; 1440 directory_type *dir = opendirectory(pathname); 1441 1442 if (dir == NULL) 1443 { 1444 if (!silent) 1445 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, 1446 strerror(errno)); 1447 return 2; 1448 } 1449 1450 while ((nextfile = readdirectory(dir)) != NULL) 1451 { 1452 int frc, nflen; 1453 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); 1454 nflen = strlen(nextfile); 1455 1456 if (isdirectory(buffer)) 1457 { 1458 if (exclude_dir_compiled != NULL && 1459 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) 1460 continue; 1461 1462 if (include_dir_compiled != NULL && 1463 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) 1464 continue; 1465 } 1466 else 1467 { 1468 if (exclude_compiled != NULL && 1469 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) 1470 continue; 1471 1472 if (include_compiled != NULL && 1473 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) 1474 continue; 1475 } 1476 1477 frc = grep_or_recurse(buffer, dir_recurse, FALSE); 1478 if (frc > 1) rc = frc; 1479 else if (frc == 0 && rc == 1) rc = 0; 1480 } 1481 1482 closedirectory(dir); 1483 return rc; 1484 } 1485 } 1486 1487/* If the file is not a directory and not a regular file, skip it if that's 1488been requested. */ 1489 1490else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1; 1491 1492/* Control reaches here if we have a regular file, or if we have a directory 1493and recursion or skipping was not requested, or if we have anything else and 1494skipping was not requested. The scan proceeds. If this is the first and only 1495argument at top level, we don't show the file name, unless we are only showing 1496the file name, or the filename was forced (-H). */ 1497 1498pathlen = strlen(pathname); 1499 1500/* Open using zlib if it is supported and the file name ends with .gz. */ 1501 1502#ifdef SUPPORT_LIBZ 1503if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) 1504 { 1505 ingz = gzopen(pathname, "rb"); 1506 if (ingz == NULL) 1507 { 1508 if (!silent) 1509 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, 1510 strerror(errno)); 1511 return 2; 1512 } 1513 handle = (void *)ingz; 1514 frtype = FR_LIBZ; 1515 } 1516else 1517#endif 1518 1519/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ 1520 1521#ifdef SUPPORT_LIBBZ2 1522if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) 1523 { 1524 inbz2 = BZ2_bzopen(pathname, "rb"); 1525 handle = (void *)inbz2; 1526 frtype = FR_LIBBZ2; 1527 } 1528else 1529#endif 1530 1531/* Otherwise use plain fopen(). The label is so that we can come back here if 1532an attempt to read a .bz2 file indicates that it really is a plain file. */ 1533 1534#ifdef SUPPORT_LIBBZ2 1535PLAIN_FILE: 1536#endif 1537 { 1538 in = fopen(pathname, "rb"); 1539 handle = (void *)in; 1540 frtype = FR_PLAIN; 1541 } 1542 1543/* All the opening methods return errno when they fail. */ 1544 1545if (handle == NULL) 1546 { 1547 if (!silent) 1548 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, 1549 strerror(errno)); 1550 return 2; 1551 } 1552 1553/* Now grep the file */ 1554 1555rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT || 1556 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); 1557 1558/* Close in an appropriate manner. */ 1559 1560#ifdef SUPPORT_LIBZ 1561if (frtype == FR_LIBZ) 1562 gzclose(ingz); 1563else 1564#endif 1565 1566/* If it is a .bz2 file and the result is 2, it means that the first attempt to 1567read failed. If the error indicates that the file isn't in fact bzipped, try 1568again as a normal file. */ 1569 1570#ifdef SUPPORT_LIBBZ2 1571if (frtype == FR_LIBBZ2) 1572 { 1573 if (rc == 2) 1574 { 1575 int errnum; 1576 const char *err = BZ2_bzerror(inbz2, &errnum); 1577 if (errnum == BZ_DATA_ERROR_MAGIC) 1578 { 1579 BZ2_bzclose(inbz2); 1580 goto PLAIN_FILE; 1581 } 1582 else if (!silent) 1583 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n", 1584 pathname, err); 1585 } 1586 BZ2_bzclose(inbz2); 1587 } 1588else 1589#endif 1590 1591/* Normal file close */ 1592 1593fclose(in); 1594 1595/* Pass back the yield from pcregrep(). */ 1596 1597return rc; 1598} 1599 1600 1601 1602 1603/************************************************* 1604* Usage function * 1605*************************************************/ 1606 1607static int 1608usage(int rc) 1609{ 1610option_item *op; 1611fprintf(stderr, "Usage: pcregrep [-"); 1612for (op = optionlist; op->one_char != 0; op++) 1613 { 1614 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); 1615 } 1616fprintf(stderr, "] [long options] [pattern] [files]\n"); 1617fprintf(stderr, "Type `pcregrep --help' for more information and the long " 1618 "options.\n"); 1619return rc; 1620} 1621 1622 1623 1624 1625/************************************************* 1626* Help function * 1627*************************************************/ 1628 1629static void 1630help(void) 1631{ 1632option_item *op; 1633 1634printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); 1635printf("Search for PATTERN in each FILE or standard input.\n"); 1636printf("PATTERN must be present if neither -e nor -f is used.\n"); 1637printf("\"-\" can be used as a file name to mean STDIN.\n"); 1638 1639#ifdef SUPPORT_LIBZ 1640printf("Files whose names end in .gz are read using zlib.\n"); 1641#endif 1642 1643#ifdef SUPPORT_LIBBZ2 1644printf("Files whose names end in .bz2 are read using bzlib2.\n"); 1645#endif 1646 1647#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 1648printf("Other files and the standard input are read as plain files.\n\n"); 1649#else 1650printf("All files are read as plain files, without any interpretation.\n\n"); 1651#endif 1652 1653printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); 1654printf("Options:\n"); 1655 1656for (op = optionlist; op->one_char != 0; op++) 1657 { 1658 int n; 1659 char s[4]; 1660 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); 1661 n = 30 - printf(" %s --%s", s, op->long_name); 1662 if (n < 1) n = 1; 1663 printf("%.*s%s\n", n, " ", op->help_text); 1664 } 1665 1666printf("\nWhen reading patterns from a file instead of using a command line option,\n"); 1667printf("trailing white space is removed and blank lines are ignored.\n"); 1668printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); 1669 1670printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); 1671printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); 1672} 1673 1674 1675 1676 1677/************************************************* 1678* Handle a single-letter, no data option * 1679*************************************************/ 1680 1681static int 1682handle_option(int letter, int options) 1683{ 1684switch(letter) 1685 { 1686 case N_FOFFSETS: file_offsets = TRUE; break; 1687 case N_HELP: help(); exit(0); 1688 case N_LOFFSETS: line_offsets = number = TRUE; break; 1689 case 'c': count_only = TRUE; break; 1690 case 'F': process_options |= PO_FIXED_STRINGS; break; 1691 case 'H': filenames = FN_FORCE; break; 1692 case 'h': filenames = FN_NONE; break; 1693 case 'i': options |= PCRE_CASELESS; break; 1694 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; 1695 case 'L': filenames = FN_NOMATCH_ONLY; break; 1696 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; 1697 case 'n': number = TRUE; break; 1698 case 'o': only_matching = TRUE; break; 1699 case 'q': quiet = TRUE; break; 1700 case 'r': dee_action = dee_RECURSE; break; 1701 case 's': silent = TRUE; break; 1702 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break; 1703 case 'v': invert = TRUE; break; 1704 case 'w': process_options |= PO_WORD_MATCH; break; 1705 case 'x': process_options |= PO_LINE_MATCH; break; 1706 1707 case 'V': 1708 fprintf(stderr, "pcregrep version %s\n", pcre_version()); 1709 exit(0); 1710 break; 1711 1712 default: 1713 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); 1714 exit(usage(2)); 1715 } 1716 1717return options; 1718} 1719 1720 1721 1722 1723/************************************************* 1724* Construct printed ordinal * 1725*************************************************/ 1726 1727/* This turns a number into "1st", "3rd", etc. */ 1728 1729static char * 1730ordin(int n) 1731{ 1732static char buffer[8]; 1733char *p = buffer; 1734sprintf(p, "%d", n); 1735while (*p != 0) p++; 1736switch (n%10) 1737 { 1738 case 1: strcpy(p, "st"); break; 1739 case 2: strcpy(p, "nd"); break; 1740 case 3: strcpy(p, "rd"); break; 1741 default: strcpy(p, "th"); break; 1742 } 1743return buffer; 1744} 1745 1746 1747 1748/************************************************* 1749* Compile a single pattern * 1750*************************************************/ 1751 1752/* When the -F option has been used, this is called for each substring. 1753Otherwise it's called for each supplied pattern. 1754 1755Arguments: 1756 pattern the pattern string 1757 options the PCRE options 1758 filename the file name, or NULL for a command-line pattern 1759 count 0 if this is the only command line pattern, or 1760 number of the command line pattern, or 1761 linenumber for a pattern from a file 1762 1763Returns: TRUE on success, FALSE after an error 1764*/ 1765 1766static BOOL 1767compile_single_pattern(char *pattern, int options, char *filename, int count) 1768{ 1769char buffer[MBUFTHIRD + 16]; 1770const char *error; 1771int errptr; 1772 1773if (pattern_count >= MAX_PATTERN_COUNT) 1774 { 1775 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n", 1776 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT); 1777 return FALSE; 1778 } 1779 1780sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, 1781 suffix[process_options]); 1782pattern_list[pattern_count] = 1783 pcre_compile(buffer, options, &error, &errptr, pcretables); 1784if (pattern_list[pattern_count] != NULL) 1785 { 1786 pattern_count++; 1787 return TRUE; 1788 } 1789 1790/* Handle compile errors */ 1791 1792errptr -= (int)strlen(prefix[process_options]); 1793if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern); 1794 1795if (filename == NULL) 1796 { 1797 if (count == 0) 1798 fprintf(stderr, "pcregrep: Error in command-line regex " 1799 "at offset %d: %s\n", errptr, error); 1800 else 1801 fprintf(stderr, "pcregrep: Error in %s command-line regex " 1802 "at offset %d: %s\n", ordin(count), errptr, error); 1803 } 1804else 1805 { 1806 fprintf(stderr, "pcregrep: Error in regex in line %d of %s " 1807 "at offset %d: %s\n", count, filename, errptr, error); 1808 } 1809 1810return FALSE; 1811} 1812 1813 1814 1815/************************************************* 1816* Compile one supplied pattern * 1817*************************************************/ 1818 1819/* When the -F option has been used, each string may be a list of strings, 1820separated by line breaks. They will be matched literally. 1821 1822Arguments: 1823 pattern the pattern string 1824 options the PCRE options 1825 filename the file name, or NULL for a command-line pattern 1826 count 0 if this is the only command line pattern, or 1827 number of the command line pattern, or 1828 linenumber for a pattern from a file 1829 1830Returns: TRUE on success, FALSE after an error 1831*/ 1832 1833static BOOL 1834compile_pattern(char *pattern, int options, char *filename, int count) 1835{ 1836if ((process_options & PO_FIXED_STRINGS) != 0) 1837 { 1838 char *eop = pattern + strlen(pattern); 1839 char buffer[MBUFTHIRD]; 1840 for(;;) 1841 { 1842 int ellength; 1843 char *p = end_of_line(pattern, eop, &ellength); 1844 if (ellength == 0) 1845 return compile_single_pattern(pattern, options, filename, count); 1846 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern); 1847 pattern = p; 1848 if (!compile_single_pattern(buffer, options, filename, count)) 1849 return FALSE; 1850 } 1851 } 1852else return compile_single_pattern(pattern, options, filename, count); 1853} 1854 1855 1856 1857/************************************************* 1858* Main program * 1859*************************************************/ 1860 1861/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ 1862 1863int 1864main(int argc, char **argv) 1865{ 1866int i, j; 1867int rc = 1; 1868int pcre_options = 0; 1869int cmd_pattern_count = 0; 1870int hint_count = 0; 1871int errptr; 1872BOOL only_one_at_top; 1873char *patterns[MAX_PATTERN_COUNT]; 1874const char *locale_from = "--locale"; 1875const char *error; 1876 1877/* Set the default line ending value from the default in the PCRE library; 1878"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". 1879Note that the return values from pcre_config(), though derived from the ASCII 1880codes, are the same in EBCDIC environments, so we must use the actual values 1881rather than escapes such as as '\r'. */ 1882 1883(void)pcre_config(PCRE_CONFIG_NEWLINE, &i); 1884switch(i) 1885 { 1886 default: newline = (char *)"lf"; break; 1887 case 13: newline = (char *)"cr"; break; 1888 case (13 << 8) | 10: newline = (char *)"crlf"; break; 1889 case -1: newline = (char *)"any"; break; 1890 case -2: newline = (char *)"anycrlf"; break; 1891 } 1892 1893/* Process the options */ 1894 1895for (i = 1; i < argc; i++) 1896 { 1897 option_item *op = NULL; 1898 char *option_data = (char *)""; /* default to keep compiler happy */ 1899 BOOL longop; 1900 BOOL longopwasequals = FALSE; 1901 1902 if (argv[i][0] != '-') break; 1903 1904 /* If we hit an argument that is just "-", it may be a reference to STDIN, 1905 but only if we have previously had -e or -f to define the patterns. */ 1906 1907 if (argv[i][1] == 0) 1908 { 1909 if (pattern_filename != NULL || pattern_count > 0) break; 1910 else exit(usage(2)); 1911 } 1912 1913 /* Handle a long name option, or -- to terminate the options */ 1914 1915 if (argv[i][1] == '-') 1916 { 1917 char *arg = argv[i] + 2; 1918 char *argequals = strchr(arg, '='); 1919 1920 if (*arg == 0) /* -- terminates options */ 1921 { 1922 i++; 1923 break; /* out of the options-handling loop */ 1924 } 1925 1926 longop = TRUE; 1927 1928 /* Some long options have data that follows after =, for example file=name. 1929 Some options have variations in the long name spelling: specifically, we 1930 allow "regexp" because GNU grep allows it, though I personally go along 1931 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". 1932 These options are entered in the table as "regex(p)". Options can be in 1933 both these categories. */ 1934 1935 for (op = optionlist; op->one_char != 0; op++) 1936 { 1937 char *opbra = strchr(op->long_name, '('); 1938 char *equals = strchr(op->long_name, '='); 1939 1940 /* Handle options with only one spelling of the name */ 1941 1942 if (opbra == NULL) /* Does not contain '(' */ 1943 { 1944 if (equals == NULL) /* Not thing=data case */ 1945 { 1946 if (strcmp(arg, op->long_name) == 0) break; 1947 } 1948 else /* Special case xxx=data */ 1949 { 1950 int oplen = equals - op->long_name; 1951 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg; 1952 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) 1953 { 1954 option_data = arg + arglen; 1955 if (*option_data == '=') 1956 { 1957 option_data++; 1958 longopwasequals = TRUE; 1959 } 1960 break; 1961 } 1962 } 1963 } 1964 1965 /* Handle options with an alternate spelling of the name */ 1966 1967 else 1968 { 1969 char buff1[24]; 1970 char buff2[24]; 1971 1972 int baselen = opbra - op->long_name; 1973 int fulllen = strchr(op->long_name, ')') - op->long_name + 1; 1974 int arglen = (argequals == NULL || equals == NULL)? 1975 (int)strlen(arg) : argequals - arg; 1976 1977 sprintf(buff1, "%.*s", baselen, op->long_name); 1978 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1); 1979 1980 if (strncmp(arg, buff1, arglen) == 0 || 1981 strncmp(arg, buff2, arglen) == 0) 1982 { 1983 if (equals != NULL && argequals != NULL) 1984 { 1985 option_data = argequals; 1986 if (*option_data == '=') 1987 { 1988 option_data++; 1989 longopwasequals = TRUE; 1990 } 1991 } 1992 break; 1993 } 1994 } 1995 } 1996 1997 if (op->one_char == 0) 1998 { 1999 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); 2000 exit(usage(2)); 2001 } 2002 } 2003 2004 /* Jeffrey Friedl's debugging harness uses these additional options which 2005 are not in the right form for putting in the option table because they use 2006 only one hyphen, yet are more than one character long. By putting them 2007 separately here, they will not get displayed as part of the help() output, 2008 but I don't think Jeffrey will care about that. */ 2009 2010#ifdef JFRIEDL_DEBUG 2011 else if (strcmp(argv[i], "-pre") == 0) { 2012 jfriedl_prefix = argv[++i]; 2013 continue; 2014 } else if (strcmp(argv[i], "-post") == 0) { 2015 jfriedl_postfix = argv[++i]; 2016 continue; 2017 } else if (strcmp(argv[i], "-XT") == 0) { 2018 sscanf(argv[++i], "%d", &jfriedl_XT); 2019 continue; 2020 } else if (strcmp(argv[i], "-XR") == 0) { 2021 sscanf(argv[++i], "%d", &jfriedl_XR); 2022 continue; 2023 } 2024#endif 2025 2026 2027 /* One-char options; many that have no data may be in a single argument; we 2028 continue till we hit the last one or one that needs data. */ 2029 2030 else 2031 { 2032 char *s = argv[i] + 1; 2033 longop = FALSE; 2034 while (*s != 0) 2035 { 2036 for (op = optionlist; op->one_char != 0; op++) 2037 { if (*s == op->one_char) break; } 2038 if (op->one_char == 0) 2039 { 2040 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n", 2041 *s, argv[i]); 2042 exit(usage(2)); 2043 } 2044 if (op->type != OP_NODATA || s[1] == 0) 2045 { 2046 option_data = s+1; 2047 break; 2048 } 2049 pcre_options = handle_option(*s++, pcre_options); 2050 } 2051 } 2052 2053 /* At this point we should have op pointing to a matched option. If the type 2054 is NO_DATA, it means that there is no data, and the option might set 2055 something in the PCRE options. */ 2056 2057 if (op->type == OP_NODATA) 2058 { 2059 pcre_options = handle_option(op->one_char, pcre_options); 2060 continue; 2061 } 2062 2063 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that 2064 either has a value or defaults to something. It cannot have data in a 2065 separate item. At the moment, the only such options are "colo(u)r" and 2066 Jeffrey Friedl's special -S debugging option. */ 2067 2068 if (*option_data == 0 && 2069 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER)) 2070 { 2071 switch (op->one_char) 2072 { 2073 case N_COLOUR: 2074 colour_option = (char *)"auto"; 2075 break; 2076#ifdef JFRIEDL_DEBUG 2077 case 'S': 2078 S_arg = 0; 2079 break; 2080#endif 2081 } 2082 continue; 2083 } 2084 2085 /* Otherwise, find the data string for the option. */ 2086 2087 if (*option_data == 0) 2088 { 2089 if (i >= argc - 1 || longopwasequals) 2090 { 2091 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); 2092 exit(usage(2)); 2093 } 2094 option_data = argv[++i]; 2095 } 2096 2097 /* If the option type is OP_PATLIST, it's the -e option, which can be called 2098 multiple times to create a list of patterns. */ 2099 2100 if (op->type == OP_PATLIST) 2101 { 2102 if (cmd_pattern_count >= MAX_PATTERN_COUNT) 2103 { 2104 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n", 2105 MAX_PATTERN_COUNT); 2106 return 2; 2107 } 2108 patterns[cmd_pattern_count++] = option_data; 2109 } 2110 2111 /* Otherwise, deal with single string or numeric data values. */ 2112 2113 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER) 2114 { 2115 *((char **)op->dataptr) = option_data; 2116 } 2117 else 2118 { 2119 char *endptr; 2120 int n = strtoul(option_data, &endptr, 10); 2121 if (*endptr != 0) 2122 { 2123 if (longop) 2124 { 2125 char *equals = strchr(op->long_name, '='); 2126 int nlen = (equals == NULL)? (int)strlen(op->long_name) : 2127 equals - op->long_name; 2128 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n", 2129 option_data, nlen, op->long_name); 2130 } 2131 else 2132 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", 2133 option_data, op->one_char); 2134 exit(usage(2)); 2135 } 2136 *((int *)op->dataptr) = n; 2137 } 2138 } 2139 2140/* Options have been decoded. If -C was used, its value is used as a default 2141for -A and -B. */ 2142 2143if (both_context > 0) 2144 { 2145 if (after_context == 0) after_context = both_context; 2146 if (before_context == 0) before_context = both_context; 2147 } 2148 2149/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. 2150However, the latter two set the only_matching flag. */ 2151 2152if ((only_matching && (file_offsets || line_offsets)) || 2153 (file_offsets && line_offsets)) 2154 { 2155 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets " 2156 "and/or --line-offsets\n"); 2157 exit(usage(2)); 2158 } 2159 2160if (file_offsets || line_offsets) only_matching = TRUE; 2161 2162/* If a locale has not been provided as an option, see if the LC_CTYPE or 2163LC_ALL environment variable is set, and if so, use it. */ 2164 2165if (locale == NULL) 2166 { 2167 locale = getenv("LC_ALL"); 2168 locale_from = "LCC_ALL"; 2169 } 2170 2171if (locale == NULL) 2172 { 2173 locale = getenv("LC_CTYPE"); 2174 locale_from = "LC_CTYPE"; 2175 } 2176 2177/* If a locale has been provided, set it, and generate the tables the PCRE 2178needs. Otherwise, pcretables==NULL, which causes the use of default tables. */ 2179 2180if (locale != NULL) 2181 { 2182 if (setlocale(LC_CTYPE, locale) == NULL) 2183 { 2184 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", 2185 locale, locale_from); 2186 return 2; 2187 } 2188 pcretables = pcre_maketables(); 2189 } 2190 2191/* Sort out colouring */ 2192 2193if (colour_option != NULL && strcmp(colour_option, "never") != 0) 2194 { 2195 if (strcmp(colour_option, "always") == 0) do_colour = TRUE; 2196 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); 2197 else 2198 { 2199 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", 2200 colour_option); 2201 return 2; 2202 } 2203 if (do_colour) 2204 { 2205 char *cs = getenv("PCREGREP_COLOUR"); 2206 if (cs == NULL) cs = getenv("PCREGREP_COLOR"); 2207 if (cs != NULL) colour_string = cs; 2208 } 2209 } 2210 2211/* Interpret the newline type; the default settings are Unix-like. */ 2212 2213if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0) 2214 { 2215 pcre_options |= PCRE_NEWLINE_CR; 2216 endlinetype = EL_CR; 2217 } 2218else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0) 2219 { 2220 pcre_options |= PCRE_NEWLINE_LF; 2221 endlinetype = EL_LF; 2222 } 2223else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0) 2224 { 2225 pcre_options |= PCRE_NEWLINE_CRLF; 2226 endlinetype = EL_CRLF; 2227 } 2228else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0) 2229 { 2230 pcre_options |= PCRE_NEWLINE_ANY; 2231 endlinetype = EL_ANY; 2232 } 2233else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) 2234 { 2235 pcre_options |= PCRE_NEWLINE_ANYCRLF; 2236 endlinetype = EL_ANYCRLF; 2237 } 2238else 2239 { 2240 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); 2241 return 2; 2242 } 2243 2244/* Interpret the text values for -d and -D */ 2245 2246if (dee_option != NULL) 2247 { 2248 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; 2249 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; 2250 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; 2251 else 2252 { 2253 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); 2254 return 2; 2255 } 2256 } 2257 2258if (DEE_option != NULL) 2259 { 2260 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; 2261 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; 2262 else 2263 { 2264 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); 2265 return 2; 2266 } 2267 } 2268 2269/* Check the values for Jeffrey Friedl's debugging options. */ 2270 2271#ifdef JFRIEDL_DEBUG 2272if (S_arg > 9) 2273 { 2274 fprintf(stderr, "pcregrep: bad value for -S option\n"); 2275 return 2; 2276 } 2277if (jfriedl_XT != 0 || jfriedl_XR != 0) 2278 { 2279 if (jfriedl_XT == 0) jfriedl_XT = 1; 2280 if (jfriedl_XR == 0) jfriedl_XR = 1; 2281 } 2282#endif 2283 2284/* Get memory to store the pattern and hints lists. */ 2285 2286pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); 2287hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); 2288 2289if (pattern_list == NULL || hints_list == NULL) 2290 { 2291 fprintf(stderr, "pcregrep: malloc failed\n"); 2292 goto EXIT2; 2293 } 2294 2295/* If no patterns were provided by -e, and there is no file provided by -f, 2296the first argument is the one and only pattern, and it must exist. */ 2297 2298if (cmd_pattern_count == 0 && pattern_filename == NULL) 2299 { 2300 if (i >= argc) return usage(2); 2301 patterns[cmd_pattern_count++] = argv[i++]; 2302 } 2303 2304/* Compile the patterns that were provided on the command line, either by 2305multiple uses of -e or as a single unkeyed pattern. */ 2306 2307for (j = 0; j < cmd_pattern_count; j++) 2308 { 2309 if (!compile_pattern(patterns[j], pcre_options, NULL, 2310 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1)) 2311 goto EXIT2; 2312 } 2313 2314/* Compile the regular expressions that are provided in a file. */ 2315 2316if (pattern_filename != NULL) 2317 { 2318 int linenumber = 0; 2319 FILE *f; 2320 char *filename; 2321 char buffer[MBUFTHIRD]; 2322 2323 if (strcmp(pattern_filename, "-") == 0) 2324 { 2325 f = stdin; 2326 filename = stdin_name; 2327 } 2328 else 2329 { 2330 f = fopen(pattern_filename, "r"); 2331 if (f == NULL) 2332 { 2333 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, 2334 strerror(errno)); 2335 goto EXIT2; 2336 } 2337 filename = pattern_filename; 2338 } 2339 2340 while (fgets(buffer, MBUFTHIRD, f) != NULL) 2341 { 2342 char *s = buffer + (int)strlen(buffer); 2343 while (s > buffer && isspace((unsigned char)(s[-1]))) s--; 2344 *s = 0; 2345 linenumber++; 2346 if (buffer[0] == 0) continue; /* Skip blank lines */ 2347 if (!compile_pattern(buffer, pcre_options, filename, linenumber)) 2348 goto EXIT2; 2349 } 2350 2351 if (f != stdin) fclose(f); 2352 } 2353 2354/* Study the regular expressions, as we will be running them many times */ 2355 2356for (j = 0; j < pattern_count; j++) 2357 { 2358 hints_list[j] = pcre_study(pattern_list[j], 0, &error); 2359 if (error != NULL) 2360 { 2361 char s[16]; 2362 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); 2363 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); 2364 goto EXIT2; 2365 } 2366 hint_count++; 2367 } 2368 2369/* If there are include or exclude patterns, compile them. */ 2370 2371if (exclude_pattern != NULL) 2372 { 2373 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, 2374 pcretables); 2375 if (exclude_compiled == NULL) 2376 { 2377 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", 2378 errptr, error); 2379 goto EXIT2; 2380 } 2381 } 2382 2383if (include_pattern != NULL) 2384 { 2385 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, 2386 pcretables); 2387 if (include_compiled == NULL) 2388 { 2389 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", 2390 errptr, error); 2391 goto EXIT2; 2392 } 2393 } 2394 2395if (exclude_dir_pattern != NULL) 2396 { 2397 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr, 2398 pcretables); 2399 if (exclude_dir_compiled == NULL) 2400 { 2401 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n", 2402 errptr, error); 2403 goto EXIT2; 2404 } 2405 } 2406 2407if (include_dir_pattern != NULL) 2408 { 2409 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr, 2410 pcretables); 2411 if (include_dir_compiled == NULL) 2412 { 2413 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n", 2414 errptr, error); 2415 goto EXIT2; 2416 } 2417 } 2418 2419/* If there are no further arguments, do the business on stdin and exit. */ 2420 2421if (i >= argc) 2422 { 2423 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL); 2424 goto EXIT; 2425 } 2426 2427/* Otherwise, work through the remaining arguments as files or directories. 2428Pass in the fact that there is only one argument at top level - this suppresses 2429the file name if the argument is not a directory and filenames are not 2430otherwise forced. */ 2431 2432only_one_at_top = i == argc - 1; /* Catch initial value of i */ 2433 2434for (; i < argc; i++) 2435 { 2436 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, 2437 only_one_at_top); 2438 if (frc > 1) rc = frc; 2439 else if (frc == 0 && rc == 1) rc = 0; 2440 } 2441 2442EXIT: 2443if (pattern_list != NULL) 2444 { 2445 for (i = 0; i < pattern_count; i++) free(pattern_list[i]); 2446 free(pattern_list); 2447 } 2448if (hints_list != NULL) 2449 { 2450 for (i = 0; i < hint_count; i++) free(hints_list[i]); 2451 free(hints_list); 2452 } 2453return rc; 2454 2455EXIT2: 2456rc = 2; 2457goto EXIT; 2458} 2459 2460/* End of pcregrep */ 2461