1/************************************************* 2* PCRE testing program * 3*************************************************/ 4 5/* This program was hacked up as a tester for PCRE. I really should have 6written it more tidily in the first place. Will I ever learn? It has grown and 7been extended and consequently is now rather untidy in places. 8 9----------------------------------------------------------------------------- 10Redistribution and use in source and binary forms, with or without 11modification, are permitted provided that the following conditions are met: 12 13 * Redistributions of source code must retain the above copyright notice, 14 this list of conditions and the following disclaimer. 15 16 * Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in the 18 documentation and/or other materials provided with the distribution. 19 20 * Neither the name of the University of Cambridge nor the names of its 21 contributors may be used to endorse or promote products derived from 22 this software without specific prior written permission. 23 24THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34POSSIBILITY OF SUCH DAMAGE. 35----------------------------------------------------------------------------- 36*/ 37 38 39#include <ctype.h> 40#include <stdio.h> 41#include <string.h> 42#include <stdlib.h> 43#include <time.h> 44#include <locale.h> 45#include <errno.h> 46 47/* We need the internal info for displaying the results of pcre_study(). Also 48for getting the opcodes for showing compiled code. */ 49 50#define PCRE_SPY /* For Win32 build, import data, not export */ 51#include "internal.h" 52 53/* It is possible to compile this test program without including support for 54testing the POSIX interface, though this is not available via the standard 55Makefile. */ 56 57#if !defined NOPOSIX 58#include "pcreposix.h" 59#endif 60 61#ifndef CLOCKS_PER_SEC 62#ifdef CLK_TCK 63#define CLOCKS_PER_SEC CLK_TCK 64#else 65#define CLOCKS_PER_SEC 100 66#endif 67#endif 68 69#define LOOPREPEAT 500000 70 71#define BUFFER_SIZE 30000 72#define PBUFFER_SIZE BUFFER_SIZE 73#define DBUFFER_SIZE BUFFER_SIZE 74 75 76static FILE *outfile; 77static int log_store = 0; 78static int callout_count; 79static int callout_extra; 80static int callout_fail_count; 81static int callout_fail_id; 82static int first_callout; 83static int show_malloc; 84static int use_utf8; 85static size_t gotten_store; 86 87static uschar *pbuffer = NULL; 88 89 90static const int utf8_table1[] = { 91 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff}; 92 93static const int utf8_table2[] = { 94 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; 95 96static const int utf8_table3[] = { 97 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; 98 99 100 101/************************************************* 102* Print compiled regex * 103*************************************************/ 104 105/* The code for doing this is held in a separate file that is also included in 106pcre.c when it is compiled with the debug switch. It defines a function called 107print_internals(), which uses a table of opcode lengths defined by the macro 108OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates 109Unicode property names to numbers; this is kept in a separate file. */ 110 111static uschar OP_lengths[] = { OP_LENGTHS }; 112 113#include "ucp.h" 114#include "ucptypetable.c" 115#include "printint.c" 116 117 118 119/************************************************* 120* Read number from string * 121*************************************************/ 122 123/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess 124around with conditional compilation, just do the job by hand. It is only used 125for unpicking the -o argument, so just keep it simple. 126 127Arguments: 128 str string to be converted 129 endptr where to put the end pointer 130 131Returns: the unsigned long 132*/ 133 134static int 135get_value(unsigned char *str, unsigned char **endptr) 136{ 137int result = 0; 138while(*str != 0 && isspace(*str)) str++; 139while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); 140*endptr = str; 141return(result); 142} 143 144 145 146/************************************************* 147* Convert character value to UTF-8 * 148*************************************************/ 149 150/* This function takes an integer value in the range 0 - 0x7fffffff 151and encodes it as a UTF-8 character in 0 to 6 bytes. 152 153Arguments: 154 cvalue the character value 155 buffer pointer to buffer for result - at least 6 bytes long 156 157Returns: number of characters placed in the buffer 158 -1 if input character is negative 159 0 if input character is positive but too big (only when 160 int is longer than 32 bits) 161*/ 162 163static int 164ord2utf8(int cvalue, unsigned char *buffer) 165{ 166register int i, j; 167for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) 168 if (cvalue <= utf8_table1[i]) break; 169if (i >= sizeof(utf8_table1)/sizeof(int)) return 0; 170if (cvalue < 0) return -1; 171 172buffer += i; 173for (j = i; j > 0; j--) 174 { 175 *buffer-- = 0x80 | (cvalue & 0x3f); 176 cvalue >>= 6; 177 } 178*buffer = utf8_table2[i] | cvalue; 179return i + 1; 180} 181 182 183/************************************************* 184* Convert UTF-8 string to value * 185*************************************************/ 186 187/* This function takes one or more bytes that represents a UTF-8 character, 188and returns the value of the character. 189 190Argument: 191 buffer a pointer to the byte vector 192 vptr a pointer to an int to receive the value 193 194Returns: > 0 => the number of bytes consumed 195 -6 to 0 => malformed UTF-8 character at offset = (-return) 196*/ 197 198static int 199utf82ord(unsigned char *buffer, int *vptr) 200{ 201int c = *buffer++; 202int d = c; 203int i, j, s; 204 205for (i = -1; i < 6; i++) /* i is number of additional bytes */ 206 { 207 if ((d & 0x80) == 0) break; 208 d <<= 1; 209 } 210 211if (i == -1) { *vptr = c; return 1; } /* ascii character */ 212if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ 213 214/* i now has a value in the range 1-5 */ 215 216s = 6*i; 217d = (c & utf8_table3[i]) << s; 218 219for (j = 0; j < i; j++) 220 { 221 c = *buffer++; 222 if ((c & 0xc0) != 0x80) return -(j+1); 223 s -= 6; 224 d |= (c & 0x3f) << s; 225 } 226 227/* Check that encoding was the correct unique one */ 228 229for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++) 230 if (d <= utf8_table1[j]) break; 231if (j != i) return -(i+1); 232 233/* Valid value */ 234 235*vptr = d; 236return i+1; 237} 238 239 240 241/************************************************* 242* Print character string * 243*************************************************/ 244 245/* Character string printing function. Must handle UTF-8 strings in utf8 246mode. Yields number of characters printed. If handed a NULL file, just counts 247chars without printing. */ 248 249static int pchars(unsigned char *p, int length, FILE *f) 250{ 251int c; 252int yield = 0; 253 254while (length-- > 0) 255 { 256 if (use_utf8) 257 { 258 int rc = utf82ord(p, &c); 259 260 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ 261 { 262 length -= rc - 1; 263 p += rc; 264 if (c < 256 && isprint(c)) 265 { 266 if (f != NULL) fprintf(f, "%c", c); 267 yield++; 268 } 269 else 270 { 271 int n; 272 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n); 273 yield += n; 274 } 275 continue; 276 } 277 } 278 279 /* Not UTF-8, or malformed UTF-8 */ 280 281 if (isprint(c = *(p++))) 282 { 283 if (f != NULL) fprintf(f, "%c", c); 284 yield++; 285 } 286 else 287 { 288 if (f != NULL) fprintf(f, "\\x%02x", c); 289 yield += 4; 290 } 291 } 292 293return yield; 294} 295 296 297 298/************************************************* 299* Callout function * 300*************************************************/ 301 302/* Called from PCRE as a result of the (?C) item. We print out where we are in 303the match. Yield zero unless more callouts than the fail count, or the callout 304data is not zero. */ 305 306static int callout(pcre_callout_block *cb) 307{ 308FILE *f = (first_callout | callout_extra)? outfile : NULL; 309int i, pre_start, post_start, subject_length; 310 311if (callout_extra) 312 { 313 fprintf(f, "Callout %d: last capture = %d\n", 314 cb->callout_number, cb->capture_last); 315 316 for (i = 0; i < cb->capture_top * 2; i += 2) 317 { 318 if (cb->offset_vector[i] < 0) 319 fprintf(f, "%2d: <unset>\n", i/2); 320 else 321 { 322 fprintf(f, "%2d: ", i/2); 323 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i], 324 cb->offset_vector[i+1] - cb->offset_vector[i], f); 325 fprintf(f, "\n"); 326 } 327 } 328 } 329 330/* Re-print the subject in canonical form, the first time or if giving full 331datails. On subsequent calls in the same match, we use pchars just to find the 332printed lengths of the substrings. */ 333 334if (f != NULL) fprintf(f, "--->"); 335 336pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f); 337post_start = pchars((unsigned char *)(cb->subject + cb->start_match), 338 cb->current_position - cb->start_match, f); 339 340subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL); 341 342(void)pchars((unsigned char *)(cb->subject + cb->current_position), 343 cb->subject_length - cb->current_position, f); 344 345if (f != NULL) fprintf(f, "\n"); 346 347/* Always print appropriate indicators, with callout number if not already 348shown. For automatic callouts, show the pattern offset. */ 349 350if (cb->callout_number == 255) 351 { 352 fprintf(outfile, "%+3d ", cb->pattern_position); 353 if (cb->pattern_position > 99) fprintf(outfile, "\n "); 354 } 355else 356 { 357 if (callout_extra) fprintf(outfile, " "); 358 else fprintf(outfile, "%3d ", cb->callout_number); 359 } 360 361for (i = 0; i < pre_start; i++) fprintf(outfile, " "); 362fprintf(outfile, "^"); 363 364if (post_start > 0) 365 { 366 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); 367 fprintf(outfile, "^"); 368 } 369 370for (i = 0; i < subject_length - pre_start - post_start + 4; i++) 371 fprintf(outfile, " "); 372 373fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length, 374 pbuffer + cb->pattern_position); 375 376fprintf(outfile, "\n"); 377first_callout = 0; 378 379if (cb->callout_data != NULL) 380 { 381 int callout_data = *((int *)(cb->callout_data)); 382 if (callout_data != 0) 383 { 384 fprintf(outfile, "Callout data = %d\n", callout_data); 385 return callout_data; 386 } 387 } 388 389return (cb->callout_number != callout_fail_id)? 0 : 390 (++callout_count >= callout_fail_count)? 1 : 0; 391} 392 393 394/************************************************* 395* Local malloc functions * 396*************************************************/ 397 398/* Alternative malloc function, to test functionality and show the size of the 399compiled re. */ 400 401static void *new_malloc(size_t size) 402{ 403void *block = malloc(size); 404gotten_store = size; 405if (show_malloc) 406 fprintf(outfile, "malloc %3d %p\n", size, block); 407return block; 408} 409 410static void new_free(void *block) 411{ 412if (show_malloc) 413 fprintf(outfile, "free %p\n", block); 414free(block); 415} 416 417 418/* For recursion malloc/free, to test stacking calls */ 419 420static void *stack_malloc(size_t size) 421{ 422void *block = malloc(size); 423if (show_malloc) 424 fprintf(outfile, "stack_malloc %3d %p\n", size, block); 425return block; 426} 427 428static void stack_free(void *block) 429{ 430if (show_malloc) 431 fprintf(outfile, "stack_free %p\n", block); 432free(block); 433} 434 435 436/************************************************* 437* Call pcre_fullinfo() * 438*************************************************/ 439 440/* Get one piece of information from the pcre_fullinfo() function */ 441 442static void new_info(pcre *re, pcre_extra *study, int option, void *ptr) 443{ 444int rc; 445if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0) 446 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option); 447} 448 449 450 451/************************************************* 452* Byte flipping function * 453*************************************************/ 454 455static long int 456byteflip(long int value, int n) 457{ 458if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8); 459return ((value & 0x000000ff) << 24) | 460 ((value & 0x0000ff00) << 8) | 461 ((value & 0x00ff0000) >> 8) | 462 ((value & 0xff000000) >> 24); 463} 464 465 466 467 468/************************************************* 469* Main Program * 470*************************************************/ 471 472/* Read lines from named file or stdin and write to named file or stdout; lines 473consist of a regular expression, in delimiters and optionally followed by 474options, followed by a set of test data, terminated by an empty line. */ 475 476int main(int argc, char **argv) 477{ 478FILE *infile = stdin; 479int options = 0; 480int study_options = 0; 481int op = 1; 482int timeit = 0; 483int showinfo = 0; 484int showstore = 0; 485int size_offsets = 45; 486int size_offsets_max; 487int *offsets; 488#if !defined NOPOSIX 489int posix = 0; 490#endif 491int debug = 0; 492int done = 0; 493 494unsigned char *buffer; 495unsigned char *dbuffer; 496 497/* Get buffers from malloc() so that Electric Fence will check their misuse 498when I am debugging. */ 499 500buffer = (unsigned char *)malloc(BUFFER_SIZE); 501dbuffer = (unsigned char *)malloc(DBUFFER_SIZE); 502pbuffer = (unsigned char *)malloc(PBUFFER_SIZE); 503 504/* The outfile variable is static so that new_malloc can use it. The _setmode() 505stuff is some magic that I don't understand, but which apparently does good 506things in Windows. It's related to line terminations. */ 507 508#if defined(_WIN32) || defined(WIN32) 509_setmode( _fileno( stdout ), 0x8000 ); 510#endif /* defined(_WIN32) || defined(WIN32) */ 511 512outfile = stdout; 513 514/* Scan options */ 515 516while (argc > 1 && argv[op][0] == '-') 517 { 518 unsigned char *endptr; 519 520 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0) 521 showstore = 1; 522 else if (strcmp(argv[op], "-t") == 0) timeit = 1; 523 else if (strcmp(argv[op], "-i") == 0) showinfo = 1; 524 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; 525 else if (strcmp(argv[op], "-o") == 0 && argc > 2 && 526 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)), 527 *endptr == 0)) 528 { 529 op++; 530 argc--; 531 } 532#if !defined NOPOSIX 533 else if (strcmp(argv[op], "-p") == 0) posix = 1; 534#endif 535 else if (strcmp(argv[op], "-C") == 0) 536 { 537 int rc; 538 printf("PCRE version %s\n", pcre_version()); 539 printf("Compiled with\n"); 540 (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 541 printf(" %sUTF-8 support\n", rc? "" : "No "); 542 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 543 printf(" %sUnicode properties support\n", rc? "" : "No "); 544 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc); 545 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF"); 546 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc); 547 printf(" Internal link size = %d\n", rc); 548 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); 549 printf(" POSIX malloc threshold = %d\n", rc); 550 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc); 551 printf(" Default match limit = %d\n", rc); 552 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc); 553 printf(" Match recursion uses %s\n", rc? "stack" : "heap"); 554 exit(0); 555 } 556 else 557 { 558 printf("** Unknown or malformed option %s\n", argv[op]); 559 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n"); 560 printf(" -C show PCRE compile-time options and exit\n"); 561 printf(" -d debug: show compiled code; implies -i\n" 562 " -i show information about compiled pattern\n" 563 " -m output memory used information\n" 564 " -o <n> set size of offsets vector to <n>\n"); 565#if !defined NOPOSIX 566 printf(" -p use POSIX interface\n"); 567#endif 568 printf(" -s output store (memory) used information\n" 569 " -t time compilation and execution\n"); 570 return 1; 571 } 572 op++; 573 argc--; 574 } 575 576/* Get the store for the offsets vector, and remember what it was */ 577 578size_offsets_max = size_offsets; 579offsets = (int *)malloc(size_offsets_max * sizeof(int)); 580if (offsets == NULL) 581 { 582 printf("** Failed to get %d bytes of memory for offsets vector\n", 583 size_offsets_max * sizeof(int)); 584 return 1; 585 } 586 587/* Sort out the input and output files */ 588 589if (argc > 1) 590 { 591 infile = fopen(argv[op], "rb"); 592 if (infile == NULL) 593 { 594 printf("** Failed to open %s\n", argv[op]); 595 return 1; 596 } 597 } 598 599if (argc > 2) 600 { 601 outfile = fopen(argv[op+1], "wb"); 602 if (outfile == NULL) 603 { 604 printf("** Failed to open %s\n", argv[op+1]); 605 return 1; 606 } 607 } 608 609/* Set alternative malloc function */ 610 611pcre_malloc = new_malloc; 612pcre_free = new_free; 613pcre_stack_malloc = stack_malloc; 614pcre_stack_free = stack_free; 615 616/* Heading line, then prompt for first regex if stdin */ 617 618fprintf(outfile, "PCRE version %s\n\n", pcre_version()); 619 620/* Main loop */ 621 622while (!done) 623 { 624 pcre *re = NULL; 625 pcre_extra *extra = NULL; 626 627#if !defined NOPOSIX /* There are still compilers that require no indent */ 628 regex_t preg; 629 int do_posix = 0; 630#endif 631 632 const char *error; 633 unsigned char *p, *pp, *ppp; 634 unsigned char *to_file = NULL; 635 const unsigned char *tables = NULL; 636 unsigned long int true_size, true_study_size = 0; 637 size_t size, regex_gotten_store; 638 int do_study = 0; 639 int do_debug = debug; 640 int do_G = 0; 641 int do_g = 0; 642 int do_showinfo = showinfo; 643 int do_showrest = 0; 644 int do_flip = 0; 645 int erroroffset, len, delimiter; 646 647 use_utf8 = 0; 648 649 if (infile == stdin) printf(" re> "); 650 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break; 651 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 652 fflush(outfile); 653 654 p = buffer; 655 while (isspace(*p)) p++; 656 if (*p == 0) continue; 657 658 /* See if the pattern is to be loaded pre-compiled from a file. */ 659 660 if (*p == '<' && strchr((char *)(p+1), '<') == NULL) 661 { 662 unsigned long int magic; 663 uschar sbuf[8]; 664 FILE *f; 665 666 p++; 667 pp = p + (int)strlen((char *)p); 668 while (isspace(pp[-1])) pp--; 669 *pp = 0; 670 671 f = fopen((char *)p, "rb"); 672 if (f == NULL) 673 { 674 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno)); 675 continue; 676 } 677 678 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; 679 680 true_size = 681 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3]; 682 true_study_size = 683 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; 684 685 re = (real_pcre *)new_malloc(true_size); 686 regex_gotten_store = gotten_store; 687 688 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; 689 690 magic = ((real_pcre *)re)->magic_number; 691 if (magic != MAGIC_NUMBER) 692 { 693 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER) 694 { 695 do_flip = 1; 696 } 697 else 698 { 699 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); 700 fclose(f); 701 continue; 702 } 703 } 704 705 fprintf(outfile, "Compiled regex%s loaded from %s\n", 706 do_flip? " (byte-inverted)" : "", p); 707 708 /* Need to know if UTF-8 for printing data strings */ 709 710 new_info(re, NULL, PCRE_INFO_OPTIONS, &options); 711 use_utf8 = (options & PCRE_UTF8) != 0; 712 713 /* Now see if there is any following study data */ 714 715 if (true_study_size != 0) 716 { 717 pcre_study_data *psd; 718 719 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size); 720 extra->flags = PCRE_EXTRA_STUDY_DATA; 721 722 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra)); 723 extra->study_data = psd; 724 725 if (fread(psd, 1, true_study_size, f) != true_study_size) 726 { 727 FAIL_READ: 728 fprintf(outfile, "Failed to read data from %s\n", p); 729 if (extra != NULL) new_free(extra); 730 if (re != NULL) new_free(re); 731 fclose(f); 732 continue; 733 } 734 fprintf(outfile, "Study data loaded from %s\n", p); 735 do_study = 1; /* To get the data output if requested */ 736 } 737 else fprintf(outfile, "No study data\n"); 738 739 fclose(f); 740 goto SHOW_INFO; 741 } 742 743 /* In-line pattern (the usual case). Get the delimiter and seek the end of 744 the pattern; if is isn't complete, read more. */ 745 746 delimiter = *p++; 747 748 if (isalnum(delimiter) || delimiter == '\\') 749 { 750 fprintf(outfile, "** Delimiter must not be alphameric or \\\n"); 751 goto SKIP_DATA; 752 } 753 754 pp = p; 755 756 for(;;) 757 { 758 while (*pp != 0) 759 { 760 if (*pp == '\\' && pp[1] != 0) pp++; 761 else if (*pp == delimiter) break; 762 pp++; 763 } 764 if (*pp != 0) break; 765 766 len = BUFFER_SIZE - (pp - buffer); 767 if (len < 256) 768 { 769 fprintf(outfile, "** Expression too long - missing delimiter?\n"); 770 goto SKIP_DATA; 771 } 772 773 if (infile == stdin) printf(" > "); 774 if (fgets((char *)pp, len, infile) == NULL) 775 { 776 fprintf(outfile, "** Unexpected EOF\n"); 777 done = 1; 778 goto CONTINUE; 779 } 780 if (infile != stdin) fprintf(outfile, "%s", (char *)pp); 781 } 782 783 /* If the first character after the delimiter is backslash, make 784 the pattern end with backslash. This is purely to provide a way 785 of testing for the error message when a pattern ends with backslash. */ 786 787 if (pp[1] == '\\') *pp++ = '\\'; 788 789 /* Terminate the pattern at the delimiter, and save a copy of the pattern 790 for callouts. */ 791 792 *pp++ = 0; 793 strcpy((char *)pbuffer, (char *)p); 794 795 /* Look for options after final delimiter */ 796 797 options = 0; 798 study_options = 0; 799 log_store = showstore; /* default from command line */ 800 801 while (*pp != 0) 802 { 803 switch (*pp++) 804 { 805 case 'g': do_g = 1; break; 806 case 'i': options |= PCRE_CASELESS; break; 807 case 'm': options |= PCRE_MULTILINE; break; 808 case 's': options |= PCRE_DOTALL; break; 809 case 'x': options |= PCRE_EXTENDED; break; 810 811 case '+': do_showrest = 1; break; 812 case 'A': options |= PCRE_ANCHORED; break; 813 case 'C': options |= PCRE_AUTO_CALLOUT; break; 814 case 'D': do_debug = do_showinfo = 1; break; 815 case 'E': options |= PCRE_DOLLAR_ENDONLY; break; 816 case 'F': do_flip = 1; break; 817 case 'G': do_G = 1; break; 818 case 'I': do_showinfo = 1; break; 819 case 'M': log_store = 1; break; 820 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; 821 822#if !defined NOPOSIX 823 case 'P': do_posix = 1; break; 824#endif 825 826 case 'S': do_study = 1; break; 827 case 'U': options |= PCRE_UNGREEDY; break; 828 case 'X': options |= PCRE_EXTRA; break; 829 case '8': options |= PCRE_UTF8; use_utf8 = 1; break; 830 case '?': options |= PCRE_NO_UTF8_CHECK; break; 831 832 case 'L': 833 ppp = pp; 834 while (*ppp != '\n' && *ppp != ' ') ppp++; 835 *ppp = 0; 836 if (setlocale(LC_CTYPE, (const char *)pp) == NULL) 837 { 838 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp); 839 goto SKIP_DATA; 840 } 841 tables = pcre_maketables(); 842 pp = ppp; 843 break; 844 845 case '>': 846 to_file = pp; 847 while (*pp != 0) pp++; 848 while (isspace(pp[-1])) pp--; 849 *pp = 0; 850 break; 851 852 case '\n': case ' ': break; 853 854 default: 855 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]); 856 goto SKIP_DATA; 857 } 858 } 859 860 /* Handle compiling via the POSIX interface, which doesn't support the 861 timing, showing, or debugging options, nor the ability to pass over 862 local character tables. */ 863 864#if !defined NOPOSIX 865 if (posix || do_posix) 866 { 867 int rc; 868 int cflags = 0; 869 870 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE; 871 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE; 872 rc = regcomp(&preg, (char *)p, cflags); 873 874 /* Compilation failed; go back for another re, skipping to blank line 875 if non-interactive. */ 876 877 if (rc != 0) 878 { 879 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE); 880 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); 881 goto SKIP_DATA; 882 } 883 } 884 885 /* Handle compiling via the native interface */ 886 887 else 888#endif /* !defined NOPOSIX */ 889 890 { 891 if (timeit) 892 { 893 register int i; 894 clock_t time_taken; 895 clock_t start_time = clock(); 896 for (i = 0; i < LOOPREPEAT; i++) 897 { 898 re = pcre_compile((char *)p, options, &error, &erroroffset, tables); 899 if (re != NULL) free(re); 900 } 901 time_taken = clock() - start_time; 902 fprintf(outfile, "Compile time %.3f milliseconds\n", 903 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) / 904 (double)CLOCKS_PER_SEC); 905 } 906 907 re = pcre_compile((char *)p, options, &error, &erroroffset, tables); 908 909 /* Compilation failed; go back for another re, skipping to blank line 910 if non-interactive. */ 911 912 if (re == NULL) 913 { 914 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset); 915 SKIP_DATA: 916 if (infile != stdin) 917 { 918 for (;;) 919 { 920 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) 921 { 922 done = 1; 923 goto CONTINUE; 924 } 925 len = (int)strlen((char *)buffer); 926 while (len > 0 && isspace(buffer[len-1])) len--; 927 if (len == 0) break; 928 } 929 fprintf(outfile, "\n"); 930 } 931 goto CONTINUE; 932 } 933 934 /* Compilation succeeded; print data if required. There are now two 935 info-returning functions. The old one has a limited interface and 936 returns only limited data. Check that it agrees with the newer one. */ 937 938 if (log_store) 939 fprintf(outfile, "Memory allocation (code space): %d\n", 940 (int)(gotten_store - 941 sizeof(real_pcre) - 942 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size)); 943 944 /* Extract the size for possible writing before possibly flipping it, 945 and remember the store that was got. */ 946 947 true_size = ((real_pcre *)re)->size; 948 regex_gotten_store = gotten_store; 949 950 /* If /S was present, study the regexp to generate additional info to 951 help with the matching. */ 952 953 if (do_study) 954 { 955 if (timeit) 956 { 957 register int i; 958 clock_t time_taken; 959 clock_t start_time = clock(); 960 for (i = 0; i < LOOPREPEAT; i++) 961 extra = pcre_study(re, study_options, &error); 962 time_taken = clock() - start_time; 963 if (extra != NULL) free(extra); 964 fprintf(outfile, " Study time %.3f milliseconds\n", 965 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) / 966 (double)CLOCKS_PER_SEC); 967 } 968 extra = pcre_study(re, study_options, &error); 969 if (error != NULL) 970 fprintf(outfile, "Failed to study: %s\n", error); 971 else if (extra != NULL) 972 true_study_size = ((pcre_study_data *)(extra->study_data))->size; 973 } 974 975 /* If the 'F' option was present, we flip the bytes of all the integer 976 fields in the regex data block and the study block. This is to make it 977 possible to test PCRE's handling of byte-flipped patterns, e.g. those 978 compiled on a different architecture. */ 979 980 if (do_flip) 981 { 982 real_pcre *rre = (real_pcre *)re; 983 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number)); 984 rre->size = byteflip(rre->size, sizeof(rre->size)); 985 rre->options = byteflip(rre->options, sizeof(rre->options)); 986 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket)); 987 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref)); 988 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte)); 989 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte)); 990 rre->name_table_offset = byteflip(rre->name_table_offset, 991 sizeof(rre->name_table_offset)); 992 rre->name_entry_size = byteflip(rre->name_entry_size, 993 sizeof(rre->name_entry_size)); 994 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count)); 995 996 if (extra != NULL) 997 { 998 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); 999 rsd->size = byteflip(rsd->size, sizeof(rsd->size)); 1000 rsd->options = byteflip(rsd->options, sizeof(rsd->options)); 1001 } 1002 } 1003 1004 /* Extract information from the compiled data if required */ 1005 1006 SHOW_INFO: 1007 1008 if (do_showinfo) 1009 { 1010 unsigned long int get_options, all_options; 1011 int old_first_char, old_options, old_count; 1012 int count, backrefmax, first_char, need_char; 1013 int nameentrysize, namecount; 1014 const uschar *nametable; 1015 1016 if (do_debug) 1017 { 1018 fprintf(outfile, "------------------------------------------------------------------\n"); 1019 print_internals(re, outfile); 1020 } 1021 1022 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); 1023 new_info(re, NULL, PCRE_INFO_SIZE, &size); 1024 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); 1025 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); 1026 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char); 1027 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char); 1028 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); 1029 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount); 1030 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable); 1031 1032 old_count = pcre_info(re, &old_options, &old_first_char); 1033 if (count < 0) fprintf(outfile, 1034 "Error %d from pcre_info()\n", count); 1035 else 1036 { 1037 if (old_count != count) fprintf(outfile, 1038 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count, 1039 old_count); 1040 1041 if (old_first_char != first_char) fprintf(outfile, 1042 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n", 1043 first_char, old_first_char); 1044 1045 if (old_options != (int)get_options) fprintf(outfile, 1046 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n", 1047 get_options, old_options); 1048 } 1049 1050 if (size != regex_gotten_store) fprintf(outfile, 1051 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n", 1052 size, regex_gotten_store); 1053 1054 fprintf(outfile, "Capturing subpattern count = %d\n", count); 1055 if (backrefmax > 0) 1056 fprintf(outfile, "Max back reference = %d\n", backrefmax); 1057 1058 if (namecount > 0) 1059 { 1060 fprintf(outfile, "Named capturing subpatterns:\n"); 1061 while (namecount-- > 0) 1062 { 1063 fprintf(outfile, " %s %*s%3d\n", nametable + 2, 1064 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "", 1065 GET2(nametable, 0)); 1066 nametable += nameentrysize; 1067 } 1068 } 1069 1070 /* The NOPARTIAL bit is a private bit in the options, so we have 1071 to fish it out via out back door */ 1072 1073 all_options = ((real_pcre *)re)->options; 1074 if (do_flip) 1075 { 1076 all_options = byteflip(all_options, sizeof(all_options)); 1077 } 1078 1079 if ((all_options & PCRE_NOPARTIAL) != 0) 1080 fprintf(outfile, "Partial matching not supported\n"); 1081 1082 if (get_options == 0) fprintf(outfile, "No options\n"); 1083 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n", 1084 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", 1085 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", 1086 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", 1087 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", 1088 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", 1089 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", 1090 ((get_options & PCRE_EXTRA) != 0)? " extra" : "", 1091 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", 1092 ((get_options & PCRE_UTF8) != 0)? " utf8" : "", 1093 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : ""); 1094 1095 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0) 1096 fprintf(outfile, "Case state changes\n"); 1097 1098 if (first_char == -1) 1099 { 1100 fprintf(outfile, "First char at start or follows \\n\n"); 1101 } 1102 else if (first_char < 0) 1103 { 1104 fprintf(outfile, "No first char\n"); 1105 } 1106 else 1107 { 1108 int ch = first_char & 255; 1109 const char *caseless = ((first_char & REQ_CASELESS) == 0)? 1110 "" : " (caseless)"; 1111 if (isprint(ch)) 1112 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless); 1113 else 1114 fprintf(outfile, "First char = %d%s\n", ch, caseless); 1115 } 1116 1117 if (need_char < 0) 1118 { 1119 fprintf(outfile, "No need char\n"); 1120 } 1121 else 1122 { 1123 int ch = need_char & 255; 1124 const char *caseless = ((need_char & REQ_CASELESS) == 0)? 1125 "" : " (caseless)"; 1126 if (isprint(ch)) 1127 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless); 1128 else 1129 fprintf(outfile, "Need char = %d%s\n", ch, caseless); 1130 } 1131 1132 /* Don't output study size; at present it is in any case a fixed 1133 value, but it varies, depending on the computer architecture, and 1134 so messes up the test suite. (And with the /F option, it might be 1135 flipped.) */ 1136 1137 if (do_study) 1138 { 1139 if (extra == NULL) 1140 fprintf(outfile, "Study returned NULL\n"); 1141 else 1142 { 1143 uschar *start_bits = NULL; 1144 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits); 1145 1146 if (start_bits == NULL) 1147 fprintf(outfile, "No starting byte set\n"); 1148 else 1149 { 1150 int i; 1151 int c = 24; 1152 fprintf(outfile, "Starting byte set: "); 1153 for (i = 0; i < 256; i++) 1154 { 1155 if ((start_bits[i/8] & (1<<(i&7))) != 0) 1156 { 1157 if (c > 75) 1158 { 1159 fprintf(outfile, "\n "); 1160 c = 2; 1161 } 1162 if (isprint(i) && i != ' ') 1163 { 1164 fprintf(outfile, "%c ", i); 1165 c += 2; 1166 } 1167 else 1168 { 1169 fprintf(outfile, "\\x%02x ", i); 1170 c += 5; 1171 } 1172 } 1173 } 1174 fprintf(outfile, "\n"); 1175 } 1176 } 1177 } 1178 } 1179 1180 /* If the '>' option was present, we write out the regex to a file, and 1181 that is all. The first 8 bytes of the file are the regex length and then 1182 the study length, in big-endian order. */ 1183 1184 if (to_file != NULL) 1185 { 1186 FILE *f = fopen((char *)to_file, "wb"); 1187 if (f == NULL) 1188 { 1189 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno)); 1190 } 1191 else 1192 { 1193 uschar sbuf[8]; 1194 sbuf[0] = (true_size >> 24) & 255; 1195 sbuf[1] = (true_size >> 16) & 255; 1196 sbuf[2] = (true_size >> 8) & 255; 1197 sbuf[3] = (true_size) & 255; 1198 1199 sbuf[4] = (true_study_size >> 24) & 255; 1200 sbuf[5] = (true_study_size >> 16) & 255; 1201 sbuf[6] = (true_study_size >> 8) & 255; 1202 sbuf[7] = (true_study_size) & 255; 1203 1204 if (fwrite(sbuf, 1, 8, f) < 8 || 1205 fwrite(re, 1, true_size, f) < true_size) 1206 { 1207 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno)); 1208 } 1209 else 1210 { 1211 fprintf(outfile, "Compiled regex written to %s\n", to_file); 1212 if (extra != NULL) 1213 { 1214 if (fwrite(extra->study_data, 1, true_study_size, f) < 1215 true_study_size) 1216 { 1217 fprintf(outfile, "Write error on %s: %s\n", to_file, 1218 strerror(errno)); 1219 } 1220 else fprintf(outfile, "Study data written to %s\n", to_file); 1221 } 1222 } 1223 fclose(f); 1224 } 1225 continue; /* With next regex */ 1226 } 1227 } /* End of non-POSIX compile */ 1228 1229 /* Read data lines and test them */ 1230 1231 for (;;) 1232 { 1233 unsigned char *q; 1234 unsigned char *bptr = dbuffer; 1235 int *use_offsets = offsets; 1236 int use_size_offsets = size_offsets; 1237 int callout_data = 0; 1238 int callout_data_set = 0; 1239 int count, c; 1240 int copystrings = 0; 1241 int find_match_limit = 0; 1242 int getstrings = 0; 1243 int getlist = 0; 1244 int gmatched = 0; 1245 int start_offset = 0; 1246 int g_notempty = 0; 1247 1248 options = 0; 1249 1250 pcre_callout = callout; 1251 first_callout = 1; 1252 callout_extra = 0; 1253 callout_count = 0; 1254 callout_fail_count = 999999; 1255 callout_fail_id = -1; 1256 show_malloc = 0; 1257 1258 if (infile == stdin) printf("data> "); 1259 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) 1260 { 1261 done = 1; 1262 goto CONTINUE; 1263 } 1264 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 1265 1266 len = (int)strlen((char *)buffer); 1267 while (len > 0 && isspace(buffer[len-1])) len--; 1268 buffer[len] = 0; 1269 if (len == 0) break; 1270 1271 p = buffer; 1272 while (isspace(*p)) p++; 1273 1274 q = dbuffer; 1275 while ((c = *p++) != 0) 1276 { 1277 int i = 0; 1278 int n = 0; 1279 1280 if (c == '\\') switch ((c = *p++)) 1281 { 1282 case 'a': c = 7; break; 1283 case 'b': c = '\b'; break; 1284 case 'e': c = 27; break; 1285 case 'f': c = '\f'; break; 1286 case 'n': c = '\n'; break; 1287 case 'r': c = '\r'; break; 1288 case 't': c = '\t'; break; 1289 case 'v': c = '\v'; break; 1290 1291 case '0': case '1': case '2': case '3': 1292 case '4': case '5': case '6': case '7': 1293 c -= '0'; 1294 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') 1295 c = c * 8 + *p++ - '0'; 1296 break; 1297 1298 case 'x': 1299 1300 /* Handle \x{..} specially - new Perl thing for utf8 */ 1301 1302 if (*p == '{') 1303 { 1304 unsigned char *pt = p; 1305 c = 0; 1306 while (isxdigit(*(++pt))) 1307 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W'); 1308 if (*pt == '}') 1309 { 1310 unsigned char buff8[8]; 1311 int ii, utn; 1312 utn = ord2utf8(c, buff8); 1313 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii]; 1314 c = buff8[ii]; /* Last byte */ 1315 p = pt + 1; 1316 break; 1317 } 1318 /* Not correct form; fall through */ 1319 } 1320 1321 /* Ordinary \x */ 1322 1323 c = 0; 1324 while (i++ < 2 && isxdigit(*p)) 1325 { 1326 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W'); 1327 p++; 1328 } 1329 break; 1330 1331 case 0: /* \ followed by EOF allows for an empty line */ 1332 p--; 1333 continue; 1334 1335 case '>': 1336 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0'; 1337 continue; 1338 1339 case 'A': /* Option setting */ 1340 options |= PCRE_ANCHORED; 1341 continue; 1342 1343 case 'B': 1344 options |= PCRE_NOTBOL; 1345 continue; 1346 1347 case 'C': 1348 if (isdigit(*p)) /* Set copy string */ 1349 { 1350 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 1351 copystrings |= 1 << n; 1352 } 1353 else if (isalnum(*p)) 1354 { 1355 uschar name[256]; 1356 uschar *npp = name; 1357 while (isalnum(*p)) *npp++ = *p++; 1358 *npp = 0; 1359 n = pcre_get_stringnumber(re, (char *)name); 1360 if (n < 0) 1361 fprintf(outfile, "no parentheses with name \"%s\"\n", name); 1362 else copystrings |= 1 << n; 1363 } 1364 else if (*p == '+') 1365 { 1366 callout_extra = 1; 1367 p++; 1368 } 1369 else if (*p == '-') 1370 { 1371 pcre_callout = NULL; 1372 p++; 1373 } 1374 else if (*p == '!') 1375 { 1376 callout_fail_id = 0; 1377 p++; 1378 while(isdigit(*p)) 1379 callout_fail_id = callout_fail_id * 10 + *p++ - '0'; 1380 callout_fail_count = 0; 1381 if (*p == '!') 1382 { 1383 p++; 1384 while(isdigit(*p)) 1385 callout_fail_count = callout_fail_count * 10 + *p++ - '0'; 1386 } 1387 } 1388 else if (*p == '*') 1389 { 1390 int sign = 1; 1391 callout_data = 0; 1392 if (*(++p) == '-') { sign = -1; p++; } 1393 while(isdigit(*p)) 1394 callout_data = callout_data * 10 + *p++ - '0'; 1395 callout_data *= sign; 1396 callout_data_set = 1; 1397 } 1398 continue; 1399 1400 case 'G': 1401 if (isdigit(*p)) 1402 { 1403 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 1404 getstrings |= 1 << n; 1405 } 1406 else if (isalnum(*p)) 1407 { 1408 uschar name[256]; 1409 uschar *npp = name; 1410 while (isalnum(*p)) *npp++ = *p++; 1411 *npp = 0; 1412 n = pcre_get_stringnumber(re, (char *)name); 1413 if (n < 0) 1414 fprintf(outfile, "no parentheses with name \"%s\"\n", name); 1415 else getstrings |= 1 << n; 1416 } 1417 continue; 1418 1419 case 'L': 1420 getlist = 1; 1421 continue; 1422 1423 case 'M': 1424 find_match_limit = 1; 1425 continue; 1426 1427 case 'N': 1428 options |= PCRE_NOTEMPTY; 1429 continue; 1430 1431 case 'O': 1432 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 1433 if (n > size_offsets_max) 1434 { 1435 size_offsets_max = n; 1436 free(offsets); 1437 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int)); 1438 if (offsets == NULL) 1439 { 1440 printf("** Failed to get %d bytes of memory for offsets vector\n", 1441 size_offsets_max * sizeof(int)); 1442 return 1; 1443 } 1444 } 1445 use_size_offsets = n; 1446 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */ 1447 continue; 1448 1449 case 'P': 1450 options |= PCRE_PARTIAL; 1451 continue; 1452 1453 case 'S': 1454 show_malloc = 1; 1455 continue; 1456 1457 case 'Z': 1458 options |= PCRE_NOTEOL; 1459 continue; 1460 1461 case '?': 1462 options |= PCRE_NO_UTF8_CHECK; 1463 continue; 1464 } 1465 *q++ = c; 1466 } 1467 *q = 0; 1468 len = q - dbuffer; 1469 1470 /* Handle matching via the POSIX interface, which does not 1471 support timing or playing with the match limit or callout data. */ 1472 1473#if !defined NOPOSIX 1474 if (posix || do_posix) 1475 { 1476 int rc; 1477 int eflags = 0; 1478 regmatch_t *pmatch = NULL; 1479 if (use_size_offsets > 0) 1480 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets); 1481 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; 1482 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; 1483 1484 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); 1485 1486 if (rc != 0) 1487 { 1488 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE); 1489 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); 1490 } 1491 else 1492 { 1493 size_t i; 1494 for (i = 0; i < (size_t)use_size_offsets; i++) 1495 { 1496 if (pmatch[i].rm_so >= 0) 1497 { 1498 fprintf(outfile, "%2d: ", (int)i); 1499 (void)pchars(dbuffer + pmatch[i].rm_so, 1500 pmatch[i].rm_eo - pmatch[i].rm_so, outfile); 1501 fprintf(outfile, "\n"); 1502 if (i == 0 && do_showrest) 1503 { 1504 fprintf(outfile, " 0+ "); 1505 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, 1506 outfile); 1507 fprintf(outfile, "\n"); 1508 } 1509 } 1510 } 1511 } 1512 free(pmatch); 1513 } 1514 1515 /* Handle matching via the native interface - repeats for /g and /G */ 1516 1517 else 1518#endif /* !defined NOPOSIX */ 1519 1520 for (;; gmatched++) /* Loop for /g or /G */ 1521 { 1522 if (timeit) 1523 { 1524 register int i; 1525 clock_t time_taken; 1526 clock_t start_time = clock(); 1527 for (i = 0; i < LOOPREPEAT; i++) 1528 count = pcre_exec(re, extra, (char *)bptr, len, 1529 start_offset, options | g_notempty, use_offsets, use_size_offsets); 1530 time_taken = clock() - start_time; 1531 fprintf(outfile, "Execute time %.3f milliseconds\n", 1532 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) / 1533 (double)CLOCKS_PER_SEC); 1534 } 1535 1536 /* If find_match_limit is set, we want to do repeated matches with 1537 varying limits in order to find the minimum value. */ 1538 1539 if (find_match_limit) 1540 { 1541 int min = 0; 1542 int mid = 64; 1543 int max = -1; 1544 1545 if (extra == NULL) 1546 { 1547 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 1548 extra->flags = 0; 1549 } 1550 extra->flags |= PCRE_EXTRA_MATCH_LIMIT; 1551 1552 for (;;) 1553 { 1554 extra->match_limit = mid; 1555 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, 1556 options | g_notempty, use_offsets, use_size_offsets); 1557 if (count == PCRE_ERROR_MATCHLIMIT) 1558 { 1559 /* fprintf(outfile, "Testing match limit = %d\n", mid); */ 1560 min = mid; 1561 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2; 1562 } 1563 else if (count >= 0 || count == PCRE_ERROR_NOMATCH || 1564 count == PCRE_ERROR_PARTIAL) 1565 { 1566 if (mid == min + 1) 1567 { 1568 fprintf(outfile, "Minimum match limit = %d\n", mid); 1569 break; 1570 } 1571 /* fprintf(outfile, "Testing match limit = %d\n", mid); */ 1572 max = mid; 1573 mid = (min + mid)/2; 1574 } 1575 else break; /* Some other error */ 1576 } 1577 1578 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT; 1579 } 1580 1581 /* If callout_data is set, use the interface with additional data */ 1582 1583 else if (callout_data_set) 1584 { 1585 if (extra == NULL) 1586 { 1587 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 1588 extra->flags = 0; 1589 } 1590 extra->flags |= PCRE_EXTRA_CALLOUT_DATA; 1591 extra->callout_data = &callout_data; 1592 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, 1593 options | g_notempty, use_offsets, use_size_offsets); 1594 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA; 1595 } 1596 1597 /* The normal case is just to do the match once, with the default 1598 value of match_limit. */ 1599 1600 else 1601 { 1602 count = pcre_exec(re, extra, (char *)bptr, len, 1603 start_offset, options | g_notempty, use_offsets, use_size_offsets); 1604 } 1605 1606 if (count == 0) 1607 { 1608 fprintf(outfile, "Matched, but too many substrings\n"); 1609 count = use_size_offsets/3; 1610 } 1611 1612 /* Matched */ 1613 1614 if (count >= 0) 1615 { 1616 int i; 1617 for (i = 0; i < count * 2; i += 2) 1618 { 1619 if (use_offsets[i] < 0) 1620 fprintf(outfile, "%2d: <unset>\n", i/2); 1621 else 1622 { 1623 fprintf(outfile, "%2d: ", i/2); 1624 (void)pchars(bptr + use_offsets[i], 1625 use_offsets[i+1] - use_offsets[i], outfile); 1626 fprintf(outfile, "\n"); 1627 if (i == 0) 1628 { 1629 if (do_showrest) 1630 { 1631 fprintf(outfile, " 0+ "); 1632 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], 1633 outfile); 1634 fprintf(outfile, "\n"); 1635 } 1636 } 1637 } 1638 } 1639 1640 for (i = 0; i < 32; i++) 1641 { 1642 if ((copystrings & (1 << i)) != 0) 1643 { 1644 char copybuffer[16]; 1645 int rc = pcre_copy_substring((char *)bptr, use_offsets, count, 1646 i, copybuffer, sizeof(copybuffer)); 1647 if (rc < 0) 1648 fprintf(outfile, "copy substring %d failed %d\n", i, rc); 1649 else 1650 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc); 1651 } 1652 } 1653 1654 for (i = 0; i < 32; i++) 1655 { 1656 if ((getstrings & (1 << i)) != 0) 1657 { 1658 const char *substring; 1659 int rc = pcre_get_substring((char *)bptr, use_offsets, count, 1660 i, &substring); 1661 if (rc < 0) 1662 fprintf(outfile, "get substring %d failed %d\n", i, rc); 1663 else 1664 { 1665 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc); 1666 /* free((void *)substring); */ 1667 pcre_free_substring(substring); 1668 } 1669 } 1670 } 1671 1672 if (getlist) 1673 { 1674 const char **stringlist; 1675 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count, 1676 &stringlist); 1677 if (rc < 0) 1678 fprintf(outfile, "get substring list failed %d\n", rc); 1679 else 1680 { 1681 for (i = 0; i < count; i++) 1682 fprintf(outfile, "%2dL %s\n", i, stringlist[i]); 1683 if (stringlist[i] != NULL) 1684 fprintf(outfile, "string list not terminated by NULL\n"); 1685 /* free((void *)stringlist); */ 1686 pcre_free_substring_list(stringlist); 1687 } 1688 } 1689 } 1690 1691 /* There was a partial match */ 1692 1693 else if (count == PCRE_ERROR_PARTIAL) 1694 { 1695 fprintf(outfile, "Partial match\n"); 1696 break; /* Out of the /g loop */ 1697 } 1698 1699 /* Failed to match. If this is a /g or /G loop and we previously set 1700 g_notempty after a null match, this is not necessarily the end. 1701 We want to advance the start offset, and continue. In the case of UTF-8 1702 matching, the advance must be one character, not one byte. Fudge the 1703 offset values to achieve this. We won't be at the end of the string - 1704 that was checked before setting g_notempty. */ 1705 1706 else 1707 { 1708 if (g_notempty != 0) 1709 { 1710 int onechar = 1; 1711 use_offsets[0] = start_offset; 1712 if (use_utf8) 1713 { 1714 while (start_offset + onechar < len) 1715 { 1716 int tb = bptr[start_offset+onechar]; 1717 if (tb <= 127) break; 1718 tb &= 0xc0; 1719 if (tb != 0 && tb != 0xc0) onechar++; 1720 } 1721 } 1722 use_offsets[1] = start_offset + onechar; 1723 } 1724 else 1725 { 1726 if (count == PCRE_ERROR_NOMATCH) 1727 { 1728 if (gmatched == 0) fprintf(outfile, "No match\n"); 1729 } 1730 else fprintf(outfile, "Error %d\n", count); 1731 break; /* Out of the /g loop */ 1732 } 1733 } 1734 1735 /* If not /g or /G we are done */ 1736 1737 if (!do_g && !do_G) break; 1738 1739 /* If we have matched an empty string, first check to see if we are at 1740 the end of the subject. If so, the /g loop is over. Otherwise, mimic 1741 what Perl's /g options does. This turns out to be rather cunning. First 1742 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the 1743 same point. If this fails (picked up above) we advance to the next 1744 character. */ 1745 1746 g_notempty = 0; 1747 if (use_offsets[0] == use_offsets[1]) 1748 { 1749 if (use_offsets[0] == len) break; 1750 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; 1751 } 1752 1753 /* For /g, update the start offset, leaving the rest alone */ 1754 1755 if (do_g) start_offset = use_offsets[1]; 1756 1757 /* For /G, update the pointer and length */ 1758 1759 else 1760 { 1761 bptr += use_offsets[1]; 1762 len -= use_offsets[1]; 1763 } 1764 } /* End of loop for /g and /G */ 1765 } /* End of loop for data lines */ 1766 1767 CONTINUE: 1768 1769#if !defined NOPOSIX 1770 if (posix || do_posix) regfree(&preg); 1771#endif 1772 1773 if (re != NULL) free(re); 1774 if (extra != NULL) free(extra); 1775 if (tables != NULL) 1776 { 1777 free((void *)tables); 1778 setlocale(LC_CTYPE, "C"); 1779 } 1780 } 1781 1782if (infile == stdin) fprintf(outfile, "\n"); 1783return 0; 1784} 1785 1786/* End */ 1787