1/************************************************* 2* PCRE testing program * 3*************************************************/ 4 5/* This program was hacked up as a tester for PCRE. I really should have 6written it more tidily in the first place. Will I ever learn? It has grown and 7been extended and consequently is now rather, er, *very* untidy in places. The 8addition of 16-bit support has made it even worse. :-( 9 10----------------------------------------------------------------------------- 11Redistribution and use in source and binary forms, with or without 12modification, are permitted provided that the following conditions are met: 13 14 * Redistributions of source code must retain the above copyright notice, 15 this list of conditions and the following disclaimer. 16 17 * Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in the 19 documentation and/or other materials provided with the distribution. 20 21 * Neither the name of the University of Cambridge nor the names of its 22 contributors may be used to endorse or promote products derived from 23 this software without specific prior written permission. 24 25THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35POSSIBILITY OF SUCH DAMAGE. 36----------------------------------------------------------------------------- 37*/ 38 39/* This program now supports the testing of both the 8-bit and 16-bit PCRE 40libraries in a single program. This is different from the modules such as 41pcre_compile.c in the library itself, which are compiled separately for each 42mode. If both modes are enabled, for example, pcre_compile.c is compiled twice 43(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is 44compiled only once. Therefore, it must not make use of any of the macros from 45pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does, 46however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls 47only supported library functions. */ 48 49#ifdef HAVE_CONFIG_H 50#include "config.h" 51#endif 52 53#include <ctype.h> 54#include <stdio.h> 55#include <string.h> 56#include <stdlib.h> 57#include <time.h> 58#include <locale.h> 59#include <errno.h> 60 61/* Both libreadline and libedit are optionally supported. The user-supplied 62original patch uses readline/readline.h for libedit, but in at least one system 63it is installed as editline/readline.h, so the configuration code now looks for 64that first, falling back to readline/readline.h. */ 65 66#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 67#ifdef HAVE_UNISTD_H 68#include <unistd.h> 69#endif 70#if defined(SUPPORT_LIBREADLINE) 71#include <readline/readline.h> 72#include <readline/history.h> 73#else 74#if defined(HAVE_EDITLINE_READLINE_H) 75#include <editline/readline.h> 76#else 77#include <readline/readline.h> 78#endif 79#endif 80#endif 81 82/* A number of things vary for Windows builds. Originally, pcretest opened its 83input and output without "b"; then I was told that "b" was needed in some 84environments, so it was added for release 5.0 to both the input and output. (It 85makes no difference on Unix-like systems.) Later I was told that it is wrong 86for the input on Windows. I've now abstracted the modes into two macros that 87are set here, to make it easier to fiddle with them, and removed "b" from the 88input mode under Windows. */ 89 90#if defined(_WIN32) || defined(WIN32) 91#include <io.h> /* For _setmode() */ 92#include <fcntl.h> /* For _O_BINARY */ 93#define INPUT_MODE "r" 94#define OUTPUT_MODE "wb" 95 96#ifndef isatty 97#define isatty _isatty /* This is what Windows calls them, I'm told, */ 98#endif /* though in some environments they seem to */ 99 /* be already defined, hence the #ifndefs. */ 100#ifndef fileno 101#define fileno _fileno 102#endif 103 104/* A user sent this fix for Borland Builder 5 under Windows. */ 105 106#ifdef __BORLANDC__ 107#define _setmode(handle, mode) setmode(handle, mode) 108#endif 109 110/* Not Windows */ 111 112#else 113#include <sys/time.h> /* These two includes are needed */ 114#include <sys/resource.h> /* for setrlimit(). */ 115#define INPUT_MODE "rb" 116#define OUTPUT_MODE "wb" 117#endif 118 119#define PRIV(name) name 120 121/* We have to include pcre_internal.h because we need the internal info for 122displaying the results of pcre_study() and we also need to know about the 123internal macros, structures, and other internal data values; pcretest has 124"inside information" compared to a program that strictly follows the PCRE API. 125 126Although pcre_internal.h does itself include pcre.h, we explicitly include it 127here before pcre_internal.h so that the PCRE_EXP_xxx macros get set 128appropriately for an application, not for building PCRE. */ 129 130#include "pcre.h" 131 132#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 133/* Configure internal macros to 16 bit mode. */ 134#define COMPILE_PCRE16 135#endif 136 137#include "pcre_internal.h" 138 139/* The pcre_printint() function, which prints the internal form of a compiled 140regex, is held in a separate file so that (a) it can be compiled in either 1418-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c 142when that is compiled in debug mode. */ 143 144#ifdef SUPPORT_PCRE8 145void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); 146#endif 147#ifdef SUPPORT_PCRE16 148void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); 149#endif 150 151/* We need access to some of the data tables that PCRE uses. So as not to have 152to keep two copies, we include the source file here, changing the names of the 153external symbols to prevent clashes. */ 154 155#define PCRE_INCLUDED 156 157#include "pcre_tables.c" 158 159/* The definition of the macro PRINTABLE, which determines whether to print an 160output character as-is or as a hex value when showing compiled patterns, is 161the same as in the printint.src file. We uses it here in cases when the locale 162has not been explicitly changed, so as to get consistent output from systems 163that differ in their output from isprint() even in the "C" locale. */ 164 165#ifdef EBCDIC 166#define PRINTABLE(c) ((c) >= 64 && (c) < 255) 167#else 168#define PRINTABLE(c) ((c) >= 32 && (c) < 127) 169#endif 170 171#define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c)) 172 173/* Posix support is disabled in 16 bit only mode. */ 174#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX 175#define NOPOSIX 176#endif 177 178/* It is possible to compile this test program without including support for 179testing the POSIX interface, though this is not available via the standard 180Makefile. */ 181 182#if !defined NOPOSIX 183#include "pcreposix.h" 184#endif 185 186/* It is also possible, originally for the benefit of a version that was 187imported into Exim, to build pcretest without support for UTF8 or UTF16 (define 188NOUTF), without the interface to the DFA matcher (NODFA). In fact, we 189automatically cut out the UTF support if PCRE is built without it. */ 190 191#ifndef SUPPORT_UTF 192#ifndef NOUTF 193#define NOUTF 194#endif 195#endif 196 197/* To make the code a bit tidier for 8-bit and 16-bit support, we define macros 198for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called 199only from one place and is handled differently). I couldn't dream up any way of 200using a single macro to do this in a generic way, because of the many different 201argument requirements. We know that at least one of SUPPORT_PCRE8 and 202SUPPORT_PCRE16 must be set. First define macros for each individual mode; then 203use these in the definitions of generic macros. 204 205**** Special note about the PCHARSxxx macros: the address of the string to be 206printed is always given as two arguments: a base address followed by an offset. 207The base address is cast to the correct data size for 8 or 16 bit data; the 208offset is in units of this size. If the string were given as base+offset in one 209argument, the casting might be incorrectly applied. */ 210 211#ifdef SUPPORT_PCRE8 212 213#define PCHARS8(lv, p, offset, len, f) \ 214 lv = pchars((pcre_uint8 *)(p) + offset, len, f) 215 216#define PCHARSV8(p, offset, len, f) \ 217 (void)pchars((pcre_uint8 *)(p) + offset, len, f) 218 219#define READ_CAPTURE_NAME8(p, cn8, cn16, re) \ 220 p = read_capture_name8(p, cn8, re) 221 222#define STRLEN8(p) ((int)strlen((char *)p)) 223 224#define SET_PCRE_CALLOUT8(callout) \ 225 pcre_callout = callout 226 227#define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \ 228 pcre_assign_jit_stack(extra, callback, userdata) 229 230#define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \ 231 re = pcre_compile((char *)pat, options, error, erroffset, tables) 232 233#define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 234 namesptr, cbuffer, size) \ 235 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \ 236 (char *)namesptr, cbuffer, size) 237 238#define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \ 239 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size) 240 241#define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 242 offsets, size_offsets, workspace, size_workspace) \ 243 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \ 244 offsets, size_offsets, workspace, size_workspace) 245 246#define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 247 offsets, size_offsets) \ 248 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \ 249 offsets, size_offsets) 250 251#define PCRE_FREE_STUDY8(extra) \ 252 pcre_free_study(extra) 253 254#define PCRE_FREE_SUBSTRING8(substring) \ 255 pcre_free_substring(substring) 256 257#define PCRE_FREE_SUBSTRING_LIST8(listptr) \ 258 pcre_free_substring_list(listptr) 259 260#define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 261 getnamesptr, subsptr) \ 262 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \ 263 (char *)getnamesptr, subsptr) 264 265#define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \ 266 n = pcre_get_stringnumber(re, (char *)ptr) 267 268#define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \ 269 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr) 270 271#define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \ 272 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr) 273 274#define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \ 275 rc = pcre_pattern_to_host_byte_order(re, extra, tables) 276 277#define PCRE_PRINTINT8(re, outfile, debug_lengths) \ 278 pcre_printint(re, outfile, debug_lengths) 279 280#define PCRE_STUDY8(extra, re, options, error) \ 281 extra = pcre_study(re, options, error) 282 283#define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \ 284 pcre_jit_stack_alloc(startsize, maxsize) 285 286#define PCRE_JIT_STACK_FREE8(stack) \ 287 pcre_jit_stack_free(stack) 288 289#endif /* SUPPORT_PCRE8 */ 290 291/* -----------------------------------------------------------*/ 292 293#ifdef SUPPORT_PCRE16 294 295#define PCHARS16(lv, p, offset, len, f) \ 296 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f) 297 298#define PCHARSV16(p, offset, len, f) \ 299 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f) 300 301#define READ_CAPTURE_NAME16(p, cn8, cn16, re) \ 302 p = read_capture_name16(p, cn16, re) 303 304#define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p)) 305 306#define SET_PCRE_CALLOUT16(callout) \ 307 pcre16_callout = (int (*)(pcre16_callout_block *))callout 308 309#define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \ 310 pcre16_assign_jit_stack((pcre16_extra *)extra, \ 311 (pcre16_jit_callback)callback, userdata) 312 313#define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \ 314 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \ 315 tables) 316 317#define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 318 namesptr, cbuffer, size) \ 319 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ 320 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2) 321 322#define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \ 323 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \ 324 (PCRE_UCHAR16 *)cbuffer, size/2) 325 326#define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 327 offsets, size_offsets, workspace, size_workspace) \ 328 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \ 329 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \ 330 workspace, size_workspace) 331 332#define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 333 offsets, size_offsets) \ 334 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \ 335 len, start_offset, options, offsets, size_offsets) 336 337#define PCRE_FREE_STUDY16(extra) \ 338 pcre16_free_study((pcre16_extra *)extra) 339 340#define PCRE_FREE_SUBSTRING16(substring) \ 341 pcre16_free_substring((PCRE_SPTR16)substring) 342 343#define PCRE_FREE_SUBSTRING_LIST16(listptr) \ 344 pcre16_free_substring_list((PCRE_SPTR16 *)listptr) 345 346#define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 347 getnamesptr, subsptr) \ 348 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ 349 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr) 350 351#define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \ 352 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr) 353 354#define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \ 355 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \ 356 (PCRE_SPTR16 *)(void*)subsptr) 357 358#define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \ 359 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \ 360 (PCRE_SPTR16 **)(void*)listptr) 361 362#define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \ 363 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \ 364 tables) 365 366#define PCRE_PRINTINT16(re, outfile, debug_lengths) \ 367 pcre16_printint(re, outfile, debug_lengths) 368 369#define PCRE_STUDY16(extra, re, options, error) \ 370 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error) 371 372#define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ 373 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize) 374 375#define PCRE_JIT_STACK_FREE16(stack) \ 376 pcre16_jit_stack_free((pcre16_jit_stack *)stack) 377 378#endif /* SUPPORT_PCRE16 */ 379 380 381/* ----- Both modes are supported; a runtime test is needed, except for 382pcre_config(), and the JIT stack functions, when it doesn't matter which 383version is called. ----- */ 384 385#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 386 387#define CHAR_SIZE (use_pcre16? 2:1) 388 389#define PCHARS(lv, p, offset, len, f) \ 390 if (use_pcre16) \ 391 PCHARS16(lv, p, offset, len, f); \ 392 else \ 393 PCHARS8(lv, p, offset, len, f) 394 395#define PCHARSV(p, offset, len, f) \ 396 if (use_pcre16) \ 397 PCHARSV16(p, offset, len, f); \ 398 else \ 399 PCHARSV8(p, offset, len, f) 400 401#define READ_CAPTURE_NAME(p, cn8, cn16, re) \ 402 if (use_pcre16) \ 403 READ_CAPTURE_NAME16(p, cn8, cn16, re); \ 404 else \ 405 READ_CAPTURE_NAME8(p, cn8, cn16, re) 406 407#define SET_PCRE_CALLOUT(callout) \ 408 if (use_pcre16) \ 409 SET_PCRE_CALLOUT16(callout); \ 410 else \ 411 SET_PCRE_CALLOUT8(callout) 412 413#define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p)) 414 415#define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \ 416 if (use_pcre16) \ 417 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \ 418 else \ 419 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) 420 421#define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \ 422 if (use_pcre16) \ 423 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \ 424 else \ 425 PCRE_COMPILE8(re, pat, options, error, erroffset, tables) 426 427#define PCRE_CONFIG pcre_config 428 429#define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 430 namesptr, cbuffer, size) \ 431 if (use_pcre16) \ 432 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 433 namesptr, cbuffer, size); \ 434 else \ 435 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 436 namesptr, cbuffer, size) 437 438#define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \ 439 if (use_pcre16) \ 440 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \ 441 else \ 442 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) 443 444#define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \ 445 offsets, size_offsets, workspace, size_workspace) \ 446 if (use_pcre16) \ 447 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 448 offsets, size_offsets, workspace, size_workspace); \ 449 else \ 450 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 451 offsets, size_offsets, workspace, size_workspace) 452 453#define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \ 454 offsets, size_offsets) \ 455 if (use_pcre16) \ 456 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 457 offsets, size_offsets); \ 458 else \ 459 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 460 offsets, size_offsets) 461 462#define PCRE_FREE_STUDY(extra) \ 463 if (use_pcre16) \ 464 PCRE_FREE_STUDY16(extra); \ 465 else \ 466 PCRE_FREE_STUDY8(extra) 467 468#define PCRE_FREE_SUBSTRING(substring) \ 469 if (use_pcre16) \ 470 PCRE_FREE_SUBSTRING16(substring); \ 471 else \ 472 PCRE_FREE_SUBSTRING8(substring) 473 474#define PCRE_FREE_SUBSTRING_LIST(listptr) \ 475 if (use_pcre16) \ 476 PCRE_FREE_SUBSTRING_LIST16(listptr); \ 477 else \ 478 PCRE_FREE_SUBSTRING_LIST8(listptr) 479 480#define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 481 getnamesptr, subsptr) \ 482 if (use_pcre16) \ 483 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 484 getnamesptr, subsptr); \ 485 else \ 486 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 487 getnamesptr, subsptr) 488 489#define PCRE_GET_STRINGNUMBER(n, rc, ptr) \ 490 if (use_pcre16) \ 491 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \ 492 else \ 493 PCRE_GET_STRINGNUMBER8(n, rc, ptr) 494 495#define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \ 496 if (use_pcre16) \ 497 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \ 498 else \ 499 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr) 500 501#define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \ 502 if (use_pcre16) \ 503 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \ 504 else \ 505 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) 506 507#define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \ 508 (use_pcre16 ? \ 509 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ 510 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize)) 511 512#define PCRE_JIT_STACK_FREE(stack) \ 513 if (use_pcre16) \ 514 PCRE_JIT_STACK_FREE16(stack); \ 515 else \ 516 PCRE_JIT_STACK_FREE8(stack) 517 518#define PCRE_MAKETABLES \ 519 (use_pcre16? pcre16_maketables() : pcre_maketables()) 520 521#define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \ 522 if (use_pcre16) \ 523 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \ 524 else \ 525 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) 526 527#define PCRE_PRINTINT(re, outfile, debug_lengths) \ 528 if (use_pcre16) \ 529 PCRE_PRINTINT16(re, outfile, debug_lengths); \ 530 else \ 531 PCRE_PRINTINT8(re, outfile, debug_lengths) 532 533#define PCRE_STUDY(extra, re, options, error) \ 534 if (use_pcre16) \ 535 PCRE_STUDY16(extra, re, options, error); \ 536 else \ 537 PCRE_STUDY8(extra, re, options, error) 538 539/* ----- Only 8-bit mode is supported ----- */ 540 541#elif defined SUPPORT_PCRE8 542#define CHAR_SIZE 1 543#define PCHARS PCHARS8 544#define PCHARSV PCHARSV8 545#define READ_CAPTURE_NAME READ_CAPTURE_NAME8 546#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8 547#define STRLEN STRLEN8 548#define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8 549#define PCRE_COMPILE PCRE_COMPILE8 550#define PCRE_CONFIG pcre_config 551#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8 552#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8 553#define PCRE_DFA_EXEC PCRE_DFA_EXEC8 554#define PCRE_EXEC PCRE_EXEC8 555#define PCRE_FREE_STUDY PCRE_FREE_STUDY8 556#define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8 557#define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8 558#define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8 559#define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8 560#define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8 561#define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8 562#define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8 563#define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8 564#define PCRE_MAKETABLES pcre_maketables() 565#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8 566#define PCRE_PRINTINT PCRE_PRINTINT8 567#define PCRE_STUDY PCRE_STUDY8 568 569/* ----- Only 16-bit mode is supported ----- */ 570 571#else 572#define CHAR_SIZE 2 573#define PCHARS PCHARS16 574#define PCHARSV PCHARSV16 575#define READ_CAPTURE_NAME READ_CAPTURE_NAME16 576#define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16 577#define STRLEN STRLEN16 578#define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16 579#define PCRE_COMPILE PCRE_COMPILE16 580#define PCRE_CONFIG pcre16_config 581#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16 582#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16 583#define PCRE_DFA_EXEC PCRE_DFA_EXEC16 584#define PCRE_EXEC PCRE_EXEC16 585#define PCRE_FREE_STUDY PCRE_FREE_STUDY16 586#define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16 587#define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16 588#define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16 589#define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16 590#define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16 591#define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16 592#define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16 593#define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16 594#define PCRE_MAKETABLES pcre16_maketables() 595#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16 596#define PCRE_PRINTINT PCRE_PRINTINT16 597#define PCRE_STUDY PCRE_STUDY16 598#endif 599 600/* ----- End of mode-specific function call macros ----- */ 601 602 603/* Other parameters */ 604 605#ifndef CLOCKS_PER_SEC 606#ifdef CLK_TCK 607#define CLOCKS_PER_SEC CLK_TCK 608#else 609#define CLOCKS_PER_SEC 100 610#endif 611#endif 612 613#if !defined NODFA 614#define DFA_WS_DIMENSION 1000 615#endif 616 617/* This is the default loop count for timing. */ 618 619#define LOOPREPEAT 500000 620 621/* Static variables */ 622 623static FILE *outfile; 624static int log_store = 0; 625static int callout_count; 626static int callout_extra; 627static int callout_fail_count; 628static int callout_fail_id; 629static int debug_lengths; 630static int first_callout; 631static int jit_was_used; 632static int locale_set = 0; 633static int show_malloc; 634static int use_utf; 635static size_t gotten_store; 636static size_t first_gotten_store = 0; 637static const unsigned char *last_callout_mark = NULL; 638 639/* The buffers grow automatically if very long input lines are encountered. */ 640 641static int buffer_size = 50000; 642static pcre_uint8 *buffer = NULL; 643static pcre_uint8 *dbuffer = NULL; 644static pcre_uint8 *pbuffer = NULL; 645 646/* Another buffer is needed translation to 16-bit character strings. It will 647obtained and extended as required. */ 648 649#ifdef SUPPORT_PCRE16 650static int buffer16_size = 0; 651static pcre_uint16 *buffer16 = NULL; 652 653#ifdef SUPPORT_PCRE8 654 655/* We need the table of operator lengths that is used for 16-bit compiling, in 656order to swap bytes in a pattern for saving/reloading testing. Luckily, the 657data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted 658appropriately for the 16-bit world. Just as a safety check, make sure that 659COMPILE_PCRE16 is *not* set. */ 660 661#ifdef COMPILE_PCRE16 662#error COMPILE_PCRE16 must not be set when compiling pcretest.c 663#endif 664 665#if LINK_SIZE == 2 666#undef LINK_SIZE 667#define LINK_SIZE 1 668#elif LINK_SIZE == 3 || LINK_SIZE == 4 669#undef LINK_SIZE 670#define LINK_SIZE 2 671#else 672#error LINK_SIZE must be either 2, 3, or 4 673#endif 674 675#undef IMM2_SIZE 676#define IMM2_SIZE 1 677 678#endif /* SUPPORT_PCRE8 */ 679 680static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS }; 681#endif /* SUPPORT_PCRE16 */ 682 683/* If we have 8-bit support, default use_pcre16 to false; if there is also 68416-bit support, it can be changed by an option. If there is no 8-bit support, 685there must be 16-bit support, so default it to 1. */ 686 687#ifdef SUPPORT_PCRE8 688static int use_pcre16 = 0; 689#else 690static int use_pcre16 = 1; 691#endif 692 693/* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */ 694 695static int jit_study_bits[] = 696 { 697 PCRE_STUDY_JIT_COMPILE, 698 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE, 699 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE, 700 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 701 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 702 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 703 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + 704 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 705}; 706 707/* Textual explanations for runtime error codes */ 708 709static const char *errtexts[] = { 710 NULL, /* 0 is no error */ 711 NULL, /* NOMATCH is handled specially */ 712 "NULL argument passed", 713 "bad option value", 714 "magic number missing", 715 "unknown opcode - pattern overwritten?", 716 "no more memory", 717 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */ 718 "match limit exceeded", 719 "callout error code", 720 NULL, /* BADUTF8/16 is handled specially */ 721 NULL, /* BADUTF8/16 offset is handled specially */ 722 NULL, /* PARTIAL is handled specially */ 723 "not used - internal error", 724 "internal error - pattern overwritten?", 725 "bad count value", 726 "item unsupported for DFA matching", 727 "backreference condition or recursion test not supported for DFA matching", 728 "match limit not supported for DFA matching", 729 "workspace size exceeded in DFA matching", 730 "too much recursion for DFA matching", 731 "recursion limit exceeded", 732 "not used - internal error", 733 "invalid combination of newline options", 734 "bad offset value", 735 NULL, /* SHORTUTF8/16 is handled specially */ 736 "nested recursion at the same subject position", 737 "JIT stack limit reached", 738 "pattern compiled in wrong mode: 8-bit/16-bit error", 739 "pattern compiled with other endianness", 740 "invalid data in workspace for DFA restart" 741}; 742 743 744/************************************************* 745* Alternate character tables * 746*************************************************/ 747 748/* By default, the "tables" pointer when calling PCRE is set to NULL, thereby 749using the default tables of the library. However, the T option can be used to 750select alternate sets of tables, for different kinds of testing. Note also that 751the L (locale) option also adjusts the tables. */ 752 753/* This is the set of tables distributed as default with PCRE. It recognizes 754only ASCII characters. */ 755 756static const pcre_uint8 tables0[] = { 757 758/* This table is a lower casing table. */ 759 760 0, 1, 2, 3, 4, 5, 6, 7, 761 8, 9, 10, 11, 12, 13, 14, 15, 762 16, 17, 18, 19, 20, 21, 22, 23, 763 24, 25, 26, 27, 28, 29, 30, 31, 764 32, 33, 34, 35, 36, 37, 38, 39, 765 40, 41, 42, 43, 44, 45, 46, 47, 766 48, 49, 50, 51, 52, 53, 54, 55, 767 56, 57, 58, 59, 60, 61, 62, 63, 768 64, 97, 98, 99,100,101,102,103, 769 104,105,106,107,108,109,110,111, 770 112,113,114,115,116,117,118,119, 771 120,121,122, 91, 92, 93, 94, 95, 772 96, 97, 98, 99,100,101,102,103, 773 104,105,106,107,108,109,110,111, 774 112,113,114,115,116,117,118,119, 775 120,121,122,123,124,125,126,127, 776 128,129,130,131,132,133,134,135, 777 136,137,138,139,140,141,142,143, 778 144,145,146,147,148,149,150,151, 779 152,153,154,155,156,157,158,159, 780 160,161,162,163,164,165,166,167, 781 168,169,170,171,172,173,174,175, 782 176,177,178,179,180,181,182,183, 783 184,185,186,187,188,189,190,191, 784 192,193,194,195,196,197,198,199, 785 200,201,202,203,204,205,206,207, 786 208,209,210,211,212,213,214,215, 787 216,217,218,219,220,221,222,223, 788 224,225,226,227,228,229,230,231, 789 232,233,234,235,236,237,238,239, 790 240,241,242,243,244,245,246,247, 791 248,249,250,251,252,253,254,255, 792 793/* This table is a case flipping table. */ 794 795 0, 1, 2, 3, 4, 5, 6, 7, 796 8, 9, 10, 11, 12, 13, 14, 15, 797 16, 17, 18, 19, 20, 21, 22, 23, 798 24, 25, 26, 27, 28, 29, 30, 31, 799 32, 33, 34, 35, 36, 37, 38, 39, 800 40, 41, 42, 43, 44, 45, 46, 47, 801 48, 49, 50, 51, 52, 53, 54, 55, 802 56, 57, 58, 59, 60, 61, 62, 63, 803 64, 97, 98, 99,100,101,102,103, 804 104,105,106,107,108,109,110,111, 805 112,113,114,115,116,117,118,119, 806 120,121,122, 91, 92, 93, 94, 95, 807 96, 65, 66, 67, 68, 69, 70, 71, 808 72, 73, 74, 75, 76, 77, 78, 79, 809 80, 81, 82, 83, 84, 85, 86, 87, 810 88, 89, 90,123,124,125,126,127, 811 128,129,130,131,132,133,134,135, 812 136,137,138,139,140,141,142,143, 813 144,145,146,147,148,149,150,151, 814 152,153,154,155,156,157,158,159, 815 160,161,162,163,164,165,166,167, 816 168,169,170,171,172,173,174,175, 817 176,177,178,179,180,181,182,183, 818 184,185,186,187,188,189,190,191, 819 192,193,194,195,196,197,198,199, 820 200,201,202,203,204,205,206,207, 821 208,209,210,211,212,213,214,215, 822 216,217,218,219,220,221,222,223, 823 224,225,226,227,228,229,230,231, 824 232,233,234,235,236,237,238,239, 825 240,241,242,243,244,245,246,247, 826 248,249,250,251,252,253,254,255, 827 828/* This table contains bit maps for various character classes. Each map is 32 829bytes long and the bits run from the least significant end of each byte. The 830classes that have their own maps are: space, xdigit, digit, upper, lower, word, 831graph, print, punct, and cntrl. Other classes are built from combinations. */ 832 833 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 835 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 836 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 837 838 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 839 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 840 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 841 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 842 843 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 845 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 846 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 847 848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 849 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 850 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 851 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 852 853 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 854 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 855 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 856 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 857 858 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 859 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 860 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 861 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 862 863 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 864 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 865 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 866 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 867 868 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 869 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 870 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 871 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 872 873 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 874 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 875 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 876 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 877 878 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 882 883/* This table identifies various classes of character by individual bits: 884 0x01 white space character 885 0x02 letter 886 0x04 decimal digit 887 0x08 hexadecimal digit 888 0x10 alphanumeric or '_' 889 0x80 regular expression metacharacter or binary zero 890*/ 891 892 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 893 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ 894 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 895 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 896 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 897 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 898 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 899 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 900 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 901 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 902 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 903 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 904 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 905 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 906 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 907 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 908 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 909 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 910 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 911 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 912 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 913 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 914 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 915 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 916 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 917 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 918 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 919 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 920 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 921 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 922 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 923 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 924 925/* This is a set of tables that came orginally from a Windows user. It seems to 926be at least an approximation of ISO 8859. In particular, there are characters 927greater than 128 that are marked as spaces, letters, etc. */ 928 929static const pcre_uint8 tables1[] = { 9300,1,2,3,4,5,6,7, 9318,9,10,11,12,13,14,15, 93216,17,18,19,20,21,22,23, 93324,25,26,27,28,29,30,31, 93432,33,34,35,36,37,38,39, 93540,41,42,43,44,45,46,47, 93648,49,50,51,52,53,54,55, 93756,57,58,59,60,61,62,63, 93864,97,98,99,100,101,102,103, 939104,105,106,107,108,109,110,111, 940112,113,114,115,116,117,118,119, 941120,121,122,91,92,93,94,95, 94296,97,98,99,100,101,102,103, 943104,105,106,107,108,109,110,111, 944112,113,114,115,116,117,118,119, 945120,121,122,123,124,125,126,127, 946128,129,130,131,132,133,134,135, 947136,137,138,139,140,141,142,143, 948144,145,146,147,148,149,150,151, 949152,153,154,155,156,157,158,159, 950160,161,162,163,164,165,166,167, 951168,169,170,171,172,173,174,175, 952176,177,178,179,180,181,182,183, 953184,185,186,187,188,189,190,191, 954224,225,226,227,228,229,230,231, 955232,233,234,235,236,237,238,239, 956240,241,242,243,244,245,246,215, 957248,249,250,251,252,253,254,223, 958224,225,226,227,228,229,230,231, 959232,233,234,235,236,237,238,239, 960240,241,242,243,244,245,246,247, 961248,249,250,251,252,253,254,255, 9620,1,2,3,4,5,6,7, 9638,9,10,11,12,13,14,15, 96416,17,18,19,20,21,22,23, 96524,25,26,27,28,29,30,31, 96632,33,34,35,36,37,38,39, 96740,41,42,43,44,45,46,47, 96848,49,50,51,52,53,54,55, 96956,57,58,59,60,61,62,63, 97064,97,98,99,100,101,102,103, 971104,105,106,107,108,109,110,111, 972112,113,114,115,116,117,118,119, 973120,121,122,91,92,93,94,95, 97496,65,66,67,68,69,70,71, 97572,73,74,75,76,77,78,79, 97680,81,82,83,84,85,86,87, 97788,89,90,123,124,125,126,127, 978128,129,130,131,132,133,134,135, 979136,137,138,139,140,141,142,143, 980144,145,146,147,148,149,150,151, 981152,153,154,155,156,157,158,159, 982160,161,162,163,164,165,166,167, 983168,169,170,171,172,173,174,175, 984176,177,178,179,180,181,182,183, 985184,185,186,187,188,189,190,191, 986224,225,226,227,228,229,230,231, 987232,233,234,235,236,237,238,239, 988240,241,242,243,244,245,246,215, 989248,249,250,251,252,253,254,223, 990192,193,194,195,196,197,198,199, 991200,201,202,203,204,205,206,207, 992208,209,210,211,212,213,214,247, 993216,217,218,219,220,221,222,255, 9940,62,0,0,1,0,0,0, 9950,0,0,0,0,0,0,0, 99632,0,0,0,1,0,0,0, 9970,0,0,0,0,0,0,0, 9980,0,0,0,0,0,255,3, 999126,0,0,0,126,0,0,0, 10000,0,0,0,0,0,0,0, 10010,0,0,0,0,0,0,0, 10020,0,0,0,0,0,255,3, 10030,0,0,0,0,0,0,0, 10040,0,0,0,0,0,12,2, 10050,0,0,0,0,0,0,0, 10060,0,0,0,0,0,0,0, 1007254,255,255,7,0,0,0,0, 10080,0,0,0,0,0,0,0, 1009255,255,127,127,0,0,0,0, 10100,0,0,0,0,0,0,0, 10110,0,0,0,254,255,255,7, 10120,0,0,0,0,4,32,4, 10130,0,0,128,255,255,127,255, 10140,0,0,0,0,0,255,3, 1015254,255,255,135,254,255,255,7, 10160,0,0,0,0,4,44,6, 1017255,255,127,255,255,255,127,255, 10180,0,0,0,254,255,255,255, 1019255,255,255,255,255,255,255,127, 10200,0,0,0,254,255,255,255, 1021255,255,255,255,255,255,255,255, 10220,2,0,0,255,255,255,255, 1023255,255,255,255,255,255,255,127, 10240,0,0,0,255,255,255,255, 1025255,255,255,255,255,255,255,255, 10260,0,0,0,254,255,0,252, 10271,0,0,248,1,0,0,120, 10280,0,0,0,254,255,255,255, 10290,0,128,0,0,0,128,0, 1030255,255,255,255,0,0,0,0, 10310,0,0,0,0,0,0,128, 1032255,255,255,255,0,0,0,0, 10330,0,0,0,0,0,0,0, 1034128,0,0,0,0,0,0,0, 10350,1,1,0,1,1,0,0, 10360,0,0,0,0,0,0,0, 10370,0,0,0,0,0,0,0, 10381,0,0,0,128,0,0,0, 1039128,128,128,128,0,0,128,0, 104028,28,28,28,28,28,28,28, 104128,28,0,0,0,0,0,128, 10420,26,26,26,26,26,26,18, 104318,18,18,18,18,18,18,18, 104418,18,18,18,18,18,18,18, 104518,18,18,128,128,0,128,16, 10460,26,26,26,26,26,26,18, 104718,18,18,18,18,18,18,18, 104818,18,18,18,18,18,18,18, 104918,18,18,128,128,0,0,0, 10500,0,0,0,0,1,0,0, 10510,0,0,0,0,0,0,0, 10520,0,0,0,0,0,0,0, 10530,0,0,0,0,0,0,0, 10541,0,0,0,0,0,0,0, 10550,0,18,0,0,0,0,0, 10560,0,20,20,0,18,0,0, 10570,20,18,0,0,0,0,0, 105818,18,18,18,18,18,18,18, 105918,18,18,18,18,18,18,18, 106018,18,18,18,18,18,18,0, 106118,18,18,18,18,18,18,18, 106218,18,18,18,18,18,18,18, 106318,18,18,18,18,18,18,18, 106418,18,18,18,18,18,18,0, 106518,18,18,18,18,18,18,18 1066}; 1067 1068 1069 1070 1071#ifndef HAVE_STRERROR 1072/************************************************* 1073* Provide strerror() for non-ANSI libraries * 1074*************************************************/ 1075 1076/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 1077in their libraries, but can provide the same facility by this simple 1078alternative function. */ 1079 1080extern int sys_nerr; 1081extern char *sys_errlist[]; 1082 1083char * 1084strerror(int n) 1085{ 1086if (n < 0 || n >= sys_nerr) return "unknown error number"; 1087return sys_errlist[n]; 1088} 1089#endif /* HAVE_STRERROR */ 1090 1091 1092/************************************************* 1093* JIT memory callback * 1094*************************************************/ 1095 1096static pcre_jit_stack* jit_callback(void *arg) 1097{ 1098jit_was_used = TRUE; 1099return (pcre_jit_stack *)arg; 1100} 1101 1102 1103#if !defined NOUTF || defined SUPPORT_PCRE16 1104/************************************************* 1105* Convert UTF-8 string to value * 1106*************************************************/ 1107 1108/* This function takes one or more bytes that represents a UTF-8 character, 1109and returns the value of the character. 1110 1111Argument: 1112 utf8bytes a pointer to the byte vector 1113 vptr a pointer to an int to receive the value 1114 1115Returns: > 0 => the number of bytes consumed 1116 -6 to 0 => malformed UTF-8 character at offset = (-return) 1117*/ 1118 1119static int 1120utf82ord(pcre_uint8 *utf8bytes, int *vptr) 1121{ 1122int c = *utf8bytes++; 1123int d = c; 1124int i, j, s; 1125 1126for (i = -1; i < 6; i++) /* i is number of additional bytes */ 1127 { 1128 if ((d & 0x80) == 0) break; 1129 d <<= 1; 1130 } 1131 1132if (i == -1) { *vptr = c; return 1; } /* ascii character */ 1133if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ 1134 1135/* i now has a value in the range 1-5 */ 1136 1137s = 6*i; 1138d = (c & utf8_table3[i]) << s; 1139 1140for (j = 0; j < i; j++) 1141 { 1142 c = *utf8bytes++; 1143 if ((c & 0xc0) != 0x80) return -(j+1); 1144 s -= 6; 1145 d |= (c & 0x3f) << s; 1146 } 1147 1148/* Check that encoding was the correct unique one */ 1149 1150for (j = 0; j < utf8_table1_size; j++) 1151 if (d <= utf8_table1[j]) break; 1152if (j != i) return -(i+1); 1153 1154/* Valid value */ 1155 1156*vptr = d; 1157return i+1; 1158} 1159#endif /* NOUTF || SUPPORT_PCRE16 */ 1160 1161 1162 1163#if !defined NOUTF || defined SUPPORT_PCRE16 1164/************************************************* 1165* Convert character value to UTF-8 * 1166*************************************************/ 1167 1168/* This function takes an integer value in the range 0 - 0x7fffffff 1169and encodes it as a UTF-8 character in 0 to 6 bytes. 1170 1171Arguments: 1172 cvalue the character value 1173 utf8bytes pointer to buffer for result - at least 6 bytes long 1174 1175Returns: number of characters placed in the buffer 1176*/ 1177 1178static int 1179ord2utf8(int cvalue, pcre_uint8 *utf8bytes) 1180{ 1181register int i, j; 1182for (i = 0; i < utf8_table1_size; i++) 1183 if (cvalue <= utf8_table1[i]) break; 1184utf8bytes += i; 1185for (j = i; j > 0; j--) 1186 { 1187 *utf8bytes-- = 0x80 | (cvalue & 0x3f); 1188 cvalue >>= 6; 1189 } 1190*utf8bytes = utf8_table2[i] | cvalue; 1191return i + 1; 1192} 1193#endif 1194 1195 1196#ifdef SUPPORT_PCRE16 1197/************************************************* 1198* Convert a string to 16-bit * 1199*************************************************/ 1200 1201/* In non-UTF mode, the space needed for a 16-bit string is exactly double the 12028-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than 1203double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 1204in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The 1205result is always left in buffer16. 1206 1207Note that this function does not object to surrogate values. This is 1208deliberate; it makes it possible to construct UTF-16 strings that are invalid, 1209for the purpose of testing that they are correctly faulted. 1210 1211Patterns to be converted are either plain ASCII or UTF-8; data lines are always 1212in UTF-8 so that values greater than 255 can be handled. 1213 1214Arguments: 1215 data TRUE if converting a data line; FALSE for a regex 1216 p points to a byte string 1217 utf true if UTF-8 (to be converted to UTF-16) 1218 len number of bytes in the string (excluding trailing zero) 1219 1220Returns: number of 16-bit data items used (excluding trailing zero) 1221 OR -1 if a UTF-8 string is malformed 1222 OR -2 if a value > 0x10ffff is encountered 1223 OR -3 if a value > 0xffff is encountered when not in UTF mode 1224*/ 1225 1226static int 1227to16(int data, pcre_uint8 *p, int utf, int len) 1228{ 1229pcre_uint16 *pp; 1230 1231if (buffer16_size < 2*len + 2) 1232 { 1233 if (buffer16 != NULL) free(buffer16); 1234 buffer16_size = 2*len + 2; 1235 buffer16 = (pcre_uint16 *)malloc(buffer16_size); 1236 if (buffer16 == NULL) 1237 { 1238 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size); 1239 exit(1); 1240 } 1241 } 1242 1243pp = buffer16; 1244 1245if (!utf && !data) 1246 { 1247 while (len-- > 0) *pp++ = *p++; 1248 } 1249 1250else 1251 { 1252 int c = 0; 1253 while (len > 0) 1254 { 1255 int chlen = utf82ord(p, &c); 1256 if (chlen <= 0) return -1; 1257 if (c > 0x10ffff) return -2; 1258 p += chlen; 1259 len -= chlen; 1260 if (c < 0x10000) *pp++ = c; else 1261 { 1262 if (!utf) return -3; 1263 c -= 0x10000; 1264 *pp++ = 0xD800 | (c >> 10); 1265 *pp++ = 0xDC00 | (c & 0x3ff); 1266 } 1267 } 1268 } 1269 1270*pp = 0; 1271return pp - buffer16; 1272} 1273#endif 1274 1275 1276/************************************************* 1277* Read or extend an input line * 1278*************************************************/ 1279 1280/* Input lines are read into buffer, but both patterns and data lines can be 1281continued over multiple input lines. In addition, if the buffer fills up, we 1282want to automatically expand it so as to be able to handle extremely large 1283lines that are needed for certain stress tests. When the input buffer is 1284expanded, the other two buffers must also be expanded likewise, and the 1285contents of pbuffer, which are a copy of the input for callouts, must be 1286preserved (for when expansion happens for a data line). This is not the most 1287optimal way of handling this, but hey, this is just a test program! 1288 1289Arguments: 1290 f the file to read 1291 start where in buffer to start (this *must* be within buffer) 1292 prompt for stdin or readline() 1293 1294Returns: pointer to the start of new data 1295 could be a copy of start, or could be moved 1296 NULL if no data read and EOF reached 1297*/ 1298 1299static pcre_uint8 * 1300extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt) 1301{ 1302pcre_uint8 *here = start; 1303 1304for (;;) 1305 { 1306 size_t rlen = (size_t)(buffer_size - (here - buffer)); 1307 1308 if (rlen > 1000) 1309 { 1310 int dlen; 1311 1312 /* If libreadline or libedit support is required, use readline() to read a 1313 line if the input is a terminal. Note that readline() removes the trailing 1314 newline, so we must put it back again, to be compatible with fgets(). */ 1315 1316#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 1317 if (isatty(fileno(f))) 1318 { 1319 size_t len; 1320 char *s = readline(prompt); 1321 if (s == NULL) return (here == start)? NULL : start; 1322 len = strlen(s); 1323 if (len > 0) add_history(s); 1324 if (len > rlen - 1) len = rlen - 1; 1325 memcpy(here, s, len); 1326 here[len] = '\n'; 1327 here[len+1] = 0; 1328 free(s); 1329 } 1330 else 1331#endif 1332 1333 /* Read the next line by normal means, prompting if the file is stdin. */ 1334 1335 { 1336 if (f == stdin) printf("%s", prompt); 1337 if (fgets((char *)here, rlen, f) == NULL) 1338 return (here == start)? NULL : start; 1339 } 1340 1341 dlen = (int)strlen((char *)here); 1342 if (dlen > 0 && here[dlen - 1] == '\n') return start; 1343 here += dlen; 1344 } 1345 1346 else 1347 { 1348 int new_buffer_size = 2*buffer_size; 1349 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size); 1350 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size); 1351 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size); 1352 1353 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL) 1354 { 1355 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size); 1356 exit(1); 1357 } 1358 1359 memcpy(new_buffer, buffer, buffer_size); 1360 memcpy(new_pbuffer, pbuffer, buffer_size); 1361 1362 buffer_size = new_buffer_size; 1363 1364 start = new_buffer + (start - buffer); 1365 here = new_buffer + (here - buffer); 1366 1367 free(buffer); 1368 free(dbuffer); 1369 free(pbuffer); 1370 1371 buffer = new_buffer; 1372 dbuffer = new_dbuffer; 1373 pbuffer = new_pbuffer; 1374 } 1375 } 1376 1377return NULL; /* Control never gets here */ 1378} 1379 1380 1381 1382/************************************************* 1383* Read number from string * 1384*************************************************/ 1385 1386/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess 1387around with conditional compilation, just do the job by hand. It is only used 1388for unpicking arguments, so just keep it simple. 1389 1390Arguments: 1391 str string to be converted 1392 endptr where to put the end pointer 1393 1394Returns: the unsigned long 1395*/ 1396 1397static int 1398get_value(pcre_uint8 *str, pcre_uint8 **endptr) 1399{ 1400int result = 0; 1401while(*str != 0 && isspace(*str)) str++; 1402while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); 1403*endptr = str; 1404return(result); 1405} 1406 1407 1408 1409/************************************************* 1410* Print one character * 1411*************************************************/ 1412 1413/* Print a single character either literally, or as a hex escape. */ 1414 1415static int pchar(int c, FILE *f) 1416{ 1417if (PRINTOK(c)) 1418 { 1419 if (f != NULL) fprintf(f, "%c", c); 1420 return 1; 1421 } 1422 1423if (c < 0x100) 1424 { 1425 if (use_utf) 1426 { 1427 if (f != NULL) fprintf(f, "\\x{%02x}", c); 1428 return 6; 1429 } 1430 else 1431 { 1432 if (f != NULL) fprintf(f, "\\x%02x", c); 1433 return 4; 1434 } 1435 } 1436 1437if (f != NULL) fprintf(f, "\\x{%02x}", c); 1438return (c <= 0x000000ff)? 6 : 1439 (c <= 0x00000fff)? 7 : 1440 (c <= 0x0000ffff)? 8 : 1441 (c <= 0x000fffff)? 9 : 10; 1442} 1443 1444 1445 1446#ifdef SUPPORT_PCRE8 1447/************************************************* 1448* Print 8-bit character string * 1449*************************************************/ 1450 1451/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. 1452If handed a NULL file, just counts chars without printing. */ 1453 1454static int pchars(pcre_uint8 *p, int length, FILE *f) 1455{ 1456int c = 0; 1457int yield = 0; 1458 1459if (length < 0) 1460 length = strlen((char *)p); 1461 1462while (length-- > 0) 1463 { 1464#if !defined NOUTF 1465 if (use_utf) 1466 { 1467 int rc = utf82ord(p, &c); 1468 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ 1469 { 1470 length -= rc - 1; 1471 p += rc; 1472 yield += pchar(c, f); 1473 continue; 1474 } 1475 } 1476#endif 1477 c = *p++; 1478 yield += pchar(c, f); 1479 } 1480 1481return yield; 1482} 1483#endif 1484 1485 1486 1487#ifdef SUPPORT_PCRE16 1488/************************************************* 1489* Find length of 0-terminated 16-bit string * 1490*************************************************/ 1491 1492static int strlen16(PCRE_SPTR16 p) 1493{ 1494int len = 0; 1495while (*p++ != 0) len++; 1496return len; 1497} 1498#endif /* SUPPORT_PCRE16 */ 1499 1500 1501#ifdef SUPPORT_PCRE16 1502/************************************************* 1503* Print 16-bit character string * 1504*************************************************/ 1505 1506/* Must handle UTF-16 strings in utf mode. Yields number of characters printed. 1507If handed a NULL file, just counts chars without printing. */ 1508 1509static int pchars16(PCRE_SPTR16 p, int length, FILE *f) 1510{ 1511int yield = 0; 1512 1513if (length < 0) 1514 length = strlen16(p); 1515 1516while (length-- > 0) 1517 { 1518 int c = *p++ & 0xffff; 1519#if !defined NOUTF 1520 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0) 1521 { 1522 int d = *p & 0xffff; 1523 if (d >= 0xDC00 && d < 0xDFFF) 1524 { 1525 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; 1526 length--; 1527 p++; 1528 } 1529 } 1530#endif 1531 yield += pchar(c, f); 1532 } 1533 1534return yield; 1535} 1536#endif /* SUPPORT_PCRE16 */ 1537 1538 1539 1540#ifdef SUPPORT_PCRE8 1541/************************************************* 1542* Read a capture name (8-bit) and check it * 1543*************************************************/ 1544 1545static pcre_uint8 * 1546read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re) 1547{ 1548pcre_uint8 *npp = *pp; 1549while (isalnum(*p)) *npp++ = *p++; 1550*npp++ = 0; 1551*npp = 0; 1552if (pcre_get_stringnumber(re, (char *)(*pp)) < 0) 1553 { 1554 fprintf(outfile, "no parentheses with name \""); 1555 PCHARSV(*pp, 0, -1, outfile); 1556 fprintf(outfile, "\"\n"); 1557 } 1558 1559*pp = npp; 1560return p; 1561} 1562#endif /* SUPPORT_PCRE8 */ 1563 1564 1565 1566#ifdef SUPPORT_PCRE16 1567/************************************************* 1568* Read a capture name (16-bit) and check it * 1569*************************************************/ 1570 1571/* Note that the text being read is 8-bit. */ 1572 1573static pcre_uint8 * 1574read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re) 1575{ 1576pcre_uint16 *npp = *pp; 1577while (isalnum(*p)) *npp++ = *p++; 1578*npp++ = 0; 1579*npp = 0; 1580if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0) 1581 { 1582 fprintf(outfile, "no parentheses with name \""); 1583 PCHARSV(*pp, 0, -1, outfile); 1584 fprintf(outfile, "\"\n"); 1585 } 1586*pp = npp; 1587return p; 1588} 1589#endif /* SUPPORT_PCRE16 */ 1590 1591 1592 1593/************************************************* 1594* Callout function * 1595*************************************************/ 1596 1597/* Called from PCRE as a result of the (?C) item. We print out where we are in 1598the match. Yield zero unless more callouts than the fail count, or the callout 1599data is not zero. */ 1600 1601static int callout(pcre_callout_block *cb) 1602{ 1603FILE *f = (first_callout | callout_extra)? outfile : NULL; 1604int i, pre_start, post_start, subject_length; 1605 1606if (callout_extra) 1607 { 1608 fprintf(f, "Callout %d: last capture = %d\n", 1609 cb->callout_number, cb->capture_last); 1610 1611 for (i = 0; i < cb->capture_top * 2; i += 2) 1612 { 1613 if (cb->offset_vector[i] < 0) 1614 fprintf(f, "%2d: <unset>\n", i/2); 1615 else 1616 { 1617 fprintf(f, "%2d: ", i/2); 1618 PCHARSV(cb->subject, cb->offset_vector[i], 1619 cb->offset_vector[i+1] - cb->offset_vector[i], f); 1620 fprintf(f, "\n"); 1621 } 1622 } 1623 } 1624 1625/* Re-print the subject in canonical form, the first time or if giving full 1626datails. On subsequent calls in the same match, we use pchars just to find the 1627printed lengths of the substrings. */ 1628 1629if (f != NULL) fprintf(f, "--->"); 1630 1631PCHARS(pre_start, cb->subject, 0, cb->start_match, f); 1632PCHARS(post_start, cb->subject, cb->start_match, 1633 cb->current_position - cb->start_match, f); 1634 1635PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL); 1636 1637PCHARSV(cb->subject, cb->current_position, 1638 cb->subject_length - cb->current_position, f); 1639 1640if (f != NULL) fprintf(f, "\n"); 1641 1642/* Always print appropriate indicators, with callout number if not already 1643shown. For automatic callouts, show the pattern offset. */ 1644 1645if (cb->callout_number == 255) 1646 { 1647 fprintf(outfile, "%+3d ", cb->pattern_position); 1648 if (cb->pattern_position > 99) fprintf(outfile, "\n "); 1649 } 1650else 1651 { 1652 if (callout_extra) fprintf(outfile, " "); 1653 else fprintf(outfile, "%3d ", cb->callout_number); 1654 } 1655 1656for (i = 0; i < pre_start; i++) fprintf(outfile, " "); 1657fprintf(outfile, "^"); 1658 1659if (post_start > 0) 1660 { 1661 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); 1662 fprintf(outfile, "^"); 1663 } 1664 1665for (i = 0; i < subject_length - pre_start - post_start + 4; i++) 1666 fprintf(outfile, " "); 1667 1668fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length, 1669 pbuffer + cb->pattern_position); 1670 1671fprintf(outfile, "\n"); 1672first_callout = 0; 1673 1674if (cb->mark != last_callout_mark) 1675 { 1676 if (cb->mark == NULL) 1677 fprintf(outfile, "Latest Mark: <unset>\n"); 1678 else 1679 { 1680 fprintf(outfile, "Latest Mark: "); 1681 PCHARSV(cb->mark, 0, -1, outfile); 1682 putc('\n', outfile); 1683 } 1684 last_callout_mark = cb->mark; 1685 } 1686 1687if (cb->callout_data != NULL) 1688 { 1689 int callout_data = *((int *)(cb->callout_data)); 1690 if (callout_data != 0) 1691 { 1692 fprintf(outfile, "Callout data = %d\n", callout_data); 1693 return callout_data; 1694 } 1695 } 1696 1697return (cb->callout_number != callout_fail_id)? 0 : 1698 (++callout_count >= callout_fail_count)? 1 : 0; 1699} 1700 1701 1702/************************************************* 1703* Local malloc functions * 1704*************************************************/ 1705 1706/* Alternative malloc function, to test functionality and save the size of a 1707compiled re, which is the first store request that pcre_compile() makes. The 1708show_malloc variable is set only during matching. */ 1709 1710static void *new_malloc(size_t size) 1711{ 1712void *block = malloc(size); 1713gotten_store = size; 1714if (first_gotten_store == 0) first_gotten_store = size; 1715if (show_malloc) 1716 fprintf(outfile, "malloc %3d %p\n", (int)size, block); 1717return block; 1718} 1719 1720static void new_free(void *block) 1721{ 1722if (show_malloc) 1723 fprintf(outfile, "free %p\n", block); 1724free(block); 1725} 1726 1727/* For recursion malloc/free, to test stacking calls */ 1728 1729static void *stack_malloc(size_t size) 1730{ 1731void *block = malloc(size); 1732if (show_malloc) 1733 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block); 1734return block; 1735} 1736 1737static void stack_free(void *block) 1738{ 1739if (show_malloc) 1740 fprintf(outfile, "stack_free %p\n", block); 1741free(block); 1742} 1743 1744 1745/************************************************* 1746* Call pcre_fullinfo() * 1747*************************************************/ 1748 1749/* Get one piece of information from the pcre_fullinfo() function. When only 1750one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct 1751value, but the code is defensive. 1752 1753Arguments: 1754 re compiled regex 1755 study study data 1756 option PCRE_INFO_xxx option 1757 ptr where to put the data 1758 1759Returns: 0 when OK, < 0 on error 1760*/ 1761 1762static int 1763new_info(pcre *re, pcre_extra *study, int option, void *ptr) 1764{ 1765int rc; 1766 1767if (use_pcre16) 1768#ifdef SUPPORT_PCRE16 1769 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr); 1770#else 1771 rc = PCRE_ERROR_BADMODE; 1772#endif 1773else 1774#ifdef SUPPORT_PCRE8 1775 rc = pcre_fullinfo(re, study, option, ptr); 1776#else 1777 rc = PCRE_ERROR_BADMODE; 1778#endif 1779 1780if (rc < 0) 1781 { 1782 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, 1783 use_pcre16? "16" : "", option); 1784 if (rc == PCRE_ERROR_BADMODE) 1785 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in " 1786 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16"); 1787 } 1788 1789return rc; 1790} 1791 1792 1793 1794/************************************************* 1795* Swap byte functions * 1796*************************************************/ 1797 1798/* The following functions swap the bytes of a pcre_uint16 and pcre_uint32 1799value, respectively. 1800 1801Arguments: 1802 value any number 1803 1804Returns: the byte swapped value 1805*/ 1806 1807static pcre_uint32 1808swap_uint32(pcre_uint32 value) 1809{ 1810return ((value & 0x000000ff) << 24) | 1811 ((value & 0x0000ff00) << 8) | 1812 ((value & 0x00ff0000) >> 8) | 1813 (value >> 24); 1814} 1815 1816static pcre_uint16 1817swap_uint16(pcre_uint16 value) 1818{ 1819return (value >> 8) | (value << 8); 1820} 1821 1822 1823 1824/************************************************* 1825* Flip bytes in a compiled pattern * 1826*************************************************/ 1827 1828/* This function is called if the 'F' option was present on a pattern that is 1829to be written to a file. We flip the bytes of all the integer fields in the 1830regex data block and the study block. In 16-bit mode this also flips relevant 1831bytes in the pattern itself. This is to make it possible to test PCRE's 1832ability to reload byte-flipped patterns, e.g. those compiled on a different 1833architecture. */ 1834 1835static void 1836regexflip(pcre *ere, pcre_extra *extra) 1837{ 1838REAL_PCRE *re = (REAL_PCRE *)ere; 1839#ifdef SUPPORT_PCRE16 1840int op; 1841pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset; 1842int length = re->name_count * re->name_entry_size; 1843#ifdef SUPPORT_UTF 1844BOOL utf = (re->options & PCRE_UTF16) != 0; 1845BOOL utf16_char = FALSE; 1846#endif /* SUPPORT_UTF */ 1847#endif /* SUPPORT_PCRE16 */ 1848 1849/* Always flip the bytes in the main data block and study blocks. */ 1850 1851re->magic_number = REVERSED_MAGIC_NUMBER; 1852re->size = swap_uint32(re->size); 1853re->options = swap_uint32(re->options); 1854re->flags = swap_uint16(re->flags); 1855re->top_bracket = swap_uint16(re->top_bracket); 1856re->top_backref = swap_uint16(re->top_backref); 1857re->first_char = swap_uint16(re->first_char); 1858re->req_char = swap_uint16(re->req_char); 1859re->name_table_offset = swap_uint16(re->name_table_offset); 1860re->name_entry_size = swap_uint16(re->name_entry_size); 1861re->name_count = swap_uint16(re->name_count); 1862 1863if (extra != NULL) 1864 { 1865 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); 1866 rsd->size = swap_uint32(rsd->size); 1867 rsd->flags = swap_uint32(rsd->flags); 1868 rsd->minlength = swap_uint32(rsd->minlength); 1869 } 1870 1871/* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes 1872in the name table, if present, and then in the pattern itself. */ 1873 1874#ifdef SUPPORT_PCRE16 1875if (!use_pcre16) return; 1876 1877while(TRUE) 1878 { 1879 /* Swap previous characters. */ 1880 while (length-- > 0) 1881 { 1882 *ptr = swap_uint16(*ptr); 1883 ptr++; 1884 } 1885#ifdef SUPPORT_UTF 1886 if (utf16_char) 1887 { 1888 if ((ptr[-1] & 0xfc00) == 0xd800) 1889 { 1890 /* We know that there is only one extra character in UTF-16. */ 1891 *ptr = swap_uint16(*ptr); 1892 ptr++; 1893 } 1894 } 1895 utf16_char = FALSE; 1896#endif /* SUPPORT_UTF */ 1897 1898 /* Get next opcode. */ 1899 1900 length = 0; 1901 op = *ptr; 1902 *ptr++ = swap_uint16(op); 1903 1904 switch (op) 1905 { 1906 case OP_END: 1907 return; 1908 1909#ifdef SUPPORT_UTF 1910 case OP_CHAR: 1911 case OP_CHARI: 1912 case OP_NOT: 1913 case OP_NOTI: 1914 case OP_STAR: 1915 case OP_MINSTAR: 1916 case OP_PLUS: 1917 case OP_MINPLUS: 1918 case OP_QUERY: 1919 case OP_MINQUERY: 1920 case OP_UPTO: 1921 case OP_MINUPTO: 1922 case OP_EXACT: 1923 case OP_POSSTAR: 1924 case OP_POSPLUS: 1925 case OP_POSQUERY: 1926 case OP_POSUPTO: 1927 case OP_STARI: 1928 case OP_MINSTARI: 1929 case OP_PLUSI: 1930 case OP_MINPLUSI: 1931 case OP_QUERYI: 1932 case OP_MINQUERYI: 1933 case OP_UPTOI: 1934 case OP_MINUPTOI: 1935 case OP_EXACTI: 1936 case OP_POSSTARI: 1937 case OP_POSPLUSI: 1938 case OP_POSQUERYI: 1939 case OP_POSUPTOI: 1940 case OP_NOTSTAR: 1941 case OP_NOTMINSTAR: 1942 case OP_NOTPLUS: 1943 case OP_NOTMINPLUS: 1944 case OP_NOTQUERY: 1945 case OP_NOTMINQUERY: 1946 case OP_NOTUPTO: 1947 case OP_NOTMINUPTO: 1948 case OP_NOTEXACT: 1949 case OP_NOTPOSSTAR: 1950 case OP_NOTPOSPLUS: 1951 case OP_NOTPOSQUERY: 1952 case OP_NOTPOSUPTO: 1953 case OP_NOTSTARI: 1954 case OP_NOTMINSTARI: 1955 case OP_NOTPLUSI: 1956 case OP_NOTMINPLUSI: 1957 case OP_NOTQUERYI: 1958 case OP_NOTMINQUERYI: 1959 case OP_NOTUPTOI: 1960 case OP_NOTMINUPTOI: 1961 case OP_NOTEXACTI: 1962 case OP_NOTPOSSTARI: 1963 case OP_NOTPOSPLUSI: 1964 case OP_NOTPOSQUERYI: 1965 case OP_NOTPOSUPTOI: 1966 if (utf) utf16_char = TRUE; 1967#endif 1968 /* Fall through. */ 1969 1970 default: 1971 length = OP_lengths16[op] - 1; 1972 break; 1973 1974 case OP_CLASS: 1975 case OP_NCLASS: 1976 /* Skip the character bit map. */ 1977 ptr += 32/sizeof(pcre_uint16); 1978 length = 0; 1979 break; 1980 1981 case OP_XCLASS: 1982 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ 1983 if (LINK_SIZE > 1) 1984 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1])) 1985 - (1 + LINK_SIZE + 1)); 1986 else 1987 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1)); 1988 1989 /* Reverse the size of the XCLASS instance. */ 1990 *ptr = swap_uint16(*ptr); 1991 ptr++; 1992 if (LINK_SIZE > 1) 1993 { 1994 *ptr = swap_uint16(*ptr); 1995 ptr++; 1996 } 1997 1998 op = *ptr; 1999 *ptr = swap_uint16(op); 2000 ptr++; 2001 if ((op & XCL_MAP) != 0) 2002 { 2003 /* Skip the character bit map. */ 2004 ptr += 32/sizeof(pcre_uint16); 2005 length -= 32/sizeof(pcre_uint16); 2006 } 2007 break; 2008 } 2009 } 2010/* Control should never reach here in 16 bit mode. */ 2011#endif /* SUPPORT_PCRE16 */ 2012} 2013 2014 2015 2016/************************************************* 2017* Check match or recursion limit * 2018*************************************************/ 2019 2020static int 2021check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len, 2022 int start_offset, int options, int *use_offsets, int use_size_offsets, 2023 int flag, unsigned long int *limit, int errnumber, const char *msg) 2024{ 2025int count; 2026int min = 0; 2027int mid = 64; 2028int max = -1; 2029 2030extra->flags |= flag; 2031 2032for (;;) 2033 { 2034 *limit = mid; 2035 2036 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, 2037 use_offsets, use_size_offsets); 2038 2039 if (count == errnumber) 2040 { 2041 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */ 2042 min = mid; 2043 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2; 2044 } 2045 2046 else if (count >= 0 || count == PCRE_ERROR_NOMATCH || 2047 count == PCRE_ERROR_PARTIAL) 2048 { 2049 if (mid == min + 1) 2050 { 2051 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); 2052 break; 2053 } 2054 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */ 2055 max = mid; 2056 mid = (min + mid)/2; 2057 } 2058 else break; /* Some other error */ 2059 } 2060 2061extra->flags &= ~flag; 2062return count; 2063} 2064 2065 2066 2067/************************************************* 2068* Case-independent strncmp() function * 2069*************************************************/ 2070 2071/* 2072Arguments: 2073 s first string 2074 t second string 2075 n number of characters to compare 2076 2077Returns: < 0, = 0, or > 0, according to the comparison 2078*/ 2079 2080static int 2081strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n) 2082{ 2083while (n--) 2084 { 2085 int c = tolower(*s++) - tolower(*t++); 2086 if (c) return c; 2087 } 2088return 0; 2089} 2090 2091 2092 2093/************************************************* 2094* Check newline indicator * 2095*************************************************/ 2096 2097/* This is used both at compile and run-time to check for <xxx> escapes. Print 2098a message and return 0 if there is no match. 2099 2100Arguments: 2101 p points after the leading '<' 2102 f file for error message 2103 2104Returns: appropriate PCRE_NEWLINE_xxx flags, or 0 2105*/ 2106 2107static int 2108check_newline(pcre_uint8 *p, FILE *f) 2109{ 2110if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR; 2111if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF; 2112if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF; 2113if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF; 2114if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY; 2115if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF; 2116if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE; 2117fprintf(f, "Unknown newline type at: <%s\n", p); 2118return 0; 2119} 2120 2121 2122 2123/************************************************* 2124* Usage function * 2125*************************************************/ 2126 2127static void 2128usage(void) 2129{ 2130printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n"); 2131printf("Input and output default to stdin and stdout.\n"); 2132#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 2133printf("If input is a terminal, readline() is used to read from it.\n"); 2134#else 2135printf("This version of pcretest is not linked with readline().\n"); 2136#endif 2137printf("\nOptions:\n"); 2138#ifdef SUPPORT_PCRE16 2139printf(" -16 use the 16-bit library\n"); 2140#endif 2141printf(" -b show compiled code\n"); 2142printf(" -C show PCRE compile-time options and exit\n"); 2143printf(" -C arg show a specific compile-time option\n"); 2144printf(" and exit with its value. The arg can be:\n"); 2145printf(" linksize internal link size [2, 3, 4]\n"); 2146printf(" pcre8 8 bit library support enabled [0, 1]\n"); 2147printf(" pcre16 16 bit library support enabled [0, 1]\n"); 2148printf(" utf Unicode Transformation Format supported [0, 1]\n"); 2149printf(" ucp Unicode Properties supported [0, 1]\n"); 2150printf(" jit Just-in-time compiler supported [0, 1]\n"); 2151printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n"); 2152printf(" -d debug: show compiled code and information (-b and -i)\n"); 2153#if !defined NODFA 2154printf(" -dfa force DFA matching for all subjects\n"); 2155#endif 2156printf(" -help show usage information\n"); 2157printf(" -i show information about compiled patterns\n" 2158 " -M find MATCH_LIMIT minimum for each subject\n" 2159 " -m output memory used information\n" 2160 " -o <n> set size of offsets vector to <n>\n"); 2161#if !defined NOPOSIX 2162printf(" -p use POSIX interface\n"); 2163#endif 2164printf(" -q quiet: do not output PCRE version number at start\n"); 2165printf(" -S <n> set stack size to <n> megabytes\n"); 2166printf(" -s force each pattern to be studied at basic level\n" 2167 " -s+ force each pattern to be studied, using JIT if available\n" 2168 " -s++ ditto, verifying when JIT was actually used\n" 2169 " -s+n force each pattern to be studied, using JIT if available,\n" 2170 " where 1 <= n <= 7 selects JIT options\n" 2171 " -s++n ditto, verifying when JIT was actually used\n" 2172 " -t time compilation and execution\n"); 2173printf(" -t <n> time compilation and execution, repeating <n> times\n"); 2174printf(" -tm time execution (matching) only\n"); 2175printf(" -tm <n> time execution (matching) only, repeating <n> times\n"); 2176} 2177 2178 2179 2180/************************************************* 2181* Main Program * 2182*************************************************/ 2183 2184/* Read lines from named file or stdin and write to named file or stdout; lines 2185consist of a regular expression, in delimiters and optionally followed by 2186options, followed by a set of test data, terminated by an empty line. */ 2187 2188int main(int argc, char **argv) 2189{ 2190FILE *infile = stdin; 2191const char *version; 2192int options = 0; 2193int study_options = 0; 2194int default_find_match_limit = FALSE; 2195int op = 1; 2196int timeit = 0; 2197int timeitm = 0; 2198int showinfo = 0; 2199int showstore = 0; 2200int force_study = -1; 2201int force_study_options = 0; 2202int quiet = 0; 2203int size_offsets = 45; 2204int size_offsets_max; 2205int *offsets = NULL; 2206int debug = 0; 2207int done = 0; 2208int all_use_dfa = 0; 2209int verify_jit = 0; 2210int yield = 0; 2211int stack_size; 2212 2213#if !defined NOPOSIX 2214int posix = 0; 2215#endif 2216#if !defined NODFA 2217int *dfa_workspace = NULL; 2218#endif 2219 2220pcre_jit_stack *jit_stack = NULL; 2221 2222/* These vectors store, end-to-end, a list of zero-terminated captured 2223substring names, each list itself being terminated by an empty name. Assume 2224that 1024 is plenty long enough for the few names we'll be testing. It is 2225easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version 2226for the actual memory, to ensure alignment. */ 2227 2228pcre_uint16 copynames[1024]; 2229pcre_uint16 getnames[1024]; 2230 2231#ifdef SUPPORT_PCRE16 2232pcre_uint16 *cn16ptr; 2233pcre_uint16 *gn16ptr; 2234#endif 2235 2236#ifdef SUPPORT_PCRE8 2237pcre_uint8 *copynames8 = (pcre_uint8 *)copynames; 2238pcre_uint8 *getnames8 = (pcre_uint8 *)getnames; 2239pcre_uint8 *cn8ptr; 2240pcre_uint8 *gn8ptr; 2241#endif 2242 2243/* Get buffers from malloc() so that valgrind will check their misuse when 2244debugging. They grow automatically when very long lines are read. The 16-bit 2245buffer (buffer16) is obtained only if needed. */ 2246 2247buffer = (pcre_uint8 *)malloc(buffer_size); 2248dbuffer = (pcre_uint8 *)malloc(buffer_size); 2249pbuffer = (pcre_uint8 *)malloc(buffer_size); 2250 2251/* The outfile variable is static so that new_malloc can use it. */ 2252 2253outfile = stdout; 2254 2255/* The following _setmode() stuff is some Windows magic that tells its runtime 2256library to translate CRLF into a single LF character. At least, that's what 2257I've been told: never having used Windows I take this all on trust. Originally 2258it set 0x8000, but then I was advised that _O_BINARY was better. */ 2259 2260#if defined(_WIN32) || defined(WIN32) 2261_setmode( _fileno( stdout ), _O_BINARY ); 2262#endif 2263 2264/* Get the version number: both pcre_version() and pcre16_version() give the 2265same answer. We just need to ensure that we call one that is available. */ 2266 2267#ifdef SUPPORT_PCRE8 2268version = pcre_version(); 2269#else 2270version = pcre16_version(); 2271#endif 2272 2273/* Scan options */ 2274 2275while (argc > 1 && argv[op][0] == '-') 2276 { 2277 pcre_uint8 *endptr; 2278 char *arg = argv[op]; 2279 2280 if (strcmp(arg, "-m") == 0) showstore = 1; 2281 else if (strcmp(arg, "-s") == 0) force_study = 0; 2282 2283 else if (strncmp(arg, "-s+", 3) == 0) 2284 { 2285 arg += 3; 2286 if (*arg == '+') { arg++; verify_jit = TRUE; } 2287 force_study = 1; 2288 if (*arg == 0) 2289 force_study_options = jit_study_bits[6]; 2290 else if (*arg >= '1' && *arg <= '7') 2291 force_study_options = jit_study_bits[*arg - '1']; 2292 else goto BAD_ARG; 2293 } 2294 else if (strcmp(arg, "-16") == 0) 2295 { 2296#ifdef SUPPORT_PCRE16 2297 use_pcre16 = 1; 2298#else 2299 printf("** This version of PCRE was built without 16-bit support\n"); 2300 exit(1); 2301#endif 2302 } 2303 else if (strcmp(arg, "-q") == 0) quiet = 1; 2304 else if (strcmp(arg, "-b") == 0) debug = 1; 2305 else if (strcmp(arg, "-i") == 0) showinfo = 1; 2306 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1; 2307 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE; 2308#if !defined NODFA 2309 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1; 2310#endif 2311 else if (strcmp(arg, "-o") == 0 && argc > 2 && 2312 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)), 2313 *endptr == 0)) 2314 { 2315 op++; 2316 argc--; 2317 } 2318 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0) 2319 { 2320 int both = arg[2] == 0; 2321 int temp; 2322 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr), 2323 *endptr == 0)) 2324 { 2325 timeitm = temp; 2326 op++; 2327 argc--; 2328 } 2329 else timeitm = LOOPREPEAT; 2330 if (both) timeit = timeitm; 2331 } 2332 else if (strcmp(arg, "-S") == 0 && argc > 2 && 2333 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)), 2334 *endptr == 0)) 2335 { 2336#if defined(_WIN32) || defined(WIN32) || defined(__minix) 2337 printf("PCRE: -S not supported on this OS\n"); 2338 exit(1); 2339#else 2340 int rc; 2341 struct rlimit rlim; 2342 getrlimit(RLIMIT_STACK, &rlim); 2343 rlim.rlim_cur = stack_size * 1024 * 1024; 2344 rc = setrlimit(RLIMIT_STACK, &rlim); 2345 if (rc != 0) 2346 { 2347 printf("PCRE: setrlimit() failed with error %d\n", rc); 2348 exit(1); 2349 } 2350 op++; 2351 argc--; 2352#endif 2353 } 2354#if !defined NOPOSIX 2355 else if (strcmp(arg, "-p") == 0) posix = 1; 2356#endif 2357 else if (strcmp(arg, "-C") == 0) 2358 { 2359 int rc; 2360 unsigned long int lrc; 2361 2362 if (argc > 2) 2363 { 2364 if (strcmp(argv[op + 1], "linksize") == 0) 2365 { 2366 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); 2367 printf("%d\n", rc); 2368 yield = rc; 2369 goto EXIT; 2370 } 2371 if (strcmp(argv[op + 1], "pcre8") == 0) 2372 { 2373#ifdef SUPPORT_PCRE8 2374 printf("1\n"); 2375 yield = 1; 2376#else 2377 printf("0\n"); 2378 yield = 0; 2379#endif 2380 goto EXIT; 2381 } 2382 if (strcmp(argv[op + 1], "pcre16") == 0) 2383 { 2384#ifdef SUPPORT_PCRE16 2385 printf("1\n"); 2386 yield = 1; 2387#else 2388 printf("0\n"); 2389 yield = 0; 2390#endif 2391 goto EXIT; 2392 } 2393 if (strcmp(argv[op + 1], "utf") == 0) 2394 { 2395#ifdef SUPPORT_PCRE8 2396 (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 2397 printf("%d\n", rc); 2398 yield = rc; 2399#else 2400 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); 2401 printf("%d\n", rc); 2402 yield = rc; 2403#endif 2404 goto EXIT; 2405 } 2406 if (strcmp(argv[op + 1], "ucp") == 0) 2407 { 2408 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 2409 printf("%d\n", rc); 2410 yield = rc; 2411 goto EXIT; 2412 } 2413 if (strcmp(argv[op + 1], "jit") == 0) 2414 { 2415 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); 2416 printf("%d\n", rc); 2417 yield = rc; 2418 goto EXIT; 2419 } 2420 if (strcmp(argv[op + 1], "newline") == 0) 2421 { 2422 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); 2423 /* Note that these values are always the ASCII values, even 2424 in EBCDIC environments. CR is 13 and NL is 10. */ 2425 printf("%s\n", (rc == 13)? "CR" : 2426 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" : 2427 (rc == -2)? "ANYCRLF" : 2428 (rc == -1)? "ANY" : "???"); 2429 goto EXIT; 2430 } 2431 printf("Unknown -C option: %s\n", argv[op + 1]); 2432 goto EXIT; 2433 } 2434 2435 printf("PCRE version %s\n", version); 2436 printf("Compiled with\n"); 2437 2438/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both 2439are set, either both UTFs are supported or both are not supported. */ 2440 2441#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 2442 printf(" 8-bit and 16-bit support\n"); 2443 (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 2444 if (rc) 2445 printf(" UTF-8 and UTF-16 support\n"); 2446 else 2447 printf(" No UTF-8 or UTF-16 support\n"); 2448#elif defined SUPPORT_PCRE8 2449 printf(" 8-bit support only\n"); 2450 (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 2451 printf(" %sUTF-8 support\n", rc? "" : "No "); 2452#else 2453 printf(" 16-bit support only\n"); 2454 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); 2455 printf(" %sUTF-16 support\n", rc? "" : "No "); 2456#endif 2457 2458 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 2459 printf(" %sUnicode properties support\n", rc? "" : "No "); 2460 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); 2461 if (rc) 2462 { 2463 const char *arch; 2464 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch)); 2465 printf(" Just-in-time compiler support: %s\n", arch); 2466 } 2467 else 2468 printf(" No just-in-time compiler support\n"); 2469 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); 2470 /* Note that these values are always the ASCII values, even 2471 in EBCDIC environments. CR is 13 and NL is 10. */ 2472 printf(" Newline sequence is %s\n", (rc == 13)? "CR" : 2473 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" : 2474 (rc == -2)? "ANYCRLF" : 2475 (rc == -1)? "ANY" : "???"); 2476 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc); 2477 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" : 2478 "all Unicode newlines"); 2479 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); 2480 printf(" Internal link size = %d\n", rc); 2481 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); 2482 printf(" POSIX malloc threshold = %d\n", rc); 2483 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc); 2484 printf(" Default match limit = %ld\n", lrc); 2485 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc); 2486 printf(" Default recursion depth limit = %ld\n", lrc); 2487 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc); 2488 printf(" Match recursion uses %s", rc? "stack" : "heap"); 2489 if (showstore) 2490 { 2491 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0); 2492 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size); 2493 } 2494 printf("\n"); 2495 goto EXIT; 2496 } 2497 else if (strcmp(arg, "-help") == 0 || 2498 strcmp(arg, "--help") == 0) 2499 { 2500 usage(); 2501 goto EXIT; 2502 } 2503 else 2504 { 2505 BAD_ARG: 2506 printf("** Unknown or malformed option %s\n", arg); 2507 usage(); 2508 yield = 1; 2509 goto EXIT; 2510 } 2511 op++; 2512 argc--; 2513 } 2514 2515/* Get the store for the offsets vector, and remember what it was */ 2516 2517size_offsets_max = size_offsets; 2518offsets = (int *)malloc(size_offsets_max * sizeof(int)); 2519if (offsets == NULL) 2520 { 2521 printf("** Failed to get %d bytes of memory for offsets vector\n", 2522 (int)(size_offsets_max * sizeof(int))); 2523 yield = 1; 2524 goto EXIT; 2525 } 2526 2527/* Sort out the input and output files */ 2528 2529if (argc > 1) 2530 { 2531 infile = fopen(argv[op], INPUT_MODE); 2532 if (infile == NULL) 2533 { 2534 printf("** Failed to open %s\n", argv[op]); 2535 yield = 1; 2536 goto EXIT; 2537 } 2538 } 2539 2540if (argc > 2) 2541 { 2542 outfile = fopen(argv[op+1], OUTPUT_MODE); 2543 if (outfile == NULL) 2544 { 2545 printf("** Failed to open %s\n", argv[op+1]); 2546 yield = 1; 2547 goto EXIT; 2548 } 2549 } 2550 2551/* Set alternative malloc function */ 2552 2553#ifdef SUPPORT_PCRE8 2554pcre_malloc = new_malloc; 2555pcre_free = new_free; 2556pcre_stack_malloc = stack_malloc; 2557pcre_stack_free = stack_free; 2558#endif 2559 2560#ifdef SUPPORT_PCRE16 2561pcre16_malloc = new_malloc; 2562pcre16_free = new_free; 2563pcre16_stack_malloc = stack_malloc; 2564pcre16_stack_free = stack_free; 2565#endif 2566 2567/* Heading line unless quiet, then prompt for first regex if stdin */ 2568 2569if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version); 2570 2571/* Main loop */ 2572 2573while (!done) 2574 { 2575 pcre *re = NULL; 2576 pcre_extra *extra = NULL; 2577 2578#if !defined NOPOSIX /* There are still compilers that require no indent */ 2579 regex_t preg; 2580 int do_posix = 0; 2581#endif 2582 2583 const char *error; 2584 pcre_uint8 *markptr; 2585 pcre_uint8 *p, *pp, *ppp; 2586 pcre_uint8 *to_file = NULL; 2587 const pcre_uint8 *tables = NULL; 2588 unsigned long int get_options; 2589 unsigned long int true_size, true_study_size = 0; 2590 size_t size, regex_gotten_store; 2591 int do_allcaps = 0; 2592 int do_mark = 0; 2593 int do_study = 0; 2594 int no_force_study = 0; 2595 int do_debug = debug; 2596 int do_G = 0; 2597 int do_g = 0; 2598 int do_showinfo = showinfo; 2599 int do_showrest = 0; 2600 int do_showcaprest = 0; 2601 int do_flip = 0; 2602 int erroroffset, len, delimiter, poffset; 2603 2604#if !defined NODFA 2605 int dfa_matched = 0; 2606#endif 2607 2608 use_utf = 0; 2609 debug_lengths = 1; 2610 2611 if (extend_inputline(infile, buffer, " re> ") == NULL) break; 2612 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 2613 fflush(outfile); 2614 2615 p = buffer; 2616 while (isspace(*p)) p++; 2617 if (*p == 0) continue; 2618 2619 /* See if the pattern is to be loaded pre-compiled from a file. */ 2620 2621 if (*p == '<' && strchr((char *)(p+1), '<') == NULL) 2622 { 2623 pcre_uint32 magic; 2624 pcre_uint8 sbuf[8]; 2625 FILE *f; 2626 2627 p++; 2628 if (*p == '!') 2629 { 2630 do_debug = TRUE; 2631 do_showinfo = TRUE; 2632 p++; 2633 } 2634 2635 pp = p + (int)strlen((char *)p); 2636 while (isspace(pp[-1])) pp--; 2637 *pp = 0; 2638 2639 f = fopen((char *)p, "rb"); 2640 if (f == NULL) 2641 { 2642 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno)); 2643 continue; 2644 } 2645 2646 first_gotten_store = 0; 2647 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; 2648 2649 true_size = 2650 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3]; 2651 true_study_size = 2652 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; 2653 2654 re = (pcre *)new_malloc(true_size); 2655 regex_gotten_store = first_gotten_store; 2656 2657 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; 2658 2659 magic = ((REAL_PCRE *)re)->magic_number; 2660 if (magic != MAGIC_NUMBER) 2661 { 2662 if (swap_uint32(magic) == MAGIC_NUMBER) 2663 { 2664 do_flip = 1; 2665 } 2666 else 2667 { 2668 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); 2669 fclose(f); 2670 continue; 2671 } 2672 } 2673 2674 /* We hide the byte-invert info for little and big endian tests. */ 2675 fprintf(outfile, "Compiled pattern%s loaded from %s\n", 2676 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p); 2677 2678 /* Now see if there is any following study data. */ 2679 2680 if (true_study_size != 0) 2681 { 2682 pcre_study_data *psd; 2683 2684 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size); 2685 extra->flags = PCRE_EXTRA_STUDY_DATA; 2686 2687 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra)); 2688 extra->study_data = psd; 2689 2690 if (fread(psd, 1, true_study_size, f) != true_study_size) 2691 { 2692 FAIL_READ: 2693 fprintf(outfile, "Failed to read data from %s\n", p); 2694 if (extra != NULL) 2695 { 2696 PCRE_FREE_STUDY(extra); 2697 } 2698 if (re != NULL) new_free(re); 2699 fclose(f); 2700 continue; 2701 } 2702 fprintf(outfile, "Study data loaded from %s\n", p); 2703 do_study = 1; /* To get the data output if requested */ 2704 } 2705 else fprintf(outfile, "No study data\n"); 2706 2707 /* Flip the necessary bytes. */ 2708 if (do_flip) 2709 { 2710 int rc; 2711 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL); 2712 if (rc == PCRE_ERROR_BADMODE) 2713 { 2714 /* Simulate the result of the function call below. */ 2715 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, 2716 use_pcre16? "16" : "", PCRE_INFO_OPTIONS); 2717 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in " 2718 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16"); 2719 continue; 2720 } 2721 } 2722 2723 /* Need to know if UTF-8 for printing data strings. */ 2724 2725 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue; 2726 use_utf = (get_options & PCRE_UTF8) != 0; 2727 2728 fclose(f); 2729 goto SHOW_INFO; 2730 } 2731 2732 /* In-line pattern (the usual case). Get the delimiter and seek the end of 2733 the pattern; if it isn't complete, read more. */ 2734 2735 delimiter = *p++; 2736 2737 if (isalnum(delimiter) || delimiter == '\\') 2738 { 2739 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n"); 2740 goto SKIP_DATA; 2741 } 2742 2743 pp = p; 2744 poffset = (int)(p - buffer); 2745 2746 for(;;) 2747 { 2748 while (*pp != 0) 2749 { 2750 if (*pp == '\\' && pp[1] != 0) pp++; 2751 else if (*pp == delimiter) break; 2752 pp++; 2753 } 2754 if (*pp != 0) break; 2755 if ((pp = extend_inputline(infile, pp, " > ")) == NULL) 2756 { 2757 fprintf(outfile, "** Unexpected EOF\n"); 2758 done = 1; 2759 goto CONTINUE; 2760 } 2761 if (infile != stdin) fprintf(outfile, "%s", (char *)pp); 2762 } 2763 2764 /* The buffer may have moved while being extended; reset the start of data 2765 pointer to the correct relative point in the buffer. */ 2766 2767 p = buffer + poffset; 2768 2769 /* If the first character after the delimiter is backslash, make 2770 the pattern end with backslash. This is purely to provide a way 2771 of testing for the error message when a pattern ends with backslash. */ 2772 2773 if (pp[1] == '\\') *pp++ = '\\'; 2774 2775 /* Terminate the pattern at the delimiter, and save a copy of the pattern 2776 for callouts. */ 2777 2778 *pp++ = 0; 2779 strcpy((char *)pbuffer, (char *)p); 2780 2781 /* Look for options after final delimiter */ 2782 2783 options = 0; 2784 study_options = 0; 2785 log_store = showstore; /* default from command line */ 2786 2787 while (*pp != 0) 2788 { 2789 switch (*pp++) 2790 { 2791 case 'f': options |= PCRE_FIRSTLINE; break; 2792 case 'g': do_g = 1; break; 2793 case 'i': options |= PCRE_CASELESS; break; 2794 case 'm': options |= PCRE_MULTILINE; break; 2795 case 's': options |= PCRE_DOTALL; break; 2796 case 'x': options |= PCRE_EXTENDED; break; 2797 2798 case '+': 2799 if (do_showrest) do_showcaprest = 1; else do_showrest = 1; 2800 break; 2801 2802 case '=': do_allcaps = 1; break; 2803 case 'A': options |= PCRE_ANCHORED; break; 2804 case 'B': do_debug = 1; break; 2805 case 'C': options |= PCRE_AUTO_CALLOUT; break; 2806 case 'D': do_debug = do_showinfo = 1; break; 2807 case 'E': options |= PCRE_DOLLAR_ENDONLY; break; 2808 case 'F': do_flip = 1; break; 2809 case 'G': do_G = 1; break; 2810 case 'I': do_showinfo = 1; break; 2811 case 'J': options |= PCRE_DUPNAMES; break; 2812 case 'K': do_mark = 1; break; 2813 case 'M': log_store = 1; break; 2814 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; 2815 2816#if !defined NOPOSIX 2817 case 'P': do_posix = 1; break; 2818#endif 2819 2820 case 'S': 2821 if (do_study == 0) 2822 { 2823 do_study = 1; 2824 if (*pp == '+') 2825 { 2826 if (*(++pp) == '+') 2827 { 2828 verify_jit = TRUE; 2829 pp++; 2830 } 2831 if (*pp >= '1' && *pp <= '7') 2832 study_options |= jit_study_bits[*pp++ - '1']; 2833 else 2834 study_options |= jit_study_bits[6]; 2835 } 2836 } 2837 else 2838 { 2839 do_study = 0; 2840 no_force_study = 1; 2841 } 2842 break; 2843 2844 case 'U': options |= PCRE_UNGREEDY; break; 2845 case 'W': options |= PCRE_UCP; break; 2846 case 'X': options |= PCRE_EXTRA; break; 2847 case 'Y': options |= PCRE_NO_START_OPTIMISE; break; 2848 case 'Z': debug_lengths = 0; break; 2849 case '8': options |= PCRE_UTF8; use_utf = 1; break; 2850 case '?': options |= PCRE_NO_UTF8_CHECK; break; 2851 2852 case 'T': 2853 switch (*pp++) 2854 { 2855 case '0': tables = tables0; break; 2856 case '1': tables = tables1; break; 2857 2858 case '\r': 2859 case '\n': 2860 case ' ': 2861 case 0: 2862 fprintf(outfile, "** Missing table number after /T\n"); 2863 goto SKIP_DATA; 2864 2865 default: 2866 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]); 2867 goto SKIP_DATA; 2868 } 2869 break; 2870 2871 case 'L': 2872 ppp = pp; 2873 /* The '\r' test here is so that it works on Windows. */ 2874 /* The '0' test is just in case this is an unterminated line. */ 2875 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++; 2876 *ppp = 0; 2877 if (setlocale(LC_CTYPE, (const char *)pp) == NULL) 2878 { 2879 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp); 2880 goto SKIP_DATA; 2881 } 2882 locale_set = 1; 2883 tables = PCRE_MAKETABLES; 2884 pp = ppp; 2885 break; 2886 2887 case '>': 2888 to_file = pp; 2889 while (*pp != 0) pp++; 2890 while (isspace(pp[-1])) pp--; 2891 *pp = 0; 2892 break; 2893 2894 case '<': 2895 { 2896 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0) 2897 { 2898 options |= PCRE_JAVASCRIPT_COMPAT; 2899 pp += 3; 2900 } 2901 else 2902 { 2903 int x = check_newline(pp, outfile); 2904 if (x == 0) goto SKIP_DATA; 2905 options |= x; 2906 while (*pp++ != '>'); 2907 } 2908 } 2909 break; 2910 2911 case '\r': /* So that it works in Windows */ 2912 case '\n': 2913 case ' ': 2914 break; 2915 2916 default: 2917 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]); 2918 goto SKIP_DATA; 2919 } 2920 } 2921 2922 /* Handle compiling via the POSIX interface, which doesn't support the 2923 timing, showing, or debugging options, nor the ability to pass over 2924 local character tables. Neither does it have 16-bit support. */ 2925 2926#if !defined NOPOSIX 2927 if (posix || do_posix) 2928 { 2929 int rc; 2930 int cflags = 0; 2931 2932 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE; 2933 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE; 2934 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; 2935 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; 2936 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; 2937 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP; 2938 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY; 2939 2940 first_gotten_store = 0; 2941 rc = regcomp(&preg, (char *)p, cflags); 2942 2943 /* Compilation failed; go back for another re, skipping to blank line 2944 if non-interactive. */ 2945 2946 if (rc != 0) 2947 { 2948 (void)regerror(rc, &preg, (char *)buffer, buffer_size); 2949 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); 2950 goto SKIP_DATA; 2951 } 2952 } 2953 2954 /* Handle compiling via the native interface */ 2955 2956 else 2957#endif /* !defined NOPOSIX */ 2958 2959 { 2960 /* In 16-bit mode, convert the input. */ 2961 2962#ifdef SUPPORT_PCRE16 2963 if (use_pcre16) 2964 { 2965 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p))) 2966 { 2967 case -1: 2968 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " 2969 "converted to UTF-16\n"); 2970 goto SKIP_DATA; 2971 2972 case -2: 2973 fprintf(outfile, "**Failed: character value greater than 0x10ffff " 2974 "cannot be converted to UTF-16\n"); 2975 goto SKIP_DATA; 2976 2977 case -3: /* "Impossible error" when to16 is called arg1 FALSE */ 2978 fprintf(outfile, "**Failed: character value greater than 0xffff " 2979 "cannot be converted to 16-bit in non-UTF mode\n"); 2980 goto SKIP_DATA; 2981 2982 default: 2983 break; 2984 } 2985 p = (pcre_uint8 *)buffer16; 2986 } 2987#endif 2988 2989 /* Compile many times when timing */ 2990 2991 if (timeit > 0) 2992 { 2993 register int i; 2994 clock_t time_taken; 2995 clock_t start_time = clock(); 2996 for (i = 0; i < timeit; i++) 2997 { 2998 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); 2999 if (re != NULL) free(re); 3000 } 3001 time_taken = clock() - start_time; 3002 fprintf(outfile, "Compile time %.4f milliseconds\n", 3003 (((double)time_taken * 1000.0) / (double)timeit) / 3004 (double)CLOCKS_PER_SEC); 3005 } 3006 3007 first_gotten_store = 0; 3008 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); 3009 3010 /* Compilation failed; go back for another re, skipping to blank line 3011 if non-interactive. */ 3012 3013 if (re == NULL) 3014 { 3015 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset); 3016 SKIP_DATA: 3017 if (infile != stdin) 3018 { 3019 for (;;) 3020 { 3021 if (extend_inputline(infile, buffer, NULL) == NULL) 3022 { 3023 done = 1; 3024 goto CONTINUE; 3025 } 3026 len = (int)strlen((char *)buffer); 3027 while (len > 0 && isspace(buffer[len-1])) len--; 3028 if (len == 0) break; 3029 } 3030 fprintf(outfile, "\n"); 3031 } 3032 goto CONTINUE; 3033 } 3034 3035 /* Compilation succeeded. It is now possible to set the UTF-8 option from 3036 within the regex; check for this so that we know how to process the data 3037 lines. */ 3038 3039 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) 3040 goto SKIP_DATA; 3041 if ((get_options & PCRE_UTF8) != 0) use_utf = 1; 3042 3043 /* Extract the size for possible writing before possibly flipping it, 3044 and remember the store that was got. */ 3045 3046 true_size = ((REAL_PCRE *)re)->size; 3047 regex_gotten_store = first_gotten_store; 3048 3049 /* Output code size information if requested */ 3050 3051 if (log_store) 3052 fprintf(outfile, "Memory allocation (code space): %d\n", 3053 (int)(first_gotten_store - 3054 sizeof(REAL_PCRE) - 3055 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size)); 3056 3057 /* If -s or /S was present, study the regex to generate additional info to 3058 help with the matching, unless the pattern has the SS option, which 3059 suppresses the effect of /S (used for a few test patterns where studying is 3060 never sensible). */ 3061 3062 if (do_study || (force_study >= 0 && !no_force_study)) 3063 { 3064 if (timeit > 0) 3065 { 3066 register int i; 3067 clock_t time_taken; 3068 clock_t start_time = clock(); 3069 for (i = 0; i < timeit; i++) 3070 { 3071 PCRE_STUDY(extra, re, study_options | force_study_options, &error); 3072 } 3073 time_taken = clock() - start_time; 3074 if (extra != NULL) 3075 { 3076 PCRE_FREE_STUDY(extra); 3077 } 3078 fprintf(outfile, " Study time %.4f milliseconds\n", 3079 (((double)time_taken * 1000.0) / (double)timeit) / 3080 (double)CLOCKS_PER_SEC); 3081 } 3082 PCRE_STUDY(extra, re, study_options | force_study_options, &error); 3083 if (error != NULL) 3084 fprintf(outfile, "Failed to study: %s\n", error); 3085 else if (extra != NULL) 3086 { 3087 true_study_size = ((pcre_study_data *)(extra->study_data))->size; 3088 if (log_store) 3089 { 3090 size_t jitsize; 3091 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 && 3092 jitsize != 0) 3093 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize); 3094 } 3095 } 3096 } 3097 3098 /* If /K was present, we set up for handling MARK data. */ 3099 3100 if (do_mark) 3101 { 3102 if (extra == NULL) 3103 { 3104 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3105 extra->flags = 0; 3106 } 3107 extra->mark = &markptr; 3108 extra->flags |= PCRE_EXTRA_MARK; 3109 } 3110 3111 /* Extract and display information from the compiled data if required. */ 3112 3113 SHOW_INFO: 3114 3115 if (do_debug) 3116 { 3117 fprintf(outfile, "------------------------------------------------------------------\n"); 3118 PCRE_PRINTINT(re, outfile, debug_lengths); 3119 } 3120 3121 /* We already have the options in get_options (see above) */ 3122 3123 if (do_showinfo) 3124 { 3125 unsigned long int all_options; 3126 int count, backrefmax, first_char, need_char, okpartial, jchanged, 3127 hascrorlf, maxlookbehind; 3128 int nameentrysize, namecount; 3129 const pcre_uint8 *nametable; 3130 3131 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) + 3132 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) + 3133 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) + 3134 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) + 3135 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) + 3136 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) + 3137 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) + 3138 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) + 3139 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) + 3140 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) + 3141 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) + 3142 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind) 3143 != 0) 3144 goto SKIP_DATA; 3145 3146 if (size != regex_gotten_store) fprintf(outfile, 3147 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n", 3148 (int)size, (int)regex_gotten_store); 3149 3150 fprintf(outfile, "Capturing subpattern count = %d\n", count); 3151 if (backrefmax > 0) 3152 fprintf(outfile, "Max back reference = %d\n", backrefmax); 3153 3154 if (namecount > 0) 3155 { 3156 fprintf(outfile, "Named capturing subpatterns:\n"); 3157 while (namecount-- > 0) 3158 { 3159#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 3160 int imm2_size = use_pcre16 ? 1 : 2; 3161#else 3162 int imm2_size = IMM2_SIZE; 3163#endif 3164 int length = (int)STRLEN(nametable + imm2_size); 3165 fprintf(outfile, " "); 3166 PCHARSV(nametable, imm2_size, length, outfile); 3167 while (length++ < nameentrysize - imm2_size) putc(' ', outfile); 3168#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 3169 fprintf(outfile, "%3d\n", use_pcre16? 3170 (int)(((PCRE_SPTR16)nametable)[0]) 3171 :((int)nametable[0] << 8) | (int)nametable[1]); 3172 nametable += nameentrysize * (use_pcre16 ? 2 : 1); 3173#else 3174 fprintf(outfile, "%3d\n", GET2(nametable, 0)); 3175#ifdef SUPPORT_PCRE8 3176 nametable += nameentrysize; 3177#else 3178 nametable += nameentrysize * 2; 3179#endif 3180#endif 3181 } 3182 } 3183 3184 if (!okpartial) fprintf(outfile, "Partial matching not supported\n"); 3185 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); 3186 3187 all_options = ((REAL_PCRE *)re)->options; 3188 if (do_flip) all_options = swap_uint32(all_options); 3189 3190 if (get_options == 0) fprintf(outfile, "No options\n"); 3191 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", 3192 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", 3193 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", 3194 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", 3195 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", 3196 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "", 3197 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", 3198 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "", 3199 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "", 3200 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", 3201 ((get_options & PCRE_EXTRA) != 0)? " extra" : "", 3202 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", 3203 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", 3204 ((get_options & PCRE_UTF8) != 0)? " utf" : "", 3205 ((get_options & PCRE_UCP) != 0)? " ucp" : "", 3206 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "", 3207 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", 3208 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : ""); 3209 3210 if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); 3211 3212 switch (get_options & PCRE_NEWLINE_BITS) 3213 { 3214 case PCRE_NEWLINE_CR: 3215 fprintf(outfile, "Forced newline sequence: CR\n"); 3216 break; 3217 3218 case PCRE_NEWLINE_LF: 3219 fprintf(outfile, "Forced newline sequence: LF\n"); 3220 break; 3221 3222 case PCRE_NEWLINE_CRLF: 3223 fprintf(outfile, "Forced newline sequence: CRLF\n"); 3224 break; 3225 3226 case PCRE_NEWLINE_ANYCRLF: 3227 fprintf(outfile, "Forced newline sequence: ANYCRLF\n"); 3228 break; 3229 3230 case PCRE_NEWLINE_ANY: 3231 fprintf(outfile, "Forced newline sequence: ANY\n"); 3232 break; 3233 3234 default: 3235 break; 3236 } 3237 3238 if (first_char == -1) 3239 { 3240 fprintf(outfile, "First char at start or follows newline\n"); 3241 } 3242 else if (first_char < 0) 3243 { 3244 fprintf(outfile, "No first char\n"); 3245 } 3246 else 3247 { 3248 const char *caseless = 3249 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)? 3250 "" : " (caseless)"; 3251 3252 if (PRINTOK(first_char)) 3253 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless); 3254 else 3255 { 3256 fprintf(outfile, "First char = "); 3257 pchar(first_char, outfile); 3258 fprintf(outfile, "%s\n", caseless); 3259 } 3260 } 3261 3262 if (need_char < 0) 3263 { 3264 fprintf(outfile, "No need char\n"); 3265 } 3266 else 3267 { 3268 const char *caseless = 3269 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)? 3270 "" : " (caseless)"; 3271 3272 if (PRINTOK(need_char)) 3273 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless); 3274 else 3275 { 3276 fprintf(outfile, "Need char = "); 3277 pchar(need_char, outfile); 3278 fprintf(outfile, "%s\n", caseless); 3279 } 3280 } 3281 3282 if (maxlookbehind > 0) 3283 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); 3284 3285 /* Don't output study size; at present it is in any case a fixed 3286 value, but it varies, depending on the computer architecture, and 3287 so messes up the test suite. (And with the /F option, it might be 3288 flipped.) If study was forced by an external -s, don't show this 3289 information unless -i or -d was also present. This means that, except 3290 when auto-callouts are involved, the output from runs with and without 3291 -s should be identical. */ 3292 3293 if (do_study || (force_study >= 0 && showinfo && !no_force_study)) 3294 { 3295 if (extra == NULL) 3296 fprintf(outfile, "Study returned NULL\n"); 3297 else 3298 { 3299 pcre_uint8 *start_bits = NULL; 3300 int minlength; 3301 3302 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0) 3303 fprintf(outfile, "Subject length lower bound = %d\n", minlength); 3304 3305 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0) 3306 { 3307 if (start_bits == NULL) 3308 fprintf(outfile, "No set of starting bytes\n"); 3309 else 3310 { 3311 int i; 3312 int c = 24; 3313 fprintf(outfile, "Starting byte set: "); 3314 for (i = 0; i < 256; i++) 3315 { 3316 if ((start_bits[i/8] & (1<<(i&7))) != 0) 3317 { 3318 if (c > 75) 3319 { 3320 fprintf(outfile, "\n "); 3321 c = 2; 3322 } 3323 if (PRINTOK(i) && i != ' ') 3324 { 3325 fprintf(outfile, "%c ", i); 3326 c += 2; 3327 } 3328 else 3329 { 3330 fprintf(outfile, "\\x%02x ", i); 3331 c += 5; 3332 } 3333 } 3334 } 3335 fprintf(outfile, "\n"); 3336 } 3337 } 3338 } 3339 3340 /* Show this only if the JIT was set by /S, not by -s. */ 3341 3342 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0) 3343 { 3344 int jit; 3345 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0) 3346 { 3347 if (jit) 3348 fprintf(outfile, "JIT study was successful\n"); 3349 else 3350#ifdef SUPPORT_JIT 3351 fprintf(outfile, "JIT study was not successful\n"); 3352#else 3353 fprintf(outfile, "JIT support is not available in this version of PCRE\n"); 3354#endif 3355 } 3356 } 3357 } 3358 } 3359 3360 /* If the '>' option was present, we write out the regex to a file, and 3361 that is all. The first 8 bytes of the file are the regex length and then 3362 the study length, in big-endian order. */ 3363 3364 if (to_file != NULL) 3365 { 3366 FILE *f = fopen((char *)to_file, "wb"); 3367 if (f == NULL) 3368 { 3369 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno)); 3370 } 3371 else 3372 { 3373 pcre_uint8 sbuf[8]; 3374 3375 if (do_flip) regexflip(re, extra); 3376 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255); 3377 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255); 3378 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255); 3379 sbuf[3] = (pcre_uint8)((true_size) & 255); 3380 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255); 3381 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255); 3382 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255); 3383 sbuf[7] = (pcre_uint8)((true_study_size) & 255); 3384 3385 if (fwrite(sbuf, 1, 8, f) < 8 || 3386 fwrite(re, 1, true_size, f) < true_size) 3387 { 3388 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno)); 3389 } 3390 else 3391 { 3392 fprintf(outfile, "Compiled pattern written to %s\n", to_file); 3393 3394 /* If there is study data, write it. */ 3395 3396 if (extra != NULL) 3397 { 3398 if (fwrite(extra->study_data, 1, true_study_size, f) < 3399 true_study_size) 3400 { 3401 fprintf(outfile, "Write error on %s: %s\n", to_file, 3402 strerror(errno)); 3403 } 3404 else fprintf(outfile, "Study data written to %s\n", to_file); 3405 } 3406 } 3407 fclose(f); 3408 } 3409 3410 new_free(re); 3411 if (extra != NULL) 3412 { 3413 PCRE_FREE_STUDY(extra); 3414 } 3415 if (locale_set) 3416 { 3417 new_free((void *)tables); 3418 setlocale(LC_CTYPE, "C"); 3419 locale_set = 0; 3420 } 3421 continue; /* With next regex */ 3422 } 3423 } /* End of non-POSIX compile */ 3424 3425 /* Read data lines and test them */ 3426 3427 for (;;) 3428 { 3429 pcre_uint8 *q; 3430 pcre_uint8 *bptr; 3431 int *use_offsets = offsets; 3432 int use_size_offsets = size_offsets; 3433 int callout_data = 0; 3434 int callout_data_set = 0; 3435 int count, c; 3436 int copystrings = 0; 3437 int find_match_limit = default_find_match_limit; 3438 int getstrings = 0; 3439 int getlist = 0; 3440 int gmatched = 0; 3441 int start_offset = 0; 3442 int start_offset_sign = 1; 3443 int g_notempty = 0; 3444 int use_dfa = 0; 3445 3446 *copynames = 0; 3447 *getnames = 0; 3448 3449#ifdef SUPPORT_PCRE16 3450 cn16ptr = copynames; 3451 gn16ptr = getnames; 3452#endif 3453#ifdef SUPPORT_PCRE8 3454 cn8ptr = copynames8; 3455 gn8ptr = getnames8; 3456#endif 3457 3458 SET_PCRE_CALLOUT(callout); 3459 first_callout = 1; 3460 last_callout_mark = NULL; 3461 callout_extra = 0; 3462 callout_count = 0; 3463 callout_fail_count = 999999; 3464 callout_fail_id = -1; 3465 show_malloc = 0; 3466 options = 0; 3467 3468 if (extra != NULL) extra->flags &= 3469 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION); 3470 3471 len = 0; 3472 for (;;) 3473 { 3474 if (extend_inputline(infile, buffer + len, "data> ") == NULL) 3475 { 3476 if (len > 0) /* Reached EOF without hitting a newline */ 3477 { 3478 fprintf(outfile, "\n"); 3479 break; 3480 } 3481 done = 1; 3482 goto CONTINUE; 3483 } 3484 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 3485 len = (int)strlen((char *)buffer); 3486 if (buffer[len-1] == '\n') break; 3487 } 3488 3489 while (len > 0 && isspace(buffer[len-1])) len--; 3490 buffer[len] = 0; 3491 if (len == 0) break; 3492 3493 p = buffer; 3494 while (isspace(*p)) p++; 3495 3496 bptr = q = dbuffer; 3497 while ((c = *p++) != 0) 3498 { 3499 int i = 0; 3500 int n = 0; 3501 3502 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes. 3503 In non-UTF mode, allow the value of the byte to fall through to later, 3504 where values greater than 127 are turned into UTF-8 when running in 3505 16-bit mode. */ 3506 3507 if (c != '\\') 3508 { 3509 if (use_utf) 3510 { 3511 *q++ = c; 3512 continue; 3513 } 3514 } 3515 3516 /* Handle backslash escapes */ 3517 3518 else switch ((c = *p++)) 3519 { 3520 case 'a': c = 7; break; 3521 case 'b': c = '\b'; break; 3522 case 'e': c = 27; break; 3523 case 'f': c = '\f'; break; 3524 case 'n': c = '\n'; break; 3525 case 'r': c = '\r'; break; 3526 case 't': c = '\t'; break; 3527 case 'v': c = '\v'; break; 3528 3529 case '0': case '1': case '2': case '3': 3530 case '4': case '5': case '6': case '7': 3531 c -= '0'; 3532 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') 3533 c = c * 8 + *p++ - '0'; 3534 break; 3535 3536 case 'x': 3537 if (*p == '{') 3538 { 3539 pcre_uint8 *pt = p; 3540 c = 0; 3541 3542 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails 3543 when isxdigit() is a macro that refers to its argument more than 3544 once. This is banned by the C Standard, but apparently happens in at 3545 least one MacOS environment. */ 3546 3547 for (pt++; isxdigit(*pt); pt++) 3548 { 3549 if (++i == 9) 3550 fprintf(outfile, "** Too many hex digits in \\x{...} item; " 3551 "using only the first eight.\n"); 3552 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); 3553 } 3554 if (*pt == '}') 3555 { 3556 p = pt + 1; 3557 break; 3558 } 3559 /* Not correct form for \x{...}; fall through */ 3560 } 3561 3562 /* \x without {} always defines just one byte in 8-bit mode. This 3563 allows UTF-8 characters to be constructed byte by byte, and also allows 3564 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode. 3565 Otherwise, pass it down to later code so that it can be turned into 3566 UTF-8 when running in 16-bit mode. */ 3567 3568 c = 0; 3569 while (i++ < 2 && isxdigit(*p)) 3570 { 3571 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); 3572 p++; 3573 } 3574 if (use_utf) 3575 { 3576 *q++ = c; 3577 continue; 3578 } 3579 break; 3580 3581 case 0: /* \ followed by EOF allows for an empty line */ 3582 p--; 3583 continue; 3584 3585 case '>': 3586 if (*p == '-') 3587 { 3588 start_offset_sign = -1; 3589 p++; 3590 } 3591 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0'; 3592 start_offset *= start_offset_sign; 3593 continue; 3594 3595 case 'A': /* Option setting */ 3596 options |= PCRE_ANCHORED; 3597 continue; 3598 3599 case 'B': 3600 options |= PCRE_NOTBOL; 3601 continue; 3602 3603 case 'C': 3604 if (isdigit(*p)) /* Set copy string */ 3605 { 3606 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3607 copystrings |= 1 << n; 3608 } 3609 else if (isalnum(*p)) 3610 { 3611 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re); 3612 } 3613 else if (*p == '+') 3614 { 3615 callout_extra = 1; 3616 p++; 3617 } 3618 else if (*p == '-') 3619 { 3620 SET_PCRE_CALLOUT(NULL); 3621 p++; 3622 } 3623 else if (*p == '!') 3624 { 3625 callout_fail_id = 0; 3626 p++; 3627 while(isdigit(*p)) 3628 callout_fail_id = callout_fail_id * 10 + *p++ - '0'; 3629 callout_fail_count = 0; 3630 if (*p == '!') 3631 { 3632 p++; 3633 while(isdigit(*p)) 3634 callout_fail_count = callout_fail_count * 10 + *p++ - '0'; 3635 } 3636 } 3637 else if (*p == '*') 3638 { 3639 int sign = 1; 3640 callout_data = 0; 3641 if (*(++p) == '-') { sign = -1; p++; } 3642 while(isdigit(*p)) 3643 callout_data = callout_data * 10 + *p++ - '0'; 3644 callout_data *= sign; 3645 callout_data_set = 1; 3646 } 3647 continue; 3648 3649#if !defined NODFA 3650 case 'D': 3651#if !defined NOPOSIX 3652 if (posix || do_posix) 3653 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n"); 3654 else 3655#endif 3656 use_dfa = 1; 3657 continue; 3658#endif 3659 3660#if !defined NODFA 3661 case 'F': 3662 options |= PCRE_DFA_SHORTEST; 3663 continue; 3664#endif 3665 3666 case 'G': 3667 if (isdigit(*p)) 3668 { 3669 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3670 getstrings |= 1 << n; 3671 } 3672 else if (isalnum(*p)) 3673 { 3674 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re); 3675 } 3676 continue; 3677 3678 case 'J': 3679 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3680 if (extra != NULL 3681 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 3682 && extra->executable_jit != NULL) 3683 { 3684 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); } 3685 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024); 3686 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); 3687 } 3688 continue; 3689 3690 case 'L': 3691 getlist = 1; 3692 continue; 3693 3694 case 'M': 3695 find_match_limit = 1; 3696 continue; 3697 3698 case 'N': 3699 if ((options & PCRE_NOTEMPTY) != 0) 3700 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART; 3701 else 3702 options |= PCRE_NOTEMPTY; 3703 continue; 3704 3705 case 'O': 3706 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3707 if (n > size_offsets_max) 3708 { 3709 size_offsets_max = n; 3710 free(offsets); 3711 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int)); 3712 if (offsets == NULL) 3713 { 3714 printf("** Failed to get %d bytes of memory for offsets vector\n", 3715 (int)(size_offsets_max * sizeof(int))); 3716 yield = 1; 3717 goto EXIT; 3718 } 3719 } 3720 use_size_offsets = n; 3721 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */ 3722 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */ 3723 continue; 3724 3725 case 'P': 3726 options |= ((options & PCRE_PARTIAL_SOFT) == 0)? 3727 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD; 3728 continue; 3729 3730 case 'Q': 3731 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3732 if (extra == NULL) 3733 { 3734 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3735 extra->flags = 0; 3736 } 3737 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 3738 extra->match_limit_recursion = n; 3739 continue; 3740 3741 case 'q': 3742 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 3743 if (extra == NULL) 3744 { 3745 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3746 extra->flags = 0; 3747 } 3748 extra->flags |= PCRE_EXTRA_MATCH_LIMIT; 3749 extra->match_limit = n; 3750 continue; 3751 3752#if !defined NODFA 3753 case 'R': 3754 options |= PCRE_DFA_RESTART; 3755 continue; 3756#endif 3757 3758 case 'S': 3759 show_malloc = 1; 3760 continue; 3761 3762 case 'Y': 3763 options |= PCRE_NO_START_OPTIMIZE; 3764 continue; 3765 3766 case 'Z': 3767 options |= PCRE_NOTEOL; 3768 continue; 3769 3770 case '?': 3771 options |= PCRE_NO_UTF8_CHECK; 3772 continue; 3773 3774 case '<': 3775 { 3776 int x = check_newline(p, outfile); 3777 if (x == 0) goto NEXT_DATA; 3778 options |= x; 3779 while (*p++ != '>'); 3780 } 3781 continue; 3782 } 3783 3784 /* We now have a character value in c that may be greater than 255. In 3785 16-bit mode, we always convert characters to UTF-8 so that values greater 3786 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we 3787 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF 3788 mode must have come from \x{...} or octal constructs because values from 3789 \x.. get this far only in non-UTF mode. */ 3790 3791#if !defined NOUTF || defined SUPPORT_PCRE16 3792 if (use_pcre16 || use_utf) 3793 { 3794 pcre_uint8 buff8[8]; 3795 int ii, utn; 3796 utn = ord2utf8(c, buff8); 3797 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii]; 3798 } 3799 else 3800#endif 3801 { 3802 if (c > 255) 3803 { 3804 fprintf(outfile, "** Character \\x{%x} is greater than 255 " 3805 "and UTF-8 mode is not enabled.\n", c); 3806 fprintf(outfile, "** Truncation will probably give the wrong " 3807 "result.\n"); 3808 } 3809 *q++ = c; 3810 } 3811 } 3812 3813 /* Reached end of subject string */ 3814 3815 *q = 0; 3816 len = (int)(q - dbuffer); 3817 3818 /* Move the data to the end of the buffer so that a read over the end of 3819 the buffer will be seen by valgrind, even if it doesn't cause a crash. If 3820 we are using the POSIX interface, we must include the terminating zero. */ 3821 3822#if !defined NOPOSIX 3823 if (posix || do_posix) 3824 { 3825 memmove(bptr + buffer_size - len - 1, bptr, len + 1); 3826 bptr += buffer_size - len - 1; 3827 } 3828 else 3829#endif 3830 { 3831 memmove(bptr + buffer_size - len, bptr, len); 3832 bptr += buffer_size - len; 3833 } 3834 3835 if ((all_use_dfa || use_dfa) && find_match_limit) 3836 { 3837 printf("**Match limit not relevant for DFA matching: ignored\n"); 3838 find_match_limit = 0; 3839 } 3840 3841 /* Handle matching via the POSIX interface, which does not 3842 support timing or playing with the match limit or callout data. */ 3843 3844#if !defined NOPOSIX 3845 if (posix || do_posix) 3846 { 3847 int rc; 3848 int eflags = 0; 3849 regmatch_t *pmatch = NULL; 3850 if (use_size_offsets > 0) 3851 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets); 3852 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; 3853 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; 3854 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; 3855 3856 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); 3857 3858 if (rc != 0) 3859 { 3860 (void)regerror(rc, &preg, (char *)buffer, buffer_size); 3861 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); 3862 } 3863 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE) 3864 != 0) 3865 { 3866 fprintf(outfile, "Matched with REG_NOSUB\n"); 3867 } 3868 else 3869 { 3870 size_t i; 3871 for (i = 0; i < (size_t)use_size_offsets; i++) 3872 { 3873 if (pmatch[i].rm_so >= 0) 3874 { 3875 fprintf(outfile, "%2d: ", (int)i); 3876 PCHARSV(dbuffer, pmatch[i].rm_so, 3877 pmatch[i].rm_eo - pmatch[i].rm_so, outfile); 3878 fprintf(outfile, "\n"); 3879 if (do_showcaprest || (i == 0 && do_showrest)) 3880 { 3881 fprintf(outfile, "%2d+ ", (int)i); 3882 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo, 3883 outfile); 3884 fprintf(outfile, "\n"); 3885 } 3886 } 3887 } 3888 } 3889 free(pmatch); 3890 goto NEXT_DATA; 3891 } 3892 3893#endif /* !defined NOPOSIX */ 3894 3895 /* Handle matching via the native interface - repeats for /g and /G */ 3896 3897#ifdef SUPPORT_PCRE16 3898 if (use_pcre16) 3899 { 3900 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len); 3901 switch(len) 3902 { 3903 case -1: 3904 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " 3905 "converted to UTF-16\n"); 3906 goto NEXT_DATA; 3907 3908 case -2: 3909 fprintf(outfile, "**Failed: character value greater than 0x10ffff " 3910 "cannot be converted to UTF-16\n"); 3911 goto NEXT_DATA; 3912 3913 case -3: 3914 fprintf(outfile, "**Failed: character value greater than 0xffff " 3915 "cannot be converted to 16-bit in non-UTF mode\n"); 3916 goto NEXT_DATA; 3917 3918 default: 3919 break; 3920 } 3921 bptr = (pcre_uint8 *)buffer16; 3922 } 3923#endif 3924 3925 /* Ensure that there is a JIT callback if we want to verify that JIT was 3926 actually used. If jit_stack == NULL, no stack has yet been assigned. */ 3927 3928 if (verify_jit && jit_stack == NULL && extra != NULL) 3929 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); } 3930 3931 for (;; gmatched++) /* Loop for /g or /G */ 3932 { 3933 markptr = NULL; 3934 jit_was_used = FALSE; 3935 3936 if (timeitm > 0) 3937 { 3938 register int i; 3939 clock_t time_taken; 3940 clock_t start_time = clock(); 3941 3942#if !defined NODFA 3943 if (all_use_dfa || use_dfa) 3944 { 3945 if ((options & PCRE_DFA_RESTART) != 0) 3946 { 3947 fprintf(outfile, "Timing DFA restarts is not supported\n"); 3948 break; 3949 } 3950 if (dfa_workspace == NULL) 3951 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 3952 for (i = 0; i < timeitm; i++) 3953 { 3954 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, 3955 (options | g_notempty), use_offsets, use_size_offsets, 3956 dfa_workspace, DFA_WS_DIMENSION); 3957 } 3958 } 3959 else 3960#endif 3961 3962 for (i = 0; i < timeitm; i++) 3963 { 3964 PCRE_EXEC(count, re, extra, bptr, len, start_offset, 3965 (options | g_notempty), use_offsets, use_size_offsets); 3966 } 3967 time_taken = clock() - start_time; 3968 fprintf(outfile, "Execute time %.4f milliseconds\n", 3969 (((double)time_taken * 1000.0) / (double)timeitm) / 3970 (double)CLOCKS_PER_SEC); 3971 } 3972 3973 /* If find_match_limit is set, we want to do repeated matches with 3974 varying limits in order to find the minimum value for the match limit and 3975 for the recursion limit. The match limits are relevant only to the normal 3976 running of pcre_exec(), so disable the JIT optimization. This makes it 3977 possible to run the same set of tests with and without JIT externally 3978 requested. */ 3979 3980 if (find_match_limit) 3981 { 3982 if (extra == NULL) 3983 { 3984 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 3985 extra->flags = 0; 3986 } 3987 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT; 3988 3989 (void)check_match_limit(re, extra, bptr, len, start_offset, 3990 options|g_notempty, use_offsets, use_size_offsets, 3991 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit), 3992 PCRE_ERROR_MATCHLIMIT, "match()"); 3993 3994 count = check_match_limit(re, extra, bptr, len, start_offset, 3995 options|g_notempty, use_offsets, use_size_offsets, 3996 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion), 3997 PCRE_ERROR_RECURSIONLIMIT, "match() recursion"); 3998 } 3999 4000 /* If callout_data is set, use the interface with additional data */ 4001 4002 else if (callout_data_set) 4003 { 4004 if (extra == NULL) 4005 { 4006 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 4007 extra->flags = 0; 4008 } 4009 extra->flags |= PCRE_EXTRA_CALLOUT_DATA; 4010 extra->callout_data = &callout_data; 4011 PCRE_EXEC(count, re, extra, bptr, len, start_offset, 4012 options | g_notempty, use_offsets, use_size_offsets); 4013 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA; 4014 } 4015 4016 /* The normal case is just to do the match once, with the default 4017 value of match_limit. */ 4018 4019#if !defined NODFA 4020 else if (all_use_dfa || use_dfa) 4021 { 4022 if (dfa_workspace == NULL) 4023 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 4024 if (dfa_matched++ == 0) 4025 dfa_workspace[0] = -1; /* To catch bad restart */ 4026 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, 4027 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace, 4028 DFA_WS_DIMENSION); 4029 if (count == 0) 4030 { 4031 fprintf(outfile, "Matched, but too many subsidiary matches\n"); 4032 count = use_size_offsets/2; 4033 } 4034 } 4035#endif 4036 4037 else 4038 { 4039 PCRE_EXEC(count, re, extra, bptr, len, start_offset, 4040 options | g_notempty, use_offsets, use_size_offsets); 4041 if (count == 0) 4042 { 4043 fprintf(outfile, "Matched, but too many substrings\n"); 4044 count = use_size_offsets/3; 4045 } 4046 } 4047 4048 /* Matched */ 4049 4050 if (count >= 0) 4051 { 4052 int i, maxcount; 4053 void *cnptr, *gnptr; 4054 4055#if !defined NODFA 4056 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else 4057#endif 4058 maxcount = use_size_offsets/3; 4059 4060 /* This is a check against a lunatic return value. */ 4061 4062 if (count > maxcount) 4063 { 4064 fprintf(outfile, 4065 "** PCRE error: returned count %d is too big for offset size %d\n", 4066 count, use_size_offsets); 4067 count = use_size_offsets/3; 4068 if (do_g || do_G) 4069 { 4070 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G'); 4071 do_g = do_G = FALSE; /* Break g/G loop */ 4072 } 4073 } 4074 4075 /* do_allcaps requests showing of all captures in the pattern, to check 4076 unset ones at the end. */ 4077 4078 if (do_allcaps) 4079 { 4080 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0) 4081 goto SKIP_DATA; 4082 count++; /* Allow for full match */ 4083 if (count * 2 > use_size_offsets) count = use_size_offsets/2; 4084 } 4085 4086 /* Output the captured substrings */ 4087 4088 for (i = 0; i < count * 2; i += 2) 4089 { 4090 if (use_offsets[i] < 0) 4091 { 4092 if (use_offsets[i] != -1) 4093 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n", 4094 use_offsets[i], i); 4095 if (use_offsets[i+1] != -1) 4096 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n", 4097 use_offsets[i+1], i+1); 4098 fprintf(outfile, "%2d: <unset>\n", i/2); 4099 } 4100 else 4101 { 4102 fprintf(outfile, "%2d: ", i/2); 4103 PCHARSV(bptr, use_offsets[i], 4104 use_offsets[i+1] - use_offsets[i], outfile); 4105 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); 4106 fprintf(outfile, "\n"); 4107 if (do_showcaprest || (i == 0 && do_showrest)) 4108 { 4109 fprintf(outfile, "%2d+ ", i/2); 4110 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1], 4111 outfile); 4112 fprintf(outfile, "\n"); 4113 } 4114 } 4115 } 4116 4117 if (markptr != NULL) 4118 { 4119 fprintf(outfile, "MK: "); 4120 PCHARSV(markptr, 0, -1, outfile); 4121 fprintf(outfile, "\n"); 4122 } 4123 4124 for (i = 0; i < 32; i++) 4125 { 4126 if ((copystrings & (1 << i)) != 0) 4127 { 4128 int rc; 4129 char copybuffer[256]; 4130 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i, 4131 copybuffer, sizeof(copybuffer)); 4132 if (rc < 0) 4133 fprintf(outfile, "copy substring %d failed %d\n", i, rc); 4134 else 4135 { 4136 fprintf(outfile, "%2dC ", i); 4137 PCHARSV(copybuffer, 0, rc, outfile); 4138 fprintf(outfile, " (%d)\n", rc); 4139 } 4140 } 4141 } 4142 4143 cnptr = copynames; 4144 for (;;) 4145 { 4146 int rc; 4147 char copybuffer[256]; 4148 4149 if (use_pcre16) 4150 { 4151 if (*(pcre_uint16 *)cnptr == 0) break; 4152 } 4153 else 4154 { 4155 if (*(pcre_uint8 *)cnptr == 0) break; 4156 } 4157 4158 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count, 4159 cnptr, copybuffer, sizeof(copybuffer)); 4160 4161 if (rc < 0) 4162 { 4163 fprintf(outfile, "copy substring "); 4164 PCHARSV(cnptr, 0, -1, outfile); 4165 fprintf(outfile, " failed %d\n", rc); 4166 } 4167 else 4168 { 4169 fprintf(outfile, " C "); 4170 PCHARSV(copybuffer, 0, rc, outfile); 4171 fprintf(outfile, " (%d) ", rc); 4172 PCHARSV(cnptr, 0, -1, outfile); 4173 putc('\n', outfile); 4174 } 4175 4176 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE; 4177 } 4178 4179 for (i = 0; i < 32; i++) 4180 { 4181 if ((getstrings & (1 << i)) != 0) 4182 { 4183 int rc; 4184 const char *substring; 4185 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring); 4186 if (rc < 0) 4187 fprintf(outfile, "get substring %d failed %d\n", i, rc); 4188 else 4189 { 4190 fprintf(outfile, "%2dG ", i); 4191 PCHARSV(substring, 0, rc, outfile); 4192 fprintf(outfile, " (%d)\n", rc); 4193 PCRE_FREE_SUBSTRING(substring); 4194 } 4195 } 4196 } 4197 4198 gnptr = getnames; 4199 for (;;) 4200 { 4201 int rc; 4202 const char *substring; 4203 4204 if (use_pcre16) 4205 { 4206 if (*(pcre_uint16 *)gnptr == 0) break; 4207 } 4208 else 4209 { 4210 if (*(pcre_uint8 *)gnptr == 0) break; 4211 } 4212 4213 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count, 4214 gnptr, &substring); 4215 if (rc < 0) 4216 { 4217 fprintf(outfile, "get substring "); 4218 PCHARSV(gnptr, 0, -1, outfile); 4219 fprintf(outfile, " failed %d\n", rc); 4220 } 4221 else 4222 { 4223 fprintf(outfile, " G "); 4224 PCHARSV(substring, 0, rc, outfile); 4225 fprintf(outfile, " (%d) ", rc); 4226 PCHARSV(gnptr, 0, -1, outfile); 4227 PCRE_FREE_SUBSTRING(substring); 4228 putc('\n', outfile); 4229 } 4230 4231 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE; 4232 } 4233 4234 if (getlist) 4235 { 4236 int rc; 4237 const char **stringlist; 4238 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist); 4239 if (rc < 0) 4240 fprintf(outfile, "get substring list failed %d\n", rc); 4241 else 4242 { 4243 for (i = 0; i < count; i++) 4244 { 4245 fprintf(outfile, "%2dL ", i); 4246 PCHARSV(stringlist[i], 0, -1, outfile); 4247 putc('\n', outfile); 4248 } 4249 if (stringlist[i] != NULL) 4250 fprintf(outfile, "string list not terminated by NULL\n"); 4251 PCRE_FREE_SUBSTRING_LIST(stringlist); 4252 } 4253 } 4254 } 4255 4256 /* There was a partial match */ 4257 4258 else if (count == PCRE_ERROR_PARTIAL) 4259 { 4260 if (markptr == NULL) fprintf(outfile, "Partial match"); 4261 else 4262 { 4263 fprintf(outfile, "Partial match, mark="); 4264 PCHARSV(markptr, 0, -1, outfile); 4265 } 4266 if (use_size_offsets > 1) 4267 { 4268 fprintf(outfile, ": "); 4269 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0], 4270 outfile); 4271 } 4272 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); 4273 fprintf(outfile, "\n"); 4274 break; /* Out of the /g loop */ 4275 } 4276 4277 /* Failed to match. If this is a /g or /G loop and we previously set 4278 g_notempty after a null match, this is not necessarily the end. We want 4279 to advance the start offset, and continue. We won't be at the end of the 4280 string - that was checked before setting g_notempty. 4281 4282 Complication arises in the case when the newline convention is "any", 4283 "crlf", or "anycrlf". If the previous match was at the end of a line 4284 terminated by CRLF, an advance of one character just passes the \r, 4285 whereas we should prefer the longer newline sequence, as does the code in 4286 pcre_exec(). Fudge the offset value to achieve this. We check for a 4287 newline setting in the pattern; if none was set, use PCRE_CONFIG() to 4288 find the default. 4289 4290 Otherwise, in the case of UTF-8 matching, the advance must be one 4291 character, not one byte. */ 4292 4293 else 4294 { 4295 if (g_notempty != 0) 4296 { 4297 int onechar = 1; 4298 unsigned int obits = ((REAL_PCRE *)re)->options; 4299 use_offsets[0] = start_offset; 4300 if ((obits & PCRE_NEWLINE_BITS) == 0) 4301 { 4302 int d; 4303 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d); 4304 /* Note that these values are always the ASCII ones, even in 4305 EBCDIC environments. CR = 13, NL = 10. */ 4306 obits = (d == 13)? PCRE_NEWLINE_CR : 4307 (d == 10)? PCRE_NEWLINE_LF : 4308 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF : 4309 (d == -2)? PCRE_NEWLINE_ANYCRLF : 4310 (d == -1)? PCRE_NEWLINE_ANY : 0; 4311 } 4312 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY || 4313 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF || 4314 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF) 4315 && 4316 start_offset < len - 1 && 4317#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 4318 (use_pcre16? 4319 ((PCRE_SPTR16)bptr)[start_offset] == '\r' 4320 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n' 4321 : 4322 bptr[start_offset] == '\r' 4323 && bptr[start_offset + 1] == '\n') 4324#elif defined SUPPORT_PCRE16 4325 ((PCRE_SPTR16)bptr)[start_offset] == '\r' 4326 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n' 4327#else 4328 bptr[start_offset] == '\r' 4329 && bptr[start_offset + 1] == '\n' 4330#endif 4331 ) 4332 onechar++; 4333 else if (use_utf) 4334 { 4335 while (start_offset + onechar < len) 4336 { 4337 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break; 4338 onechar++; 4339 } 4340 } 4341 use_offsets[1] = start_offset + onechar; 4342 } 4343 else 4344 { 4345 switch(count) 4346 { 4347 case PCRE_ERROR_NOMATCH: 4348 if (gmatched == 0) 4349 { 4350 if (markptr == NULL) 4351 { 4352 fprintf(outfile, "No match"); 4353 } 4354 else 4355 { 4356 fprintf(outfile, "No match, mark = "); 4357 PCHARSV(markptr, 0, -1, outfile); 4358 } 4359 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); 4360 putc('\n', outfile); 4361 } 4362 break; 4363 4364 case PCRE_ERROR_BADUTF8: 4365 case PCRE_ERROR_SHORTUTF8: 4366 fprintf(outfile, "Error %d (%s UTF-%s string)", count, 4367 (count == PCRE_ERROR_BADUTF8)? "bad" : "short", 4368 use_pcre16? "16" : "8"); 4369 if (use_size_offsets >= 2) 4370 fprintf(outfile, " offset=%d reason=%d", use_offsets[0], 4371 use_offsets[1]); 4372 fprintf(outfile, "\n"); 4373 break; 4374 4375 case PCRE_ERROR_BADUTF8_OFFSET: 4376 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count, 4377 use_pcre16? "16" : "8"); 4378 break; 4379 4380 default: 4381 if (count < 0 && 4382 (-count) < (int)(sizeof(errtexts)/sizeof(const char *))) 4383 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]); 4384 else 4385 fprintf(outfile, "Error %d (Unexpected value)\n", count); 4386 break; 4387 } 4388 4389 break; /* Out of the /g loop */ 4390 } 4391 } 4392 4393 /* If not /g or /G we are done */ 4394 4395 if (!do_g && !do_G) break; 4396 4397 /* If we have matched an empty string, first check to see if we are at 4398 the end of the subject. If so, the /g loop is over. Otherwise, mimic what 4399 Perl's /g options does. This turns out to be rather cunning. First we set 4400 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the 4401 same point. If this fails (picked up above) we advance to the next 4402 character. */ 4403 4404 g_notempty = 0; 4405 4406 if (use_offsets[0] == use_offsets[1]) 4407 { 4408 if (use_offsets[0] == len) break; 4409 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; 4410 } 4411 4412 /* For /g, update the start offset, leaving the rest alone */ 4413 4414 if (do_g) start_offset = use_offsets[1]; 4415 4416 /* For /G, update the pointer and length */ 4417 4418 else 4419 { 4420 bptr += use_offsets[1] * CHAR_SIZE; 4421 len -= use_offsets[1]; 4422 } 4423 } /* End of loop for /g and /G */ 4424 4425 NEXT_DATA: continue; 4426 } /* End of loop for data lines */ 4427 4428 CONTINUE: 4429 4430#if !defined NOPOSIX 4431 if (posix || do_posix) regfree(&preg); 4432#endif 4433 4434 if (re != NULL) new_free(re); 4435 if (extra != NULL) 4436 { 4437 PCRE_FREE_STUDY(extra); 4438 } 4439 if (locale_set) 4440 { 4441 new_free((void *)tables); 4442 setlocale(LC_CTYPE, "C"); 4443 locale_set = 0; 4444 } 4445 if (jit_stack != NULL) 4446 { 4447 PCRE_JIT_STACK_FREE(jit_stack); 4448 jit_stack = NULL; 4449 } 4450 } 4451 4452if (infile == stdin) fprintf(outfile, "\n"); 4453 4454EXIT: 4455 4456if (infile != NULL && infile != stdin) fclose(infile); 4457if (outfile != NULL && outfile != stdout) fclose(outfile); 4458 4459free(buffer); 4460free(dbuffer); 4461free(pbuffer); 4462free(offsets); 4463 4464#ifdef SUPPORT_PCRE16 4465if (buffer16 != NULL) free(buffer16); 4466#endif 4467 4468return yield; 4469} 4470 4471/* End of pcretest.c */ 4472