1/*************************************************
2*             PCRE testing program               *
3*************************************************/
4
5/* This program was hacked up as a tester for PCRE. I really should have
6written it more tidily in the first place. Will I ever learn? It has grown and
7been extended and consequently is now rather, er, *very* untidy in places. The
8addition of 16-bit support has made it even worse. :-(
9
10-----------------------------------------------------------------------------
11Redistribution and use in source and binary forms, with or without
12modification, are permitted provided that the following conditions are met:
13
14    * Redistributions of source code must retain the above copyright notice,
15      this list of conditions and the following disclaimer.
16
17    * Redistributions in binary form must reproduce the above copyright
18      notice, this list of conditions and the following disclaimer in the
19      documentation and/or other materials provided with the distribution.
20
21    * Neither the name of the University of Cambridge nor the names of its
22      contributors may be used to endorse or promote products derived from
23      this software without specific prior written permission.
24
25THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35POSSIBILITY OF SUCH DAMAGE.
36-----------------------------------------------------------------------------
37*/
38
39/* This program now supports the testing of both the 8-bit and 16-bit PCRE
40libraries in a single program. This is different from the modules such as
41pcre_compile.c in the library itself, which are compiled separately for each
42mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44compiled only once. Therefore, it must not make use of any of the macros from
45pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47only supported library functions. */
48
49#ifdef HAVE_CONFIG_H
50#include "config.h"
51#endif
52
53#include <ctype.h>
54#include <stdio.h>
55#include <string.h>
56#include <stdlib.h>
57#include <time.h>
58#include <locale.h>
59#include <errno.h>
60
61/* Both libreadline and libedit are optionally supported. The user-supplied
62original patch uses readline/readline.h for libedit, but in at least one system
63it is installed as editline/readline.h, so the configuration code now looks for
64that first, falling back to readline/readline.h. */
65
66#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67#ifdef HAVE_UNISTD_H
68#include <unistd.h>
69#endif
70#if defined(SUPPORT_LIBREADLINE)
71#include <readline/readline.h>
72#include <readline/history.h>
73#else
74#if defined(HAVE_EDITLINE_READLINE_H)
75#include <editline/readline.h>
76#else
77#include <readline/readline.h>
78#endif
79#endif
80#endif
81
82/* A number of things vary for Windows builds. Originally, pcretest opened its
83input and output without "b"; then I was told that "b" was needed in some
84environments, so it was added for release 5.0 to both the input and output. (It
85makes no difference on Unix-like systems.) Later I was told that it is wrong
86for the input on Windows. I've now abstracted the modes into two macros that
87are set here, to make it easier to fiddle with them, and removed "b" from the
88input mode under Windows. */
89
90#if defined(_WIN32) || defined(WIN32)
91#include <io.h>                /* For _setmode() */
92#include <fcntl.h>             /* For _O_BINARY */
93#define INPUT_MODE   "r"
94#define OUTPUT_MODE  "wb"
95
96#ifndef isatty
97#define isatty _isatty         /* This is what Windows calls them, I'm told, */
98#endif                         /* though in some environments they seem to   */
99                               /* be already defined, hence the #ifndefs.    */
100#ifndef fileno
101#define fileno _fileno
102#endif
103
104/* A user sent this fix for Borland Builder 5 under Windows. */
105
106#ifdef __BORLANDC__
107#define _setmode(handle, mode) setmode(handle, mode)
108#endif
109
110/* Not Windows */
111
112#else
113#include <sys/time.h>          /* These two includes are needed */
114#include <sys/resource.h>      /* for setrlimit(). */
115#define INPUT_MODE   "rb"
116#define OUTPUT_MODE  "wb"
117#endif
118
119#define PRIV(name) name
120
121/* We have to include pcre_internal.h because we need the internal info for
122displaying the results of pcre_study() and we also need to know about the
123internal macros, structures, and other internal data values; pcretest has
124"inside information" compared to a program that strictly follows the PCRE API.
125
126Although pcre_internal.h does itself include pcre.h, we explicitly include it
127here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128appropriately for an application, not for building PCRE. */
129
130#include "pcre.h"
131
132#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133/* Configure internal macros to 16 bit mode. */
134#define COMPILE_PCRE16
135#endif
136
137#include "pcre_internal.h"
138
139/* The pcre_printint() function, which prints the internal form of a compiled
140regex, is held in a separate file so that (a) it can be compiled in either
1418-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142when that is compiled in debug mode. */
143
144#ifdef SUPPORT_PCRE8
145void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146#endif
147#ifdef SUPPORT_PCRE16
148void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149#endif
150
151/* We need access to some of the data tables that PCRE uses. So as not to have
152to keep two copies, we include the source file here, changing the names of the
153external symbols to prevent clashes. */
154
155#define PCRE_INCLUDED
156
157#include "pcre_tables.c"
158
159/* The definition of the macro PRINTABLE, which determines whether to print an
160output character as-is or as a hex value when showing compiled patterns, is
161the same as in the printint.src file. We uses it here in cases when the locale
162has not been explicitly changed, so as to get consistent output from systems
163that differ in their output from isprint() even in the "C" locale. */
164
165#ifdef EBCDIC
166#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167#else
168#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169#endif
170
171#define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172
173/* Posix support is disabled in 16 bit only mode. */
174#if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175#define NOPOSIX
176#endif
177
178/* It is possible to compile this test program without including support for
179testing the POSIX interface, though this is not available via the standard
180Makefile. */
181
182#if !defined NOPOSIX
183#include "pcreposix.h"
184#endif
185
186/* It is also possible, originally for the benefit of a version that was
187imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189automatically cut out the UTF support if PCRE is built without it. */
190
191#ifndef SUPPORT_UTF
192#ifndef NOUTF
193#define NOUTF
194#endif
195#endif
196
197/* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199only from one place and is handled differently). I couldn't dream up any way of
200using a single macro to do this in a generic way, because of the many different
201argument requirements. We know that at least one of SUPPORT_PCRE8 and
202SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203use these in the definitions of generic macros.
204
205**** Special note about the PCHARSxxx macros: the address of the string to be
206printed is always given as two arguments: a base address followed by an offset.
207The base address is cast to the correct data size for 8 or 16 bit data; the
208offset is in units of this size. If the string were given as base+offset in one
209argument, the casting might be incorrectly applied. */
210
211#ifdef SUPPORT_PCRE8
212
213#define PCHARS8(lv, p, offset, len, f) \
214  lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215
216#define PCHARSV8(p, offset, len, f) \
217  (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218
219#define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220  p = read_capture_name8(p, cn8, re)
221
222#define STRLEN8(p) ((int)strlen((char *)p))
223
224#define SET_PCRE_CALLOUT8(callout) \
225  pcre_callout = callout
226
227#define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228   pcre_assign_jit_stack(extra, callback, userdata)
229
230#define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231  re = pcre_compile((char *)pat, options, error, erroffset, tables)
232
233#define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234    namesptr, cbuffer, size) \
235  rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236    (char *)namesptr, cbuffer, size)
237
238#define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239  rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240
241#define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242    offsets, size_offsets, workspace, size_workspace) \
243  count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244    offsets, size_offsets, workspace, size_workspace)
245
246#define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247    offsets, size_offsets) \
248  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249    offsets, size_offsets)
250
251#define PCRE_FREE_STUDY8(extra) \
252  pcre_free_study(extra)
253
254#define PCRE_FREE_SUBSTRING8(substring) \
255  pcre_free_substring(substring)
256
257#define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258  pcre_free_substring_list(listptr)
259
260#define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261    getnamesptr, subsptr) \
262  rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263    (char *)getnamesptr, subsptr)
264
265#define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266  n = pcre_get_stringnumber(re, (char *)ptr)
267
268#define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269  rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270
271#define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272  rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273
274#define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275  rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276
277#define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278  pcre_printint(re, outfile, debug_lengths)
279
280#define PCRE_STUDY8(extra, re, options, error) \
281  extra = pcre_study(re, options, error)
282
283#define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284  pcre_jit_stack_alloc(startsize, maxsize)
285
286#define PCRE_JIT_STACK_FREE8(stack) \
287  pcre_jit_stack_free(stack)
288
289#endif /* SUPPORT_PCRE8 */
290
291/* -----------------------------------------------------------*/
292
293#ifdef SUPPORT_PCRE16
294
295#define PCHARS16(lv, p, offset, len, f) \
296  lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297
298#define PCHARSV16(p, offset, len, f) \
299  (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300
301#define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302  p = read_capture_name16(p, cn16, re)
303
304#define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305
306#define SET_PCRE_CALLOUT16(callout) \
307  pcre16_callout = (int (*)(pcre16_callout_block *))callout
308
309#define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310  pcre16_assign_jit_stack((pcre16_extra *)extra, \
311    (pcre16_jit_callback)callback, userdata)
312
313#define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314  re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315    tables)
316
317#define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318    namesptr, cbuffer, size) \
319  rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320    count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321
322#define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323  rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324    (PCRE_UCHAR16 *)cbuffer, size/2)
325
326#define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327    offsets, size_offsets, workspace, size_workspace) \
328  count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329    (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330    workspace, size_workspace)
331
332#define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333    offsets, size_offsets) \
334  count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335    len, start_offset, options, offsets, size_offsets)
336
337#define PCRE_FREE_STUDY16(extra) \
338  pcre16_free_study((pcre16_extra *)extra)
339
340#define PCRE_FREE_SUBSTRING16(substring) \
341  pcre16_free_substring((PCRE_SPTR16)substring)
342
343#define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344  pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345
346#define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347    getnamesptr, subsptr) \
348  rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349    count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350
351#define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352  n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353
354#define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355  rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356    (PCRE_SPTR16 *)(void*)subsptr)
357
358#define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359  rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360    (PCRE_SPTR16 **)(void*)listptr)
361
362#define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363  rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364    tables)
365
366#define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367  pcre16_printint(re, outfile, debug_lengths)
368
369#define PCRE_STUDY16(extra, re, options, error) \
370  extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371
372#define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373  (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374
375#define PCRE_JIT_STACK_FREE16(stack) \
376  pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377
378#endif /* SUPPORT_PCRE16 */
379
380
381/* ----- Both modes are supported; a runtime test is needed, except for
382pcre_config(), and the JIT stack functions, when it doesn't matter which
383version is called. ----- */
384
385#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386
387#define CHAR_SIZE (use_pcre16? 2:1)
388
389#define PCHARS(lv, p, offset, len, f) \
390  if (use_pcre16) \
391    PCHARS16(lv, p, offset, len, f); \
392  else \
393    PCHARS8(lv, p, offset, len, f)
394
395#define PCHARSV(p, offset, len, f) \
396  if (use_pcre16) \
397    PCHARSV16(p, offset, len, f); \
398  else \
399    PCHARSV8(p, offset, len, f)
400
401#define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402  if (use_pcre16) \
403    READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404  else \
405    READ_CAPTURE_NAME8(p, cn8, cn16, re)
406
407#define SET_PCRE_CALLOUT(callout) \
408  if (use_pcre16) \
409    SET_PCRE_CALLOUT16(callout); \
410  else \
411    SET_PCRE_CALLOUT8(callout)
412
413#define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414
415#define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416  if (use_pcre16) \
417    PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418  else \
419    PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420
421#define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422  if (use_pcre16) \
423    PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424  else \
425    PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426
427#define PCRE_CONFIG pcre_config
428
429#define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430    namesptr, cbuffer, size) \
431  if (use_pcre16) \
432    PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433      namesptr, cbuffer, size); \
434  else \
435    PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436      namesptr, cbuffer, size)
437
438#define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439  if (use_pcre16) \
440    PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441  else \
442    PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443
444#define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445    offsets, size_offsets, workspace, size_workspace) \
446  if (use_pcre16) \
447    PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448      offsets, size_offsets, workspace, size_workspace); \
449  else \
450    PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451      offsets, size_offsets, workspace, size_workspace)
452
453#define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454    offsets, size_offsets) \
455  if (use_pcre16) \
456    PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457      offsets, size_offsets); \
458  else \
459    PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460      offsets, size_offsets)
461
462#define PCRE_FREE_STUDY(extra) \
463  if (use_pcre16) \
464    PCRE_FREE_STUDY16(extra); \
465  else \
466    PCRE_FREE_STUDY8(extra)
467
468#define PCRE_FREE_SUBSTRING(substring) \
469  if (use_pcre16) \
470    PCRE_FREE_SUBSTRING16(substring); \
471  else \
472    PCRE_FREE_SUBSTRING8(substring)
473
474#define PCRE_FREE_SUBSTRING_LIST(listptr) \
475  if (use_pcre16) \
476    PCRE_FREE_SUBSTRING_LIST16(listptr); \
477  else \
478    PCRE_FREE_SUBSTRING_LIST8(listptr)
479
480#define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481    getnamesptr, subsptr) \
482  if (use_pcre16) \
483    PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484      getnamesptr, subsptr); \
485  else \
486    PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487      getnamesptr, subsptr)
488
489#define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490  if (use_pcre16) \
491    PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492  else \
493    PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494
495#define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496  if (use_pcre16) \
497    PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498  else \
499    PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500
501#define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502  if (use_pcre16) \
503    PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504  else \
505    PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506
507#define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508  (use_pcre16 ? \
509     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510    :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511
512#define PCRE_JIT_STACK_FREE(stack) \
513  if (use_pcre16) \
514    PCRE_JIT_STACK_FREE16(stack); \
515  else \
516    PCRE_JIT_STACK_FREE8(stack)
517
518#define PCRE_MAKETABLES \
519  (use_pcre16? pcre16_maketables() : pcre_maketables())
520
521#define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522  if (use_pcre16) \
523    PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524  else \
525    PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526
527#define PCRE_PRINTINT(re, outfile, debug_lengths) \
528  if (use_pcre16) \
529    PCRE_PRINTINT16(re, outfile, debug_lengths); \
530  else \
531    PCRE_PRINTINT8(re, outfile, debug_lengths)
532
533#define PCRE_STUDY(extra, re, options, error) \
534  if (use_pcre16) \
535    PCRE_STUDY16(extra, re, options, error); \
536  else \
537    PCRE_STUDY8(extra, re, options, error)
538
539/* ----- Only 8-bit mode is supported ----- */
540
541#elif defined SUPPORT_PCRE8
542#define CHAR_SIZE                 1
543#define PCHARS                    PCHARS8
544#define PCHARSV                   PCHARSV8
545#define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
546#define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
547#define STRLEN                    STRLEN8
548#define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
549#define PCRE_COMPILE              PCRE_COMPILE8
550#define PCRE_CONFIG               pcre_config
551#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552#define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
553#define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
554#define PCRE_EXEC                 PCRE_EXEC8
555#define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
556#define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
557#define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
558#define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
559#define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
560#define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
561#define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
562#define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
563#define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
564#define PCRE_MAKETABLES           pcre_maketables()
565#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566#define PCRE_PRINTINT             PCRE_PRINTINT8
567#define PCRE_STUDY                PCRE_STUDY8
568
569/* ----- Only 16-bit mode is supported ----- */
570
571#else
572#define CHAR_SIZE                 2
573#define PCHARS                    PCHARS16
574#define PCHARSV                   PCHARSV16
575#define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
576#define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
577#define STRLEN                    STRLEN16
578#define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
579#define PCRE_COMPILE              PCRE_COMPILE16
580#define PCRE_CONFIG               pcre16_config
581#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582#define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
583#define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
584#define PCRE_EXEC                 PCRE_EXEC16
585#define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
586#define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
587#define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
588#define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
589#define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
590#define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
591#define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
592#define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
593#define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
594#define PCRE_MAKETABLES           pcre16_maketables()
595#define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596#define PCRE_PRINTINT             PCRE_PRINTINT16
597#define PCRE_STUDY                PCRE_STUDY16
598#endif
599
600/* ----- End of mode-specific function call macros ----- */
601
602
603/* Other parameters */
604
605#ifndef CLOCKS_PER_SEC
606#ifdef CLK_TCK
607#define CLOCKS_PER_SEC CLK_TCK
608#else
609#define CLOCKS_PER_SEC 100
610#endif
611#endif
612
613#if !defined NODFA
614#define DFA_WS_DIMENSION 1000
615#endif
616
617/* This is the default loop count for timing. */
618
619#define LOOPREPEAT 500000
620
621/* Static variables */
622
623static FILE *outfile;
624static int log_store = 0;
625static int callout_count;
626static int callout_extra;
627static int callout_fail_count;
628static int callout_fail_id;
629static int debug_lengths;
630static int first_callout;
631static int jit_was_used;
632static int locale_set = 0;
633static int show_malloc;
634static int use_utf;
635static size_t gotten_store;
636static size_t first_gotten_store = 0;
637static const unsigned char *last_callout_mark = NULL;
638
639/* The buffers grow automatically if very long input lines are encountered. */
640
641static int buffer_size = 50000;
642static pcre_uint8 *buffer = NULL;
643static pcre_uint8 *dbuffer = NULL;
644static pcre_uint8 *pbuffer = NULL;
645
646/* Another buffer is needed translation to 16-bit character strings. It will
647obtained and extended as required. */
648
649#ifdef SUPPORT_PCRE16
650static int buffer16_size = 0;
651static pcre_uint16 *buffer16 = NULL;
652
653#ifdef SUPPORT_PCRE8
654
655/* We need the table of operator lengths that is used for 16-bit compiling, in
656order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658appropriately for the 16-bit world. Just as a safety check, make sure that
659COMPILE_PCRE16 is *not* set. */
660
661#ifdef COMPILE_PCRE16
662#error COMPILE_PCRE16 must not be set when compiling pcretest.c
663#endif
664
665#if LINK_SIZE == 2
666#undef LINK_SIZE
667#define LINK_SIZE 1
668#elif LINK_SIZE == 3 || LINK_SIZE == 4
669#undef LINK_SIZE
670#define LINK_SIZE 2
671#else
672#error LINK_SIZE must be either 2, 3, or 4
673#endif
674
675#undef IMM2_SIZE
676#define IMM2_SIZE 1
677
678#endif /* SUPPORT_PCRE8 */
679
680static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681#endif  /* SUPPORT_PCRE16 */
682
683/* If we have 8-bit support, default use_pcre16 to false; if there is also
68416-bit support, it can be changed by an option. If there is no 8-bit support,
685there must be 16-bit support, so default it to 1. */
686
687#ifdef SUPPORT_PCRE8
688static int use_pcre16 = 0;
689#else
690static int use_pcre16 = 1;
691#endif
692
693/* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694
695static int jit_study_bits[] =
696  {
697  PCRE_STUDY_JIT_COMPILE,
698  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704    PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705};
706
707/* Textual explanations for runtime error codes */
708
709static const char *errtexts[] = {
710  NULL,  /* 0 is no error */
711  NULL,  /* NOMATCH is handled specially */
712  "NULL argument passed",
713  "bad option value",
714  "magic number missing",
715  "unknown opcode - pattern overwritten?",
716  "no more memory",
717  NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
718  "match limit exceeded",
719  "callout error code",
720  NULL,  /* BADUTF8/16 is handled specially */
721  NULL,  /* BADUTF8/16 offset is handled specially */
722  NULL,  /* PARTIAL is handled specially */
723  "not used - internal error",
724  "internal error - pattern overwritten?",
725  "bad count value",
726  "item unsupported for DFA matching",
727  "backreference condition or recursion test not supported for DFA matching",
728  "match limit not supported for DFA matching",
729  "workspace size exceeded in DFA matching",
730  "too much recursion for DFA matching",
731  "recursion limit exceeded",
732  "not used - internal error",
733  "invalid combination of newline options",
734  "bad offset value",
735  NULL,  /* SHORTUTF8/16 is handled specially */
736  "nested recursion at the same subject position",
737  "JIT stack limit reached",
738  "pattern compiled in wrong mode: 8-bit/16-bit error",
739  "pattern compiled with other endianness",
740  "invalid data in workspace for DFA restart"
741};
742
743
744/*************************************************
745*         Alternate character tables             *
746*************************************************/
747
748/* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
749using the default tables of the library. However, the T option can be used to
750select alternate sets of tables, for different kinds of testing. Note also that
751the L (locale) option also adjusts the tables. */
752
753/* This is the set of tables distributed as default with PCRE. It recognizes
754only ASCII characters. */
755
756static const pcre_uint8 tables0[] = {
757
758/* This table is a lower casing table. */
759
760    0,  1,  2,  3,  4,  5,  6,  7,
761    8,  9, 10, 11, 12, 13, 14, 15,
762   16, 17, 18, 19, 20, 21, 22, 23,
763   24, 25, 26, 27, 28, 29, 30, 31,
764   32, 33, 34, 35, 36, 37, 38, 39,
765   40, 41, 42, 43, 44, 45, 46, 47,
766   48, 49, 50, 51, 52, 53, 54, 55,
767   56, 57, 58, 59, 60, 61, 62, 63,
768   64, 97, 98, 99,100,101,102,103,
769  104,105,106,107,108,109,110,111,
770  112,113,114,115,116,117,118,119,
771  120,121,122, 91, 92, 93, 94, 95,
772   96, 97, 98, 99,100,101,102,103,
773  104,105,106,107,108,109,110,111,
774  112,113,114,115,116,117,118,119,
775  120,121,122,123,124,125,126,127,
776  128,129,130,131,132,133,134,135,
777  136,137,138,139,140,141,142,143,
778  144,145,146,147,148,149,150,151,
779  152,153,154,155,156,157,158,159,
780  160,161,162,163,164,165,166,167,
781  168,169,170,171,172,173,174,175,
782  176,177,178,179,180,181,182,183,
783  184,185,186,187,188,189,190,191,
784  192,193,194,195,196,197,198,199,
785  200,201,202,203,204,205,206,207,
786  208,209,210,211,212,213,214,215,
787  216,217,218,219,220,221,222,223,
788  224,225,226,227,228,229,230,231,
789  232,233,234,235,236,237,238,239,
790  240,241,242,243,244,245,246,247,
791  248,249,250,251,252,253,254,255,
792
793/* This table is a case flipping table. */
794
795    0,  1,  2,  3,  4,  5,  6,  7,
796    8,  9, 10, 11, 12, 13, 14, 15,
797   16, 17, 18, 19, 20, 21, 22, 23,
798   24, 25, 26, 27, 28, 29, 30, 31,
799   32, 33, 34, 35, 36, 37, 38, 39,
800   40, 41, 42, 43, 44, 45, 46, 47,
801   48, 49, 50, 51, 52, 53, 54, 55,
802   56, 57, 58, 59, 60, 61, 62, 63,
803   64, 97, 98, 99,100,101,102,103,
804  104,105,106,107,108,109,110,111,
805  112,113,114,115,116,117,118,119,
806  120,121,122, 91, 92, 93, 94, 95,
807   96, 65, 66, 67, 68, 69, 70, 71,
808   72, 73, 74, 75, 76, 77, 78, 79,
809   80, 81, 82, 83, 84, 85, 86, 87,
810   88, 89, 90,123,124,125,126,127,
811  128,129,130,131,132,133,134,135,
812  136,137,138,139,140,141,142,143,
813  144,145,146,147,148,149,150,151,
814  152,153,154,155,156,157,158,159,
815  160,161,162,163,164,165,166,167,
816  168,169,170,171,172,173,174,175,
817  176,177,178,179,180,181,182,183,
818  184,185,186,187,188,189,190,191,
819  192,193,194,195,196,197,198,199,
820  200,201,202,203,204,205,206,207,
821  208,209,210,211,212,213,214,215,
822  216,217,218,219,220,221,222,223,
823  224,225,226,227,228,229,230,231,
824  232,233,234,235,236,237,238,239,
825  240,241,242,243,244,245,246,247,
826  248,249,250,251,252,253,254,255,
827
828/* This table contains bit maps for various character classes. Each map is 32
829bytes long and the bits run from the least significant end of each byte. The
830classes that have their own maps are: space, xdigit, digit, upper, lower, word,
831graph, print, punct, and cntrl. Other classes are built from combinations. */
832
833  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
834  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837
838  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
840  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842
843  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
844  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847
848  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
850  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852
853  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
855  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857
858  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
859  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
860  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862
863  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
864  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867
868  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
869  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
870  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872
873  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
874  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
875  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877
878  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
879  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
880  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
882
883/* This table identifies various classes of character by individual bits:
884  0x01   white space character
885  0x02   letter
886  0x04   decimal digit
887  0x08   hexadecimal digit
888  0x10   alphanumeric or '_'
889  0x80   regular expression metacharacter or binary zero
890*/
891
892  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
893  0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
894  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
895  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
896  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
897  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
898  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
899  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
900  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
901  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
902  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
903  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
904  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
905  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
906  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
907  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
908  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
909  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
910  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
911  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
912  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
913  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
914  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
915  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
916  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
917  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
918  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
919  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
920  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
921  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
922  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
923  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
924
925/* This is a set of tables that came orginally from a Windows user. It seems to
926be at least an approximation of ISO 8859. In particular, there are characters
927greater than 128 that are marked as spaces, letters, etc. */
928
929static const pcre_uint8 tables1[] = {
9300,1,2,3,4,5,6,7,
9318,9,10,11,12,13,14,15,
93216,17,18,19,20,21,22,23,
93324,25,26,27,28,29,30,31,
93432,33,34,35,36,37,38,39,
93540,41,42,43,44,45,46,47,
93648,49,50,51,52,53,54,55,
93756,57,58,59,60,61,62,63,
93864,97,98,99,100,101,102,103,
939104,105,106,107,108,109,110,111,
940112,113,114,115,116,117,118,119,
941120,121,122,91,92,93,94,95,
94296,97,98,99,100,101,102,103,
943104,105,106,107,108,109,110,111,
944112,113,114,115,116,117,118,119,
945120,121,122,123,124,125,126,127,
946128,129,130,131,132,133,134,135,
947136,137,138,139,140,141,142,143,
948144,145,146,147,148,149,150,151,
949152,153,154,155,156,157,158,159,
950160,161,162,163,164,165,166,167,
951168,169,170,171,172,173,174,175,
952176,177,178,179,180,181,182,183,
953184,185,186,187,188,189,190,191,
954224,225,226,227,228,229,230,231,
955232,233,234,235,236,237,238,239,
956240,241,242,243,244,245,246,215,
957248,249,250,251,252,253,254,223,
958224,225,226,227,228,229,230,231,
959232,233,234,235,236,237,238,239,
960240,241,242,243,244,245,246,247,
961248,249,250,251,252,253,254,255,
9620,1,2,3,4,5,6,7,
9638,9,10,11,12,13,14,15,
96416,17,18,19,20,21,22,23,
96524,25,26,27,28,29,30,31,
96632,33,34,35,36,37,38,39,
96740,41,42,43,44,45,46,47,
96848,49,50,51,52,53,54,55,
96956,57,58,59,60,61,62,63,
97064,97,98,99,100,101,102,103,
971104,105,106,107,108,109,110,111,
972112,113,114,115,116,117,118,119,
973120,121,122,91,92,93,94,95,
97496,65,66,67,68,69,70,71,
97572,73,74,75,76,77,78,79,
97680,81,82,83,84,85,86,87,
97788,89,90,123,124,125,126,127,
978128,129,130,131,132,133,134,135,
979136,137,138,139,140,141,142,143,
980144,145,146,147,148,149,150,151,
981152,153,154,155,156,157,158,159,
982160,161,162,163,164,165,166,167,
983168,169,170,171,172,173,174,175,
984176,177,178,179,180,181,182,183,
985184,185,186,187,188,189,190,191,
986224,225,226,227,228,229,230,231,
987232,233,234,235,236,237,238,239,
988240,241,242,243,244,245,246,215,
989248,249,250,251,252,253,254,223,
990192,193,194,195,196,197,198,199,
991200,201,202,203,204,205,206,207,
992208,209,210,211,212,213,214,247,
993216,217,218,219,220,221,222,255,
9940,62,0,0,1,0,0,0,
9950,0,0,0,0,0,0,0,
99632,0,0,0,1,0,0,0,
9970,0,0,0,0,0,0,0,
9980,0,0,0,0,0,255,3,
999126,0,0,0,126,0,0,0,
10000,0,0,0,0,0,0,0,
10010,0,0,0,0,0,0,0,
10020,0,0,0,0,0,255,3,
10030,0,0,0,0,0,0,0,
10040,0,0,0,0,0,12,2,
10050,0,0,0,0,0,0,0,
10060,0,0,0,0,0,0,0,
1007254,255,255,7,0,0,0,0,
10080,0,0,0,0,0,0,0,
1009255,255,127,127,0,0,0,0,
10100,0,0,0,0,0,0,0,
10110,0,0,0,254,255,255,7,
10120,0,0,0,0,4,32,4,
10130,0,0,128,255,255,127,255,
10140,0,0,0,0,0,255,3,
1015254,255,255,135,254,255,255,7,
10160,0,0,0,0,4,44,6,
1017255,255,127,255,255,255,127,255,
10180,0,0,0,254,255,255,255,
1019255,255,255,255,255,255,255,127,
10200,0,0,0,254,255,255,255,
1021255,255,255,255,255,255,255,255,
10220,2,0,0,255,255,255,255,
1023255,255,255,255,255,255,255,127,
10240,0,0,0,255,255,255,255,
1025255,255,255,255,255,255,255,255,
10260,0,0,0,254,255,0,252,
10271,0,0,248,1,0,0,120,
10280,0,0,0,254,255,255,255,
10290,0,128,0,0,0,128,0,
1030255,255,255,255,0,0,0,0,
10310,0,0,0,0,0,0,128,
1032255,255,255,255,0,0,0,0,
10330,0,0,0,0,0,0,0,
1034128,0,0,0,0,0,0,0,
10350,1,1,0,1,1,0,0,
10360,0,0,0,0,0,0,0,
10370,0,0,0,0,0,0,0,
10381,0,0,0,128,0,0,0,
1039128,128,128,128,0,0,128,0,
104028,28,28,28,28,28,28,28,
104128,28,0,0,0,0,0,128,
10420,26,26,26,26,26,26,18,
104318,18,18,18,18,18,18,18,
104418,18,18,18,18,18,18,18,
104518,18,18,128,128,0,128,16,
10460,26,26,26,26,26,26,18,
104718,18,18,18,18,18,18,18,
104818,18,18,18,18,18,18,18,
104918,18,18,128,128,0,0,0,
10500,0,0,0,0,1,0,0,
10510,0,0,0,0,0,0,0,
10520,0,0,0,0,0,0,0,
10530,0,0,0,0,0,0,0,
10541,0,0,0,0,0,0,0,
10550,0,18,0,0,0,0,0,
10560,0,20,20,0,18,0,0,
10570,20,18,0,0,0,0,0,
105818,18,18,18,18,18,18,18,
105918,18,18,18,18,18,18,18,
106018,18,18,18,18,18,18,0,
106118,18,18,18,18,18,18,18,
106218,18,18,18,18,18,18,18,
106318,18,18,18,18,18,18,18,
106418,18,18,18,18,18,18,0,
106518,18,18,18,18,18,18,18
1066};
1067
1068
1069
1070
1071#ifndef HAVE_STRERROR
1072/*************************************************
1073*     Provide strerror() for non-ANSI libraries  *
1074*************************************************/
1075
1076/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1077in their libraries, but can provide the same facility by this simple
1078alternative function. */
1079
1080extern int   sys_nerr;
1081extern char *sys_errlist[];
1082
1083char *
1084strerror(int n)
1085{
1086if (n < 0 || n >= sys_nerr) return "unknown error number";
1087return sys_errlist[n];
1088}
1089#endif /* HAVE_STRERROR */
1090
1091
1092/*************************************************
1093*         JIT memory callback                    *
1094*************************************************/
1095
1096static pcre_jit_stack* jit_callback(void *arg)
1097{
1098jit_was_used = TRUE;
1099return (pcre_jit_stack *)arg;
1100}
1101
1102
1103#if !defined NOUTF || defined SUPPORT_PCRE16
1104/*************************************************
1105*            Convert UTF-8 string to value       *
1106*************************************************/
1107
1108/* This function takes one or more bytes that represents a UTF-8 character,
1109and returns the value of the character.
1110
1111Argument:
1112  utf8bytes   a pointer to the byte vector
1113  vptr        a pointer to an int to receive the value
1114
1115Returns:      >  0 => the number of bytes consumed
1116              -6 to 0 => malformed UTF-8 character at offset = (-return)
1117*/
1118
1119static int
1120utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1121{
1122int c = *utf8bytes++;
1123int d = c;
1124int i, j, s;
1125
1126for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1127  {
1128  if ((d & 0x80) == 0) break;
1129  d <<= 1;
1130  }
1131
1132if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1133if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1134
1135/* i now has a value in the range 1-5 */
1136
1137s = 6*i;
1138d = (c & utf8_table3[i]) << s;
1139
1140for (j = 0; j < i; j++)
1141  {
1142  c = *utf8bytes++;
1143  if ((c & 0xc0) != 0x80) return -(j+1);
1144  s -= 6;
1145  d |= (c & 0x3f) << s;
1146  }
1147
1148/* Check that encoding was the correct unique one */
1149
1150for (j = 0; j < utf8_table1_size; j++)
1151  if (d <= utf8_table1[j]) break;
1152if (j != i) return -(i+1);
1153
1154/* Valid value */
1155
1156*vptr = d;
1157return i+1;
1158}
1159#endif /* NOUTF || SUPPORT_PCRE16 */
1160
1161
1162
1163#if !defined NOUTF || defined SUPPORT_PCRE16
1164/*************************************************
1165*       Convert character value to UTF-8         *
1166*************************************************/
1167
1168/* This function takes an integer value in the range 0 - 0x7fffffff
1169and encodes it as a UTF-8 character in 0 to 6 bytes.
1170
1171Arguments:
1172  cvalue     the character value
1173  utf8bytes  pointer to buffer for result - at least 6 bytes long
1174
1175Returns:     number of characters placed in the buffer
1176*/
1177
1178static int
1179ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1180{
1181register int i, j;
1182for (i = 0; i < utf8_table1_size; i++)
1183  if (cvalue <= utf8_table1[i]) break;
1184utf8bytes += i;
1185for (j = i; j > 0; j--)
1186 {
1187 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1188 cvalue >>= 6;
1189 }
1190*utf8bytes = utf8_table2[i] | cvalue;
1191return i + 1;
1192}
1193#endif
1194
1195
1196#ifdef SUPPORT_PCRE16
1197/*************************************************
1198*         Convert a string to 16-bit             *
1199*************************************************/
1200
1201/* In non-UTF mode, the space needed for a 16-bit string is exactly double the
12028-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1203double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1204in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1205result is always left in buffer16.
1206
1207Note that this function does not object to surrogate values. This is
1208deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1209for the purpose of testing that they are correctly faulted.
1210
1211Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1212in UTF-8 so that values greater than 255 can be handled.
1213
1214Arguments:
1215  data       TRUE if converting a data line; FALSE for a regex
1216  p          points to a byte string
1217  utf        true if UTF-8 (to be converted to UTF-16)
1218  len        number of bytes in the string (excluding trailing zero)
1219
1220Returns:     number of 16-bit data items used (excluding trailing zero)
1221             OR -1 if a UTF-8 string is malformed
1222             OR -2 if a value > 0x10ffff is encountered
1223             OR -3 if a value > 0xffff is encountered when not in UTF mode
1224*/
1225
1226static int
1227to16(int data, pcre_uint8 *p, int utf, int len)
1228{
1229pcre_uint16 *pp;
1230
1231if (buffer16_size < 2*len + 2)
1232  {
1233  if (buffer16 != NULL) free(buffer16);
1234  buffer16_size = 2*len + 2;
1235  buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1236  if (buffer16 == NULL)
1237    {
1238    fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1239    exit(1);
1240    }
1241  }
1242
1243pp = buffer16;
1244
1245if (!utf && !data)
1246  {
1247  while (len-- > 0) *pp++ = *p++;
1248  }
1249
1250else
1251  {
1252  int c = 0;
1253  while (len > 0)
1254    {
1255    int chlen = utf82ord(p, &c);
1256    if (chlen <= 0) return -1;
1257    if (c > 0x10ffff) return -2;
1258    p += chlen;
1259    len -= chlen;
1260    if (c < 0x10000) *pp++ = c; else
1261      {
1262      if (!utf) return -3;
1263      c -= 0x10000;
1264      *pp++ = 0xD800 | (c >> 10);
1265      *pp++ = 0xDC00 | (c & 0x3ff);
1266      }
1267    }
1268  }
1269
1270*pp = 0;
1271return pp - buffer16;
1272}
1273#endif
1274
1275
1276/*************************************************
1277*        Read or extend an input line            *
1278*************************************************/
1279
1280/* Input lines are read into buffer, but both patterns and data lines can be
1281continued over multiple input lines. In addition, if the buffer fills up, we
1282want to automatically expand it so as to be able to handle extremely large
1283lines that are needed for certain stress tests. When the input buffer is
1284expanded, the other two buffers must also be expanded likewise, and the
1285contents of pbuffer, which are a copy of the input for callouts, must be
1286preserved (for when expansion happens for a data line). This is not the most
1287optimal way of handling this, but hey, this is just a test program!
1288
1289Arguments:
1290  f            the file to read
1291  start        where in buffer to start (this *must* be within buffer)
1292  prompt       for stdin or readline()
1293
1294Returns:       pointer to the start of new data
1295               could be a copy of start, or could be moved
1296               NULL if no data read and EOF reached
1297*/
1298
1299static pcre_uint8 *
1300extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1301{
1302pcre_uint8 *here = start;
1303
1304for (;;)
1305  {
1306  size_t rlen = (size_t)(buffer_size - (here - buffer));
1307
1308  if (rlen > 1000)
1309    {
1310    int dlen;
1311
1312    /* If libreadline or libedit support is required, use readline() to read a
1313    line if the input is a terminal. Note that readline() removes the trailing
1314    newline, so we must put it back again, to be compatible with fgets(). */
1315
1316#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1317    if (isatty(fileno(f)))
1318      {
1319      size_t len;
1320      char *s = readline(prompt);
1321      if (s == NULL) return (here == start)? NULL : start;
1322      len = strlen(s);
1323      if (len > 0) add_history(s);
1324      if (len > rlen - 1) len = rlen - 1;
1325      memcpy(here, s, len);
1326      here[len] = '\n';
1327      here[len+1] = 0;
1328      free(s);
1329      }
1330    else
1331#endif
1332
1333    /* Read the next line by normal means, prompting if the file is stdin. */
1334
1335      {
1336      if (f == stdin) printf("%s", prompt);
1337      if (fgets((char *)here, rlen,  f) == NULL)
1338        return (here == start)? NULL : start;
1339      }
1340
1341    dlen = (int)strlen((char *)here);
1342    if (dlen > 0 && here[dlen - 1] == '\n') return start;
1343    here += dlen;
1344    }
1345
1346  else
1347    {
1348    int new_buffer_size = 2*buffer_size;
1349    pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1350    pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1351    pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1352
1353    if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1354      {
1355      fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1356      exit(1);
1357      }
1358
1359    memcpy(new_buffer, buffer, buffer_size);
1360    memcpy(new_pbuffer, pbuffer, buffer_size);
1361
1362    buffer_size = new_buffer_size;
1363
1364    start = new_buffer + (start - buffer);
1365    here = new_buffer + (here - buffer);
1366
1367    free(buffer);
1368    free(dbuffer);
1369    free(pbuffer);
1370
1371    buffer = new_buffer;
1372    dbuffer = new_dbuffer;
1373    pbuffer = new_pbuffer;
1374    }
1375  }
1376
1377return NULL;  /* Control never gets here */
1378}
1379
1380
1381
1382/*************************************************
1383*          Read number from string               *
1384*************************************************/
1385
1386/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1387around with conditional compilation, just do the job by hand. It is only used
1388for unpicking arguments, so just keep it simple.
1389
1390Arguments:
1391  str           string to be converted
1392  endptr        where to put the end pointer
1393
1394Returns:        the unsigned long
1395*/
1396
1397static int
1398get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1399{
1400int result = 0;
1401while(*str != 0 && isspace(*str)) str++;
1402while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1403*endptr = str;
1404return(result);
1405}
1406
1407
1408
1409/*************************************************
1410*             Print one character                *
1411*************************************************/
1412
1413/* Print a single character either literally, or as a hex escape. */
1414
1415static int pchar(int c, FILE *f)
1416{
1417if (PRINTOK(c))
1418  {
1419  if (f != NULL) fprintf(f, "%c", c);
1420  return 1;
1421  }
1422
1423if (c < 0x100)
1424  {
1425  if (use_utf)
1426    {
1427    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1428    return 6;
1429    }
1430  else
1431    {
1432    if (f != NULL) fprintf(f, "\\x%02x", c);
1433    return 4;
1434    }
1435  }
1436
1437if (f != NULL) fprintf(f, "\\x{%02x}", c);
1438return (c <= 0x000000ff)? 6 :
1439       (c <= 0x00000fff)? 7 :
1440       (c <= 0x0000ffff)? 8 :
1441       (c <= 0x000fffff)? 9 : 10;
1442}
1443
1444
1445
1446#ifdef SUPPORT_PCRE8
1447/*************************************************
1448*         Print 8-bit character string           *
1449*************************************************/
1450
1451/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1452If handed a NULL file, just counts chars without printing. */
1453
1454static int pchars(pcre_uint8 *p, int length, FILE *f)
1455{
1456int c = 0;
1457int yield = 0;
1458
1459if (length < 0)
1460  length = strlen((char *)p);
1461
1462while (length-- > 0)
1463  {
1464#if !defined NOUTF
1465  if (use_utf)
1466    {
1467    int rc = utf82ord(p, &c);
1468    if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1469      {
1470      length -= rc - 1;
1471      p += rc;
1472      yield += pchar(c, f);
1473      continue;
1474      }
1475    }
1476#endif
1477  c = *p++;
1478  yield += pchar(c, f);
1479  }
1480
1481return yield;
1482}
1483#endif
1484
1485
1486
1487#ifdef SUPPORT_PCRE16
1488/*************************************************
1489*    Find length of 0-terminated 16-bit string   *
1490*************************************************/
1491
1492static int strlen16(PCRE_SPTR16 p)
1493{
1494int len = 0;
1495while (*p++ != 0) len++;
1496return len;
1497}
1498#endif  /* SUPPORT_PCRE16 */
1499
1500
1501#ifdef SUPPORT_PCRE16
1502/*************************************************
1503*           Print 16-bit character string        *
1504*************************************************/
1505
1506/* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1507If handed a NULL file, just counts chars without printing. */
1508
1509static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1510{
1511int yield = 0;
1512
1513if (length < 0)
1514  length = strlen16(p);
1515
1516while (length-- > 0)
1517  {
1518  int c = *p++ & 0xffff;
1519#if !defined NOUTF
1520  if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1521    {
1522    int d = *p & 0xffff;
1523    if (d >= 0xDC00 && d < 0xDFFF)
1524      {
1525      c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1526      length--;
1527      p++;
1528      }
1529    }
1530#endif
1531  yield += pchar(c, f);
1532  }
1533
1534return yield;
1535}
1536#endif  /* SUPPORT_PCRE16 */
1537
1538
1539
1540#ifdef SUPPORT_PCRE8
1541/*************************************************
1542*     Read a capture name (8-bit) and check it   *
1543*************************************************/
1544
1545static pcre_uint8 *
1546read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1547{
1548pcre_uint8 *npp = *pp;
1549while (isalnum(*p)) *npp++ = *p++;
1550*npp++ = 0;
1551*npp = 0;
1552if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1553  {
1554  fprintf(outfile, "no parentheses with name \"");
1555  PCHARSV(*pp, 0, -1, outfile);
1556  fprintf(outfile, "\"\n");
1557  }
1558
1559*pp = npp;
1560return p;
1561}
1562#endif  /* SUPPORT_PCRE8 */
1563
1564
1565
1566#ifdef SUPPORT_PCRE16
1567/*************************************************
1568*     Read a capture name (16-bit) and check it  *
1569*************************************************/
1570
1571/* Note that the text being read is 8-bit. */
1572
1573static pcre_uint8 *
1574read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1575{
1576pcre_uint16 *npp = *pp;
1577while (isalnum(*p)) *npp++ = *p++;
1578*npp++ = 0;
1579*npp = 0;
1580if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1581  {
1582  fprintf(outfile, "no parentheses with name \"");
1583  PCHARSV(*pp, 0, -1, outfile);
1584  fprintf(outfile, "\"\n");
1585  }
1586*pp = npp;
1587return p;
1588}
1589#endif  /* SUPPORT_PCRE16 */
1590
1591
1592
1593/*************************************************
1594*              Callout function                  *
1595*************************************************/
1596
1597/* Called from PCRE as a result of the (?C) item. We print out where we are in
1598the match. Yield zero unless more callouts than the fail count, or the callout
1599data is not zero. */
1600
1601static int callout(pcre_callout_block *cb)
1602{
1603FILE *f = (first_callout | callout_extra)? outfile : NULL;
1604int i, pre_start, post_start, subject_length;
1605
1606if (callout_extra)
1607  {
1608  fprintf(f, "Callout %d: last capture = %d\n",
1609    cb->callout_number, cb->capture_last);
1610
1611  for (i = 0; i < cb->capture_top * 2; i += 2)
1612    {
1613    if (cb->offset_vector[i] < 0)
1614      fprintf(f, "%2d: <unset>\n", i/2);
1615    else
1616      {
1617      fprintf(f, "%2d: ", i/2);
1618      PCHARSV(cb->subject, cb->offset_vector[i],
1619        cb->offset_vector[i+1] - cb->offset_vector[i], f);
1620      fprintf(f, "\n");
1621      }
1622    }
1623  }
1624
1625/* Re-print the subject in canonical form, the first time or if giving full
1626datails. On subsequent calls in the same match, we use pchars just to find the
1627printed lengths of the substrings. */
1628
1629if (f != NULL) fprintf(f, "--->");
1630
1631PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1632PCHARS(post_start, cb->subject, cb->start_match,
1633  cb->current_position - cb->start_match, f);
1634
1635PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1636
1637PCHARSV(cb->subject, cb->current_position,
1638  cb->subject_length - cb->current_position, f);
1639
1640if (f != NULL) fprintf(f, "\n");
1641
1642/* Always print appropriate indicators, with callout number if not already
1643shown. For automatic callouts, show the pattern offset. */
1644
1645if (cb->callout_number == 255)
1646  {
1647  fprintf(outfile, "%+3d ", cb->pattern_position);
1648  if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1649  }
1650else
1651  {
1652  if (callout_extra) fprintf(outfile, "    ");
1653    else fprintf(outfile, "%3d ", cb->callout_number);
1654  }
1655
1656for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1657fprintf(outfile, "^");
1658
1659if (post_start > 0)
1660  {
1661  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1662  fprintf(outfile, "^");
1663  }
1664
1665for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1666  fprintf(outfile, " ");
1667
1668fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1669  pbuffer + cb->pattern_position);
1670
1671fprintf(outfile, "\n");
1672first_callout = 0;
1673
1674if (cb->mark != last_callout_mark)
1675  {
1676  if (cb->mark == NULL)
1677    fprintf(outfile, "Latest Mark: <unset>\n");
1678  else
1679    {
1680    fprintf(outfile, "Latest Mark: ");
1681    PCHARSV(cb->mark, 0, -1, outfile);
1682    putc('\n', outfile);
1683    }
1684  last_callout_mark = cb->mark;
1685  }
1686
1687if (cb->callout_data != NULL)
1688  {
1689  int callout_data = *((int *)(cb->callout_data));
1690  if (callout_data != 0)
1691    {
1692    fprintf(outfile, "Callout data = %d\n", callout_data);
1693    return callout_data;
1694    }
1695  }
1696
1697return (cb->callout_number != callout_fail_id)? 0 :
1698       (++callout_count >= callout_fail_count)? 1 : 0;
1699}
1700
1701
1702/*************************************************
1703*            Local malloc functions              *
1704*************************************************/
1705
1706/* Alternative malloc function, to test functionality and save the size of a
1707compiled re, which is the first store request that pcre_compile() makes. The
1708show_malloc variable is set only during matching. */
1709
1710static void *new_malloc(size_t size)
1711{
1712void *block = malloc(size);
1713gotten_store = size;
1714if (first_gotten_store == 0) first_gotten_store = size;
1715if (show_malloc)
1716  fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1717return block;
1718}
1719
1720static void new_free(void *block)
1721{
1722if (show_malloc)
1723  fprintf(outfile, "free             %p\n", block);
1724free(block);
1725}
1726
1727/* For recursion malloc/free, to test stacking calls */
1728
1729static void *stack_malloc(size_t size)
1730{
1731void *block = malloc(size);
1732if (show_malloc)
1733  fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1734return block;
1735}
1736
1737static void stack_free(void *block)
1738{
1739if (show_malloc)
1740  fprintf(outfile, "stack_free       %p\n", block);
1741free(block);
1742}
1743
1744
1745/*************************************************
1746*          Call pcre_fullinfo()                  *
1747*************************************************/
1748
1749/* Get one piece of information from the pcre_fullinfo() function. When only
1750one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1751value, but the code is defensive.
1752
1753Arguments:
1754  re        compiled regex
1755  study     study data
1756  option    PCRE_INFO_xxx option
1757  ptr       where to put the data
1758
1759Returns:    0 when OK, < 0 on error
1760*/
1761
1762static int
1763new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1764{
1765int rc;
1766
1767if (use_pcre16)
1768#ifdef SUPPORT_PCRE16
1769  rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1770#else
1771  rc = PCRE_ERROR_BADMODE;
1772#endif
1773else
1774#ifdef SUPPORT_PCRE8
1775  rc = pcre_fullinfo(re, study, option, ptr);
1776#else
1777  rc = PCRE_ERROR_BADMODE;
1778#endif
1779
1780if (rc < 0)
1781  {
1782  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1783    use_pcre16? "16" : "", option);
1784  if (rc == PCRE_ERROR_BADMODE)
1785    fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1786      "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1787  }
1788
1789return rc;
1790}
1791
1792
1793
1794/*************************************************
1795*             Swap byte functions                *
1796*************************************************/
1797
1798/* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1799value, respectively.
1800
1801Arguments:
1802  value        any number
1803
1804Returns:       the byte swapped value
1805*/
1806
1807static pcre_uint32
1808swap_uint32(pcre_uint32 value)
1809{
1810return ((value & 0x000000ff) << 24) |
1811       ((value & 0x0000ff00) <<  8) |
1812       ((value & 0x00ff0000) >>  8) |
1813       (value >> 24);
1814}
1815
1816static pcre_uint16
1817swap_uint16(pcre_uint16 value)
1818{
1819return (value >> 8) | (value << 8);
1820}
1821
1822
1823
1824/*************************************************
1825*        Flip bytes in a compiled pattern        *
1826*************************************************/
1827
1828/* This function is called if the 'F' option was present on a pattern that is
1829to be written to a file. We flip the bytes of all the integer fields in the
1830regex data block and the study block. In 16-bit mode this also flips relevant
1831bytes in the pattern itself. This is to make it possible to test PCRE's
1832ability to reload byte-flipped patterns, e.g. those compiled on a different
1833architecture. */
1834
1835static void
1836regexflip(pcre *ere, pcre_extra *extra)
1837{
1838REAL_PCRE *re = (REAL_PCRE *)ere;
1839#ifdef SUPPORT_PCRE16
1840int op;
1841pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1842int length = re->name_count * re->name_entry_size;
1843#ifdef SUPPORT_UTF
1844BOOL utf = (re->options & PCRE_UTF16) != 0;
1845BOOL utf16_char = FALSE;
1846#endif /* SUPPORT_UTF */
1847#endif /* SUPPORT_PCRE16 */
1848
1849/* Always flip the bytes in the main data block and study blocks. */
1850
1851re->magic_number = REVERSED_MAGIC_NUMBER;
1852re->size = swap_uint32(re->size);
1853re->options = swap_uint32(re->options);
1854re->flags = swap_uint16(re->flags);
1855re->top_bracket = swap_uint16(re->top_bracket);
1856re->top_backref = swap_uint16(re->top_backref);
1857re->first_char = swap_uint16(re->first_char);
1858re->req_char = swap_uint16(re->req_char);
1859re->name_table_offset = swap_uint16(re->name_table_offset);
1860re->name_entry_size = swap_uint16(re->name_entry_size);
1861re->name_count = swap_uint16(re->name_count);
1862
1863if (extra != NULL)
1864  {
1865  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1866  rsd->size = swap_uint32(rsd->size);
1867  rsd->flags = swap_uint32(rsd->flags);
1868  rsd->minlength = swap_uint32(rsd->minlength);
1869  }
1870
1871/* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1872in the name table, if present, and then in the pattern itself. */
1873
1874#ifdef SUPPORT_PCRE16
1875if (!use_pcre16) return;
1876
1877while(TRUE)
1878  {
1879  /* Swap previous characters. */
1880  while (length-- > 0)
1881    {
1882    *ptr = swap_uint16(*ptr);
1883    ptr++;
1884    }
1885#ifdef SUPPORT_UTF
1886  if (utf16_char)
1887    {
1888    if ((ptr[-1] & 0xfc00) == 0xd800)
1889      {
1890      /* We know that there is only one extra character in UTF-16. */
1891      *ptr = swap_uint16(*ptr);
1892      ptr++;
1893      }
1894    }
1895  utf16_char = FALSE;
1896#endif /* SUPPORT_UTF */
1897
1898  /* Get next opcode. */
1899
1900  length = 0;
1901  op = *ptr;
1902  *ptr++ = swap_uint16(op);
1903
1904  switch (op)
1905    {
1906    case OP_END:
1907    return;
1908
1909#ifdef SUPPORT_UTF
1910    case OP_CHAR:
1911    case OP_CHARI:
1912    case OP_NOT:
1913    case OP_NOTI:
1914    case OP_STAR:
1915    case OP_MINSTAR:
1916    case OP_PLUS:
1917    case OP_MINPLUS:
1918    case OP_QUERY:
1919    case OP_MINQUERY:
1920    case OP_UPTO:
1921    case OP_MINUPTO:
1922    case OP_EXACT:
1923    case OP_POSSTAR:
1924    case OP_POSPLUS:
1925    case OP_POSQUERY:
1926    case OP_POSUPTO:
1927    case OP_STARI:
1928    case OP_MINSTARI:
1929    case OP_PLUSI:
1930    case OP_MINPLUSI:
1931    case OP_QUERYI:
1932    case OP_MINQUERYI:
1933    case OP_UPTOI:
1934    case OP_MINUPTOI:
1935    case OP_EXACTI:
1936    case OP_POSSTARI:
1937    case OP_POSPLUSI:
1938    case OP_POSQUERYI:
1939    case OP_POSUPTOI:
1940    case OP_NOTSTAR:
1941    case OP_NOTMINSTAR:
1942    case OP_NOTPLUS:
1943    case OP_NOTMINPLUS:
1944    case OP_NOTQUERY:
1945    case OP_NOTMINQUERY:
1946    case OP_NOTUPTO:
1947    case OP_NOTMINUPTO:
1948    case OP_NOTEXACT:
1949    case OP_NOTPOSSTAR:
1950    case OP_NOTPOSPLUS:
1951    case OP_NOTPOSQUERY:
1952    case OP_NOTPOSUPTO:
1953    case OP_NOTSTARI:
1954    case OP_NOTMINSTARI:
1955    case OP_NOTPLUSI:
1956    case OP_NOTMINPLUSI:
1957    case OP_NOTQUERYI:
1958    case OP_NOTMINQUERYI:
1959    case OP_NOTUPTOI:
1960    case OP_NOTMINUPTOI:
1961    case OP_NOTEXACTI:
1962    case OP_NOTPOSSTARI:
1963    case OP_NOTPOSPLUSI:
1964    case OP_NOTPOSQUERYI:
1965    case OP_NOTPOSUPTOI:
1966    if (utf) utf16_char = TRUE;
1967#endif
1968    /* Fall through. */
1969
1970    default:
1971    length = OP_lengths16[op] - 1;
1972    break;
1973
1974    case OP_CLASS:
1975    case OP_NCLASS:
1976    /* Skip the character bit map. */
1977    ptr += 32/sizeof(pcre_uint16);
1978    length = 0;
1979    break;
1980
1981    case OP_XCLASS:
1982    /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1983    if (LINK_SIZE > 1)
1984      length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1985        - (1 + LINK_SIZE + 1));
1986    else
1987      length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1988
1989    /* Reverse the size of the XCLASS instance. */
1990    *ptr = swap_uint16(*ptr);
1991    ptr++;
1992    if (LINK_SIZE > 1)
1993      {
1994      *ptr = swap_uint16(*ptr);
1995      ptr++;
1996      }
1997
1998    op = *ptr;
1999    *ptr = swap_uint16(op);
2000    ptr++;
2001    if ((op & XCL_MAP) != 0)
2002      {
2003      /* Skip the character bit map. */
2004      ptr += 32/sizeof(pcre_uint16);
2005      length -= 32/sizeof(pcre_uint16);
2006      }
2007    break;
2008    }
2009  }
2010/* Control should never reach here in 16 bit mode. */
2011#endif /* SUPPORT_PCRE16 */
2012}
2013
2014
2015
2016/*************************************************
2017*        Check match or recursion limit          *
2018*************************************************/
2019
2020static int
2021check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2022  int start_offset, int options, int *use_offsets, int use_size_offsets,
2023  int flag, unsigned long int *limit, int errnumber, const char *msg)
2024{
2025int count;
2026int min = 0;
2027int mid = 64;
2028int max = -1;
2029
2030extra->flags |= flag;
2031
2032for (;;)
2033  {
2034  *limit = mid;
2035
2036  PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2037    use_offsets, use_size_offsets);
2038
2039  if (count == errnumber)
2040    {
2041    /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2042    min = mid;
2043    mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2044    }
2045
2046  else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2047                         count == PCRE_ERROR_PARTIAL)
2048    {
2049    if (mid == min + 1)
2050      {
2051      fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2052      break;
2053      }
2054    /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2055    max = mid;
2056    mid = (min + mid)/2;
2057    }
2058  else break;    /* Some other error */
2059  }
2060
2061extra->flags &= ~flag;
2062return count;
2063}
2064
2065
2066
2067/*************************************************
2068*         Case-independent strncmp() function    *
2069*************************************************/
2070
2071/*
2072Arguments:
2073  s         first string
2074  t         second string
2075  n         number of characters to compare
2076
2077Returns:    < 0, = 0, or > 0, according to the comparison
2078*/
2079
2080static int
2081strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2082{
2083while (n--)
2084  {
2085  int c = tolower(*s++) - tolower(*t++);
2086  if (c) return c;
2087  }
2088return 0;
2089}
2090
2091
2092
2093/*************************************************
2094*         Check newline indicator                *
2095*************************************************/
2096
2097/* This is used both at compile and run-time to check for <xxx> escapes. Print
2098a message and return 0 if there is no match.
2099
2100Arguments:
2101  p           points after the leading '<'
2102  f           file for error message
2103
2104Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2105*/
2106
2107static int
2108check_newline(pcre_uint8 *p, FILE *f)
2109{
2110if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2111if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2112if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2113if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2114if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2115if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2116if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2117fprintf(f, "Unknown newline type at: <%s\n", p);
2118return 0;
2119}
2120
2121
2122
2123/*************************************************
2124*             Usage function                     *
2125*************************************************/
2126
2127static void
2128usage(void)
2129{
2130printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2131printf("Input and output default to stdin and stdout.\n");
2132#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2133printf("If input is a terminal, readline() is used to read from it.\n");
2134#else
2135printf("This version of pcretest is not linked with readline().\n");
2136#endif
2137printf("\nOptions:\n");
2138#ifdef SUPPORT_PCRE16
2139printf("  -16      use the 16-bit library\n");
2140#endif
2141printf("  -b       show compiled code\n");
2142printf("  -C       show PCRE compile-time options and exit\n");
2143printf("  -C arg   show a specific compile-time option\n");
2144printf("           and exit with its value. The arg can be:\n");
2145printf("     linksize     internal link size [2, 3, 4]\n");
2146printf("     pcre8        8 bit library support enabled [0, 1]\n");
2147printf("     pcre16       16 bit library support enabled [0, 1]\n");
2148printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2149printf("     ucp          Unicode Properties supported [0, 1]\n");
2150printf("     jit          Just-in-time compiler supported [0, 1]\n");
2151printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2152printf("  -d       debug: show compiled code and information (-b and -i)\n");
2153#if !defined NODFA
2154printf("  -dfa     force DFA matching for all subjects\n");
2155#endif
2156printf("  -help    show usage information\n");
2157printf("  -i       show information about compiled patterns\n"
2158       "  -M       find MATCH_LIMIT minimum for each subject\n"
2159       "  -m       output memory used information\n"
2160       "  -o <n>   set size of offsets vector to <n>\n");
2161#if !defined NOPOSIX
2162printf("  -p       use POSIX interface\n");
2163#endif
2164printf("  -q       quiet: do not output PCRE version number at start\n");
2165printf("  -S <n>   set stack size to <n> megabytes\n");
2166printf("  -s       force each pattern to be studied at basic level\n"
2167       "  -s+      force each pattern to be studied, using JIT if available\n"
2168       "  -s++     ditto, verifying when JIT was actually used\n"
2169       "  -s+n     force each pattern to be studied, using JIT if available,\n"
2170       "             where 1 <= n <= 7 selects JIT options\n"
2171       "  -s++n    ditto, verifying when JIT was actually used\n"
2172       "  -t       time compilation and execution\n");
2173printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2174printf("  -tm      time execution (matching) only\n");
2175printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2176}
2177
2178
2179
2180/*************************************************
2181*                Main Program                    *
2182*************************************************/
2183
2184/* Read lines from named file or stdin and write to named file or stdout; lines
2185consist of a regular expression, in delimiters and optionally followed by
2186options, followed by a set of test data, terminated by an empty line. */
2187
2188int main(int argc, char **argv)
2189{
2190FILE *infile = stdin;
2191const char *version;
2192int options = 0;
2193int study_options = 0;
2194int default_find_match_limit = FALSE;
2195int op = 1;
2196int timeit = 0;
2197int timeitm = 0;
2198int showinfo = 0;
2199int showstore = 0;
2200int force_study = -1;
2201int force_study_options = 0;
2202int quiet = 0;
2203int size_offsets = 45;
2204int size_offsets_max;
2205int *offsets = NULL;
2206int debug = 0;
2207int done = 0;
2208int all_use_dfa = 0;
2209int verify_jit = 0;
2210int yield = 0;
2211int stack_size;
2212
2213#if !defined NOPOSIX
2214int posix = 0;
2215#endif
2216#if !defined NODFA
2217int *dfa_workspace = NULL;
2218#endif
2219
2220pcre_jit_stack *jit_stack = NULL;
2221
2222/* These vectors store, end-to-end, a list of zero-terminated captured
2223substring names, each list itself being terminated by an empty name. Assume
2224that 1024 is plenty long enough for the few names we'll be testing. It is
2225easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2226for the actual memory, to ensure alignment. */
2227
2228pcre_uint16 copynames[1024];
2229pcre_uint16 getnames[1024];
2230
2231#ifdef SUPPORT_PCRE16
2232pcre_uint16 *cn16ptr;
2233pcre_uint16 *gn16ptr;
2234#endif
2235
2236#ifdef SUPPORT_PCRE8
2237pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2238pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2239pcre_uint8 *cn8ptr;
2240pcre_uint8 *gn8ptr;
2241#endif
2242
2243/* Get buffers from malloc() so that valgrind will check their misuse when
2244debugging. They grow automatically when very long lines are read. The 16-bit
2245buffer (buffer16) is obtained only if needed. */
2246
2247buffer = (pcre_uint8 *)malloc(buffer_size);
2248dbuffer = (pcre_uint8 *)malloc(buffer_size);
2249pbuffer = (pcre_uint8 *)malloc(buffer_size);
2250
2251/* The outfile variable is static so that new_malloc can use it. */
2252
2253outfile = stdout;
2254
2255/* The following  _setmode() stuff is some Windows magic that tells its runtime
2256library to translate CRLF into a single LF character. At least, that's what
2257I've been told: never having used Windows I take this all on trust. Originally
2258it set 0x8000, but then I was advised that _O_BINARY was better. */
2259
2260#if defined(_WIN32) || defined(WIN32)
2261_setmode( _fileno( stdout ), _O_BINARY );
2262#endif
2263
2264/* Get the version number: both pcre_version() and pcre16_version() give the
2265same answer. We just need to ensure that we call one that is available. */
2266
2267#ifdef SUPPORT_PCRE8
2268version = pcre_version();
2269#else
2270version = pcre16_version();
2271#endif
2272
2273/* Scan options */
2274
2275while (argc > 1 && argv[op][0] == '-')
2276  {
2277  pcre_uint8 *endptr;
2278  char *arg = argv[op];
2279
2280  if (strcmp(arg, "-m") == 0) showstore = 1;
2281  else if (strcmp(arg, "-s") == 0) force_study = 0;
2282
2283  else if (strncmp(arg, "-s+", 3) == 0)
2284    {
2285    arg += 3;
2286    if (*arg == '+') { arg++; verify_jit = TRUE; }
2287    force_study = 1;
2288    if (*arg == 0)
2289      force_study_options = jit_study_bits[6];
2290    else if (*arg >= '1' && *arg <= '7')
2291      force_study_options = jit_study_bits[*arg - '1'];
2292    else goto BAD_ARG;
2293    }
2294  else if (strcmp(arg, "-16") == 0)
2295    {
2296#ifdef SUPPORT_PCRE16
2297    use_pcre16 = 1;
2298#else
2299    printf("** This version of PCRE was built without 16-bit support\n");
2300    exit(1);
2301#endif
2302    }
2303  else if (strcmp(arg, "-q") == 0) quiet = 1;
2304  else if (strcmp(arg, "-b") == 0) debug = 1;
2305  else if (strcmp(arg, "-i") == 0) showinfo = 1;
2306  else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2307  else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2308#if !defined NODFA
2309  else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2310#endif
2311  else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2312      ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313        *endptr == 0))
2314    {
2315    op++;
2316    argc--;
2317    }
2318  else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2319    {
2320    int both = arg[2] == 0;
2321    int temp;
2322    if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2323                     *endptr == 0))
2324      {
2325      timeitm = temp;
2326      op++;
2327      argc--;
2328      }
2329    else timeitm = LOOPREPEAT;
2330    if (both) timeit = timeitm;
2331    }
2332  else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2333      ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2334        *endptr == 0))
2335    {
2336#if defined(_WIN32) || defined(WIN32) || defined(__minix)
2337    printf("PCRE: -S not supported on this OS\n");
2338    exit(1);
2339#else
2340    int rc;
2341    struct rlimit rlim;
2342    getrlimit(RLIMIT_STACK, &rlim);
2343    rlim.rlim_cur = stack_size * 1024 * 1024;
2344    rc = setrlimit(RLIMIT_STACK, &rlim);
2345    if (rc != 0)
2346      {
2347    printf("PCRE: setrlimit() failed with error %d\n", rc);
2348    exit(1);
2349      }
2350    op++;
2351    argc--;
2352#endif
2353    }
2354#if !defined NOPOSIX
2355  else if (strcmp(arg, "-p") == 0) posix = 1;
2356#endif
2357  else if (strcmp(arg, "-C") == 0)
2358    {
2359    int rc;
2360    unsigned long int lrc;
2361
2362    if (argc > 2)
2363      {
2364      if (strcmp(argv[op + 1], "linksize") == 0)
2365        {
2366        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2367        printf("%d\n", rc);
2368        yield = rc;
2369        goto EXIT;
2370        }
2371      if (strcmp(argv[op + 1], "pcre8") == 0)
2372        {
2373#ifdef SUPPORT_PCRE8
2374        printf("1\n");
2375        yield = 1;
2376#else
2377        printf("0\n");
2378        yield = 0;
2379#endif
2380        goto EXIT;
2381        }
2382      if (strcmp(argv[op + 1], "pcre16") == 0)
2383        {
2384#ifdef SUPPORT_PCRE16
2385        printf("1\n");
2386        yield = 1;
2387#else
2388        printf("0\n");
2389        yield = 0;
2390#endif
2391        goto EXIT;
2392        }
2393      if (strcmp(argv[op + 1], "utf") == 0)
2394        {
2395#ifdef SUPPORT_PCRE8
2396        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2397        printf("%d\n", rc);
2398        yield = rc;
2399#else
2400        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2401        printf("%d\n", rc);
2402        yield = rc;
2403#endif
2404        goto EXIT;
2405        }
2406      if (strcmp(argv[op + 1], "ucp") == 0)
2407        {
2408        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409        printf("%d\n", rc);
2410        yield = rc;
2411        goto EXIT;
2412        }
2413      if (strcmp(argv[op + 1], "jit") == 0)
2414        {
2415        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2416        printf("%d\n", rc);
2417        yield = rc;
2418        goto EXIT;
2419        }
2420      if (strcmp(argv[op + 1], "newline") == 0)
2421        {
2422        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2423        /* Note that these values are always the ASCII values, even
2424        in EBCDIC environments. CR is 13 and NL is 10. */
2425        printf("%s\n", (rc == 13)? "CR" :
2426          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2427          (rc == -2)? "ANYCRLF" :
2428          (rc == -1)? "ANY" : "???");
2429        goto EXIT;
2430        }
2431      printf("Unknown -C option: %s\n", argv[op + 1]);
2432      goto EXIT;
2433      }
2434
2435    printf("PCRE version %s\n", version);
2436    printf("Compiled with\n");
2437
2438/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2439are set, either both UTFs are supported or both are not supported. */
2440
2441#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2442    printf("  8-bit and 16-bit support\n");
2443    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2444    if (rc)
2445      printf("  UTF-8 and UTF-16 support\n");
2446    else
2447      printf("  No UTF-8 or UTF-16 support\n");
2448#elif defined SUPPORT_PCRE8
2449    printf("  8-bit support only\n");
2450    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2451    printf("  %sUTF-8 support\n", rc? "" : "No ");
2452#else
2453    printf("  16-bit support only\n");
2454    (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2455    printf("  %sUTF-16 support\n", rc? "" : "No ");
2456#endif
2457
2458    (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2459    printf("  %sUnicode properties support\n", rc? "" : "No ");
2460    (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2461    if (rc)
2462      {
2463      const char *arch;
2464      (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2465      printf("  Just-in-time compiler support: %s\n", arch);
2466      }
2467    else
2468      printf("  No just-in-time compiler support\n");
2469    (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2470    /* Note that these values are always the ASCII values, even
2471    in EBCDIC environments. CR is 13 and NL is 10. */
2472    printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2473      (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2474      (rc == -2)? "ANYCRLF" :
2475      (rc == -1)? "ANY" : "???");
2476    (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2477    printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2478                                     "all Unicode newlines");
2479    (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2480    printf("  Internal link size = %d\n", rc);
2481    (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2482    printf("  POSIX malloc threshold = %d\n", rc);
2483    (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2484    printf("  Default match limit = %ld\n", lrc);
2485    (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2486    printf("  Default recursion depth limit = %ld\n", lrc);
2487    (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2488    printf("  Match recursion uses %s", rc? "stack" : "heap");
2489    if (showstore)
2490      {
2491      PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2492      printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2493      }
2494    printf("\n");
2495    goto EXIT;
2496    }
2497  else if (strcmp(arg, "-help") == 0 ||
2498           strcmp(arg, "--help") == 0)
2499    {
2500    usage();
2501    goto EXIT;
2502    }
2503  else
2504    {
2505    BAD_ARG:
2506    printf("** Unknown or malformed option %s\n", arg);
2507    usage();
2508    yield = 1;
2509    goto EXIT;
2510    }
2511  op++;
2512  argc--;
2513  }
2514
2515/* Get the store for the offsets vector, and remember what it was */
2516
2517size_offsets_max = size_offsets;
2518offsets = (int *)malloc(size_offsets_max * sizeof(int));
2519if (offsets == NULL)
2520  {
2521  printf("** Failed to get %d bytes of memory for offsets vector\n",
2522    (int)(size_offsets_max * sizeof(int)));
2523  yield = 1;
2524  goto EXIT;
2525  }
2526
2527/* Sort out the input and output files */
2528
2529if (argc > 1)
2530  {
2531  infile = fopen(argv[op], INPUT_MODE);
2532  if (infile == NULL)
2533    {
2534    printf("** Failed to open %s\n", argv[op]);
2535    yield = 1;
2536    goto EXIT;
2537    }
2538  }
2539
2540if (argc > 2)
2541  {
2542  outfile = fopen(argv[op+1], OUTPUT_MODE);
2543  if (outfile == NULL)
2544    {
2545    printf("** Failed to open %s\n", argv[op+1]);
2546    yield = 1;
2547    goto EXIT;
2548    }
2549  }
2550
2551/* Set alternative malloc function */
2552
2553#ifdef SUPPORT_PCRE8
2554pcre_malloc = new_malloc;
2555pcre_free = new_free;
2556pcre_stack_malloc = stack_malloc;
2557pcre_stack_free = stack_free;
2558#endif
2559
2560#ifdef SUPPORT_PCRE16
2561pcre16_malloc = new_malloc;
2562pcre16_free = new_free;
2563pcre16_stack_malloc = stack_malloc;
2564pcre16_stack_free = stack_free;
2565#endif
2566
2567/* Heading line unless quiet, then prompt for first regex if stdin */
2568
2569if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2570
2571/* Main loop */
2572
2573while (!done)
2574  {
2575  pcre *re = NULL;
2576  pcre_extra *extra = NULL;
2577
2578#if !defined NOPOSIX  /* There are still compilers that require no indent */
2579  regex_t preg;
2580  int do_posix = 0;
2581#endif
2582
2583  const char *error;
2584  pcre_uint8 *markptr;
2585  pcre_uint8 *p, *pp, *ppp;
2586  pcre_uint8 *to_file = NULL;
2587  const pcre_uint8 *tables = NULL;
2588  unsigned long int get_options;
2589  unsigned long int true_size, true_study_size = 0;
2590  size_t size, regex_gotten_store;
2591  int do_allcaps = 0;
2592  int do_mark = 0;
2593  int do_study = 0;
2594  int no_force_study = 0;
2595  int do_debug = debug;
2596  int do_G = 0;
2597  int do_g = 0;
2598  int do_showinfo = showinfo;
2599  int do_showrest = 0;
2600  int do_showcaprest = 0;
2601  int do_flip = 0;
2602  int erroroffset, len, delimiter, poffset;
2603
2604#if !defined NODFA
2605  int dfa_matched = 0;
2606#endif
2607
2608  use_utf = 0;
2609  debug_lengths = 1;
2610
2611  if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2612  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2613  fflush(outfile);
2614
2615  p = buffer;
2616  while (isspace(*p)) p++;
2617  if (*p == 0) continue;
2618
2619  /* See if the pattern is to be loaded pre-compiled from a file. */
2620
2621  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2622    {
2623    pcre_uint32 magic;
2624    pcre_uint8 sbuf[8];
2625    FILE *f;
2626
2627    p++;
2628    if (*p == '!')
2629      {
2630      do_debug = TRUE;
2631      do_showinfo = TRUE;
2632      p++;
2633      }
2634
2635    pp = p + (int)strlen((char *)p);
2636    while (isspace(pp[-1])) pp--;
2637    *pp = 0;
2638
2639    f = fopen((char *)p, "rb");
2640    if (f == NULL)
2641      {
2642      fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2643      continue;
2644      }
2645
2646    first_gotten_store = 0;
2647    if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2648
2649    true_size =
2650      (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2651    true_study_size =
2652      (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2653
2654    re = (pcre *)new_malloc(true_size);
2655    regex_gotten_store = first_gotten_store;
2656
2657    if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2658
2659    magic = ((REAL_PCRE *)re)->magic_number;
2660    if (magic != MAGIC_NUMBER)
2661      {
2662      if (swap_uint32(magic) == MAGIC_NUMBER)
2663        {
2664        do_flip = 1;
2665        }
2666      else
2667        {
2668        fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2669        fclose(f);
2670        continue;
2671        }
2672      }
2673
2674    /* We hide the byte-invert info for little and big endian tests. */
2675    fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2676      do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2677
2678    /* Now see if there is any following study data. */
2679
2680    if (true_study_size != 0)
2681      {
2682      pcre_study_data *psd;
2683
2684      extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2685      extra->flags = PCRE_EXTRA_STUDY_DATA;
2686
2687      psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2688      extra->study_data = psd;
2689
2690      if (fread(psd, 1, true_study_size, f) != true_study_size)
2691        {
2692        FAIL_READ:
2693        fprintf(outfile, "Failed to read data from %s\n", p);
2694        if (extra != NULL)
2695          {
2696          PCRE_FREE_STUDY(extra);
2697          }
2698        if (re != NULL) new_free(re);
2699        fclose(f);
2700        continue;
2701        }
2702      fprintf(outfile, "Study data loaded from %s\n", p);
2703      do_study = 1;     /* To get the data output if requested */
2704      }
2705    else fprintf(outfile, "No study data\n");
2706
2707    /* Flip the necessary bytes. */
2708    if (do_flip)
2709      {
2710      int rc;
2711      PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2712      if (rc == PCRE_ERROR_BADMODE)
2713        {
2714        /* Simulate the result of the function call below. */
2715        fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2716          use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2717        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2718          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2719        continue;
2720        }
2721      }
2722
2723    /* Need to know if UTF-8 for printing data strings. */
2724
2725    if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2726    use_utf = (get_options & PCRE_UTF8) != 0;
2727
2728    fclose(f);
2729    goto SHOW_INFO;
2730    }
2731
2732  /* In-line pattern (the usual case). Get the delimiter and seek the end of
2733  the pattern; if it isn't complete, read more. */
2734
2735  delimiter = *p++;
2736
2737  if (isalnum(delimiter) || delimiter == '\\')
2738    {
2739    fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2740    goto SKIP_DATA;
2741    }
2742
2743  pp = p;
2744  poffset = (int)(p - buffer);
2745
2746  for(;;)
2747    {
2748    while (*pp != 0)
2749      {
2750      if (*pp == '\\' && pp[1] != 0) pp++;
2751        else if (*pp == delimiter) break;
2752      pp++;
2753      }
2754    if (*pp != 0) break;
2755    if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
2756      {
2757      fprintf(outfile, "** Unexpected EOF\n");
2758      done = 1;
2759      goto CONTINUE;
2760      }
2761    if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2762    }
2763
2764  /* The buffer may have moved while being extended; reset the start of data
2765  pointer to the correct relative point in the buffer. */
2766
2767  p = buffer + poffset;
2768
2769  /* If the first character after the delimiter is backslash, make
2770  the pattern end with backslash. This is purely to provide a way
2771  of testing for the error message when a pattern ends with backslash. */
2772
2773  if (pp[1] == '\\') *pp++ = '\\';
2774
2775  /* Terminate the pattern at the delimiter, and save a copy of the pattern
2776  for callouts. */
2777
2778  *pp++ = 0;
2779  strcpy((char *)pbuffer, (char *)p);
2780
2781  /* Look for options after final delimiter */
2782
2783  options = 0;
2784  study_options = 0;
2785  log_store = showstore;  /* default from command line */
2786
2787  while (*pp != 0)
2788    {
2789    switch (*pp++)
2790      {
2791      case 'f': options |= PCRE_FIRSTLINE; break;
2792      case 'g': do_g = 1; break;
2793      case 'i': options |= PCRE_CASELESS; break;
2794      case 'm': options |= PCRE_MULTILINE; break;
2795      case 's': options |= PCRE_DOTALL; break;
2796      case 'x': options |= PCRE_EXTENDED; break;
2797
2798      case '+':
2799      if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2800      break;
2801
2802      case '=': do_allcaps = 1; break;
2803      case 'A': options |= PCRE_ANCHORED; break;
2804      case 'B': do_debug = 1; break;
2805      case 'C': options |= PCRE_AUTO_CALLOUT; break;
2806      case 'D': do_debug = do_showinfo = 1; break;
2807      case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2808      case 'F': do_flip = 1; break;
2809      case 'G': do_G = 1; break;
2810      case 'I': do_showinfo = 1; break;
2811      case 'J': options |= PCRE_DUPNAMES; break;
2812      case 'K': do_mark = 1; break;
2813      case 'M': log_store = 1; break;
2814      case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2815
2816#if !defined NOPOSIX
2817      case 'P': do_posix = 1; break;
2818#endif
2819
2820      case 'S':
2821      if (do_study == 0)
2822        {
2823        do_study = 1;
2824        if (*pp == '+')
2825          {
2826          if (*(++pp) == '+')
2827            {
2828            verify_jit = TRUE;
2829            pp++;
2830            }
2831          if (*pp >= '1' && *pp <= '7')
2832            study_options |= jit_study_bits[*pp++ - '1'];
2833          else
2834            study_options |= jit_study_bits[6];
2835          }
2836        }
2837      else
2838        {
2839        do_study = 0;
2840        no_force_study = 1;
2841        }
2842      break;
2843
2844      case 'U': options |= PCRE_UNGREEDY; break;
2845      case 'W': options |= PCRE_UCP; break;
2846      case 'X': options |= PCRE_EXTRA; break;
2847      case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2848      case 'Z': debug_lengths = 0; break;
2849      case '8': options |= PCRE_UTF8; use_utf = 1; break;
2850      case '?': options |= PCRE_NO_UTF8_CHECK; break;
2851
2852      case 'T':
2853      switch (*pp++)
2854        {
2855        case '0': tables = tables0; break;
2856        case '1': tables = tables1; break;
2857
2858        case '\r':
2859        case '\n':
2860        case ' ':
2861        case 0:
2862        fprintf(outfile, "** Missing table number after /T\n");
2863        goto SKIP_DATA;
2864
2865        default:
2866        fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2867        goto SKIP_DATA;
2868        }
2869      break;
2870
2871      case 'L':
2872      ppp = pp;
2873      /* The '\r' test here is so that it works on Windows. */
2874      /* The '0' test is just in case this is an unterminated line. */
2875      while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2876      *ppp = 0;
2877      if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2878        {
2879        fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2880        goto SKIP_DATA;
2881        }
2882      locale_set = 1;
2883      tables = PCRE_MAKETABLES;
2884      pp = ppp;
2885      break;
2886
2887      case '>':
2888      to_file = pp;
2889      while (*pp != 0) pp++;
2890      while (isspace(pp[-1])) pp--;
2891      *pp = 0;
2892      break;
2893
2894      case '<':
2895        {
2896        if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2897          {
2898          options |= PCRE_JAVASCRIPT_COMPAT;
2899          pp += 3;
2900          }
2901        else
2902          {
2903          int x = check_newline(pp, outfile);
2904          if (x == 0) goto SKIP_DATA;
2905          options |= x;
2906          while (*pp++ != '>');
2907          }
2908        }
2909      break;
2910
2911      case '\r':                      /* So that it works in Windows */
2912      case '\n':
2913      case ' ':
2914      break;
2915
2916      default:
2917      fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2918      goto SKIP_DATA;
2919      }
2920    }
2921
2922  /* Handle compiling via the POSIX interface, which doesn't support the
2923  timing, showing, or debugging options, nor the ability to pass over
2924  local character tables. Neither does it have 16-bit support. */
2925
2926#if !defined NOPOSIX
2927  if (posix || do_posix)
2928    {
2929    int rc;
2930    int cflags = 0;
2931
2932    if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2933    if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2934    if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2935    if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2936    if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2937    if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2938    if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2939
2940    first_gotten_store = 0;
2941    rc = regcomp(&preg, (char *)p, cflags);
2942
2943    /* Compilation failed; go back for another re, skipping to blank line
2944    if non-interactive. */
2945
2946    if (rc != 0)
2947      {
2948      (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2949      fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2950      goto SKIP_DATA;
2951      }
2952    }
2953
2954  /* Handle compiling via the native interface */
2955
2956  else
2957#endif  /* !defined NOPOSIX */
2958
2959    {
2960    /* In 16-bit mode, convert the input. */
2961
2962#ifdef SUPPORT_PCRE16
2963    if (use_pcre16)
2964      {
2965      switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2966        {
2967        case -1:
2968        fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2969          "converted to UTF-16\n");
2970        goto SKIP_DATA;
2971
2972        case -2:
2973        fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2974          "cannot be converted to UTF-16\n");
2975        goto SKIP_DATA;
2976
2977        case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2978        fprintf(outfile, "**Failed: character value greater than 0xffff "
2979          "cannot be converted to 16-bit in non-UTF mode\n");
2980        goto SKIP_DATA;
2981
2982        default:
2983        break;
2984        }
2985      p = (pcre_uint8 *)buffer16;
2986      }
2987#endif
2988
2989    /* Compile many times when timing */
2990
2991    if (timeit > 0)
2992      {
2993      register int i;
2994      clock_t time_taken;
2995      clock_t start_time = clock();
2996      for (i = 0; i < timeit; i++)
2997        {
2998        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2999        if (re != NULL) free(re);
3000        }
3001      time_taken = clock() - start_time;
3002      fprintf(outfile, "Compile time %.4f milliseconds\n",
3003        (((double)time_taken * 1000.0) / (double)timeit) /
3004          (double)CLOCKS_PER_SEC);
3005      }
3006
3007    first_gotten_store = 0;
3008    PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3009
3010    /* Compilation failed; go back for another re, skipping to blank line
3011    if non-interactive. */
3012
3013    if (re == NULL)
3014      {
3015      fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
3016      SKIP_DATA:
3017      if (infile != stdin)
3018        {
3019        for (;;)
3020          {
3021          if (extend_inputline(infile, buffer, NULL) == NULL)
3022            {
3023            done = 1;
3024            goto CONTINUE;
3025            }
3026          len = (int)strlen((char *)buffer);
3027          while (len > 0 && isspace(buffer[len-1])) len--;
3028          if (len == 0) break;
3029          }
3030        fprintf(outfile, "\n");
3031        }
3032      goto CONTINUE;
3033      }
3034
3035    /* Compilation succeeded. It is now possible to set the UTF-8 option from
3036    within the regex; check for this so that we know how to process the data
3037    lines. */
3038
3039    if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3040      goto SKIP_DATA;
3041    if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3042
3043    /* Extract the size for possible writing before possibly flipping it,
3044    and remember the store that was got. */
3045
3046    true_size = ((REAL_PCRE *)re)->size;
3047    regex_gotten_store = first_gotten_store;
3048
3049    /* Output code size information if requested */
3050
3051    if (log_store)
3052      fprintf(outfile, "Memory allocation (code space): %d\n",
3053        (int)(first_gotten_store -
3054              sizeof(REAL_PCRE) -
3055              ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3056
3057    /* If -s or /S was present, study the regex to generate additional info to
3058    help with the matching, unless the pattern has the SS option, which
3059    suppresses the effect of /S (used for a few test patterns where studying is
3060    never sensible). */
3061
3062    if (do_study || (force_study >= 0 && !no_force_study))
3063      {
3064      if (timeit > 0)
3065        {
3066        register int i;
3067        clock_t time_taken;
3068        clock_t start_time = clock();
3069        for (i = 0; i < timeit; i++)
3070          {
3071          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3072          }
3073        time_taken = clock() - start_time;
3074        if (extra != NULL)
3075          {
3076          PCRE_FREE_STUDY(extra);
3077          }
3078        fprintf(outfile, "  Study time %.4f milliseconds\n",
3079          (((double)time_taken * 1000.0) / (double)timeit) /
3080            (double)CLOCKS_PER_SEC);
3081        }
3082      PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3083      if (error != NULL)
3084        fprintf(outfile, "Failed to study: %s\n", error);
3085      else if (extra != NULL)
3086        {
3087        true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3088        if (log_store)
3089          {
3090          size_t jitsize;
3091          if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3092              jitsize != 0)
3093            fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3094          }
3095        }
3096      }
3097
3098    /* If /K was present, we set up for handling MARK data. */
3099
3100    if (do_mark)
3101      {
3102      if (extra == NULL)
3103        {
3104        extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3105        extra->flags = 0;
3106        }
3107      extra->mark = &markptr;
3108      extra->flags |= PCRE_EXTRA_MARK;
3109      }
3110
3111    /* Extract and display information from the compiled data if required. */
3112
3113    SHOW_INFO:
3114
3115    if (do_debug)
3116      {
3117      fprintf(outfile, "------------------------------------------------------------------\n");
3118      PCRE_PRINTINT(re, outfile, debug_lengths);
3119      }
3120
3121    /* We already have the options in get_options (see above) */
3122
3123    if (do_showinfo)
3124      {
3125      unsigned long int all_options;
3126      int count, backrefmax, first_char, need_char, okpartial, jchanged,
3127        hascrorlf, maxlookbehind;
3128      int nameentrysize, namecount;
3129      const pcre_uint8 *nametable;
3130
3131      if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3132          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3133          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3134          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3135          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3136          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3137          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3138          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3139          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3140          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3141          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3142          new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3143          != 0)
3144        goto SKIP_DATA;
3145
3146      if (size != regex_gotten_store) fprintf(outfile,
3147        "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3148        (int)size, (int)regex_gotten_store);
3149
3150      fprintf(outfile, "Capturing subpattern count = %d\n", count);
3151      if (backrefmax > 0)
3152        fprintf(outfile, "Max back reference = %d\n", backrefmax);
3153
3154      if (namecount > 0)
3155        {
3156        fprintf(outfile, "Named capturing subpatterns:\n");
3157        while (namecount-- > 0)
3158          {
3159#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3160          int imm2_size = use_pcre16 ? 1 : 2;
3161#else
3162          int imm2_size = IMM2_SIZE;
3163#endif
3164          int length = (int)STRLEN(nametable + imm2_size);
3165          fprintf(outfile, "  ");
3166          PCHARSV(nametable, imm2_size, length, outfile);
3167          while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3168#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3169          fprintf(outfile, "%3d\n", use_pcre16?
3170             (int)(((PCRE_SPTR16)nametable)[0])
3171            :((int)nametable[0] << 8) | (int)nametable[1]);
3172          nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3173#else
3174          fprintf(outfile, "%3d\n", GET2(nametable, 0));
3175#ifdef SUPPORT_PCRE8
3176          nametable += nameentrysize;
3177#else
3178          nametable += nameentrysize * 2;
3179#endif
3180#endif
3181          }
3182        }
3183
3184      if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3185      if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3186
3187      all_options = ((REAL_PCRE *)re)->options;
3188      if (do_flip) all_options = swap_uint32(all_options);
3189
3190      if (get_options == 0) fprintf(outfile, "No options\n");
3191        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3192          ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3193          ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3194          ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3195          ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3196          ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3197          ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3198          ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3199          ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3200          ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3201          ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3202          ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3203          ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3204          ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3205          ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3206          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3207          ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3208          ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3209
3210      if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3211
3212      switch (get_options & PCRE_NEWLINE_BITS)
3213        {
3214        case PCRE_NEWLINE_CR:
3215        fprintf(outfile, "Forced newline sequence: CR\n");
3216        break;
3217
3218        case PCRE_NEWLINE_LF:
3219        fprintf(outfile, "Forced newline sequence: LF\n");
3220        break;
3221
3222        case PCRE_NEWLINE_CRLF:
3223        fprintf(outfile, "Forced newline sequence: CRLF\n");
3224        break;
3225
3226        case PCRE_NEWLINE_ANYCRLF:
3227        fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3228        break;
3229
3230        case PCRE_NEWLINE_ANY:
3231        fprintf(outfile, "Forced newline sequence: ANY\n");
3232        break;
3233
3234        default:
3235        break;
3236        }
3237
3238      if (first_char == -1)
3239        {
3240        fprintf(outfile, "First char at start or follows newline\n");
3241        }
3242      else if (first_char < 0)
3243        {
3244        fprintf(outfile, "No first char\n");
3245        }
3246      else
3247        {
3248        const char *caseless =
3249          ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3250          "" : " (caseless)";
3251
3252        if (PRINTOK(first_char))
3253          fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3254        else
3255          {
3256          fprintf(outfile, "First char = ");
3257          pchar(first_char, outfile);
3258          fprintf(outfile, "%s\n", caseless);
3259          }
3260        }
3261
3262      if (need_char < 0)
3263        {
3264        fprintf(outfile, "No need char\n");
3265        }
3266      else
3267        {
3268        const char *caseless =
3269          ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3270          "" : " (caseless)";
3271
3272        if (PRINTOK(need_char))
3273          fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3274        else
3275          {
3276          fprintf(outfile, "Need char = ");
3277          pchar(need_char, outfile);
3278          fprintf(outfile, "%s\n", caseless);
3279          }
3280        }
3281
3282      if (maxlookbehind > 0)
3283        fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3284
3285      /* Don't output study size; at present it is in any case a fixed
3286      value, but it varies, depending on the computer architecture, and
3287      so messes up the test suite. (And with the /F option, it might be
3288      flipped.) If study was forced by an external -s, don't show this
3289      information unless -i or -d was also present. This means that, except
3290      when auto-callouts are involved, the output from runs with and without
3291      -s should be identical. */
3292
3293      if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3294        {
3295        if (extra == NULL)
3296          fprintf(outfile, "Study returned NULL\n");
3297        else
3298          {
3299          pcre_uint8 *start_bits = NULL;
3300          int minlength;
3301
3302          if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3303            fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3304
3305          if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3306            {
3307            if (start_bits == NULL)
3308              fprintf(outfile, "No set of starting bytes\n");
3309            else
3310              {
3311              int i;
3312              int c = 24;
3313              fprintf(outfile, "Starting byte set: ");
3314              for (i = 0; i < 256; i++)
3315                {
3316                if ((start_bits[i/8] & (1<<(i&7))) != 0)
3317                  {
3318                  if (c > 75)
3319                    {
3320                    fprintf(outfile, "\n  ");
3321                    c = 2;
3322                    }
3323                  if (PRINTOK(i) && i != ' ')
3324                    {
3325                    fprintf(outfile, "%c ", i);
3326                    c += 2;
3327                    }
3328                  else
3329                    {
3330                    fprintf(outfile, "\\x%02x ", i);
3331                    c += 5;
3332                    }
3333                  }
3334                }
3335              fprintf(outfile, "\n");
3336              }
3337            }
3338          }
3339
3340        /* Show this only if the JIT was set by /S, not by -s. */
3341
3342        if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3343          {
3344          int jit;
3345          if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3346            {
3347            if (jit)
3348              fprintf(outfile, "JIT study was successful\n");
3349            else
3350#ifdef SUPPORT_JIT
3351              fprintf(outfile, "JIT study was not successful\n");
3352#else
3353              fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3354#endif
3355            }
3356          }
3357        }
3358      }
3359
3360    /* If the '>' option was present, we write out the regex to a file, and
3361    that is all. The first 8 bytes of the file are the regex length and then
3362    the study length, in big-endian order. */
3363
3364    if (to_file != NULL)
3365      {
3366      FILE *f = fopen((char *)to_file, "wb");
3367      if (f == NULL)
3368        {
3369        fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3370        }
3371      else
3372        {
3373        pcre_uint8 sbuf[8];
3374
3375        if (do_flip) regexflip(re, extra);
3376        sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3377        sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3378        sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3379        sbuf[3] = (pcre_uint8)((true_size) & 255);
3380        sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3381        sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3382        sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3383        sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3384
3385        if (fwrite(sbuf, 1, 8, f) < 8 ||
3386            fwrite(re, 1, true_size, f) < true_size)
3387          {
3388          fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3389          }
3390        else
3391          {
3392          fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3393
3394          /* If there is study data, write it. */
3395
3396          if (extra != NULL)
3397            {
3398            if (fwrite(extra->study_data, 1, true_study_size, f) <
3399                true_study_size)
3400              {
3401              fprintf(outfile, "Write error on %s: %s\n", to_file,
3402                strerror(errno));
3403              }
3404            else fprintf(outfile, "Study data written to %s\n", to_file);
3405            }
3406          }
3407        fclose(f);
3408        }
3409
3410      new_free(re);
3411      if (extra != NULL)
3412        {
3413        PCRE_FREE_STUDY(extra);
3414        }
3415      if (locale_set)
3416        {
3417        new_free((void *)tables);
3418        setlocale(LC_CTYPE, "C");
3419        locale_set = 0;
3420        }
3421      continue;  /* With next regex */
3422      }
3423    }        /* End of non-POSIX compile */
3424
3425  /* Read data lines and test them */
3426
3427  for (;;)
3428    {
3429    pcre_uint8 *q;
3430    pcre_uint8 *bptr;
3431    int *use_offsets = offsets;
3432    int use_size_offsets = size_offsets;
3433    int callout_data = 0;
3434    int callout_data_set = 0;
3435    int count, c;
3436    int copystrings = 0;
3437    int find_match_limit = default_find_match_limit;
3438    int getstrings = 0;
3439    int getlist = 0;
3440    int gmatched = 0;
3441    int start_offset = 0;
3442    int start_offset_sign = 1;
3443    int g_notempty = 0;
3444    int use_dfa = 0;
3445
3446    *copynames = 0;
3447    *getnames = 0;
3448
3449#ifdef SUPPORT_PCRE16
3450    cn16ptr = copynames;
3451    gn16ptr = getnames;
3452#endif
3453#ifdef SUPPORT_PCRE8
3454    cn8ptr = copynames8;
3455    gn8ptr = getnames8;
3456#endif
3457
3458    SET_PCRE_CALLOUT(callout);
3459    first_callout = 1;
3460    last_callout_mark = NULL;
3461    callout_extra = 0;
3462    callout_count = 0;
3463    callout_fail_count = 999999;
3464    callout_fail_id = -1;
3465    show_malloc = 0;
3466    options = 0;
3467
3468    if (extra != NULL) extra->flags &=
3469      ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3470
3471    len = 0;
3472    for (;;)
3473      {
3474      if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3475        {
3476        if (len > 0)    /* Reached EOF without hitting a newline */
3477          {
3478          fprintf(outfile, "\n");
3479          break;
3480          }
3481        done = 1;
3482        goto CONTINUE;
3483        }
3484      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3485      len = (int)strlen((char *)buffer);
3486      if (buffer[len-1] == '\n') break;
3487      }
3488
3489    while (len > 0 && isspace(buffer[len-1])) len--;
3490    buffer[len] = 0;
3491    if (len == 0) break;
3492
3493    p = buffer;
3494    while (isspace(*p)) p++;
3495
3496    bptr = q = dbuffer;
3497    while ((c = *p++) != 0)
3498      {
3499      int i = 0;
3500      int n = 0;
3501
3502      /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3503      In non-UTF mode, allow the value of the byte to fall through to later,
3504      where values greater than 127 are turned into UTF-8 when running in
3505      16-bit mode. */
3506
3507      if (c != '\\')
3508        {
3509        if (use_utf)
3510          {
3511          *q++ = c;
3512          continue;
3513          }
3514        }
3515
3516      /* Handle backslash escapes */
3517
3518      else switch ((c = *p++))
3519        {
3520        case 'a': c =    7; break;
3521        case 'b': c = '\b'; break;
3522        case 'e': c =   27; break;
3523        case 'f': c = '\f'; break;
3524        case 'n': c = '\n'; break;
3525        case 'r': c = '\r'; break;
3526        case 't': c = '\t'; break;
3527        case 'v': c = '\v'; break;
3528
3529        case '0': case '1': case '2': case '3':
3530        case '4': case '5': case '6': case '7':
3531        c -= '0';
3532        while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3533          c = c * 8 + *p++ - '0';
3534        break;
3535
3536        case 'x':
3537        if (*p == '{')
3538          {
3539          pcre_uint8 *pt = p;
3540          c = 0;
3541
3542          /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3543          when isxdigit() is a macro that refers to its argument more than
3544          once. This is banned by the C Standard, but apparently happens in at
3545          least one MacOS environment. */
3546
3547          for (pt++; isxdigit(*pt); pt++)
3548            {
3549            if (++i == 9)
3550              fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3551                               "using only the first eight.\n");
3552            else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3553            }
3554          if (*pt == '}')
3555            {
3556            p = pt + 1;
3557            break;
3558            }
3559          /* Not correct form for \x{...}; fall through */
3560          }
3561
3562        /* \x without {} always defines just one byte in 8-bit mode. This
3563        allows UTF-8 characters to be constructed byte by byte, and also allows
3564        invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3565        Otherwise, pass it down to later code so that it can be turned into
3566        UTF-8 when running in 16-bit mode. */
3567
3568        c = 0;
3569        while (i++ < 2 && isxdigit(*p))
3570          {
3571          c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3572          p++;
3573          }
3574        if (use_utf)
3575          {
3576          *q++ = c;
3577          continue;
3578          }
3579        break;
3580
3581        case 0:   /* \ followed by EOF allows for an empty line */
3582        p--;
3583        continue;
3584
3585        case '>':
3586        if (*p == '-')
3587          {
3588          start_offset_sign = -1;
3589          p++;
3590          }
3591        while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3592        start_offset *= start_offset_sign;
3593        continue;
3594
3595        case 'A':  /* Option setting */
3596        options |= PCRE_ANCHORED;
3597        continue;
3598
3599        case 'B':
3600        options |= PCRE_NOTBOL;
3601        continue;
3602
3603        case 'C':
3604        if (isdigit(*p))    /* Set copy string */
3605          {
3606          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3607          copystrings |= 1 << n;
3608          }
3609        else if (isalnum(*p))
3610          {
3611          READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3612          }
3613        else if (*p == '+')
3614          {
3615          callout_extra = 1;
3616          p++;
3617          }
3618        else if (*p == '-')
3619          {
3620          SET_PCRE_CALLOUT(NULL);
3621          p++;
3622          }
3623        else if (*p == '!')
3624          {
3625          callout_fail_id = 0;
3626          p++;
3627          while(isdigit(*p))
3628            callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3629          callout_fail_count = 0;
3630          if (*p == '!')
3631            {
3632            p++;
3633            while(isdigit(*p))
3634              callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3635            }
3636          }
3637        else if (*p == '*')
3638          {
3639          int sign = 1;
3640          callout_data = 0;
3641          if (*(++p) == '-') { sign = -1; p++; }
3642          while(isdigit(*p))
3643            callout_data = callout_data * 10 + *p++ - '0';
3644          callout_data *= sign;
3645          callout_data_set = 1;
3646          }
3647        continue;
3648
3649#if !defined NODFA
3650        case 'D':
3651#if !defined NOPOSIX
3652        if (posix || do_posix)
3653          printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3654        else
3655#endif
3656          use_dfa = 1;
3657        continue;
3658#endif
3659
3660#if !defined NODFA
3661        case 'F':
3662        options |= PCRE_DFA_SHORTEST;
3663        continue;
3664#endif
3665
3666        case 'G':
3667        if (isdigit(*p))
3668          {
3669          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3670          getstrings |= 1 << n;
3671          }
3672        else if (isalnum(*p))
3673          {
3674          READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3675          }
3676        continue;
3677
3678        case 'J':
3679        while(isdigit(*p)) n = n * 10 + *p++ - '0';
3680        if (extra != NULL
3681            && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3682            && extra->executable_jit != NULL)
3683          {
3684          if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3685          jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3686          PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3687          }
3688        continue;
3689
3690        case 'L':
3691        getlist = 1;
3692        continue;
3693
3694        case 'M':
3695        find_match_limit = 1;
3696        continue;
3697
3698        case 'N':
3699        if ((options & PCRE_NOTEMPTY) != 0)
3700          options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3701        else
3702          options |= PCRE_NOTEMPTY;
3703        continue;
3704
3705        case 'O':
3706        while(isdigit(*p)) n = n * 10 + *p++ - '0';
3707        if (n > size_offsets_max)
3708          {
3709          size_offsets_max = n;
3710          free(offsets);
3711          use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3712          if (offsets == NULL)
3713            {
3714            printf("** Failed to get %d bytes of memory for offsets vector\n",
3715              (int)(size_offsets_max * sizeof(int)));
3716            yield = 1;
3717            goto EXIT;
3718            }
3719          }
3720        use_size_offsets = n;
3721        if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3722          else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3723        continue;
3724
3725        case 'P':
3726        options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3727          PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3728        continue;
3729
3730        case 'Q':
3731        while(isdigit(*p)) n = n * 10 + *p++ - '0';
3732        if (extra == NULL)
3733          {
3734          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3735          extra->flags = 0;
3736          }
3737        extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3738        extra->match_limit_recursion = n;
3739        continue;
3740
3741        case 'q':
3742        while(isdigit(*p)) n = n * 10 + *p++ - '0';
3743        if (extra == NULL)
3744          {
3745          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3746          extra->flags = 0;
3747          }
3748        extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3749        extra->match_limit = n;
3750        continue;
3751
3752#if !defined NODFA
3753        case 'R':
3754        options |= PCRE_DFA_RESTART;
3755        continue;
3756#endif
3757
3758        case 'S':
3759        show_malloc = 1;
3760        continue;
3761
3762        case 'Y':
3763        options |= PCRE_NO_START_OPTIMIZE;
3764        continue;
3765
3766        case 'Z':
3767        options |= PCRE_NOTEOL;
3768        continue;
3769
3770        case '?':
3771        options |= PCRE_NO_UTF8_CHECK;
3772        continue;
3773
3774        case '<':
3775          {
3776          int x = check_newline(p, outfile);
3777          if (x == 0) goto NEXT_DATA;
3778          options |= x;
3779          while (*p++ != '>');
3780          }
3781        continue;
3782        }
3783
3784      /* We now have a character value in c that may be greater than 255. In
3785      16-bit mode, we always convert characters to UTF-8 so that values greater
3786      than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3787      convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3788      mode must have come from \x{...} or octal constructs because values from
3789      \x.. get this far only in non-UTF mode. */
3790
3791#if !defined NOUTF || defined SUPPORT_PCRE16
3792      if (use_pcre16 || use_utf)
3793        {
3794        pcre_uint8 buff8[8];
3795        int ii, utn;
3796        utn = ord2utf8(c, buff8);
3797        for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3798        }
3799      else
3800#endif
3801        {
3802        if (c > 255)
3803          {
3804          fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3805            "and UTF-8 mode is not enabled.\n", c);
3806          fprintf(outfile, "** Truncation will probably give the wrong "
3807            "result.\n");
3808          }
3809        *q++ = c;
3810        }
3811      }
3812
3813    /* Reached end of subject string */
3814
3815    *q = 0;
3816    len = (int)(q - dbuffer);
3817
3818    /* Move the data to the end of the buffer so that a read over the end of
3819    the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3820    we are using the POSIX interface, we must include the terminating zero. */
3821
3822#if !defined NOPOSIX
3823    if (posix || do_posix)
3824      {
3825      memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3826      bptr += buffer_size - len - 1;
3827      }
3828    else
3829#endif
3830      {
3831      memmove(bptr + buffer_size - len, bptr, len);
3832      bptr += buffer_size - len;
3833      }
3834
3835    if ((all_use_dfa || use_dfa) && find_match_limit)
3836      {
3837      printf("**Match limit not relevant for DFA matching: ignored\n");
3838      find_match_limit = 0;
3839      }
3840
3841    /* Handle matching via the POSIX interface, which does not
3842    support timing or playing with the match limit or callout data. */
3843
3844#if !defined NOPOSIX
3845    if (posix || do_posix)
3846      {
3847      int rc;
3848      int eflags = 0;
3849      regmatch_t *pmatch = NULL;
3850      if (use_size_offsets > 0)
3851        pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3852      if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3853      if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3854      if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3855
3856      rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3857
3858      if (rc != 0)
3859        {
3860        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3861        fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3862        }
3863      else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3864              != 0)
3865        {
3866        fprintf(outfile, "Matched with REG_NOSUB\n");
3867        }
3868      else
3869        {
3870        size_t i;
3871        for (i = 0; i < (size_t)use_size_offsets; i++)
3872          {
3873          if (pmatch[i].rm_so >= 0)
3874            {
3875            fprintf(outfile, "%2d: ", (int)i);
3876            PCHARSV(dbuffer, pmatch[i].rm_so,
3877              pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3878            fprintf(outfile, "\n");
3879            if (do_showcaprest || (i == 0 && do_showrest))
3880              {
3881              fprintf(outfile, "%2d+ ", (int)i);
3882              PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3883                outfile);
3884              fprintf(outfile, "\n");
3885              }
3886            }
3887          }
3888        }
3889      free(pmatch);
3890      goto NEXT_DATA;
3891      }
3892
3893#endif  /* !defined NOPOSIX */
3894
3895    /* Handle matching via the native interface - repeats for /g and /G */
3896
3897#ifdef SUPPORT_PCRE16
3898    if (use_pcre16)
3899      {
3900      len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3901      switch(len)
3902        {
3903        case -1:
3904        fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3905          "converted to UTF-16\n");
3906        goto NEXT_DATA;
3907
3908        case -2:
3909        fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3910          "cannot be converted to UTF-16\n");
3911        goto NEXT_DATA;
3912
3913        case -3:
3914        fprintf(outfile, "**Failed: character value greater than 0xffff "
3915          "cannot be converted to 16-bit in non-UTF mode\n");
3916        goto NEXT_DATA;
3917
3918        default:
3919        break;
3920        }
3921      bptr = (pcre_uint8 *)buffer16;
3922      }
3923#endif
3924
3925    /* Ensure that there is a JIT callback if we want to verify that JIT was
3926    actually used. If jit_stack == NULL, no stack has yet been assigned. */
3927
3928    if (verify_jit && jit_stack == NULL && extra != NULL)
3929       { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
3930
3931    for (;; gmatched++)    /* Loop for /g or /G */
3932      {
3933      markptr = NULL;
3934      jit_was_used = FALSE;
3935
3936      if (timeitm > 0)
3937        {
3938        register int i;
3939        clock_t time_taken;
3940        clock_t start_time = clock();
3941
3942#if !defined NODFA
3943        if (all_use_dfa || use_dfa)
3944          {
3945          if ((options & PCRE_DFA_RESTART) != 0)
3946            {
3947            fprintf(outfile, "Timing DFA restarts is not supported\n");
3948            break;
3949            }
3950          if (dfa_workspace == NULL)
3951            dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
3952          for (i = 0; i < timeitm; i++)
3953            {
3954            PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3955              (options | g_notempty), use_offsets, use_size_offsets,
3956              dfa_workspace, DFA_WS_DIMENSION);
3957            }
3958          }
3959        else
3960#endif
3961
3962        for (i = 0; i < timeitm; i++)
3963          {
3964          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3965            (options | g_notempty), use_offsets, use_size_offsets);
3966          }
3967        time_taken = clock() - start_time;
3968        fprintf(outfile, "Execute time %.4f milliseconds\n",
3969          (((double)time_taken * 1000.0) / (double)timeitm) /
3970            (double)CLOCKS_PER_SEC);
3971        }
3972
3973      /* If find_match_limit is set, we want to do repeated matches with
3974      varying limits in order to find the minimum value for the match limit and
3975      for the recursion limit. The match limits are relevant only to the normal
3976      running of pcre_exec(), so disable the JIT optimization. This makes it
3977      possible to run the same set of tests with and without JIT externally
3978      requested. */
3979
3980      if (find_match_limit)
3981        {
3982        if (extra == NULL)
3983          {
3984          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3985          extra->flags = 0;
3986          }
3987        else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3988
3989        (void)check_match_limit(re, extra, bptr, len, start_offset,
3990          options|g_notempty, use_offsets, use_size_offsets,
3991          PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3992          PCRE_ERROR_MATCHLIMIT, "match()");
3993
3994        count = check_match_limit(re, extra, bptr, len, start_offset,
3995          options|g_notempty, use_offsets, use_size_offsets,
3996          PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3997          PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3998        }
3999
4000      /* If callout_data is set, use the interface with additional data */
4001
4002      else if (callout_data_set)
4003        {
4004        if (extra == NULL)
4005          {
4006          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4007          extra->flags = 0;
4008          }
4009        extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
4010        extra->callout_data = &callout_data;
4011        PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4012          options | g_notempty, use_offsets, use_size_offsets);
4013        extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
4014        }
4015
4016      /* The normal case is just to do the match once, with the default
4017      value of match_limit. */
4018
4019#if !defined NODFA
4020      else if (all_use_dfa || use_dfa)
4021        {
4022        if (dfa_workspace == NULL)
4023          dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4024        if (dfa_matched++ == 0)
4025          dfa_workspace[0] = -1;  /* To catch bad restart */
4026        PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4027          (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4028          DFA_WS_DIMENSION);
4029        if (count == 0)
4030          {
4031          fprintf(outfile, "Matched, but too many subsidiary matches\n");
4032          count = use_size_offsets/2;
4033          }
4034        }
4035#endif
4036
4037      else
4038        {
4039        PCRE_EXEC(count, re, extra, bptr, len, start_offset,
4040          options | g_notempty, use_offsets, use_size_offsets);
4041        if (count == 0)
4042          {
4043          fprintf(outfile, "Matched, but too many substrings\n");
4044          count = use_size_offsets/3;
4045          }
4046        }
4047
4048      /* Matched */
4049
4050      if (count >= 0)
4051        {
4052        int i, maxcount;
4053        void *cnptr, *gnptr;
4054
4055#if !defined NODFA
4056        if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
4057#endif
4058          maxcount = use_size_offsets/3;
4059
4060        /* This is a check against a lunatic return value. */
4061
4062        if (count > maxcount)
4063          {
4064          fprintf(outfile,
4065            "** PCRE error: returned count %d is too big for offset size %d\n",
4066            count, use_size_offsets);
4067          count = use_size_offsets/3;
4068          if (do_g || do_G)
4069            {
4070            fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
4071            do_g = do_G = FALSE;        /* Break g/G loop */
4072            }
4073          }
4074
4075        /* do_allcaps requests showing of all captures in the pattern, to check
4076        unset ones at the end. */
4077
4078        if (do_allcaps)
4079          {
4080          if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4081            goto SKIP_DATA;
4082          count++;   /* Allow for full match */
4083          if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4084          }
4085
4086        /* Output the captured substrings */
4087
4088        for (i = 0; i < count * 2; i += 2)
4089          {
4090          if (use_offsets[i] < 0)
4091            {
4092            if (use_offsets[i] != -1)
4093              fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4094                use_offsets[i], i);
4095            if (use_offsets[i+1] != -1)
4096              fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4097                use_offsets[i+1], i+1);
4098            fprintf(outfile, "%2d: <unset>\n", i/2);
4099            }
4100          else
4101            {
4102            fprintf(outfile, "%2d: ", i/2);
4103            PCHARSV(bptr, use_offsets[i],
4104              use_offsets[i+1] - use_offsets[i], outfile);
4105            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4106            fprintf(outfile, "\n");
4107            if (do_showcaprest || (i == 0 && do_showrest))
4108              {
4109              fprintf(outfile, "%2d+ ", i/2);
4110              PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4111                outfile);
4112              fprintf(outfile, "\n");
4113              }
4114            }
4115          }
4116
4117        if (markptr != NULL)
4118          {
4119          fprintf(outfile, "MK: ");
4120          PCHARSV(markptr, 0, -1, outfile);
4121          fprintf(outfile, "\n");
4122          }
4123
4124        for (i = 0; i < 32; i++)
4125          {
4126          if ((copystrings & (1 << i)) != 0)
4127            {
4128            int rc;
4129            char copybuffer[256];
4130            PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4131              copybuffer, sizeof(copybuffer));
4132            if (rc < 0)
4133              fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4134            else
4135              {
4136              fprintf(outfile, "%2dC ", i);
4137              PCHARSV(copybuffer, 0, rc, outfile);
4138              fprintf(outfile, " (%d)\n", rc);
4139              }
4140            }
4141          }
4142
4143        cnptr = copynames;
4144        for (;;)
4145          {
4146          int rc;
4147          char copybuffer[256];
4148
4149          if (use_pcre16)
4150            {
4151            if (*(pcre_uint16 *)cnptr == 0) break;
4152            }
4153          else
4154            {
4155            if (*(pcre_uint8 *)cnptr == 0) break;
4156            }
4157
4158          PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4159            cnptr, copybuffer, sizeof(copybuffer));
4160
4161          if (rc < 0)
4162            {
4163            fprintf(outfile, "copy substring ");
4164            PCHARSV(cnptr, 0, -1, outfile);
4165            fprintf(outfile, " failed %d\n", rc);
4166            }
4167          else
4168            {
4169            fprintf(outfile, "  C ");
4170            PCHARSV(copybuffer, 0, rc, outfile);
4171            fprintf(outfile, " (%d) ", rc);
4172            PCHARSV(cnptr, 0, -1, outfile);
4173            putc('\n', outfile);
4174            }
4175
4176          cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4177          }
4178
4179        for (i = 0; i < 32; i++)
4180          {
4181          if ((getstrings & (1 << i)) != 0)
4182            {
4183            int rc;
4184            const char *substring;
4185            PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4186            if (rc < 0)
4187              fprintf(outfile, "get substring %d failed %d\n", i, rc);
4188            else
4189              {
4190              fprintf(outfile, "%2dG ", i);
4191              PCHARSV(substring, 0, rc, outfile);
4192              fprintf(outfile, " (%d)\n", rc);
4193              PCRE_FREE_SUBSTRING(substring);
4194              }
4195            }
4196          }
4197
4198        gnptr = getnames;
4199        for (;;)
4200          {
4201          int rc;
4202          const char *substring;
4203
4204          if (use_pcre16)
4205            {
4206            if (*(pcre_uint16 *)gnptr == 0) break;
4207            }
4208          else
4209            {
4210            if (*(pcre_uint8 *)gnptr == 0) break;
4211            }
4212
4213          PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4214            gnptr, &substring);
4215          if (rc < 0)
4216            {
4217            fprintf(outfile, "get substring ");
4218            PCHARSV(gnptr, 0, -1, outfile);
4219            fprintf(outfile, " failed %d\n", rc);
4220            }
4221          else
4222            {
4223            fprintf(outfile, "  G ");
4224            PCHARSV(substring, 0, rc, outfile);
4225            fprintf(outfile, " (%d) ", rc);
4226            PCHARSV(gnptr, 0, -1, outfile);
4227            PCRE_FREE_SUBSTRING(substring);
4228            putc('\n', outfile);
4229            }
4230
4231          gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4232          }
4233
4234        if (getlist)
4235          {
4236          int rc;
4237          const char **stringlist;
4238          PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4239          if (rc < 0)
4240            fprintf(outfile, "get substring list failed %d\n", rc);
4241          else
4242            {
4243            for (i = 0; i < count; i++)
4244              {
4245              fprintf(outfile, "%2dL ", i);
4246              PCHARSV(stringlist[i], 0, -1, outfile);
4247              putc('\n', outfile);
4248              }
4249            if (stringlist[i] != NULL)
4250              fprintf(outfile, "string list not terminated by NULL\n");
4251            PCRE_FREE_SUBSTRING_LIST(stringlist);
4252            }
4253          }
4254        }
4255
4256      /* There was a partial match */
4257
4258      else if (count == PCRE_ERROR_PARTIAL)
4259        {
4260        if (markptr == NULL) fprintf(outfile, "Partial match");
4261        else
4262          {
4263          fprintf(outfile, "Partial match, mark=");
4264          PCHARSV(markptr, 0, -1, outfile);
4265          }
4266        if (use_size_offsets > 1)
4267          {
4268          fprintf(outfile, ": ");
4269          PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4270            outfile);
4271          }
4272        if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4273        fprintf(outfile, "\n");
4274        break;  /* Out of the /g loop */
4275        }
4276
4277      /* Failed to match. If this is a /g or /G loop and we previously set
4278      g_notempty after a null match, this is not necessarily the end. We want
4279      to advance the start offset, and continue. We won't be at the end of the
4280      string - that was checked before setting g_notempty.
4281
4282      Complication arises in the case when the newline convention is "any",
4283      "crlf", or "anycrlf". If the previous match was at the end of a line
4284      terminated by CRLF, an advance of one character just passes the \r,
4285      whereas we should prefer the longer newline sequence, as does the code in
4286      pcre_exec(). Fudge the offset value to achieve this. We check for a
4287      newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4288      find the default.
4289
4290      Otherwise, in the case of UTF-8 matching, the advance must be one
4291      character, not one byte. */
4292
4293      else
4294        {
4295        if (g_notempty != 0)
4296          {
4297          int onechar = 1;
4298          unsigned int obits = ((REAL_PCRE *)re)->options;
4299          use_offsets[0] = start_offset;
4300          if ((obits & PCRE_NEWLINE_BITS) == 0)
4301            {
4302            int d;
4303            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4304            /* Note that these values are always the ASCII ones, even in
4305            EBCDIC environments. CR = 13, NL = 10. */
4306            obits = (d == 13)? PCRE_NEWLINE_CR :
4307                    (d == 10)? PCRE_NEWLINE_LF :
4308                    (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4309                    (d == -2)? PCRE_NEWLINE_ANYCRLF :
4310                    (d == -1)? PCRE_NEWLINE_ANY : 0;
4311            }
4312          if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4313               (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4314               (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4315              &&
4316              start_offset < len - 1 &&
4317#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4318              (use_pcre16?
4319                   ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4320                && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4321              :
4322                   bptr[start_offset] == '\r'
4323                && bptr[start_offset + 1] == '\n')
4324#elif defined SUPPORT_PCRE16
4325                 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4326              && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4327#else
4328                 bptr[start_offset] == '\r'
4329              && bptr[start_offset + 1] == '\n'
4330#endif
4331              )
4332            onechar++;
4333          else if (use_utf)
4334            {
4335            while (start_offset + onechar < len)
4336              {
4337              if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4338              onechar++;
4339              }
4340            }
4341          use_offsets[1] = start_offset + onechar;
4342          }
4343        else
4344          {
4345          switch(count)
4346            {
4347            case PCRE_ERROR_NOMATCH:
4348            if (gmatched == 0)
4349              {
4350              if (markptr == NULL)
4351                {
4352                fprintf(outfile, "No match");
4353                }
4354              else
4355                {
4356                fprintf(outfile, "No match, mark = ");
4357                PCHARSV(markptr, 0, -1, outfile);
4358                }
4359              if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4360              putc('\n', outfile);
4361              }
4362            break;
4363
4364            case PCRE_ERROR_BADUTF8:
4365            case PCRE_ERROR_SHORTUTF8:
4366            fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4367              (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4368              use_pcre16? "16" : "8");
4369            if (use_size_offsets >= 2)
4370              fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4371                use_offsets[1]);
4372            fprintf(outfile, "\n");
4373            break;
4374
4375            case PCRE_ERROR_BADUTF8_OFFSET:
4376            fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4377              use_pcre16? "16" : "8");
4378            break;
4379
4380            default:
4381            if (count < 0 &&
4382                (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4383              fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4384            else
4385              fprintf(outfile, "Error %d (Unexpected value)\n", count);
4386            break;
4387            }
4388
4389          break;  /* Out of the /g loop */
4390          }
4391        }
4392
4393      /* If not /g or /G we are done */
4394
4395      if (!do_g && !do_G) break;
4396
4397      /* If we have matched an empty string, first check to see if we are at
4398      the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4399      Perl's /g options does. This turns out to be rather cunning. First we set
4400      PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4401      same point. If this fails (picked up above) we advance to the next
4402      character. */
4403
4404      g_notempty = 0;
4405
4406      if (use_offsets[0] == use_offsets[1])
4407        {
4408        if (use_offsets[0] == len) break;
4409        g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4410        }
4411
4412      /* For /g, update the start offset, leaving the rest alone */
4413
4414      if (do_g) start_offset = use_offsets[1];
4415
4416      /* For /G, update the pointer and length */
4417
4418      else
4419        {
4420        bptr += use_offsets[1] * CHAR_SIZE;
4421        len -= use_offsets[1];
4422        }
4423      }  /* End of loop for /g and /G */
4424
4425    NEXT_DATA: continue;
4426    }    /* End of loop for data lines */
4427
4428  CONTINUE:
4429
4430#if !defined NOPOSIX
4431  if (posix || do_posix) regfree(&preg);
4432#endif
4433
4434  if (re != NULL) new_free(re);
4435  if (extra != NULL)
4436    {
4437    PCRE_FREE_STUDY(extra);
4438    }
4439  if (locale_set)
4440    {
4441    new_free((void *)tables);
4442    setlocale(LC_CTYPE, "C");
4443    locale_set = 0;
4444    }
4445  if (jit_stack != NULL)
4446    {
4447    PCRE_JIT_STACK_FREE(jit_stack);
4448    jit_stack = NULL;
4449    }
4450  }
4451
4452if (infile == stdin) fprintf(outfile, "\n");
4453
4454EXIT:
4455
4456if (infile != NULL && infile != stdin) fclose(infile);
4457if (outfile != NULL && outfile != stdout) fclose(outfile);
4458
4459free(buffer);
4460free(dbuffer);
4461free(pbuffer);
4462free(offsets);
4463
4464#ifdef SUPPORT_PCRE16
4465if (buffer16 != NULL) free(buffer16);
4466#endif
4467
4468return yield;
4469}
4470
4471/* End of pcretest.c */
4472