1/*************************************************
2*             PCRE testing program               *
3*************************************************/
4
5/* This program was hacked up as a tester for PCRE. I really should have
6written it more tidily in the first place. Will I ever learn? It has grown and
7been extended and consequently is now rather, er, *very* untidy in places.
8
9-----------------------------------------------------------------------------
10Redistribution and use in source and binary forms, with or without
11modification, are permitted provided that the following conditions are met:
12
13    * Redistributions of source code must retain the above copyright notice,
14      this list of conditions and the following disclaimer.
15
16    * Redistributions in binary form must reproduce the above copyright
17      notice, this list of conditions and the following disclaimer in the
18      documentation and/or other materials provided with the distribution.
19
20    * Neither the name of the University of Cambridge nor the names of its
21      contributors may be used to endorse or promote products derived from
22      this software without specific prior written permission.
23
24THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34POSSIBILITY OF SUCH DAMAGE.
35-----------------------------------------------------------------------------
36*/
37
38
39#ifdef HAVE_CONFIG_H
40#include "config.h"
41#endif
42
43#include <ctype.h>
44#include <stdio.h>
45#include <string.h>
46#include <stdlib.h>
47#include <time.h>
48#include <locale.h>
49#include <errno.h>
50
51#ifdef SUPPORT_LIBREADLINE
52#ifdef HAVE_UNISTD_H
53#include <unistd.h>
54#endif
55#include <readline/readline.h>
56#include <readline/history.h>
57#endif
58
59
60/* A number of things vary for Windows builds. Originally, pcretest opened its
61input and output without "b"; then I was told that "b" was needed in some
62environments, so it was added for release 5.0 to both the input and output. (It
63makes no difference on Unix-like systems.) Later I was told that it is wrong
64for the input on Windows. I've now abstracted the modes into two macros that
65are set here, to make it easier to fiddle with them, and removed "b" from the
66input mode under Windows. */
67
68#if defined(_WIN32) || defined(WIN32)
69#include <io.h>                /* For _setmode() */
70#include <fcntl.h>             /* For _O_BINARY */
71#define INPUT_MODE   "r"
72#define OUTPUT_MODE  "wb"
73
74#ifndef isatty
75#define isatty _isatty         /* This is what Windows calls them, I'm told, */
76#endif                         /* though in some environments they seem to   */
77                               /* be already defined, hence the #ifndefs.    */
78#ifndef fileno
79#define fileno _fileno
80#endif
81
82#else
83#include <sys/time.h>          /* These two includes are needed */
84#include <sys/resource.h>      /* for setrlimit(). */
85#define INPUT_MODE   "rb"
86#define OUTPUT_MODE  "wb"
87#endif
88
89
90/* We have to include pcre_internal.h because we need the internal info for
91displaying the results of pcre_study() and we also need to know about the
92internal macros, structures, and other internal data values; pcretest has
93"inside information" compared to a program that strictly follows the PCRE API.
94
95Although pcre_internal.h does itself include pcre.h, we explicitly include it
96here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97appropriately for an application, not for building PCRE. */
98
99#include "pcre.h"
100#include "pcre_internal.h"
101
102/* We need access to some of the data tables that PCRE uses. So as not to have
103to keep two copies, we include the source file here, changing the names of the
104external symbols to prevent clashes. */
105
106#define _pcre_ucp_gentype      ucp_gentype
107#define _pcre_utf8_table1      utf8_table1
108#define _pcre_utf8_table1_size utf8_table1_size
109#define _pcre_utf8_table2      utf8_table2
110#define _pcre_utf8_table3      utf8_table3
111#define _pcre_utf8_table4      utf8_table4
112#define _pcre_utt              utt
113#define _pcre_utt_size         utt_size
114#define _pcre_utt_names        utt_names
115#define _pcre_OP_lengths       OP_lengths
116
117#include "pcre_tables.c"
118
119/* We also need the pcre_printint() function for printing out compiled
120patterns. This function is in a separate file so that it can be included in
121pcre_compile.c when that module is compiled with debugging enabled. It needs to
122know which case is being compiled. */
123
124#define COMPILING_PCRETEST
125#include "pcre_printint.src"
126
127/* The definition of the macro PRINTABLE, which determines whether to print an
128output character as-is or as a hex value when showing compiled patterns, is
129contained in the printint.src file. We uses it here also, in cases when the
130locale has not been explicitly changed, so as to get consistent output from
131systems that differ in their output from isprint() even in the "C" locale. */
132
133#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135/* It is possible to compile this test program without including support for
136testing the POSIX interface, though this is not available via the standard
137Makefile. */
138
139#if !defined NOPOSIX
140#include "pcreposix.h"
141#endif
142
143/* It is also possible, for the benefit of the version currently imported into
144Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145interface to the DFA matcher (NODFA), and without the doublecheck of the old
146"info" function (define NOINFOCHECK). In fact, we automatically cut out the
147UTF8 support if PCRE is built without it. */
148
149#ifndef SUPPORT_UTF8
150#ifndef NOUTF8
151#define NOUTF8
152#endif
153#endif
154
155
156/* Other parameters */
157
158#ifndef CLOCKS_PER_SEC
159#ifdef CLK_TCK
160#define CLOCKS_PER_SEC CLK_TCK
161#else
162#define CLOCKS_PER_SEC 100
163#endif
164#endif
165
166/* This is the default loop count for timing. */
167
168#define LOOPREPEAT 500000
169
170/* Static variables */
171
172static FILE *outfile;
173static int log_store = 0;
174static int callout_count;
175static int callout_extra;
176static int callout_fail_count;
177static int callout_fail_id;
178static int debug_lengths;
179static int first_callout;
180static int locale_set = 0;
181static int show_malloc;
182static int use_utf8;
183static size_t gotten_store;
184
185/* The buffers grow automatically if very long input lines are encountered. */
186
187static int buffer_size = 50000;
188static uschar *buffer = NULL;
189static uschar *dbuffer = NULL;
190static uschar *pbuffer = NULL;
191
192
193
194/*************************************************
195*        Read or extend an input line            *
196*************************************************/
197
198/* Input lines are read into buffer, but both patterns and data lines can be
199continued over multiple input lines. In addition, if the buffer fills up, we
200want to automatically expand it so as to be able to handle extremely large
201lines that are needed for certain stress tests. When the input buffer is
202expanded, the other two buffers must also be expanded likewise, and the
203contents of pbuffer, which are a copy of the input for callouts, must be
204preserved (for when expansion happens for a data line). This is not the most
205optimal way of handling this, but hey, this is just a test program!
206
207Arguments:
208  f            the file to read
209  start        where in buffer to start (this *must* be within buffer)
210  prompt       for stdin or readline()
211
212Returns:       pointer to the start of new data
213               could be a copy of start, or could be moved
214               NULL if no data read and EOF reached
215*/
216
217static uschar *
218extend_inputline(FILE *f, uschar *start, const char *prompt)
219{
220uschar *here = start;
221
222for (;;)
223  {
224  int rlen = buffer_size - (here - buffer);
225
226  if (rlen > 1000)
227    {
228    int dlen;
229
230    /* If libreadline support is required, use readline() to read a line if the
231    input is a terminal. Note that readline() removes the trailing newline, so
232    we must put it back again, to be compatible with fgets(). */
233
234#ifdef SUPPORT_LIBREADLINE
235    if (isatty(fileno(f)))
236      {
237      size_t len;
238      char *s = readline(prompt);
239      if (s == NULL) return (here == start)? NULL : start;
240      len = strlen(s);
241      if (len > 0) add_history(s);
242      if (len > rlen - 1) len = rlen - 1;
243      memcpy(here, s, len);
244      here[len] = '\n';
245      here[len+1] = 0;
246      free(s);
247      }
248    else
249#endif
250
251    /* Read the next line by normal means, prompting if the file is stdin. */
252
253      {
254      if (f == stdin) printf(prompt);
255      if (fgets((char *)here, rlen,  f) == NULL)
256        return (here == start)? NULL : start;
257      }
258
259    dlen = (int)strlen((char *)here);
260    if (dlen > 0 && here[dlen - 1] == '\n') return start;
261    here += dlen;
262    }
263
264  else
265    {
266    int new_buffer_size = 2*buffer_size;
267    uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268    uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269    uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270
271    if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272      {
273      fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274      exit(1);
275      }
276
277    memcpy(new_buffer, buffer, buffer_size);
278    memcpy(new_pbuffer, pbuffer, buffer_size);
279
280    buffer_size = new_buffer_size;
281
282    start = new_buffer + (start - buffer);
283    here = new_buffer + (here - buffer);
284
285    free(buffer);
286    free(dbuffer);
287    free(pbuffer);
288
289    buffer = new_buffer;
290    dbuffer = new_dbuffer;
291    pbuffer = new_pbuffer;
292    }
293  }
294
295return NULL;  /* Control never gets here */
296}
297
298
299
300
301
302
303
304/*************************************************
305*          Read number from string               *
306*************************************************/
307
308/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309around with conditional compilation, just do the job by hand. It is only used
310for unpicking arguments, so just keep it simple.
311
312Arguments:
313  str           string to be converted
314  endptr        where to put the end pointer
315
316Returns:        the unsigned long
317*/
318
319static int
320get_value(unsigned char *str, unsigned char **endptr)
321{
322int result = 0;
323while(*str != 0 && isspace(*str)) str++;
324while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325*endptr = str;
326return(result);
327}
328
329
330
331
332/*************************************************
333*            Convert UTF-8 string to value       *
334*************************************************/
335
336/* This function takes one or more bytes that represents a UTF-8 character,
337and returns the value of the character.
338
339Argument:
340  utf8bytes   a pointer to the byte vector
341  vptr        a pointer to an int to receive the value
342
343Returns:      >  0 => the number of bytes consumed
344              -6 to 0 => malformed UTF-8 character at offset = (-return)
345*/
346
347#if !defined NOUTF8
348
349static int
350utf82ord(unsigned char *utf8bytes, int *vptr)
351{
352int c = *utf8bytes++;
353int d = c;
354int i, j, s;
355
356for (i = -1; i < 6; i++)               /* i is number of additional bytes */
357  {
358  if ((d & 0x80) == 0) break;
359  d <<= 1;
360  }
361
362if (i == -1) { *vptr = c; return 1; }  /* ascii character */
363if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
364
365/* i now has a value in the range 1-5 */
366
367s = 6*i;
368d = (c & utf8_table3[i]) << s;
369
370for (j = 0; j < i; j++)
371  {
372  c = *utf8bytes++;
373  if ((c & 0xc0) != 0x80) return -(j+1);
374  s -= 6;
375  d |= (c & 0x3f) << s;
376  }
377
378/* Check that encoding was the correct unique one */
379
380for (j = 0; j < utf8_table1_size; j++)
381  if (d <= utf8_table1[j]) break;
382if (j != i) return -(i+1);
383
384/* Valid value */
385
386*vptr = d;
387return i+1;
388}
389
390#endif
391
392
393
394/*************************************************
395*       Convert character value to UTF-8         *
396*************************************************/
397
398/* This function takes an integer value in the range 0 - 0x7fffffff
399and encodes it as a UTF-8 character in 0 to 6 bytes.
400
401Arguments:
402  cvalue     the character value
403  utf8bytes  pointer to buffer for result - at least 6 bytes long
404
405Returns:     number of characters placed in the buffer
406*/
407
408#if !defined NOUTF8
409
410static int
411ord2utf8(int cvalue, uschar *utf8bytes)
412{
413register int i, j;
414for (i = 0; i < utf8_table1_size; i++)
415  if (cvalue <= utf8_table1[i]) break;
416utf8bytes += i;
417for (j = i; j > 0; j--)
418 {
419 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 cvalue >>= 6;
421 }
422*utf8bytes = utf8_table2[i] | cvalue;
423return i + 1;
424}
425
426#endif
427
428
429
430/*************************************************
431*             Print character string             *
432*************************************************/
433
434/* Character string printing function. Must handle UTF-8 strings in utf8
435mode. Yields number of characters printed. If handed a NULL file, just counts
436chars without printing. */
437
438static int pchars(unsigned char *p, int length, FILE *f)
439{
440int c = 0;
441int yield = 0;
442
443while (length-- > 0)
444  {
445#if !defined NOUTF8
446  if (use_utf8)
447    {
448    int rc = utf82ord(p, &c);
449
450    if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
451      {
452      length -= rc - 1;
453      p += rc;
454      if (PRINTHEX(c))
455        {
456        if (f != NULL) fprintf(f, "%c", c);
457        yield++;
458        }
459      else
460        {
461        int n = 4;
462        if (f != NULL) fprintf(f, "\\x{%02x}", c);
463        yield += (n <= 0x000000ff)? 2 :
464                 (n <= 0x00000fff)? 3 :
465                 (n <= 0x0000ffff)? 4 :
466                 (n <= 0x000fffff)? 5 : 6;
467        }
468      continue;
469      }
470    }
471#endif
472
473   /* Not UTF-8, or malformed UTF-8  */
474
475  c = *p++;
476  if (PRINTHEX(c))
477    {
478    if (f != NULL) fprintf(f, "%c", c);
479    yield++;
480    }
481  else
482    {
483    if (f != NULL) fprintf(f, "\\x%02x", c);
484    yield += 4;
485    }
486  }
487
488return yield;
489}
490
491
492
493/*************************************************
494*              Callout function                  *
495*************************************************/
496
497/* Called from PCRE as a result of the (?C) item. We print out where we are in
498the match. Yield zero unless more callouts than the fail count, or the callout
499data is not zero. */
500
501static int callout(pcre_callout_block *cb)
502{
503FILE *f = (first_callout | callout_extra)? outfile : NULL;
504int i, pre_start, post_start, subject_length;
505
506if (callout_extra)
507  {
508  fprintf(f, "Callout %d: last capture = %d\n",
509    cb->callout_number, cb->capture_last);
510
511  for (i = 0; i < cb->capture_top * 2; i += 2)
512    {
513    if (cb->offset_vector[i] < 0)
514      fprintf(f, "%2d: <unset>\n", i/2);
515    else
516      {
517      fprintf(f, "%2d: ", i/2);
518      (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519        cb->offset_vector[i+1] - cb->offset_vector[i], f);
520      fprintf(f, "\n");
521      }
522    }
523  }
524
525/* Re-print the subject in canonical form, the first time or if giving full
526datails. On subsequent calls in the same match, we use pchars just to find the
527printed lengths of the substrings. */
528
529if (f != NULL) fprintf(f, "--->");
530
531pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533  cb->current_position - cb->start_match, f);
534
535subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536
537(void)pchars((unsigned char *)(cb->subject + cb->current_position),
538  cb->subject_length - cb->current_position, f);
539
540if (f != NULL) fprintf(f, "\n");
541
542/* Always print appropriate indicators, with callout number if not already
543shown. For automatic callouts, show the pattern offset. */
544
545if (cb->callout_number == 255)
546  {
547  fprintf(outfile, "%+3d ", cb->pattern_position);
548  if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
549  }
550else
551  {
552  if (callout_extra) fprintf(outfile, "    ");
553    else fprintf(outfile, "%3d ", cb->callout_number);
554  }
555
556for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557fprintf(outfile, "^");
558
559if (post_start > 0)
560  {
561  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562  fprintf(outfile, "^");
563  }
564
565for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566  fprintf(outfile, " ");
567
568fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569  pbuffer + cb->pattern_position);
570
571fprintf(outfile, "\n");
572first_callout = 0;
573
574if (cb->callout_data != NULL)
575  {
576  int callout_data = *((int *)(cb->callout_data));
577  if (callout_data != 0)
578    {
579    fprintf(outfile, "Callout data = %d\n", callout_data);
580    return callout_data;
581    }
582  }
583
584return (cb->callout_number != callout_fail_id)? 0 :
585       (++callout_count >= callout_fail_count)? 1 : 0;
586}
587
588
589/*************************************************
590*            Local malloc functions              *
591*************************************************/
592
593/* Alternative malloc function, to test functionality and show the size of the
594compiled re. */
595
596static void *new_malloc(size_t size)
597{
598void *block = malloc(size);
599gotten_store = size;
600if (show_malloc)
601  fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
602return block;
603}
604
605static void new_free(void *block)
606{
607if (show_malloc)
608  fprintf(outfile, "free             %p\n", block);
609free(block);
610}
611
612
613/* For recursion malloc/free, to test stacking calls */
614
615static void *stack_malloc(size_t size)
616{
617void *block = malloc(size);
618if (show_malloc)
619  fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620return block;
621}
622
623static void stack_free(void *block)
624{
625if (show_malloc)
626  fprintf(outfile, "stack_free       %p\n", block);
627free(block);
628}
629
630
631/*************************************************
632*          Call pcre_fullinfo()                  *
633*************************************************/
634
635/* Get one piece of information from the pcre_fullinfo() function */
636
637static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638{
639int rc;
640if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641  fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642}
643
644
645
646/*************************************************
647*         Byte flipping function                 *
648*************************************************/
649
650static unsigned long int
651byteflip(unsigned long int value, int n)
652{
653if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654return ((value & 0x000000ff) << 24) |
655       ((value & 0x0000ff00) <<  8) |
656       ((value & 0x00ff0000) >>  8) |
657       ((value & 0xff000000) >> 24);
658}
659
660
661
662
663/*************************************************
664*        Check match or recursion limit          *
665*************************************************/
666
667static int
668check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669  int start_offset, int options, int *use_offsets, int use_size_offsets,
670  int flag, unsigned long int *limit, int errnumber, const char *msg)
671{
672int count;
673int min = 0;
674int mid = 64;
675int max = -1;
676
677extra->flags |= flag;
678
679for (;;)
680  {
681  *limit = mid;
682
683  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684    use_offsets, use_size_offsets);
685
686  if (count == errnumber)
687    {
688    /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689    min = mid;
690    mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691    }
692
693  else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694                         count == PCRE_ERROR_PARTIAL)
695    {
696    if (mid == min + 1)
697      {
698      fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699      break;
700      }
701    /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702    max = mid;
703    mid = (min + mid)/2;
704    }
705  else break;    /* Some other error */
706  }
707
708extra->flags &= ~flag;
709return count;
710}
711
712
713
714/*************************************************
715*         Case-independent strncmp() function    *
716*************************************************/
717
718/*
719Arguments:
720  s         first string
721  t         second string
722  n         number of characters to compare
723
724Returns:    < 0, = 0, or > 0, according to the comparison
725*/
726
727static int
728strncmpic(uschar *s, uschar *t, int n)
729{
730while (n--)
731  {
732  int c = tolower(*s++) - tolower(*t++);
733  if (c) return c;
734  }
735return 0;
736}
737
738
739
740/*************************************************
741*         Check newline indicator                *
742*************************************************/
743
744/* This is used both at compile and run-time to check for <xxx> escapes, where
745xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
746no match.
747
748Arguments:
749  p           points after the leading '<'
750  f           file for error message
751
752Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
753*/
754
755static int
756check_newline(uschar *p, FILE *f)
757{
758if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
759if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
760if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
761if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
762if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
763if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
764if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
765fprintf(f, "Unknown newline type at: <%s\n", p);
766return 0;
767}
768
769
770
771/*************************************************
772*             Usage function                     *
773*************************************************/
774
775static void
776usage(void)
777{
778printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
779printf("Input and output default to stdin and stdout.\n");
780#ifdef SUPPORT_LIBREADLINE
781printf("If input is a terminal, readline() is used to read from it.\n");
782#else
783printf("This version of pcretest is not linked with readline().\n");
784#endif
785printf("\nOptions:\n");
786printf("  -b       show compiled code (bytecode)\n");
787printf("  -C       show PCRE compile-time options and exit\n");
788printf("  -d       debug: show compiled code and information (-b and -i)\n");
789#if !defined NODFA
790printf("  -dfa     force DFA matching for all subjects\n");
791#endif
792printf("  -help    show usage information\n");
793printf("  -i       show information about compiled patterns\n"
794       "  -M       find MATCH_LIMIT minimum for each subject\n"
795       "  -m       output memory used information\n"
796       "  -o <n>   set size of offsets vector to <n>\n");
797#if !defined NOPOSIX
798printf("  -p       use POSIX interface\n");
799#endif
800printf("  -q       quiet: do not output PCRE version number at start\n");
801printf("  -S <n>   set stack size to <n> megabytes\n");
802printf("  -s       output store (memory) used information\n"
803       "  -t       time compilation and execution\n");
804printf("  -t <n>   time compilation and execution, repeating <n> times\n");
805printf("  -tm      time execution (matching) only\n");
806printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
807}
808
809
810
811/*************************************************
812*                Main Program                    *
813*************************************************/
814
815/* Read lines from named file or stdin and write to named file or stdout; lines
816consist of a regular expression, in delimiters and optionally followed by
817options, followed by a set of test data, terminated by an empty line. */
818
819int main(int argc, char **argv)
820{
821FILE *infile = stdin;
822int options = 0;
823int study_options = 0;
824int default_find_match_limit = FALSE;
825int op = 1;
826int timeit = 0;
827int timeitm = 0;
828int showinfo = 0;
829int showstore = 0;
830int quiet = 0;
831int size_offsets = 45;
832int size_offsets_max;
833int *offsets = NULL;
834#if !defined NOPOSIX
835int posix = 0;
836#endif
837int debug = 0;
838int done = 0;
839int all_use_dfa = 0;
840int yield = 0;
841int stack_size;
842
843/* These vectors store, end-to-end, a list of captured substring names. Assume
844that 1024 is plenty long enough for the few names we'll be testing. */
845
846uschar copynames[1024];
847uschar getnames[1024];
848
849uschar *copynamesptr;
850uschar *getnamesptr;
851
852/* Get buffers from malloc() so that Electric Fence will check their misuse
853when I am debugging. They grow automatically when very long lines are read. */
854
855buffer = (unsigned char *)malloc(buffer_size);
856dbuffer = (unsigned char *)malloc(buffer_size);
857pbuffer = (unsigned char *)malloc(buffer_size);
858
859/* The outfile variable is static so that new_malloc can use it. */
860
861outfile = stdout;
862
863/* The following  _setmode() stuff is some Windows magic that tells its runtime
864library to translate CRLF into a single LF character. At least, that's what
865I've been told: never having used Windows I take this all on trust. Originally
866it set 0x8000, but then I was advised that _O_BINARY was better. */
867
868#if defined(_WIN32) || defined(WIN32)
869_setmode( _fileno( stdout ), _O_BINARY );
870#endif
871
872/* Scan options */
873
874while (argc > 1 && argv[op][0] == '-')
875  {
876  unsigned char *endptr;
877
878  if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
879    showstore = 1;
880  else if (strcmp(argv[op], "-q") == 0) quiet = 1;
881  else if (strcmp(argv[op], "-b") == 0) debug = 1;
882  else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
883  else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
884  else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
885#if !defined NODFA
886  else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
887#endif
888  else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
889      ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
890        *endptr == 0))
891    {
892    op++;
893    argc--;
894    }
895  else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
896    {
897    int both = argv[op][2] == 0;
898    int temp;
899    if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
900                     *endptr == 0))
901      {
902      timeitm = temp;
903      op++;
904      argc--;
905      }
906    else timeitm = LOOPREPEAT;
907    if (both) timeit = timeitm;
908    }
909  else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
910      ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
911        *endptr == 0))
912    {
913#if defined(_WIN32) || defined(WIN32)
914    printf("PCRE: -S not supported on this OS\n");
915    exit(1);
916#else
917    int rc;
918    struct rlimit rlim;
919    getrlimit(RLIMIT_STACK, &rlim);
920    rlim.rlim_cur = stack_size * 1024 * 1024;
921    rc = setrlimit(RLIMIT_STACK, &rlim);
922    if (rc != 0)
923      {
924    printf("PCRE: setrlimit() failed with error %d\n", rc);
925    exit(1);
926      }
927    op++;
928    argc--;
929#endif
930    }
931#if !defined NOPOSIX
932  else if (strcmp(argv[op], "-p") == 0) posix = 1;
933#endif
934  else if (strcmp(argv[op], "-C") == 0)
935    {
936    int rc;
937    unsigned long int lrc;
938    printf("PCRE version %s\n", pcre_version());
939    printf("Compiled with\n");
940    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
941    printf("  %sUTF-8 support\n", rc? "" : "No ");
942    (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
943    printf("  %sUnicode properties support\n", rc? "" : "No ");
944    (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
945    /* Note that these values are always the ASCII values, even
946    in EBCDIC environments. CR is 13 and NL is 10. */
947    printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
948      (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
949      (rc == -2)? "ANYCRLF" :
950      (rc == -1)? "ANY" : "???");
951    (void)pcre_config(PCRE_CONFIG_BSR, &rc);
952    printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
953                                     "all Unicode newlines");
954    (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
955    printf("  Internal link size = %d\n", rc);
956    (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
957    printf("  POSIX malloc threshold = %d\n", rc);
958    (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
959    printf("  Default match limit = %ld\n", lrc);
960    (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
961    printf("  Default recursion depth limit = %ld\n", lrc);
962    (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
963    printf("  Match recursion uses %s\n", rc? "stack" : "heap");
964    goto EXIT;
965    }
966  else if (strcmp(argv[op], "-help") == 0 ||
967           strcmp(argv[op], "--help") == 0)
968    {
969    usage();
970    goto EXIT;
971    }
972  else
973    {
974    printf("** Unknown or malformed option %s\n", argv[op]);
975    usage();
976    yield = 1;
977    goto EXIT;
978    }
979  op++;
980  argc--;
981  }
982
983/* Get the store for the offsets vector, and remember what it was */
984
985size_offsets_max = size_offsets;
986offsets = (int *)malloc(size_offsets_max * sizeof(int));
987if (offsets == NULL)
988  {
989  printf("** Failed to get %d bytes of memory for offsets vector\n",
990    (int)(size_offsets_max * sizeof(int)));
991  yield = 1;
992  goto EXIT;
993  }
994
995/* Sort out the input and output files */
996
997if (argc > 1)
998  {
999  infile = fopen(argv[op], INPUT_MODE);
1000  if (infile == NULL)
1001    {
1002    printf("** Failed to open %s\n", argv[op]);
1003    yield = 1;
1004    goto EXIT;
1005    }
1006  }
1007
1008if (argc > 2)
1009  {
1010  outfile = fopen(argv[op+1], OUTPUT_MODE);
1011  if (outfile == NULL)
1012    {
1013    printf("** Failed to open %s\n", argv[op+1]);
1014    yield = 1;
1015    goto EXIT;
1016    }
1017  }
1018
1019/* Set alternative malloc function */
1020
1021pcre_malloc = new_malloc;
1022pcre_free = new_free;
1023pcre_stack_malloc = stack_malloc;
1024pcre_stack_free = stack_free;
1025
1026/* Heading line unless quiet, then prompt for first regex if stdin */
1027
1028if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1029
1030/* Main loop */
1031
1032while (!done)
1033  {
1034  pcre *re = NULL;
1035  pcre_extra *extra = NULL;
1036
1037#if !defined NOPOSIX  /* There are still compilers that require no indent */
1038  regex_t preg;
1039  int do_posix = 0;
1040#endif
1041
1042  const char *error;
1043  unsigned char *p, *pp, *ppp;
1044  unsigned char *to_file = NULL;
1045  const unsigned char *tables = NULL;
1046  unsigned long int true_size, true_study_size = 0;
1047  size_t size, regex_gotten_store;
1048  int do_study = 0;
1049  int do_debug = debug;
1050  int do_G = 0;
1051  int do_g = 0;
1052  int do_showinfo = showinfo;
1053  int do_showrest = 0;
1054  int do_flip = 0;
1055  int erroroffset, len, delimiter, poffset;
1056
1057  use_utf8 = 0;
1058  debug_lengths = 1;
1059
1060  if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1061  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1062  fflush(outfile);
1063
1064  p = buffer;
1065  while (isspace(*p)) p++;
1066  if (*p == 0) continue;
1067
1068  /* See if the pattern is to be loaded pre-compiled from a file. */
1069
1070  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1071    {
1072    unsigned long int magic, get_options;
1073    uschar sbuf[8];
1074    FILE *f;
1075
1076    p++;
1077    pp = p + (int)strlen((char *)p);
1078    while (isspace(pp[-1])) pp--;
1079    *pp = 0;
1080
1081    f = fopen((char *)p, "rb");
1082    if (f == NULL)
1083      {
1084      fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1085      continue;
1086      }
1087
1088    if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1089
1090    true_size =
1091      (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1092    true_study_size =
1093      (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1094
1095    re = (real_pcre *)new_malloc(true_size);
1096    regex_gotten_store = gotten_store;
1097
1098    if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1099
1100    magic = ((real_pcre *)re)->magic_number;
1101    if (magic != MAGIC_NUMBER)
1102      {
1103      if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1104        {
1105        do_flip = 1;
1106        }
1107      else
1108        {
1109        fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1110        fclose(f);
1111        continue;
1112        }
1113      }
1114
1115    fprintf(outfile, "Compiled regex%s loaded from %s\n",
1116      do_flip? " (byte-inverted)" : "", p);
1117
1118    /* Need to know if UTF-8 for printing data strings */
1119
1120    new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1121    use_utf8 = (get_options & PCRE_UTF8) != 0;
1122
1123    /* Now see if there is any following study data */
1124
1125    if (true_study_size != 0)
1126      {
1127      pcre_study_data *psd;
1128
1129      extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1130      extra->flags = PCRE_EXTRA_STUDY_DATA;
1131
1132      psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1133      extra->study_data = psd;
1134
1135      if (fread(psd, 1, true_study_size, f) != true_study_size)
1136        {
1137        FAIL_READ:
1138        fprintf(outfile, "Failed to read data from %s\n", p);
1139        if (extra != NULL) new_free(extra);
1140        if (re != NULL) new_free(re);
1141        fclose(f);
1142        continue;
1143        }
1144      fprintf(outfile, "Study data loaded from %s\n", p);
1145      do_study = 1;     /* To get the data output if requested */
1146      }
1147    else fprintf(outfile, "No study data\n");
1148
1149    fclose(f);
1150    goto SHOW_INFO;
1151    }
1152
1153  /* In-line pattern (the usual case). Get the delimiter and seek the end of
1154  the pattern; if is isn't complete, read more. */
1155
1156  delimiter = *p++;
1157
1158  if (isalnum(delimiter) || delimiter == '\\')
1159    {
1160    fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1161    goto SKIP_DATA;
1162    }
1163
1164  pp = p;
1165  poffset = p - buffer;
1166
1167  for(;;)
1168    {
1169    while (*pp != 0)
1170      {
1171      if (*pp == '\\' && pp[1] != 0) pp++;
1172        else if (*pp == delimiter) break;
1173      pp++;
1174      }
1175    if (*pp != 0) break;
1176    if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
1177      {
1178      fprintf(outfile, "** Unexpected EOF\n");
1179      done = 1;
1180      goto CONTINUE;
1181      }
1182    if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1183    }
1184
1185  /* The buffer may have moved while being extended; reset the start of data
1186  pointer to the correct relative point in the buffer. */
1187
1188  p = buffer + poffset;
1189
1190  /* If the first character after the delimiter is backslash, make
1191  the pattern end with backslash. This is purely to provide a way
1192  of testing for the error message when a pattern ends with backslash. */
1193
1194  if (pp[1] == '\\') *pp++ = '\\';
1195
1196  /* Terminate the pattern at the delimiter, and save a copy of the pattern
1197  for callouts. */
1198
1199  *pp++ = 0;
1200  strcpy((char *)pbuffer, (char *)p);
1201
1202  /* Look for options after final delimiter */
1203
1204  options = 0;
1205  study_options = 0;
1206  log_store = showstore;  /* default from command line */
1207
1208  while (*pp != 0)
1209    {
1210    switch (*pp++)
1211      {
1212      case 'f': options |= PCRE_FIRSTLINE; break;
1213      case 'g': do_g = 1; break;
1214      case 'i': options |= PCRE_CASELESS; break;
1215      case 'm': options |= PCRE_MULTILINE; break;
1216      case 's': options |= PCRE_DOTALL; break;
1217      case 'x': options |= PCRE_EXTENDED; break;
1218
1219      case '+': do_showrest = 1; break;
1220      case 'A': options |= PCRE_ANCHORED; break;
1221      case 'B': do_debug = 1; break;
1222      case 'C': options |= PCRE_AUTO_CALLOUT; break;
1223      case 'D': do_debug = do_showinfo = 1; break;
1224      case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1225      case 'F': do_flip = 1; break;
1226      case 'G': do_G = 1; break;
1227      case 'I': do_showinfo = 1; break;
1228      case 'J': options |= PCRE_DUPNAMES; break;
1229      case 'M': log_store = 1; break;
1230      case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1231
1232#if !defined NOPOSIX
1233      case 'P': do_posix = 1; break;
1234#endif
1235
1236      case 'S': do_study = 1; break;
1237      case 'U': options |= PCRE_UNGREEDY; break;
1238      case 'X': options |= PCRE_EXTRA; break;
1239      case 'Z': debug_lengths = 0; break;
1240      case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1241      case '?': options |= PCRE_NO_UTF8_CHECK; break;
1242
1243      case 'L':
1244      ppp = pp;
1245      /* The '\r' test here is so that it works on Windows. */
1246      /* The '0' test is just in case this is an unterminated line. */
1247      while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1248      *ppp = 0;
1249      if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1250        {
1251        fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1252        goto SKIP_DATA;
1253        }
1254      locale_set = 1;
1255      tables = pcre_maketables();
1256      pp = ppp;
1257      break;
1258
1259      case '>':
1260      to_file = pp;
1261      while (*pp != 0) pp++;
1262      while (isspace(pp[-1])) pp--;
1263      *pp = 0;
1264      break;
1265
1266      case '<':
1267        {
1268        if (strncmp((char *)pp, "JS>", 3) == 0)
1269          {
1270          options |= PCRE_JAVASCRIPT_COMPAT;
1271          pp += 3;
1272          }
1273        else
1274          {
1275          int x = check_newline(pp, outfile);
1276          if (x == 0) goto SKIP_DATA;
1277          options |= x;
1278          while (*pp++ != '>');
1279          }
1280        }
1281      break;
1282
1283      case '\r':                      /* So that it works in Windows */
1284      case '\n':
1285      case ' ':
1286      break;
1287
1288      default:
1289      fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1290      goto SKIP_DATA;
1291      }
1292    }
1293
1294  /* Handle compiling via the POSIX interface, which doesn't support the
1295  timing, showing, or debugging options, nor the ability to pass over
1296  local character tables. */
1297
1298#if !defined NOPOSIX
1299  if (posix || do_posix)
1300    {
1301    int rc;
1302    int cflags = 0;
1303
1304    if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1305    if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1306    if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1307    if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1308    if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1309    if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1310
1311    rc = regcomp(&preg, (char *)p, cflags);
1312
1313    /* Compilation failed; go back for another re, skipping to blank line
1314    if non-interactive. */
1315
1316    if (rc != 0)
1317      {
1318      (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1319      fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1320      goto SKIP_DATA;
1321      }
1322    }
1323
1324  /* Handle compiling via the native interface */
1325
1326  else
1327#endif  /* !defined NOPOSIX */
1328
1329    {
1330    unsigned long int get_options;
1331
1332    if (timeit > 0)
1333      {
1334      register int i;
1335      clock_t time_taken;
1336      clock_t start_time = clock();
1337      for (i = 0; i < timeit; i++)
1338        {
1339        re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1340        if (re != NULL) free(re);
1341        }
1342      time_taken = clock() - start_time;
1343      fprintf(outfile, "Compile time %.4f milliseconds\n",
1344        (((double)time_taken * 1000.0) / (double)timeit) /
1345          (double)CLOCKS_PER_SEC);
1346      }
1347
1348    re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1349
1350    /* Compilation failed; go back for another re, skipping to blank line
1351    if non-interactive. */
1352
1353    if (re == NULL)
1354      {
1355      fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1356      SKIP_DATA:
1357      if (infile != stdin)
1358        {
1359        for (;;)
1360          {
1361          if (extend_inputline(infile, buffer, NULL) == NULL)
1362            {
1363            done = 1;
1364            goto CONTINUE;
1365            }
1366          len = (int)strlen((char *)buffer);
1367          while (len > 0 && isspace(buffer[len-1])) len--;
1368          if (len == 0) break;
1369          }
1370        fprintf(outfile, "\n");
1371        }
1372      goto CONTINUE;
1373      }
1374
1375    /* Compilation succeeded. It is now possible to set the UTF-8 option from
1376    within the regex; check for this so that we know how to process the data
1377    lines. */
1378
1379    new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1380    if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1381
1382    /* Print information if required. There are now two info-returning
1383    functions. The old one has a limited interface and returns only limited
1384    data. Check that it agrees with the newer one. */
1385
1386    if (log_store)
1387      fprintf(outfile, "Memory allocation (code space): %d\n",
1388        (int)(gotten_store -
1389              sizeof(real_pcre) -
1390              ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1391
1392    /* Extract the size for possible writing before possibly flipping it,
1393    and remember the store that was got. */
1394
1395    true_size = ((real_pcre *)re)->size;
1396    regex_gotten_store = gotten_store;
1397
1398    /* If /S was present, study the regexp to generate additional info to
1399    help with the matching. */
1400
1401    if (do_study)
1402      {
1403      if (timeit > 0)
1404        {
1405        register int i;
1406        clock_t time_taken;
1407        clock_t start_time = clock();
1408        for (i = 0; i < timeit; i++)
1409          extra = pcre_study(re, study_options, &error);
1410        time_taken = clock() - start_time;
1411        if (extra != NULL) free(extra);
1412        fprintf(outfile, "  Study time %.4f milliseconds\n",
1413          (((double)time_taken * 1000.0) / (double)timeit) /
1414            (double)CLOCKS_PER_SEC);
1415        }
1416      extra = pcre_study(re, study_options, &error);
1417      if (error != NULL)
1418        fprintf(outfile, "Failed to study: %s\n", error);
1419      else if (extra != NULL)
1420        true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1421      }
1422
1423    /* If the 'F' option was present, we flip the bytes of all the integer
1424    fields in the regex data block and the study block. This is to make it
1425    possible to test PCRE's handling of byte-flipped patterns, e.g. those
1426    compiled on a different architecture. */
1427
1428    if (do_flip)
1429      {
1430      real_pcre *rre = (real_pcre *)re;
1431      rre->magic_number =
1432        byteflip(rre->magic_number, sizeof(rre->magic_number));
1433      rre->size = byteflip(rre->size, sizeof(rre->size));
1434      rre->options = byteflip(rre->options, sizeof(rre->options));
1435      rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1436      rre->top_bracket =
1437        (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1438      rre->top_backref =
1439        (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1440      rre->first_byte =
1441        (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1442      rre->req_byte =
1443        (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1444      rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1445        sizeof(rre->name_table_offset));
1446      rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1447        sizeof(rre->name_entry_size));
1448      rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1449        sizeof(rre->name_count));
1450
1451      if (extra != NULL)
1452        {
1453        pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1454        rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1455        rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1456        rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1457        }
1458      }
1459
1460    /* Extract information from the compiled data if required */
1461
1462    SHOW_INFO:
1463
1464    if (do_debug)
1465      {
1466      fprintf(outfile, "------------------------------------------------------------------\n");
1467      pcre_printint(re, outfile, debug_lengths);
1468      }
1469
1470    /* We already have the options in get_options (see above) */
1471
1472    if (do_showinfo)
1473      {
1474      unsigned long int all_options;
1475#if !defined NOINFOCHECK
1476      int old_first_char, old_options, old_count;
1477#endif
1478      int count, backrefmax, first_char, need_char, okpartial, jchanged,
1479        hascrorlf;
1480      int nameentrysize, namecount;
1481      const uschar *nametable;
1482
1483      new_info(re, NULL, PCRE_INFO_SIZE, &size);
1484      new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1485      new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1486      new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1487      new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1488      new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1489      new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1490      new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1491      new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1492      new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1493      new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1494
1495#if !defined NOINFOCHECK
1496      old_count = pcre_info(re, &old_options, &old_first_char);
1497      if (count < 0) fprintf(outfile,
1498        "Error %d from pcre_info()\n", count);
1499      else
1500        {
1501        if (old_count != count) fprintf(outfile,
1502          "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1503            old_count);
1504
1505        if (old_first_char != first_char) fprintf(outfile,
1506          "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1507            first_char, old_first_char);
1508
1509        if (old_options != (int)get_options) fprintf(outfile,
1510          "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1511            get_options, old_options);
1512        }
1513#endif
1514
1515      if (size != regex_gotten_store) fprintf(outfile,
1516        "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1517        (int)size, (int)regex_gotten_store);
1518
1519      fprintf(outfile, "Capturing subpattern count = %d\n", count);
1520      if (backrefmax > 0)
1521        fprintf(outfile, "Max back reference = %d\n", backrefmax);
1522
1523      if (namecount > 0)
1524        {
1525        fprintf(outfile, "Named capturing subpatterns:\n");
1526        while (namecount-- > 0)
1527          {
1528          fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1529            nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1530            GET2(nametable, 0));
1531          nametable += nameentrysize;
1532          }
1533        }
1534
1535      if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1536      if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1537
1538      all_options = ((real_pcre *)re)->options;
1539      if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1540
1541      if (get_options == 0) fprintf(outfile, "No options\n");
1542        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1543          ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1544          ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1545          ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1546          ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1547          ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1548          ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1549          ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1550          ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1551          ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1552          ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1553          ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1554          ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1555          ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1556          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1557          ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1558
1559      if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1560
1561      switch (get_options & PCRE_NEWLINE_BITS)
1562        {
1563        case PCRE_NEWLINE_CR:
1564        fprintf(outfile, "Forced newline sequence: CR\n");
1565        break;
1566
1567        case PCRE_NEWLINE_LF:
1568        fprintf(outfile, "Forced newline sequence: LF\n");
1569        break;
1570
1571        case PCRE_NEWLINE_CRLF:
1572        fprintf(outfile, "Forced newline sequence: CRLF\n");
1573        break;
1574
1575        case PCRE_NEWLINE_ANYCRLF:
1576        fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1577        break;
1578
1579        case PCRE_NEWLINE_ANY:
1580        fprintf(outfile, "Forced newline sequence: ANY\n");
1581        break;
1582
1583        default:
1584        break;
1585        }
1586
1587      if (first_char == -1)
1588        {
1589        fprintf(outfile, "First char at start or follows newline\n");
1590        }
1591      else if (first_char < 0)
1592        {
1593        fprintf(outfile, "No first char\n");
1594        }
1595      else
1596        {
1597        int ch = first_char & 255;
1598        const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1599          "" : " (caseless)";
1600        if (PRINTHEX(ch))
1601          fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1602        else
1603          fprintf(outfile, "First char = %d%s\n", ch, caseless);
1604        }
1605
1606      if (need_char < 0)
1607        {
1608        fprintf(outfile, "No need char\n");
1609        }
1610      else
1611        {
1612        int ch = need_char & 255;
1613        const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1614          "" : " (caseless)";
1615        if (PRINTHEX(ch))
1616          fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1617        else
1618          fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1619        }
1620
1621      /* Don't output study size; at present it is in any case a fixed
1622      value, but it varies, depending on the computer architecture, and
1623      so messes up the test suite. (And with the /F option, it might be
1624      flipped.) */
1625
1626      if (do_study)
1627        {
1628        if (extra == NULL)
1629          fprintf(outfile, "Study returned NULL\n");
1630        else
1631          {
1632          uschar *start_bits = NULL;
1633          int minlength;
1634
1635          new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1636          fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1637
1638          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1639          if (start_bits == NULL)
1640            fprintf(outfile, "No set of starting bytes\n");
1641          else
1642            {
1643            int i;
1644            int c = 24;
1645            fprintf(outfile, "Starting byte set: ");
1646            for (i = 0; i < 256; i++)
1647              {
1648              if ((start_bits[i/8] & (1<<(i&7))) != 0)
1649                {
1650                if (c > 75)
1651                  {
1652                  fprintf(outfile, "\n  ");
1653                  c = 2;
1654                  }
1655                if (PRINTHEX(i) && i != ' ')
1656                  {
1657                  fprintf(outfile, "%c ", i);
1658                  c += 2;
1659                  }
1660                else
1661                  {
1662                  fprintf(outfile, "\\x%02x ", i);
1663                  c += 5;
1664                  }
1665                }
1666              }
1667            fprintf(outfile, "\n");
1668            }
1669          }
1670        }
1671      }
1672
1673    /* If the '>' option was present, we write out the regex to a file, and
1674    that is all. The first 8 bytes of the file are the regex length and then
1675    the study length, in big-endian order. */
1676
1677    if (to_file != NULL)
1678      {
1679      FILE *f = fopen((char *)to_file, "wb");
1680      if (f == NULL)
1681        {
1682        fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1683        }
1684      else
1685        {
1686        uschar sbuf[8];
1687        sbuf[0] = (uschar)((true_size >> 24) & 255);
1688        sbuf[1] = (uschar)((true_size >> 16) & 255);
1689        sbuf[2] = (uschar)((true_size >>  8) & 255);
1690        sbuf[3] = (uschar)((true_size) & 255);
1691
1692        sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1693        sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1694        sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1695        sbuf[7] = (uschar)((true_study_size) & 255);
1696
1697        if (fwrite(sbuf, 1, 8, f) < 8 ||
1698            fwrite(re, 1, true_size, f) < true_size)
1699          {
1700          fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1701          }
1702        else
1703          {
1704          fprintf(outfile, "Compiled regex written to %s\n", to_file);
1705          if (extra != NULL)
1706            {
1707            if (fwrite(extra->study_data, 1, true_study_size, f) <
1708                true_study_size)
1709              {
1710              fprintf(outfile, "Write error on %s: %s\n", to_file,
1711                strerror(errno));
1712              }
1713            else fprintf(outfile, "Study data written to %s\n", to_file);
1714
1715            }
1716          }
1717        fclose(f);
1718        }
1719
1720      new_free(re);
1721      if (extra != NULL) new_free(extra);
1722      if (tables != NULL) new_free((void *)tables);
1723      continue;  /* With next regex */
1724      }
1725    }        /* End of non-POSIX compile */
1726
1727  /* Read data lines and test them */
1728
1729  for (;;)
1730    {
1731    uschar *q;
1732    uschar *bptr;
1733    int *use_offsets = offsets;
1734    int use_size_offsets = size_offsets;
1735    int callout_data = 0;
1736    int callout_data_set = 0;
1737    int count, c;
1738    int copystrings = 0;
1739    int find_match_limit = default_find_match_limit;
1740    int getstrings = 0;
1741    int getlist = 0;
1742    int gmatched = 0;
1743    int start_offset = 0;
1744    int g_notempty = 0;
1745    int use_dfa = 0;
1746
1747    options = 0;
1748
1749    *copynames = 0;
1750    *getnames = 0;
1751
1752    copynamesptr = copynames;
1753    getnamesptr = getnames;
1754
1755    pcre_callout = callout;
1756    first_callout = 1;
1757    callout_extra = 0;
1758    callout_count = 0;
1759    callout_fail_count = 999999;
1760    callout_fail_id = -1;
1761    show_malloc = 0;
1762
1763    if (extra != NULL) extra->flags &=
1764      ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1765
1766    len = 0;
1767    for (;;)
1768      {
1769      if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1770        {
1771        if (len > 0) break;
1772        done = 1;
1773        goto CONTINUE;
1774        }
1775      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1776      len = (int)strlen((char *)buffer);
1777      if (buffer[len-1] == '\n') break;
1778      }
1779
1780    while (len > 0 && isspace(buffer[len-1])) len--;
1781    buffer[len] = 0;
1782    if (len == 0) break;
1783
1784    p = buffer;
1785    while (isspace(*p)) p++;
1786
1787    bptr = q = dbuffer;
1788    while ((c = *p++) != 0)
1789      {
1790      int i = 0;
1791      int n = 0;
1792
1793      if (c == '\\') switch ((c = *p++))
1794        {
1795        case 'a': c =    7; break;
1796        case 'b': c = '\b'; break;
1797        case 'e': c =   27; break;
1798        case 'f': c = '\f'; break;
1799        case 'n': c = '\n'; break;
1800        case 'r': c = '\r'; break;
1801        case 't': c = '\t'; break;
1802        case 'v': c = '\v'; break;
1803
1804        case '0': case '1': case '2': case '3':
1805        case '4': case '5': case '6': case '7':
1806        c -= '0';
1807        while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1808          c = c * 8 + *p++ - '0';
1809
1810#if !defined NOUTF8
1811        if (use_utf8 && c > 255)
1812          {
1813          unsigned char buff8[8];
1814          int ii, utn;
1815          utn = ord2utf8(c, buff8);
1816          for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1817          c = buff8[ii];   /* Last byte */
1818          }
1819#endif
1820        break;
1821
1822        case 'x':
1823
1824        /* Handle \x{..} specially - new Perl thing for utf8 */
1825
1826#if !defined NOUTF8
1827        if (*p == '{')
1828          {
1829          unsigned char *pt = p;
1830          c = 0;
1831          while (isxdigit(*(++pt)))
1832            c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1833          if (*pt == '}')
1834            {
1835            unsigned char buff8[8];
1836            int ii, utn;
1837            if (use_utf8)
1838              {
1839              utn = ord2utf8(c, buff8);
1840              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1841              c = buff8[ii];   /* Last byte */
1842              }
1843            else
1844             {
1845             if (c > 255)
1846               fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1847                 "UTF-8 mode is not enabled.\n"
1848                 "** Truncation will probably give the wrong result.\n", c);
1849             }
1850            p = pt + 1;
1851            break;
1852            }
1853          /* Not correct form; fall through */
1854          }
1855#endif
1856
1857        /* Ordinary \x */
1858
1859        c = 0;
1860        while (i++ < 2 && isxdigit(*p))
1861          {
1862          c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1863          p++;
1864          }
1865        break;
1866
1867        case 0:   /* \ followed by EOF allows for an empty line */
1868        p--;
1869        continue;
1870
1871        case '>':
1872        while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1873        continue;
1874
1875        case 'A':  /* Option setting */
1876        options |= PCRE_ANCHORED;
1877        continue;
1878
1879        case 'B':
1880        options |= PCRE_NOTBOL;
1881        continue;
1882
1883        case 'C':
1884        if (isdigit(*p))    /* Set copy string */
1885          {
1886          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1887          copystrings |= 1 << n;
1888          }
1889        else if (isalnum(*p))
1890          {
1891          uschar *npp = copynamesptr;
1892          while (isalnum(*p)) *npp++ = *p++;
1893          *npp++ = 0;
1894          *npp = 0;
1895          n = pcre_get_stringnumber(re, (char *)copynamesptr);
1896          if (n < 0)
1897            fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1898          copynamesptr = npp;
1899          }
1900        else if (*p == '+')
1901          {
1902          callout_extra = 1;
1903          p++;
1904          }
1905        else if (*p == '-')
1906          {
1907          pcre_callout = NULL;
1908          p++;
1909          }
1910        else if (*p == '!')
1911          {
1912          callout_fail_id = 0;
1913          p++;
1914          while(isdigit(*p))
1915            callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1916          callout_fail_count = 0;
1917          if (*p == '!')
1918            {
1919            p++;
1920            while(isdigit(*p))
1921              callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1922            }
1923          }
1924        else if (*p == '*')
1925          {
1926          int sign = 1;
1927          callout_data = 0;
1928          if (*(++p) == '-') { sign = -1; p++; }
1929          while(isdigit(*p))
1930            callout_data = callout_data * 10 + *p++ - '0';
1931          callout_data *= sign;
1932          callout_data_set = 1;
1933          }
1934        continue;
1935
1936#if !defined NODFA
1937        case 'D':
1938#if !defined NOPOSIX
1939        if (posix || do_posix)
1940          printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1941        else
1942#endif
1943          use_dfa = 1;
1944        continue;
1945
1946        case 'F':
1947        options |= PCRE_DFA_SHORTEST;
1948        continue;
1949#endif
1950
1951        case 'G':
1952        if (isdigit(*p))
1953          {
1954          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955          getstrings |= 1 << n;
1956          }
1957        else if (isalnum(*p))
1958          {
1959          uschar *npp = getnamesptr;
1960          while (isalnum(*p)) *npp++ = *p++;
1961          *npp++ = 0;
1962          *npp = 0;
1963          n = pcre_get_stringnumber(re, (char *)getnamesptr);
1964          if (n < 0)
1965            fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1966          getnamesptr = npp;
1967          }
1968        continue;
1969
1970        case 'L':
1971        getlist = 1;
1972        continue;
1973
1974        case 'M':
1975        find_match_limit = 1;
1976        continue;
1977
1978        case 'N':
1979        if ((options & PCRE_NOTEMPTY) != 0)
1980          options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
1981        else
1982          options |= PCRE_NOTEMPTY;
1983        continue;
1984
1985        case 'O':
1986        while(isdigit(*p)) n = n * 10 + *p++ - '0';
1987        if (n > size_offsets_max)
1988          {
1989          size_offsets_max = n;
1990          free(offsets);
1991          use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1992          if (offsets == NULL)
1993            {
1994            printf("** Failed to get %d bytes of memory for offsets vector\n",
1995              (int)(size_offsets_max * sizeof(int)));
1996            yield = 1;
1997            goto EXIT;
1998            }
1999          }
2000        use_size_offsets = n;
2001        if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2002        continue;
2003
2004        case 'P':
2005        options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2006          PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2007        continue;
2008
2009        case 'Q':
2010        while(isdigit(*p)) n = n * 10 + *p++ - '0';
2011        if (extra == NULL)
2012          {
2013          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2014          extra->flags = 0;
2015          }
2016        extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2017        extra->match_limit_recursion = n;
2018        continue;
2019
2020        case 'q':
2021        while(isdigit(*p)) n = n * 10 + *p++ - '0';
2022        if (extra == NULL)
2023          {
2024          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2025          extra->flags = 0;
2026          }
2027        extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2028        extra->match_limit = n;
2029        continue;
2030
2031#if !defined NODFA
2032        case 'R':
2033        options |= PCRE_DFA_RESTART;
2034        continue;
2035#endif
2036
2037        case 'S':
2038        show_malloc = 1;
2039        continue;
2040
2041        case 'Y':
2042        options |= PCRE_NO_START_OPTIMIZE;
2043        continue;
2044
2045        case 'Z':
2046        options |= PCRE_NOTEOL;
2047        continue;
2048
2049        case '?':
2050        options |= PCRE_NO_UTF8_CHECK;
2051        continue;
2052
2053        case '<':
2054          {
2055          int x = check_newline(p, outfile);
2056          if (x == 0) goto NEXT_DATA;
2057          options |= x;
2058          while (*p++ != '>');
2059          }
2060        continue;
2061        }
2062      *q++ = c;
2063      }
2064    *q = 0;
2065    len = q - dbuffer;
2066
2067    /* Move the data to the end of the buffer so that a read over the end of
2068    the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2069    we are using the POSIX interface, we must include the terminating zero. */
2070
2071#if !defined NOPOSIX
2072    if (posix || do_posix)
2073      {
2074      memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2075      bptr += buffer_size - len - 1;
2076      }
2077    else
2078#endif
2079      {
2080      memmove(bptr + buffer_size - len, bptr, len);
2081      bptr += buffer_size - len;
2082      }
2083
2084    if ((all_use_dfa || use_dfa) && find_match_limit)
2085      {
2086      printf("**Match limit not relevant for DFA matching: ignored\n");
2087      find_match_limit = 0;
2088      }
2089
2090    /* Handle matching via the POSIX interface, which does not
2091    support timing or playing with the match limit or callout data. */
2092
2093#if !defined NOPOSIX
2094    if (posix || do_posix)
2095      {
2096      int rc;
2097      int eflags = 0;
2098      regmatch_t *pmatch = NULL;
2099      if (use_size_offsets > 0)
2100        pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2101      if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2102      if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2103      if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2104
2105      rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2106
2107      if (rc != 0)
2108        {
2109        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2110        fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2111        }
2112      else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2113              != 0)
2114        {
2115        fprintf(outfile, "Matched with REG_NOSUB\n");
2116        }
2117      else
2118        {
2119        size_t i;
2120        for (i = 0; i < (size_t)use_size_offsets; i++)
2121          {
2122          if (pmatch[i].rm_so >= 0)
2123            {
2124            fprintf(outfile, "%2d: ", (int)i);
2125            (void)pchars(dbuffer + pmatch[i].rm_so,
2126              pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2127            fprintf(outfile, "\n");
2128            if (i == 0 && do_showrest)
2129              {
2130              fprintf(outfile, " 0+ ");
2131              (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2132                outfile);
2133              fprintf(outfile, "\n");
2134              }
2135            }
2136          }
2137        }
2138      free(pmatch);
2139      }
2140
2141    /* Handle matching via the native interface - repeats for /g and /G */
2142
2143    else
2144#endif  /* !defined NOPOSIX */
2145
2146    for (;; gmatched++)    /* Loop for /g or /G */
2147      {
2148      if (timeitm > 0)
2149        {
2150        register int i;
2151        clock_t time_taken;
2152        clock_t start_time = clock();
2153
2154#if !defined NODFA
2155        if (all_use_dfa || use_dfa)
2156          {
2157          int workspace[1000];
2158          for (i = 0; i < timeitm; i++)
2159            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2160              options | g_notempty, use_offsets, use_size_offsets, workspace,
2161              sizeof(workspace)/sizeof(int));
2162          }
2163        else
2164#endif
2165
2166        for (i = 0; i < timeitm; i++)
2167          count = pcre_exec(re, extra, (char *)bptr, len,
2168            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2169
2170        time_taken = clock() - start_time;
2171        fprintf(outfile, "Execute time %.4f milliseconds\n",
2172          (((double)time_taken * 1000.0) / (double)timeitm) /
2173            (double)CLOCKS_PER_SEC);
2174        }
2175
2176      /* If find_match_limit is set, we want to do repeated matches with
2177      varying limits in order to find the minimum value for the match limit and
2178      for the recursion limit. */
2179
2180      if (find_match_limit)
2181        {
2182        if (extra == NULL)
2183          {
2184          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2185          extra->flags = 0;
2186          }
2187
2188        (void)check_match_limit(re, extra, bptr, len, start_offset,
2189          options|g_notempty, use_offsets, use_size_offsets,
2190          PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2191          PCRE_ERROR_MATCHLIMIT, "match()");
2192
2193        count = check_match_limit(re, extra, bptr, len, start_offset,
2194          options|g_notempty, use_offsets, use_size_offsets,
2195          PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2196          PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2197        }
2198
2199      /* If callout_data is set, use the interface with additional data */
2200
2201      else if (callout_data_set)
2202        {
2203        if (extra == NULL)
2204          {
2205          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2206          extra->flags = 0;
2207          }
2208        extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2209        extra->callout_data = &callout_data;
2210        count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2211          options | g_notempty, use_offsets, use_size_offsets);
2212        extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2213        }
2214
2215      /* The normal case is just to do the match once, with the default
2216      value of match_limit. */
2217
2218#if !defined NODFA
2219      else if (all_use_dfa || use_dfa)
2220        {
2221        int workspace[1000];
2222        count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2223          options | g_notempty, use_offsets, use_size_offsets, workspace,
2224          sizeof(workspace)/sizeof(int));
2225        if (count == 0)
2226          {
2227          fprintf(outfile, "Matched, but too many subsidiary matches\n");
2228          count = use_size_offsets/2;
2229          }
2230        }
2231#endif
2232
2233      else
2234        {
2235        count = pcre_exec(re, extra, (char *)bptr, len,
2236          start_offset, options | g_notempty, use_offsets, use_size_offsets);
2237        if (count == 0)
2238          {
2239          fprintf(outfile, "Matched, but too many substrings\n");
2240          count = use_size_offsets/3;
2241          }
2242        }
2243
2244      /* Matched */
2245
2246      if (count >= 0)
2247        {
2248        int i, maxcount;
2249
2250#if !defined NODFA
2251        if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2252#endif
2253          maxcount = use_size_offsets/3;
2254
2255        /* This is a check against a lunatic return value. */
2256
2257        if (count > maxcount)
2258          {
2259          fprintf(outfile,
2260            "** PCRE error: returned count %d is too big for offset size %d\n",
2261            count, use_size_offsets);
2262          count = use_size_offsets/3;
2263          if (do_g || do_G)
2264            {
2265            fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2266            do_g = do_G = FALSE;        /* Break g/G loop */
2267            }
2268          }
2269
2270        for (i = 0; i < count * 2; i += 2)
2271          {
2272          if (use_offsets[i] < 0)
2273            fprintf(outfile, "%2d: <unset>\n", i/2);
2274          else
2275            {
2276            fprintf(outfile, "%2d: ", i/2);
2277            (void)pchars(bptr + use_offsets[i],
2278              use_offsets[i+1] - use_offsets[i], outfile);
2279            fprintf(outfile, "\n");
2280            if (i == 0)
2281              {
2282              if (do_showrest)
2283                {
2284                fprintf(outfile, " 0+ ");
2285                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2286                  outfile);
2287                fprintf(outfile, "\n");
2288                }
2289              }
2290            }
2291          }
2292
2293        for (i = 0; i < 32; i++)
2294          {
2295          if ((copystrings & (1 << i)) != 0)
2296            {
2297            char copybuffer[256];
2298            int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2299              i, copybuffer, sizeof(copybuffer));
2300            if (rc < 0)
2301              fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2302            else
2303              fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2304            }
2305          }
2306
2307        for (copynamesptr = copynames;
2308             *copynamesptr != 0;
2309             copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2310          {
2311          char copybuffer[256];
2312          int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2313            count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2314          if (rc < 0)
2315            fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2316          else
2317            fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2318          }
2319
2320        for (i = 0; i < 32; i++)
2321          {
2322          if ((getstrings & (1 << i)) != 0)
2323            {
2324            const char *substring;
2325            int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2326              i, &substring);
2327            if (rc < 0)
2328              fprintf(outfile, "get substring %d failed %d\n", i, rc);
2329            else
2330              {
2331              fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2332              pcre_free_substring(substring);
2333              }
2334            }
2335          }
2336
2337        for (getnamesptr = getnames;
2338             *getnamesptr != 0;
2339             getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2340          {
2341          const char *substring;
2342          int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2343            count, (char *)getnamesptr, &substring);
2344          if (rc < 0)
2345            fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2346          else
2347            {
2348            fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2349            pcre_free_substring(substring);
2350            }
2351          }
2352
2353        if (getlist)
2354          {
2355          const char **stringlist;
2356          int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2357            &stringlist);
2358          if (rc < 0)
2359            fprintf(outfile, "get substring list failed %d\n", rc);
2360          else
2361            {
2362            for (i = 0; i < count; i++)
2363              fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2364            if (stringlist[i] != NULL)
2365              fprintf(outfile, "string list not terminated by NULL\n");
2366            /* free((void *)stringlist); */
2367            pcre_free_substring_list(stringlist);
2368            }
2369          }
2370        }
2371
2372      /* There was a partial match */
2373
2374      else if (count == PCRE_ERROR_PARTIAL)
2375        {
2376        fprintf(outfile, "Partial match");
2377        if (use_size_offsets > 1)
2378          {
2379          fprintf(outfile, ": ");
2380          pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2381            outfile);
2382          }
2383        fprintf(outfile, "\n");
2384        break;  /* Out of the /g loop */
2385        }
2386
2387      /* Failed to match. If this is a /g or /G loop and we previously set
2388      g_notempty after a null match, this is not necessarily the end. We want
2389      to advance the start offset, and continue. We won't be at the end of the
2390      string - that was checked before setting g_notempty.
2391
2392      Complication arises in the case when the newline option is "any" or
2393      "anycrlf". If the previous match was at the end of a line terminated by
2394      CRLF, an advance of one character just passes the \r, whereas we should
2395      prefer the longer newline sequence, as does the code in pcre_exec().
2396      Fudge the offset value to achieve this.
2397
2398      Otherwise, in the case of UTF-8 matching, the advance must be one
2399      character, not one byte. */
2400
2401      else
2402        {
2403        if (g_notempty != 0)
2404          {
2405          int onechar = 1;
2406          unsigned int obits = ((real_pcre *)re)->options;
2407          use_offsets[0] = start_offset;
2408          if ((obits & PCRE_NEWLINE_BITS) == 0)
2409            {
2410            int d;
2411            (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2412            /* Note that these values are always the ASCII ones, even in
2413            EBCDIC environments. CR = 13, NL = 10. */
2414            obits = (d == 13)? PCRE_NEWLINE_CR :
2415                    (d == 10)? PCRE_NEWLINE_LF :
2416                    (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2417                    (d == -2)? PCRE_NEWLINE_ANYCRLF :
2418                    (d == -1)? PCRE_NEWLINE_ANY : 0;
2419            }
2420          if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2421               (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2422              &&
2423              start_offset < len - 1 &&
2424              bptr[start_offset] == '\r' &&
2425              bptr[start_offset+1] == '\n')
2426            onechar++;
2427          else if (use_utf8)
2428            {
2429            while (start_offset + onechar < len)
2430              {
2431              int tb = bptr[start_offset+onechar];
2432              if (tb <= 127) break;
2433              tb &= 0xc0;
2434              if (tb != 0 && tb != 0xc0) onechar++;
2435              }
2436            }
2437          use_offsets[1] = start_offset + onechar;
2438          }
2439        else
2440          {
2441          if (count == PCRE_ERROR_NOMATCH)
2442            {
2443            if (gmatched == 0) fprintf(outfile, "No match\n");
2444            }
2445          else fprintf(outfile, "Error %d\n", count);
2446          break;  /* Out of the /g loop */
2447          }
2448        }
2449
2450      /* If not /g or /G we are done */
2451
2452      if (!do_g && !do_G) break;
2453
2454      /* If we have matched an empty string, first check to see if we are at
2455      the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2456      Perl's /g options does. This turns out to be rather cunning. First we set
2457      PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2458      same point. If this fails (picked up above) we advance to the next
2459      character. */
2460
2461      g_notempty = 0;
2462
2463      if (use_offsets[0] == use_offsets[1])
2464        {
2465        if (use_offsets[0] == len) break;
2466        g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2467        }
2468
2469      /* For /g, update the start offset, leaving the rest alone */
2470
2471      if (do_g) start_offset = use_offsets[1];
2472
2473      /* For /G, update the pointer and length */
2474
2475      else
2476        {
2477        bptr += use_offsets[1];
2478        len -= use_offsets[1];
2479        }
2480      }  /* End of loop for /g and /G */
2481
2482    NEXT_DATA: continue;
2483    }    /* End of loop for data lines */
2484
2485  CONTINUE:
2486
2487#if !defined NOPOSIX
2488  if (posix || do_posix) regfree(&preg);
2489#endif
2490
2491  if (re != NULL) new_free(re);
2492  if (extra != NULL) new_free(extra);
2493  if (tables != NULL)
2494    {
2495    new_free((void *)tables);
2496    setlocale(LC_CTYPE, "C");
2497    locale_set = 0;
2498    }
2499  }
2500
2501if (infile == stdin) fprintf(outfile, "\n");
2502
2503EXIT:
2504
2505if (infile != NULL && infile != stdin) fclose(infile);
2506if (outfile != NULL && outfile != stdout) fclose(outfile);
2507
2508free(buffer);
2509free(dbuffer);
2510free(pbuffer);
2511free(offsets);
2512
2513return yield;
2514}
2515
2516/* End of pcretest.c */
2517