1/*************************************************
2*             PCRE testing program               *
3*************************************************/
4
5/* This program was hacked up as a tester for PCRE. I really should have
6written it more tidily in the first place. Will I ever learn? It has grown and
7been extended and consequently is now rather untidy in places.
8
9-----------------------------------------------------------------------------
10Redistribution and use in source and binary forms, with or without
11modification, are permitted provided that the following conditions are met:
12
13    * Redistributions of source code must retain the above copyright notice,
14      this list of conditions and the following disclaimer.
15
16    * Redistributions in binary form must reproduce the above copyright
17      notice, this list of conditions and the following disclaimer in the
18      documentation and/or other materials provided with the distribution.
19
20    * Neither the name of the University of Cambridge nor the names of its
21      contributors may be used to endorse or promote products derived from
22      this software without specific prior written permission.
23
24THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34POSSIBILITY OF SUCH DAMAGE.
35-----------------------------------------------------------------------------
36*/
37
38
39#include <ctype.h>
40#include <stdio.h>
41#include <string.h>
42#include <stdlib.h>
43#include <time.h>
44#include <locale.h>
45#include <errno.h>
46
47/* We need the internal info for displaying the results of pcre_study(). Also
48for getting the opcodes for showing compiled code. */
49
50#define PCRE_SPY        /* For Win32 build, import data, not export */
51#include "internal.h"
52
53/* It is possible to compile this test program without including support for
54testing the POSIX interface, though this is not available via the standard
55Makefile. */
56
57#if !defined NOPOSIX
58#include "pcreposix.h"
59#endif
60
61#ifndef CLOCKS_PER_SEC
62#ifdef CLK_TCK
63#define CLOCKS_PER_SEC CLK_TCK
64#else
65#define CLOCKS_PER_SEC 100
66#endif
67#endif
68
69#define LOOPREPEAT 500000
70
71#define BUFFER_SIZE 30000
72#define PBUFFER_SIZE BUFFER_SIZE
73#define DBUFFER_SIZE BUFFER_SIZE
74
75
76static FILE *outfile;
77static int log_store = 0;
78static int callout_count;
79static int callout_extra;
80static int callout_fail_count;
81static int callout_fail_id;
82static int first_callout;
83static int show_malloc;
84static int use_utf8;
85static size_t gotten_store;
86
87static uschar *pbuffer = NULL;
88
89
90static const int utf8_table1[] = {
91  0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
92
93static const int utf8_table2[] = {
94  0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
95
96static const int utf8_table3[] = {
97  0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
98
99
100
101/*************************************************
102*         Print compiled regex                   *
103*************************************************/
104
105/* The code for doing this is held in a separate file that is also included in
106pcre.c when it is compiled with the debug switch. It defines a function called
107print_internals(), which uses a table of opcode lengths defined by the macro
108OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109Unicode property names to numbers; this is kept in a separate file. */
110
111static uschar OP_lengths[] = { OP_LENGTHS };
112
113#include "ucp.h"
114#include "ucptypetable.c"
115#include "printint.c"
116
117
118
119/*************************************************
120*          Read number from string               *
121*************************************************/
122
123/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
124around with conditional compilation, just do the job by hand. It is only used
125for unpicking the -o argument, so just keep it simple.
126
127Arguments:
128  str           string to be converted
129  endptr        where to put the end pointer
130
131Returns:        the unsigned long
132*/
133
134static int
135get_value(unsigned char *str, unsigned char **endptr)
136{
137int result = 0;
138while(*str != 0 && isspace(*str)) str++;
139while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
140*endptr = str;
141return(result);
142}
143
144
145
146/*************************************************
147*       Convert character value to UTF-8         *
148*************************************************/
149
150/* This function takes an integer value in the range 0 - 0x7fffffff
151and encodes it as a UTF-8 character in 0 to 6 bytes.
152
153Arguments:
154  cvalue     the character value
155  buffer     pointer to buffer for result - at least 6 bytes long
156
157Returns:     number of characters placed in the buffer
158             -1 if input character is negative
159             0 if input character is positive but too big (only when
160             int is longer than 32 bits)
161*/
162
163static int
164ord2utf8(int cvalue, unsigned char *buffer)
165{
166register int i, j;
167for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
168  if (cvalue <= utf8_table1[i]) break;
169if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
170if (cvalue < 0) return -1;
171
172buffer += i;
173for (j = i; j > 0; j--)
174 {
175 *buffer-- = 0x80 | (cvalue & 0x3f);
176 cvalue >>= 6;
177 }
178*buffer = utf8_table2[i] | cvalue;
179return i + 1;
180}
181
182
183/*************************************************
184*            Convert UTF-8 string to value       *
185*************************************************/
186
187/* This function takes one or more bytes that represents a UTF-8 character,
188and returns the value of the character.
189
190Argument:
191  buffer   a pointer to the byte vector
192  vptr     a pointer to an int to receive the value
193
194Returns:   >  0 => the number of bytes consumed
195           -6 to 0 => malformed UTF-8 character at offset = (-return)
196*/
197
198static int
199utf82ord(unsigned char *buffer, int *vptr)
200{
201int c = *buffer++;
202int d = c;
203int i, j, s;
204
205for (i = -1; i < 6; i++)               /* i is number of additional bytes */
206  {
207  if ((d & 0x80) == 0) break;
208  d <<= 1;
209  }
210
211if (i == -1) { *vptr = c; return 1; }  /* ascii character */
212if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
213
214/* i now has a value in the range 1-5 */
215
216s = 6*i;
217d = (c & utf8_table3[i]) << s;
218
219for (j = 0; j < i; j++)
220  {
221  c = *buffer++;
222  if ((c & 0xc0) != 0x80) return -(j+1);
223  s -= 6;
224  d |= (c & 0x3f) << s;
225  }
226
227/* Check that encoding was the correct unique one */
228
229for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
230  if (d <= utf8_table1[j]) break;
231if (j != i) return -(i+1);
232
233/* Valid value */
234
235*vptr = d;
236return i+1;
237}
238
239
240
241/*************************************************
242*             Print character string             *
243*************************************************/
244
245/* Character string printing function. Must handle UTF-8 strings in utf8
246mode. Yields number of characters printed. If handed a NULL file, just counts
247chars without printing. */
248
249static int pchars(unsigned char *p, int length, FILE *f)
250{
251int c;
252int yield = 0;
253
254while (length-- > 0)
255  {
256  if (use_utf8)
257    {
258    int rc = utf82ord(p, &c);
259
260    if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
261      {
262      length -= rc - 1;
263      p += rc;
264      if (c < 256 && isprint(c))
265        {
266        if (f != NULL) fprintf(f, "%c", c);
267        yield++;
268        }
269      else
270        {
271        int n;
272        if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
273        yield += n;
274        }
275      continue;
276      }
277    }
278
279   /* Not UTF-8, or malformed UTF-8  */
280
281  if (isprint(c = *(p++)))
282    {
283    if (f != NULL) fprintf(f, "%c", c);
284    yield++;
285    }
286  else
287    {
288    if (f != NULL) fprintf(f, "\\x%02x", c);
289    yield += 4;
290    }
291  }
292
293return yield;
294}
295
296
297
298/*************************************************
299*              Callout function                  *
300*************************************************/
301
302/* Called from PCRE as a result of the (?C) item. We print out where we are in
303the match. Yield zero unless more callouts than the fail count, or the callout
304data is not zero. */
305
306static int callout(pcre_callout_block *cb)
307{
308FILE *f = (first_callout | callout_extra)? outfile : NULL;
309int i, pre_start, post_start, subject_length;
310
311if (callout_extra)
312  {
313  fprintf(f, "Callout %d: last capture = %d\n",
314    cb->callout_number, cb->capture_last);
315
316  for (i = 0; i < cb->capture_top * 2; i += 2)
317    {
318    if (cb->offset_vector[i] < 0)
319      fprintf(f, "%2d: <unset>\n", i/2);
320    else
321      {
322      fprintf(f, "%2d: ", i/2);
323      (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
324        cb->offset_vector[i+1] - cb->offset_vector[i], f);
325      fprintf(f, "\n");
326      }
327    }
328  }
329
330/* Re-print the subject in canonical form, the first time or if giving full
331datails. On subsequent calls in the same match, we use pchars just to find the
332printed lengths of the substrings. */
333
334if (f != NULL) fprintf(f, "--->");
335
336pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
337post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
338  cb->current_position - cb->start_match, f);
339
340subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
341
342(void)pchars((unsigned char *)(cb->subject + cb->current_position),
343  cb->subject_length - cb->current_position, f);
344
345if (f != NULL) fprintf(f, "\n");
346
347/* Always print appropriate indicators, with callout number if not already
348shown. For automatic callouts, show the pattern offset. */
349
350if (cb->callout_number == 255)
351  {
352  fprintf(outfile, "%+3d ", cb->pattern_position);
353  if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
354  }
355else
356  {
357  if (callout_extra) fprintf(outfile, "    ");
358    else fprintf(outfile, "%3d ", cb->callout_number);
359  }
360
361for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
362fprintf(outfile, "^");
363
364if (post_start > 0)
365  {
366  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
367  fprintf(outfile, "^");
368  }
369
370for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
371  fprintf(outfile, " ");
372
373fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
374  pbuffer + cb->pattern_position);
375
376fprintf(outfile, "\n");
377first_callout = 0;
378
379if (cb->callout_data != NULL)
380  {
381  int callout_data = *((int *)(cb->callout_data));
382  if (callout_data != 0)
383    {
384    fprintf(outfile, "Callout data = %d\n", callout_data);
385    return callout_data;
386    }
387  }
388
389return (cb->callout_number != callout_fail_id)? 0 :
390       (++callout_count >= callout_fail_count)? 1 : 0;
391}
392
393
394/*************************************************
395*            Local malloc functions              *
396*************************************************/
397
398/* Alternative malloc function, to test functionality and show the size of the
399compiled re. */
400
401static void *new_malloc(size_t size)
402{
403void *block = malloc(size);
404gotten_store = size;
405if (show_malloc)
406  fprintf(outfile, "malloc       %3d %p\n", size, block);
407return block;
408}
409
410static void new_free(void *block)
411{
412if (show_malloc)
413  fprintf(outfile, "free             %p\n", block);
414free(block);
415}
416
417
418/* For recursion malloc/free, to test stacking calls */
419
420static void *stack_malloc(size_t size)
421{
422void *block = malloc(size);
423if (show_malloc)
424  fprintf(outfile, "stack_malloc %3d %p\n", size, block);
425return block;
426}
427
428static void stack_free(void *block)
429{
430if (show_malloc)
431  fprintf(outfile, "stack_free       %p\n", block);
432free(block);
433}
434
435
436/*************************************************
437*          Call pcre_fullinfo()                  *
438*************************************************/
439
440/* Get one piece of information from the pcre_fullinfo() function */
441
442static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
443{
444int rc;
445if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
446  fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
447}
448
449
450
451/*************************************************
452*         Byte flipping function                 *
453*************************************************/
454
455static long int
456byteflip(long int value, int n)
457{
458if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
459return ((value & 0x000000ff) << 24) |
460       ((value & 0x0000ff00) <<  8) |
461       ((value & 0x00ff0000) >>  8) |
462       ((value & 0xff000000) >> 24);
463}
464
465
466
467
468/*************************************************
469*                Main Program                    *
470*************************************************/
471
472/* Read lines from named file or stdin and write to named file or stdout; lines
473consist of a regular expression, in delimiters and optionally followed by
474options, followed by a set of test data, terminated by an empty line. */
475
476int main(int argc, char **argv)
477{
478FILE *infile = stdin;
479int options = 0;
480int study_options = 0;
481int op = 1;
482int timeit = 0;
483int showinfo = 0;
484int showstore = 0;
485int size_offsets = 45;
486int size_offsets_max;
487int *offsets;
488#if !defined NOPOSIX
489int posix = 0;
490#endif
491int debug = 0;
492int done = 0;
493
494unsigned char *buffer;
495unsigned char *dbuffer;
496
497/* Get buffers from malloc() so that Electric Fence will check their misuse
498when I am debugging. */
499
500buffer = (unsigned char *)malloc(BUFFER_SIZE);
501dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
502pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
503
504/* The outfile variable is static so that new_malloc can use it. The _setmode()
505stuff is some magic that I don't understand, but which apparently does good
506things in Windows. It's related to line terminations.  */
507
508#if defined(_WIN32) || defined(WIN32)
509_setmode( _fileno( stdout ), 0x8000 );
510#endif  /* defined(_WIN32) || defined(WIN32) */
511
512outfile = stdout;
513
514/* Scan options */
515
516while (argc > 1 && argv[op][0] == '-')
517  {
518  unsigned char *endptr;
519
520  if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
521    showstore = 1;
522  else if (strcmp(argv[op], "-t") == 0) timeit = 1;
523  else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
524  else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
525  else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
526      ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
527        *endptr == 0))
528    {
529    op++;
530    argc--;
531    }
532#if !defined NOPOSIX
533  else if (strcmp(argv[op], "-p") == 0) posix = 1;
534#endif
535  else if (strcmp(argv[op], "-C") == 0)
536    {
537    int rc;
538    printf("PCRE version %s\n", pcre_version());
539    printf("Compiled with\n");
540    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
541    printf("  %sUTF-8 support\n", rc? "" : "No ");
542    (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
543    printf("  %sUnicode properties support\n", rc? "" : "No ");
544    (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
545    printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
546    (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
547    printf("  Internal link size = %d\n", rc);
548    (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
549    printf("  POSIX malloc threshold = %d\n", rc);
550    (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
551    printf("  Default match limit = %d\n", rc);
552    (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
553    printf("  Match recursion uses %s\n", rc? "stack" : "heap");
554    exit(0);
555    }
556  else
557    {
558    printf("** Unknown or malformed option %s\n", argv[op]);
559    printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
560    printf("  -C     show PCRE compile-time options and exit\n");
561    printf("  -d     debug: show compiled code; implies -i\n"
562           "  -i     show information about compiled pattern\n"
563           "  -m     output memory used information\n"
564           "  -o <n> set size of offsets vector to <n>\n");
565#if !defined NOPOSIX
566    printf("  -p     use POSIX interface\n");
567#endif
568    printf("  -s     output store (memory) used information\n"
569           "  -t     time compilation and execution\n");
570    return 1;
571    }
572  op++;
573  argc--;
574  }
575
576/* Get the store for the offsets vector, and remember what it was */
577
578size_offsets_max = size_offsets;
579offsets = (int *)malloc(size_offsets_max * sizeof(int));
580if (offsets == NULL)
581  {
582  printf("** Failed to get %d bytes of memory for offsets vector\n",
583    size_offsets_max * sizeof(int));
584  return 1;
585  }
586
587/* Sort out the input and output files */
588
589if (argc > 1)
590  {
591  infile = fopen(argv[op], "rb");
592  if (infile == NULL)
593    {
594    printf("** Failed to open %s\n", argv[op]);
595    return 1;
596    }
597  }
598
599if (argc > 2)
600  {
601  outfile = fopen(argv[op+1], "wb");
602  if (outfile == NULL)
603    {
604    printf("** Failed to open %s\n", argv[op+1]);
605    return 1;
606    }
607  }
608
609/* Set alternative malloc function */
610
611pcre_malloc = new_malloc;
612pcre_free = new_free;
613pcre_stack_malloc = stack_malloc;
614pcre_stack_free = stack_free;
615
616/* Heading line, then prompt for first regex if stdin */
617
618fprintf(outfile, "PCRE version %s\n\n", pcre_version());
619
620/* Main loop */
621
622while (!done)
623  {
624  pcre *re = NULL;
625  pcre_extra *extra = NULL;
626
627#if !defined NOPOSIX  /* There are still compilers that require no indent */
628  regex_t preg;
629  int do_posix = 0;
630#endif
631
632  const char *error;
633  unsigned char *p, *pp, *ppp;
634  unsigned char *to_file = NULL;
635  const unsigned char *tables = NULL;
636  unsigned long int true_size, true_study_size = 0;
637  size_t size, regex_gotten_store;
638  int do_study = 0;
639  int do_debug = debug;
640  int do_G = 0;
641  int do_g = 0;
642  int do_showinfo = showinfo;
643  int do_showrest = 0;
644  int do_flip = 0;
645  int erroroffset, len, delimiter;
646
647  use_utf8 = 0;
648
649  if (infile == stdin) printf("  re> ");
650  if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
651  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
652  fflush(outfile);
653
654  p = buffer;
655  while (isspace(*p)) p++;
656  if (*p == 0) continue;
657
658  /* See if the pattern is to be loaded pre-compiled from a file. */
659
660  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
661    {
662    unsigned long int magic;
663    uschar sbuf[8];
664    FILE *f;
665
666    p++;
667    pp = p + (int)strlen((char *)p);
668    while (isspace(pp[-1])) pp--;
669    *pp = 0;
670
671    f = fopen((char *)p, "rb");
672    if (f == NULL)
673      {
674      fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
675      continue;
676      }
677
678    if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
679
680    true_size =
681      (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
682    true_study_size =
683      (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
684
685    re = (real_pcre *)new_malloc(true_size);
686    regex_gotten_store = gotten_store;
687
688    if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
689
690    magic = ((real_pcre *)re)->magic_number;
691    if (magic != MAGIC_NUMBER)
692      {
693      if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
694        {
695        do_flip = 1;
696        }
697      else
698        {
699        fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
700        fclose(f);
701        continue;
702        }
703      }
704
705    fprintf(outfile, "Compiled regex%s loaded from %s\n",
706      do_flip? " (byte-inverted)" : "", p);
707
708    /* Need to know if UTF-8 for printing data strings */
709
710    new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
711    use_utf8 = (options & PCRE_UTF8) != 0;
712
713    /* Now see if there is any following study data */
714
715    if (true_study_size != 0)
716      {
717      pcre_study_data *psd;
718
719      extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
720      extra->flags = PCRE_EXTRA_STUDY_DATA;
721
722      psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
723      extra->study_data = psd;
724
725      if (fread(psd, 1, true_study_size, f) != true_study_size)
726        {
727        FAIL_READ:
728        fprintf(outfile, "Failed to read data from %s\n", p);
729        if (extra != NULL) new_free(extra);
730        if (re != NULL) new_free(re);
731        fclose(f);
732        continue;
733        }
734      fprintf(outfile, "Study data loaded from %s\n", p);
735      do_study = 1;     /* To get the data output if requested */
736      }
737    else fprintf(outfile, "No study data\n");
738
739    fclose(f);
740    goto SHOW_INFO;
741    }
742
743  /* In-line pattern (the usual case). Get the delimiter and seek the end of
744  the pattern; if is isn't complete, read more. */
745
746  delimiter = *p++;
747
748  if (isalnum(delimiter) || delimiter == '\\')
749    {
750    fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
751    goto SKIP_DATA;
752    }
753
754  pp = p;
755
756  for(;;)
757    {
758    while (*pp != 0)
759      {
760      if (*pp == '\\' && pp[1] != 0) pp++;
761        else if (*pp == delimiter) break;
762      pp++;
763      }
764    if (*pp != 0) break;
765
766    len = BUFFER_SIZE - (pp - buffer);
767    if (len < 256)
768      {
769      fprintf(outfile, "** Expression too long - missing delimiter?\n");
770      goto SKIP_DATA;
771      }
772
773    if (infile == stdin) printf("    > ");
774    if (fgets((char *)pp, len, infile) == NULL)
775      {
776      fprintf(outfile, "** Unexpected EOF\n");
777      done = 1;
778      goto CONTINUE;
779      }
780    if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
781    }
782
783  /* If the first character after the delimiter is backslash, make
784  the pattern end with backslash. This is purely to provide a way
785  of testing for the error message when a pattern ends with backslash. */
786
787  if (pp[1] == '\\') *pp++ = '\\';
788
789  /* Terminate the pattern at the delimiter, and save a copy of the pattern
790  for callouts. */
791
792  *pp++ = 0;
793  strcpy((char *)pbuffer, (char *)p);
794
795  /* Look for options after final delimiter */
796
797  options = 0;
798  study_options = 0;
799  log_store = showstore;  /* default from command line */
800
801  while (*pp != 0)
802    {
803    switch (*pp++)
804      {
805      case 'g': do_g = 1; break;
806      case 'i': options |= PCRE_CASELESS; break;
807      case 'm': options |= PCRE_MULTILINE; break;
808      case 's': options |= PCRE_DOTALL; break;
809      case 'x': options |= PCRE_EXTENDED; break;
810
811      case '+': do_showrest = 1; break;
812      case 'A': options |= PCRE_ANCHORED; break;
813      case 'C': options |= PCRE_AUTO_CALLOUT; break;
814      case 'D': do_debug = do_showinfo = 1; break;
815      case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
816      case 'F': do_flip = 1; break;
817      case 'G': do_G = 1; break;
818      case 'I': do_showinfo = 1; break;
819      case 'M': log_store = 1; break;
820      case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
821
822#if !defined NOPOSIX
823      case 'P': do_posix = 1; break;
824#endif
825
826      case 'S': do_study = 1; break;
827      case 'U': options |= PCRE_UNGREEDY; break;
828      case 'X': options |= PCRE_EXTRA; break;
829      case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
830      case '?': options |= PCRE_NO_UTF8_CHECK; break;
831
832      case 'L':
833      ppp = pp;
834      while (*ppp != '\n' && *ppp != ' ') ppp++;
835      *ppp = 0;
836      if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
837        {
838        fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
839        goto SKIP_DATA;
840        }
841      tables = pcre_maketables();
842      pp = ppp;
843      break;
844
845      case '>':
846      to_file = pp;
847      while (*pp != 0) pp++;
848      while (isspace(pp[-1])) pp--;
849      *pp = 0;
850      break;
851
852      case '\n': case ' ': break;
853
854      default:
855      fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
856      goto SKIP_DATA;
857      }
858    }
859
860  /* Handle compiling via the POSIX interface, which doesn't support the
861  timing, showing, or debugging options, nor the ability to pass over
862  local character tables. */
863
864#if !defined NOPOSIX
865  if (posix || do_posix)
866    {
867    int rc;
868    int cflags = 0;
869
870    if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
871    if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
872    rc = regcomp(&preg, (char *)p, cflags);
873
874    /* Compilation failed; go back for another re, skipping to blank line
875    if non-interactive. */
876
877    if (rc != 0)
878      {
879      (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
880      fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
881      goto SKIP_DATA;
882      }
883    }
884
885  /* Handle compiling via the native interface */
886
887  else
888#endif  /* !defined NOPOSIX */
889
890    {
891    if (timeit)
892      {
893      register int i;
894      clock_t time_taken;
895      clock_t start_time = clock();
896      for (i = 0; i < LOOPREPEAT; i++)
897        {
898        re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
899        if (re != NULL) free(re);
900        }
901      time_taken = clock() - start_time;
902      fprintf(outfile, "Compile time %.3f milliseconds\n",
903        (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
904          (double)CLOCKS_PER_SEC);
905      }
906
907    re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
908
909    /* Compilation failed; go back for another re, skipping to blank line
910    if non-interactive. */
911
912    if (re == NULL)
913      {
914      fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
915      SKIP_DATA:
916      if (infile != stdin)
917        {
918        for (;;)
919          {
920          if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
921            {
922            done = 1;
923            goto CONTINUE;
924            }
925          len = (int)strlen((char *)buffer);
926          while (len > 0 && isspace(buffer[len-1])) len--;
927          if (len == 0) break;
928          }
929        fprintf(outfile, "\n");
930        }
931      goto CONTINUE;
932      }
933
934    /* Compilation succeeded; print data if required. There are now two
935    info-returning functions. The old one has a limited interface and
936    returns only limited data. Check that it agrees with the newer one. */
937
938    if (log_store)
939      fprintf(outfile, "Memory allocation (code space): %d\n",
940        (int)(gotten_store -
941              sizeof(real_pcre) -
942              ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
943
944    /* Extract the size for possible writing before possibly flipping it,
945    and remember the store that was got. */
946
947    true_size = ((real_pcre *)re)->size;
948    regex_gotten_store = gotten_store;
949
950    /* If /S was present, study the regexp to generate additional info to
951    help with the matching. */
952
953    if (do_study)
954      {
955      if (timeit)
956        {
957        register int i;
958        clock_t time_taken;
959        clock_t start_time = clock();
960        for (i = 0; i < LOOPREPEAT; i++)
961          extra = pcre_study(re, study_options, &error);
962        time_taken = clock() - start_time;
963        if (extra != NULL) free(extra);
964        fprintf(outfile, "  Study time %.3f milliseconds\n",
965          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
966            (double)CLOCKS_PER_SEC);
967        }
968      extra = pcre_study(re, study_options, &error);
969      if (error != NULL)
970        fprintf(outfile, "Failed to study: %s\n", error);
971      else if (extra != NULL)
972        true_study_size = ((pcre_study_data *)(extra->study_data))->size;
973      }
974
975    /* If the 'F' option was present, we flip the bytes of all the integer
976    fields in the regex data block and the study block. This is to make it
977    possible to test PCRE's handling of byte-flipped patterns, e.g. those
978    compiled on a different architecture. */
979
980    if (do_flip)
981      {
982      real_pcre *rre = (real_pcre *)re;
983      rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
984      rre->size = byteflip(rre->size, sizeof(rre->size));
985      rre->options = byteflip(rre->options, sizeof(rre->options));
986      rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
987      rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
988      rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
989      rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
990      rre->name_table_offset = byteflip(rre->name_table_offset,
991        sizeof(rre->name_table_offset));
992      rre->name_entry_size = byteflip(rre->name_entry_size,
993        sizeof(rre->name_entry_size));
994      rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
995
996      if (extra != NULL)
997        {
998        pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
999        rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1000        rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1001        }
1002      }
1003
1004    /* Extract information from the compiled data if required */
1005
1006    SHOW_INFO:
1007
1008    if (do_showinfo)
1009      {
1010      unsigned long int get_options, all_options;
1011      int old_first_char, old_options, old_count;
1012      int count, backrefmax, first_char, need_char;
1013      int nameentrysize, namecount;
1014      const uschar *nametable;
1015
1016      if (do_debug)
1017        {
1018        fprintf(outfile, "------------------------------------------------------------------\n");
1019        print_internals(re, outfile);
1020        }
1021
1022      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1023      new_info(re, NULL, PCRE_INFO_SIZE, &size);
1024      new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1025      new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1026      new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1027      new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1028      new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1029      new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1030      new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1031
1032      old_count = pcre_info(re, &old_options, &old_first_char);
1033      if (count < 0) fprintf(outfile,
1034        "Error %d from pcre_info()\n", count);
1035      else
1036        {
1037        if (old_count != count) fprintf(outfile,
1038          "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1039            old_count);
1040
1041        if (old_first_char != first_char) fprintf(outfile,
1042          "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1043            first_char, old_first_char);
1044
1045        if (old_options != (int)get_options) fprintf(outfile,
1046          "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1047            get_options, old_options);
1048        }
1049
1050      if (size != regex_gotten_store) fprintf(outfile,
1051        "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1052        size, regex_gotten_store);
1053
1054      fprintf(outfile, "Capturing subpattern count = %d\n", count);
1055      if (backrefmax > 0)
1056        fprintf(outfile, "Max back reference = %d\n", backrefmax);
1057
1058      if (namecount > 0)
1059        {
1060        fprintf(outfile, "Named capturing subpatterns:\n");
1061        while (namecount-- > 0)
1062          {
1063          fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1064            nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1065            GET2(nametable, 0));
1066          nametable += nameentrysize;
1067          }
1068        }
1069
1070      /* The NOPARTIAL bit is a private bit in the options, so we have
1071      to fish it out via out back door */
1072
1073      all_options = ((real_pcre *)re)->options;
1074      if (do_flip)
1075        {
1076        all_options = byteflip(all_options, sizeof(all_options));
1077        }
1078
1079      if ((all_options & PCRE_NOPARTIAL) != 0)
1080        fprintf(outfile, "Partial matching not supported\n");
1081
1082      if (get_options == 0) fprintf(outfile, "No options\n");
1083        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1084          ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1085          ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1086          ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1087          ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1088          ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1089          ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1090          ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1091          ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1092          ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1093          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1094
1095      if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1096        fprintf(outfile, "Case state changes\n");
1097
1098      if (first_char == -1)
1099        {
1100        fprintf(outfile, "First char at start or follows \\n\n");
1101        }
1102      else if (first_char < 0)
1103        {
1104        fprintf(outfile, "No first char\n");
1105        }
1106      else
1107        {
1108        int ch = first_char & 255;
1109        const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1110          "" : " (caseless)";
1111        if (isprint(ch))
1112          fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1113        else
1114          fprintf(outfile, "First char = %d%s\n", ch, caseless);
1115        }
1116
1117      if (need_char < 0)
1118        {
1119        fprintf(outfile, "No need char\n");
1120        }
1121      else
1122        {
1123        int ch = need_char & 255;
1124        const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1125          "" : " (caseless)";
1126        if (isprint(ch))
1127          fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1128        else
1129          fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1130        }
1131
1132      /* Don't output study size; at present it is in any case a fixed
1133      value, but it varies, depending on the computer architecture, and
1134      so messes up the test suite. (And with the /F option, it might be
1135      flipped.) */
1136
1137      if (do_study)
1138        {
1139        if (extra == NULL)
1140          fprintf(outfile, "Study returned NULL\n");
1141        else
1142          {
1143          uschar *start_bits = NULL;
1144          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1145
1146          if (start_bits == NULL)
1147            fprintf(outfile, "No starting byte set\n");
1148          else
1149            {
1150            int i;
1151            int c = 24;
1152            fprintf(outfile, "Starting byte set: ");
1153            for (i = 0; i < 256; i++)
1154              {
1155              if ((start_bits[i/8] & (1<<(i&7))) != 0)
1156                {
1157                if (c > 75)
1158                  {
1159                  fprintf(outfile, "\n  ");
1160                  c = 2;
1161                  }
1162                if (isprint(i) && i != ' ')
1163                  {
1164                  fprintf(outfile, "%c ", i);
1165                  c += 2;
1166                  }
1167                else
1168                  {
1169                  fprintf(outfile, "\\x%02x ", i);
1170                  c += 5;
1171                  }
1172                }
1173              }
1174            fprintf(outfile, "\n");
1175            }
1176          }
1177        }
1178      }
1179
1180    /* If the '>' option was present, we write out the regex to a file, and
1181    that is all. The first 8 bytes of the file are the regex length and then
1182    the study length, in big-endian order. */
1183
1184    if (to_file != NULL)
1185      {
1186      FILE *f = fopen((char *)to_file, "wb");
1187      if (f == NULL)
1188        {
1189        fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1190        }
1191      else
1192        {
1193        uschar sbuf[8];
1194        sbuf[0] = (true_size >> 24)  & 255;
1195        sbuf[1] = (true_size >> 16)  & 255;
1196        sbuf[2] = (true_size >>  8)  & 255;
1197        sbuf[3] = (true_size)  & 255;
1198
1199        sbuf[4] = (true_study_size >> 24)  & 255;
1200        sbuf[5] = (true_study_size >> 16)  & 255;
1201        sbuf[6] = (true_study_size >>  8)  & 255;
1202        sbuf[7] = (true_study_size)  & 255;
1203
1204        if (fwrite(sbuf, 1, 8, f) < 8 ||
1205            fwrite(re, 1, true_size, f) < true_size)
1206          {
1207          fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1208          }
1209        else
1210          {
1211          fprintf(outfile, "Compiled regex written to %s\n", to_file);
1212          if (extra != NULL)
1213            {
1214            if (fwrite(extra->study_data, 1, true_study_size, f) <
1215                true_study_size)
1216              {
1217              fprintf(outfile, "Write error on %s: %s\n", to_file,
1218                strerror(errno));
1219              }
1220            else fprintf(outfile, "Study data written to %s\n", to_file);
1221            }
1222          }
1223        fclose(f);
1224        }
1225      continue;  /* With next regex */
1226      }
1227    }        /* End of non-POSIX compile */
1228
1229  /* Read data lines and test them */
1230
1231  for (;;)
1232    {
1233    unsigned char *q;
1234    unsigned char *bptr = dbuffer;
1235    int *use_offsets = offsets;
1236    int use_size_offsets = size_offsets;
1237    int callout_data = 0;
1238    int callout_data_set = 0;
1239    int count, c;
1240    int copystrings = 0;
1241    int find_match_limit = 0;
1242    int getstrings = 0;
1243    int getlist = 0;
1244    int gmatched = 0;
1245    int start_offset = 0;
1246    int g_notempty = 0;
1247
1248    options = 0;
1249
1250    pcre_callout = callout;
1251    first_callout = 1;
1252    callout_extra = 0;
1253    callout_count = 0;
1254    callout_fail_count = 999999;
1255    callout_fail_id = -1;
1256    show_malloc = 0;
1257
1258    if (infile == stdin) printf("data> ");
1259    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1260      {
1261      done = 1;
1262      goto CONTINUE;
1263      }
1264    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1265
1266    len = (int)strlen((char *)buffer);
1267    while (len > 0 && isspace(buffer[len-1])) len--;
1268    buffer[len] = 0;
1269    if (len == 0) break;
1270
1271    p = buffer;
1272    while (isspace(*p)) p++;
1273
1274    q = dbuffer;
1275    while ((c = *p++) != 0)
1276      {
1277      int i = 0;
1278      int n = 0;
1279
1280      if (c == '\\') switch ((c = *p++))
1281        {
1282        case 'a': c =    7; break;
1283        case 'b': c = '\b'; break;
1284        case 'e': c =   27; break;
1285        case 'f': c = '\f'; break;
1286        case 'n': c = '\n'; break;
1287        case 'r': c = '\r'; break;
1288        case 't': c = '\t'; break;
1289        case 'v': c = '\v'; break;
1290
1291        case '0': case '1': case '2': case '3':
1292        case '4': case '5': case '6': case '7':
1293        c -= '0';
1294        while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1295          c = c * 8 + *p++ - '0';
1296        break;
1297
1298        case 'x':
1299
1300        /* Handle \x{..} specially - new Perl thing for utf8 */
1301
1302        if (*p == '{')
1303          {
1304          unsigned char *pt = p;
1305          c = 0;
1306          while (isxdigit(*(++pt)))
1307            c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1308          if (*pt == '}')
1309            {
1310            unsigned char buff8[8];
1311            int ii, utn;
1312            utn = ord2utf8(c, buff8);
1313            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1314            c = buff8[ii];   /* Last byte */
1315            p = pt + 1;
1316            break;
1317            }
1318          /* Not correct form; fall through */
1319          }
1320
1321        /* Ordinary \x */
1322
1323        c = 0;
1324        while (i++ < 2 && isxdigit(*p))
1325          {
1326          c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1327          p++;
1328          }
1329        break;
1330
1331        case 0:   /* \ followed by EOF allows for an empty line */
1332        p--;
1333        continue;
1334
1335        case '>':
1336        while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1337        continue;
1338
1339        case 'A':  /* Option setting */
1340        options |= PCRE_ANCHORED;
1341        continue;
1342
1343        case 'B':
1344        options |= PCRE_NOTBOL;
1345        continue;
1346
1347        case 'C':
1348        if (isdigit(*p))    /* Set copy string */
1349          {
1350          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1351          copystrings |= 1 << n;
1352          }
1353        else if (isalnum(*p))
1354          {
1355          uschar name[256];
1356          uschar *npp = name;
1357          while (isalnum(*p)) *npp++ = *p++;
1358          *npp = 0;
1359          n = pcre_get_stringnumber(re, (char *)name);
1360          if (n < 0)
1361            fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1362          else copystrings |= 1 << n;
1363          }
1364        else if (*p == '+')
1365          {
1366          callout_extra = 1;
1367          p++;
1368          }
1369        else if (*p == '-')
1370          {
1371          pcre_callout = NULL;
1372          p++;
1373          }
1374        else if (*p == '!')
1375          {
1376          callout_fail_id = 0;
1377          p++;
1378          while(isdigit(*p))
1379            callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1380          callout_fail_count = 0;
1381          if (*p == '!')
1382            {
1383            p++;
1384            while(isdigit(*p))
1385              callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1386            }
1387          }
1388        else if (*p == '*')
1389          {
1390          int sign = 1;
1391          callout_data = 0;
1392          if (*(++p) == '-') { sign = -1; p++; }
1393          while(isdigit(*p))
1394            callout_data = callout_data * 10 + *p++ - '0';
1395          callout_data *= sign;
1396          callout_data_set = 1;
1397          }
1398        continue;
1399
1400        case 'G':
1401        if (isdigit(*p))
1402          {
1403          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404          getstrings |= 1 << n;
1405          }
1406        else if (isalnum(*p))
1407          {
1408          uschar name[256];
1409          uschar *npp = name;
1410          while (isalnum(*p)) *npp++ = *p++;
1411          *npp = 0;
1412          n = pcre_get_stringnumber(re, (char *)name);
1413          if (n < 0)
1414            fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1415          else getstrings |= 1 << n;
1416          }
1417        continue;
1418
1419        case 'L':
1420        getlist = 1;
1421        continue;
1422
1423        case 'M':
1424        find_match_limit = 1;
1425        continue;
1426
1427        case 'N':
1428        options |= PCRE_NOTEMPTY;
1429        continue;
1430
1431        case 'O':
1432        while(isdigit(*p)) n = n * 10 + *p++ - '0';
1433        if (n > size_offsets_max)
1434          {
1435          size_offsets_max = n;
1436          free(offsets);
1437          use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1438          if (offsets == NULL)
1439            {
1440            printf("** Failed to get %d bytes of memory for offsets vector\n",
1441              size_offsets_max * sizeof(int));
1442            return 1;
1443            }
1444          }
1445        use_size_offsets = n;
1446        if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1447        continue;
1448
1449        case 'P':
1450        options |= PCRE_PARTIAL;
1451        continue;
1452
1453        case 'S':
1454        show_malloc = 1;
1455        continue;
1456
1457        case 'Z':
1458        options |= PCRE_NOTEOL;
1459        continue;
1460
1461        case '?':
1462        options |= PCRE_NO_UTF8_CHECK;
1463        continue;
1464        }
1465      *q++ = c;
1466      }
1467    *q = 0;
1468    len = q - dbuffer;
1469
1470    /* Handle matching via the POSIX interface, which does not
1471    support timing or playing with the match limit or callout data. */
1472
1473#if !defined NOPOSIX
1474    if (posix || do_posix)
1475      {
1476      int rc;
1477      int eflags = 0;
1478      regmatch_t *pmatch = NULL;
1479      if (use_size_offsets > 0)
1480        pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1481      if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1482      if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1483
1484      rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1485
1486      if (rc != 0)
1487        {
1488        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1489        fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1490        }
1491      else
1492        {
1493        size_t i;
1494        for (i = 0; i < (size_t)use_size_offsets; i++)
1495          {
1496          if (pmatch[i].rm_so >= 0)
1497            {
1498            fprintf(outfile, "%2d: ", (int)i);
1499            (void)pchars(dbuffer + pmatch[i].rm_so,
1500              pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1501            fprintf(outfile, "\n");
1502            if (i == 0 && do_showrest)
1503              {
1504              fprintf(outfile, " 0+ ");
1505              (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1506                outfile);
1507              fprintf(outfile, "\n");
1508              }
1509            }
1510          }
1511        }
1512      free(pmatch);
1513      }
1514
1515    /* Handle matching via the native interface - repeats for /g and /G */
1516
1517    else
1518#endif  /* !defined NOPOSIX */
1519
1520    for (;; gmatched++)    /* Loop for /g or /G */
1521      {
1522      if (timeit)
1523        {
1524        register int i;
1525        clock_t time_taken;
1526        clock_t start_time = clock();
1527        for (i = 0; i < LOOPREPEAT; i++)
1528          count = pcre_exec(re, extra, (char *)bptr, len,
1529            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1530        time_taken = clock() - start_time;
1531        fprintf(outfile, "Execute time %.3f milliseconds\n",
1532          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1533            (double)CLOCKS_PER_SEC);
1534        }
1535
1536      /* If find_match_limit is set, we want to do repeated matches with
1537      varying limits in order to find the minimum value. */
1538
1539      if (find_match_limit)
1540        {
1541        int min = 0;
1542        int mid = 64;
1543        int max = -1;
1544
1545        if (extra == NULL)
1546          {
1547          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1548          extra->flags = 0;
1549          }
1550        extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1551
1552        for (;;)
1553          {
1554          extra->match_limit = mid;
1555          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1556            options | g_notempty, use_offsets, use_size_offsets);
1557          if (count == PCRE_ERROR_MATCHLIMIT)
1558            {
1559            /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1560            min = mid;
1561            mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1562            }
1563          else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1564                                 count == PCRE_ERROR_PARTIAL)
1565            {
1566            if (mid == min + 1)
1567              {
1568              fprintf(outfile, "Minimum match limit = %d\n", mid);
1569              break;
1570              }
1571            /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1572            max = mid;
1573            mid = (min + mid)/2;
1574            }
1575          else break;    /* Some other error */
1576          }
1577
1578        extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1579        }
1580
1581      /* If callout_data is set, use the interface with additional data */
1582
1583      else if (callout_data_set)
1584        {
1585        if (extra == NULL)
1586          {
1587          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1588          extra->flags = 0;
1589          }
1590        extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1591        extra->callout_data = &callout_data;
1592        count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1593          options | g_notempty, use_offsets, use_size_offsets);
1594        extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1595        }
1596
1597      /* The normal case is just to do the match once, with the default
1598      value of match_limit. */
1599
1600      else
1601        {
1602        count = pcre_exec(re, extra, (char *)bptr, len,
1603          start_offset, options | g_notempty, use_offsets, use_size_offsets);
1604        }
1605
1606      if (count == 0)
1607        {
1608        fprintf(outfile, "Matched, but too many substrings\n");
1609        count = use_size_offsets/3;
1610        }
1611
1612      /* Matched */
1613
1614      if (count >= 0)
1615        {
1616        int i;
1617        for (i = 0; i < count * 2; i += 2)
1618          {
1619          if (use_offsets[i] < 0)
1620            fprintf(outfile, "%2d: <unset>\n", i/2);
1621          else
1622            {
1623            fprintf(outfile, "%2d: ", i/2);
1624            (void)pchars(bptr + use_offsets[i],
1625              use_offsets[i+1] - use_offsets[i], outfile);
1626            fprintf(outfile, "\n");
1627            if (i == 0)
1628              {
1629              if (do_showrest)
1630                {
1631                fprintf(outfile, " 0+ ");
1632                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1633                  outfile);
1634                fprintf(outfile, "\n");
1635                }
1636              }
1637            }
1638          }
1639
1640        for (i = 0; i < 32; i++)
1641          {
1642          if ((copystrings & (1 << i)) != 0)
1643            {
1644            char copybuffer[16];
1645            int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1646              i, copybuffer, sizeof(copybuffer));
1647            if (rc < 0)
1648              fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1649            else
1650              fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1651            }
1652          }
1653
1654        for (i = 0; i < 32; i++)
1655          {
1656          if ((getstrings & (1 << i)) != 0)
1657            {
1658            const char *substring;
1659            int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1660              i, &substring);
1661            if (rc < 0)
1662              fprintf(outfile, "get substring %d failed %d\n", i, rc);
1663            else
1664              {
1665              fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1666              /* free((void *)substring); */
1667              pcre_free_substring(substring);
1668              }
1669            }
1670          }
1671
1672        if (getlist)
1673          {
1674          const char **stringlist;
1675          int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1676            &stringlist);
1677          if (rc < 0)
1678            fprintf(outfile, "get substring list failed %d\n", rc);
1679          else
1680            {
1681            for (i = 0; i < count; i++)
1682              fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1683            if (stringlist[i] != NULL)
1684              fprintf(outfile, "string list not terminated by NULL\n");
1685            /* free((void *)stringlist); */
1686            pcre_free_substring_list(stringlist);
1687            }
1688          }
1689        }
1690
1691      /* There was a partial match */
1692
1693      else if (count == PCRE_ERROR_PARTIAL)
1694        {
1695        fprintf(outfile, "Partial match\n");
1696        break;  /* Out of the /g loop */
1697        }
1698
1699      /* Failed to match. If this is a /g or /G loop and we previously set
1700      g_notempty after a null match, this is not necessarily the end.
1701      We want to advance the start offset, and continue. In the case of UTF-8
1702      matching, the advance must be one character, not one byte. Fudge the
1703      offset values to achieve this. We won't be at the end of the string -
1704      that was checked before setting g_notempty. */
1705
1706      else
1707        {
1708        if (g_notempty != 0)
1709          {
1710          int onechar = 1;
1711          use_offsets[0] = start_offset;
1712          if (use_utf8)
1713            {
1714            while (start_offset + onechar < len)
1715              {
1716              int tb = bptr[start_offset+onechar];
1717              if (tb <= 127) break;
1718              tb &= 0xc0;
1719              if (tb != 0 && tb != 0xc0) onechar++;
1720              }
1721            }
1722          use_offsets[1] = start_offset + onechar;
1723          }
1724        else
1725          {
1726          if (count == PCRE_ERROR_NOMATCH)
1727            {
1728            if (gmatched == 0) fprintf(outfile, "No match\n");
1729            }
1730          else fprintf(outfile, "Error %d\n", count);
1731          break;  /* Out of the /g loop */
1732          }
1733        }
1734
1735      /* If not /g or /G we are done */
1736
1737      if (!do_g && !do_G) break;
1738
1739      /* If we have matched an empty string, first check to see if we are at
1740      the end of the subject. If so, the /g loop is over. Otherwise, mimic
1741      what Perl's /g options does. This turns out to be rather cunning. First
1742      we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1743      same point. If this fails (picked up above) we advance to the next
1744      character. */
1745
1746      g_notempty = 0;
1747      if (use_offsets[0] == use_offsets[1])
1748        {
1749        if (use_offsets[0] == len) break;
1750        g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1751        }
1752
1753      /* For /g, update the start offset, leaving the rest alone */
1754
1755      if (do_g) start_offset = use_offsets[1];
1756
1757      /* For /G, update the pointer and length */
1758
1759      else
1760        {
1761        bptr += use_offsets[1];
1762        len -= use_offsets[1];
1763        }
1764      }  /* End of loop for /g and /G */
1765    }    /* End of loop for data lines */
1766
1767  CONTINUE:
1768
1769#if !defined NOPOSIX
1770  if (posix || do_posix) regfree(&preg);
1771#endif
1772
1773  if (re != NULL) free(re);
1774  if (extra != NULL) free(extra);
1775  if (tables != NULL)
1776    {
1777    free((void *)tables);
1778    setlocale(LC_CTYPE, "C");
1779    }
1780  }
1781
1782if (infile == stdin) fprintf(outfile, "\n");
1783return 0;
1784}
1785
1786/* End */
1787