1#! /usr/bin/env perl
2
3# Program for testing regular expressions with perl to check that PCRE handles
4# them the same. This is the version that supports /8 for UTF-8 testing. As it
5# stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to
6# have "use utf8" at the start for running the UTF-8 tests, but *not* for the
7# other tests. The only way I've found for doing this is to cat this line in
8# explicitly in the RunPerlTest script.
9
10# use locale;  # With this included, \x0b matches \s!
11
12# Function for turning a string into a string of printing chars. There are
13# currently problems with UTF-8 strings; this fudges round them.
14
15sub pchars {
16my($t) = "";
17
18if ($utf8)
19  {
20  @p = unpack('U*', $_[0]);
21  foreach $c (@p)
22    {
23    if ($c >= 32 && $c < 127) { $t .= chr $c; }
24      else { $t .= sprintf("\\x{%02x}", $c); }
25    }
26  }
27
28else
29  {
30  foreach $c (split(//, $_[0]))
31    {
32    if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
33      else { $t .= sprintf("\\x%02x", ord $c); }
34    }
35  }
36
37$t;
38}
39
40
41# Read lines from named file or stdin and write to named file or stdout; lines
42# consist of a regular expression, in delimiters and optionally followed by
43# options, followed by a set of test data, terminated by an empty line.
44
45# Sort out the input and output files
46
47if (@ARGV > 0)
48  {
49  open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
50  $infile = "INFILE";
51  }
52else { $infile = "STDIN"; }
53
54if (@ARGV > 1)
55  {
56  open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
57  $outfile = "OUTFILE";
58  }
59else { $outfile = "STDOUT"; }
60
61printf($outfile "Perl $] Regular Expressions\n\n");
62
63# Main loop
64
65NEXT_RE:
66for (;;)
67  {
68  printf "  re> " if $infile eq "STDIN";
69  last if ! ($_ = <$infile>);
70  printf $outfile "$_" if $infile ne "STDIN";
71  next if ($_ eq "");
72
73  $pattern = $_;
74
75  while ($pattern !~ /^\s*(.).*\1/s)
76    {
77    printf "    > " if $infile eq "STDIN";
78    last if ! ($_ = <$infile>);
79    printf $outfile "$_" if $infile ne "STDIN";
80    $pattern .= $_;
81    }
82
83   chomp($pattern);
84   $pattern =~ s/\s+$//;
85
86  # The private /+ modifier means "print $' afterwards".
87
88  $showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
89
90  # Remove /8 from a UTF-8 pattern.
91
92  $utf8 = $pattern =~ s/8(?=[a-z]*$)//;
93
94  # Remove /J from a pattern with duplicate names.
95
96  $pattern =~ s/J(?=[a-z]*$)//;
97
98  # Check that the pattern is valid
99
100  eval "\$_ =~ ${pattern}";
101  if ($@)
102    {
103    printf $outfile "Error: $@";
104    next NEXT_RE;
105    }
106
107  # If the /g modifier is present, we want to put a loop round the matching;
108  # otherwise just a single "if".
109
110  $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
111
112  # If the pattern is actually the null string, Perl uses the most recently
113  # executed (and successfully compiled) regex is used instead. This is a
114  # nasty trap for the unwary! The PCRE test suite does contain null strings
115  # in places - if they are allowed through here all sorts of weird and
116  # unexpected effects happen. To avoid this, we replace such patterns with
117  # a non-null pattern that has the same effect.
118
119  $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
120
121  # Read data lines and test them
122
123  for (;;)
124    {
125    printf "data> " if $infile eq "STDIN";
126    last NEXT_RE if ! ($_ = <$infile>);
127    chomp;
128    printf $outfile "$_\n" if $infile ne "STDIN";
129
130    s/\s+$//;
131    s/^\s+//;
132
133    last if ($_ eq "");
134    $x = eval "\"$_\"";   # To get escapes processed
135
136    # Empty array for holding results, ensure $REGERROR and $REGMARK are
137    # unset, then do the matching.
138
139    @subs = ();
140
141    $pushes = "push \@subs,\$&;" .
142         "push \@subs,\$1;" .
143         "push \@subs,\$2;" .
144         "push \@subs,\$3;" .
145         "push \@subs,\$4;" .
146         "push \@subs,\$5;" .
147         "push \@subs,\$6;" .
148         "push \@subs,\$7;" .
149         "push \@subs,\$8;" .
150         "push \@subs,\$9;" .
151         "push \@subs,\$10;" .
152         "push \@subs,\$11;" .
153         "push \@subs,\$12;" .
154         "push \@subs,\$13;" .
155         "push \@subs,\$14;" .
156         "push \@subs,\$15;" .
157         "push \@subs,\$16;" .
158         "push \@subs,\$'; }";
159
160    undef $REGERROR;
161    undef $REGMARK;
162
163    eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
164
165    if ($@)
166      {
167      printf $outfile "Error: $@\n";
168      next NEXT_RE;
169      }
170    elsif (scalar(@subs) == 0)
171      {
172      printf $outfile "No match";
173      if (defined $REGERROR && $REGERROR != 1)
174        { print $outfile (", mark = $REGERROR"); }
175      printf $outfile "\n";
176      }
177    else
178      {
179      while (scalar(@subs) != 0)
180        {
181        printf $outfile (" 0: %s\n", &pchars($subs[0]));
182        printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
183        $last_printed = 0;
184        for ($i = 1; $i <= 16; $i++)
185          {
186          if (defined $subs[$i])
187            {
188            while ($last_printed++ < $i-1)
189              { printf $outfile ("%2d: <unset>\n", $last_printed); }
190            printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
191            $last_printed = $i;
192            }
193          }
194        splice(@subs, 0, 18);
195        }
196      if (defined $REGMARK && $REGMARK != 1)
197        { print $outfile ("MK: $REGMARK\n"); }
198      }
199    }
200  }
201
202# printf $outfile "\n";
203
204# End
205