1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t.  If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7sub run_tests;
8
9$| = 1;
10
11BEGIN {
12    chdir 't' if -d 't';
13    require './test.pl';
14    set_up_inc( '../lib', '.' );
15    skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-");
16}
17
18use strict;
19use warnings;
20use 5.010;
21use Config;
22
23plan tests => 2514;  # Update this when adding/deleting tests.
24
25run_tests() unless caller;
26
27#
28# Tests start here.
29#
30sub run_tests {
31
32    like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/,
33	 "Match UTF-8 char in presence of (??{ }); Bug 20000731.001 (#3600)");
34
35    {
36        no warnings 'uninitialized';
37        ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005 (#4492)");
38    }
39
40    {
41        my $message = 'bug id 20001008.001 (#4407)';
42
43        my @x = ("stra\337e 138", "stra\337e 138");
44        for (@x) {
45            ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message);
46            ok(my ($latin) = /^(.+)(?:\s+\d)/, $message);
47            is($latin, "stra\337e", $message);
48	    ok($latin =~ s/stra\337e/stra��e/, $message);
49            #
50            # Previous code follows, but outcommented - there were no tests.
51            #
52            # $latin =~ s/stra\337e/stra��e/; # \303\237 after the 2nd a
53            # use utf8; # needed for the raw UTF-8
54            # $latin =~ s!(s)tr(?:a��|s+e)!$1tr.!; # \303\237 after the a
55        }
56    }
57
58    {
59        # Fist half of the bug.
60        my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003 (#4536)';
61        my $X = chr (1448);
62        ok(my ($Y) = $X =~ /(.*)/, $message);
63        is($Y, v1448, $message);
64        is(length $Y, 1, $message);
65
66        # Second half of the bug.
67        $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003 (#4536)';
68        $X = '';
69        $X =~ s/^/chr(1488)/e;
70        is(length $X, 1, $message);
71        is(ord $X, 1488, $message);
72    }
73
74    {   
75        my $message = 'Repeated s///; Bug 20001108.001 (#4631)';
76        my $X = "Szab\x{f3},Bal\x{e1}zs";
77        my $Y = $X;
78        $Y =~ s/(B)/$1/ for 0 .. 3;
79        is($Y, $X, $message);
80        is($X, "Szab\x{f3},Bal\x{e1}zs", $message);
81    }
82
83    {
84        my $message = 's/// on UTF-8 string; Bug 20000517.001 (#3253)';
85        my $x = "\x{100}A";
86        $x =~ s/A/B/;
87        is($x, "\x{100}B", $message);
88        is(length $x, 2, $message);
89    }
90
91    {
92        # The original bug report had 'no utf8' here but that was irrelevant.
93
94        my $message = "Don't dump core; Bug 20010306.008 (#5982)";
95        my $a = "a\x{1234}";
96        like($a, qr/\w/, $message);  # used to core dump.
97    }
98
99    {
100        my $message = '/g in scalar context; Bug 20010410.006 (#6796)';
101        for my $rx ('/(.*?)\{(.*?)\}/csg',
102		    '/(.*?)\{(.*?)\}/cg',
103		    '/(.*?)\{(.*?)\}/sg',
104		    '/(.*?)\{(.*?)\}/g',
105		    '/(.+?)\{(.+?)\}/csg',) {
106            my $i = 0;
107            my $input = "a{b}c{d}";
108            eval <<"            --";
109                while (eval \$input =~ $rx) {
110                    \$i ++;
111                }
112            --
113            is($i, 2, $message);
114        }
115    }
116
117    {
118        # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
119        for ("\n", "\t", "\014", "\r") {
120            unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003 (#7131)", ord $_);
121        }
122        for (" ") {
123            like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003 (#7131)");
124        }
125    }
126
127    {
128        # [ID 20010814.004 (#7526)] pos() doesn't work when using =~m// in list context
129
130        $_ = "ababacadaea";
131        my $a = join ":", /b./gc;
132        my $b = join ":", /a./gc;
133        my $c = pos;
134        is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004 (#7526)");
135    }
136
137    {
138        # [ID 20010407.006 (#6767)] matching utf8 return values from
139        # functions does not work
140
141        my $message = 'UTF-8 return values from functions; Bug 20010407.006 (#6767)';
142        package ID_20010407_006;
143        sub x {"a\x{1234}"}
144        my $x = x;
145        my $y;
146      ::ok($x =~ /(..)/, $message);
147        $y = $1;
148      ::ok(length ($y) == 2 && $y eq $x, $message);
149      ::ok(x =~ /(..)/, $message);
150        $y = $1;
151      ::ok(length ($y) == 2 && $y eq $x, $message);
152    }
153
154    {
155        # High bit bug -- japhy
156        my $x = "ab\200d";
157        ok $x =~ /.*?\200/, "High bit fine";
158    }
159
160    {
161        my $message = 'UTF-8 hash keys and /$/';
162        # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
163        #                                         /2002-01/msg01327.html
164
165        my $u = "a\x{100}";
166        my $v = substr ($u, 0, 1);
167        my $w = substr ($u, 1, 1);
168        my %u = ($u => $u, $v => $v, $w => $w);
169        for (keys %u) {
170            my $m1 =            /^\w*$/ ? 1 : 0;
171            my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
172            is($m1, $m2, $message);
173        }
174    }
175
176    {
177        my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005 (#8335)";
178
179        for my $char ("a", "\x{df}", "\x{100}") {
180            my $x = "$char b $char";
181            $x =~ s{($char)}{
182                  "c" =~ /c/;
183                  "x";
184            }ge;
185            is(substr ($x, 0, 1), substr ($x, -1, 1), $message);
186        }
187    }
188
189    {
190        my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005 (#8935)";
191
192        # Requires reuse of last successful pattern.
193        my $num = 123;
194        $num =~ /\d/;
195        for (0 .. 1) {
196            my $match = m?? + 0;
197            ok($match != $_, $message)
198                or diag(sprintf "'match one' %s on %s iteration" =>
199			$match ? 'succeeded' : 'failed',
200			$_     ? 'second'    : 'first');
201        }
202        $num =~ /(\d)/;
203        my $result = join "" => $num =~ //g;
204        is($result, $num, $message);
205    }
206
207    {
208        my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002 (#10013)';
209        for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
210            my ($type, $char) = @$_;
211            for my $len (32000, 32768, 33000) {
212                my  $s = $char . "f" x $len;
213                my  $r = $s =~ /$char([f]*)/gc;
214                ok($r, $message) or diag("<$type x $len>");
215                ok(!$r || pos ($s) == $len + 1, $message)
216		    or diag("<$type x $len>; pos = @{[pos $s]}");
217            }
218        }
219    }
220
221    {
222        my $s = "\x{100}" x 5;
223        my $ok = $s =~ /(\x{100}{4})/;
224        my ($ord, $len) = (ord $1, length $1);
225        ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]";
226    }
227
228    {
229        my $message = 'UTF-8 matching; Bug 15397';
230        like("\x{100}", qr/\x{100}/, $message);
231        like("\x{100}", qr/(\x{100})/, $message);
232        like("\x{100}", qr/(\x{100}){1}/, $message);
233        like("\x{100}\x{100}", qr/(\x{100}){2}/, $message);
234        like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message);
235    }
236
237    {
238        my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471';
239        local $_       = 'CD';
240        ok(/(AB)*?CD/ && !defined $1, $message);
241        ok(/(AB)*CD/  && !defined $1, $message);
242    }
243
244    {
245        my $message = "Caching shouldn't prevent match; Bug 3547";
246        my $pattern = "^(b+?|a){1,2}c";
247        ok("bac"    =~ /$pattern/ && $1 eq 'a', $message);
248        ok("bbac"   =~ /$pattern/ && $1 eq 'a', $message);
249        ok("bbbac"  =~ /$pattern/ && $1 eq 'a', $message);
250        ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message);
251    }
252
253    {
254        ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232');
255        is($1, "\x{100}",  '$1 is UTF-8; Bug 18232');
256        { 'a' =~ /./; }
257        is($1, "\x{100}",  '$1 is still UTF-8; Bug 18232');
258        isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232');
259    }
260
261    {
262        my $message = "Optimizer doesn't prematurely reject match; Bug 19767";
263        use utf8;
264
265        my $attr = 'Name-1';
266        my $NormalChar      = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
267        my $NormalWord      = qr /${NormalChar}+?/;
268        my $PredNameHyphen  = qr /^${NormalWord}(\-${NormalWord})*?$/;
269
270        $attr =~ /^$/;
271        like($attr, $PredNameHyphen, $message);  # Original test.
272
273        "a" =~ m/[b]/;
274        like("0", qr/\p{N}+\z/, $message);         # Variant.
275    }
276
277    {
278        my $message = "(??{ }) doesn't return stale values; Bug 20683";
279        our $p = 1;
280        foreach (1, 2, 3, 4) {
281            $p ++ if /(??{ $p })/
282        }
283        is($p, 5, $message);
284
285        {
286            package P;
287            $a = 1;
288            sub TIESCALAR {bless []}
289            sub FETCH     {$a ++}
290        }
291        tie $p, "P";
292        foreach (1, 2, 3, 4) {
293            /(??{ $p })/
294        }
295        is($p, 5, $message);
296    }
297
298    {
299        # Subject: Odd regexp behavior
300        # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
301        # Date: Wed, 26 Feb 2003 16:53:12 +0000
302        # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
303        # To: perl-unicode@perl.org
304
305        my $message = 'Markus Kuhn 2003-02-26';
306    
307        my $x = "\x{2019}\nk";
308        ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
309        is($x, "\x{2019} k", $message);
310
311        $x = "b\nk";
312        ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
313        is($x, "b k", $message);
314
315        like("\x{2019}", qr/\S/, $message);
316    }
317
318    {
319        my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411";
320        our $i;
321        is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message);
322        no warnings 'syntax';
323        @_ = split /(?{'WOW'})/, 'abc';
324        local $" = "|";
325        is("@_", "a|b|c", $message);
326    }
327
328    is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split');
329
330    {
331        $_ = "code:   'x' { '...' }\n"; study;
332        my @x; push @x, $& while m/'[^\']*'/gx;
333        local $" = ":";
334        is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757");
335    }
336
337    {
338        sub func ($) {
339            ok("a\nb" !~ /^b/,  "Propagated modifier; $_[0]; Bug 22354");
340            ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354");
341        }
342        func "standalone";
343        $_ = "x"; s/x/func "in subst"/e;
344        $_ = "x"; s/x/func "in multiline subst"/em;
345        $_ = "x"; /x(?{func "in regexp"})/;
346        $_ = "x"; /x(?{func "in multiline regexp"})/m;
347    }
348
349    {
350        $_    = "abcdef\n";
351        my @x = m/./g;
352        is("abcde", $`, 'Global match sets $`; Bug 19049');
353    }
354
355    {
356        # [perl #23769] Unicode regex broken on simple example
357        # regrepeat() didn't handle UTF-8 EXACT case right.
358
359        my $Mess       = 'regrepeat() handles UTF-8 EXACT case right';
360        my $message = "$Mess; Bug 23769";
361
362        my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
363
364        like($s, qr/\x{a0}/, $message);
365        like($s, qr/\x{a0}+/, $message);
366        like($s, qr/\x{a0}\x{a0}/, $message);
367
368        $message = "$Mess (easy variant); Bug 23769";
369        ok("aaa\x{100}" =~ /(a+)/, $message);
370        is($1, "aaa", $message);
371
372        $message = "$Mess (easy invariant); Bug 23769";
373        ok("aaa\x{100}     " =~ /(a+?)/, $message);
374        is($1, "a", $message);
375
376        $message = "$Mess (regrepeat variant); Bug 23769";
377        ok("\xa0\xa0\xa0\x{100}    " =~ /(\xa0+?)/, $message);
378        is($1, "\xa0", $message);
379
380        $message = "$Mess (regrepeat invariant); Bug 23769";
381        ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message);
382        is($1, "\xa0\xa0\xa0", $message);
383
384        $message = "$Mess (hard variant); Bug 23769";
385        ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message);
386        is($1, "\xa0\xa1", $message);
387
388        $message = "$Mess (hard invariant); Bug 23769";
389        ok("ababab\x{100}  " =~ /((?:ab)+)/, $message);
390        is($1, 'ababab', $message);
391
392        ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message);
393        is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message);
394
395        ok("ababab\x{100}  " =~ /((?:ab)+?)/, $message);
396        is($1, "ab", $message);
397
398        $message = "Don't match first byte of UTF-8 representation; Bug 23769";
399        unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message);
400        unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message);
401        unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message);
402    }
403
404    {
405        # perl panic: pp_match start/end pointers
406
407        is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc",
408	   'Captures can move backwards in string; Bug 25269');
409    }
410
411    {
412        # \cA not recognized in character classes
413        like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940');
414        like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940');
415        like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940');
416        like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940');
417        like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940');
418        like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940');
419        like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940');
420        unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940');
421    }
422
423    {
424        # perl #28532: optional zero-width match at end of string is ignored
425
426        ok("abc" =~ /^abc(\z)?/ && defined($1),
427           'Optional zero-width match at end of string; Bug 28532');
428        ok("abc" =~ /^abc(\z)??/ && !defined($1),
429           'Optional zero-width match at end of string; Bug 28532');
430    }
431
432    {
433        my $utf8 = "\xe9\x{100}"; chop $utf8;
434        my $latin1 = "\xe9";
435
436        like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207");
437        like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207");
438        like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207");
439        like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207");
440
441        like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207");
442        like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207");
443        like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207");
444        like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207");
445    }
446
447    {
448        my $s = "abcd";
449        $s =~ /(..)(..)/g;
450        $s = $1;
451        $s = $2;
452        is($2, 'cd',
453	   "Assigning to original string does not corrupt match vars; Bug 37038");
454    }
455
456    {
457        {
458            package wooosh;
459            sub gloople {"!"}
460        }
461        my $aeek = bless {} => 'wooosh';
462        is(do {$aeek -> gloople () =~ /(.)/g}, 1,
463	   "//g match against return value of sub [change e26a497577f3ce7b]");
464
465        sub gloople {"!"}
466        is(do{gloople () =~ /(.)/g}, 1,
467	   "change e26a497577f3ce7b didn't affect sub calls for some reason");
468    }
469
470    {
471        # [perl #78680]
472        # See changes 26925-26928, which reverted change 26410
473        {
474            package lv;
475            our $var = "abc";
476            sub variable : lvalue {$var}
477        }
478        my $o = bless [] => 'lv';
479        my $f = "";
480        my $r = eval {
481            for (1 .. 2) {
482                $f .= $1 if $o -> variable =~ /(.)/g;
483            }
484            1;
485        };
486        if ($r) {
487            is($f, "ab", "pos() retained between calls");
488        }
489        else {
490            ok 0, "Code failed: $@";
491        }
492
493        our $var = "abc";
494        sub variable : lvalue {$var}
495        my $g = "";
496        my $s = eval {
497            for (1 .. 2) {
498                $g .= $1 if variable =~ /(.)/g;
499            }
500            1;
501        };
502        if ($s) {
503            is($g, "ab", "pos() retained between calls");
504        }
505        else {
506            ok 0, "Code failed: $@";
507        }
508    }
509
510  SKIP:
511    {
512        skip "In EBCDIC and unclear what would trigger this bug there" if $::IS_EBCDIC;
513        fresh_perl_like(
514            'no warnings "utf8";
515             $_ = pack "U0C2", 0xa2, 0xf8;  # Ill-formed UTF-8
516             my $ret = 0;
517             do {!($ret = s/[a\0]+//g)}',
518             qr/Malformed UTF-8/,
519             {}, "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836");
520    }
521
522    {
523        # chr(65535) should be allowed in regexes
524
525        no warnings 'utf8'; # To allow non-characters
526        my ($c, $r, $s);
527
528        $c = chr 0xffff;
529        $c =~ s/$c//g;
530        is($c, "", "U+FFFF, parsed as atom; Bug 38293");
531
532        $c = chr 0xffff;
533        $r = "\\$c";
534        $c =~ s/$r//g;
535        is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293");
536
537        $c = chr 0xffff;
538        $c =~ s/[$c]//g;
539        is($c, "", "U+FFFF, parsed in class; Bug 38293");
540
541        $c = chr 0xffff;
542        $r = "[\\$c]";
543        $c =~ s/$r//g;
544        is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293");
545
546        $s = "A\x{ffff}B";
547        $s =~ s/\x{ffff}//i;
548        is($s, "AB", "U+FFFF, EXACTF; Bug 38293");
549
550        $s = "\x{ffff}A";
551        $s =~ s/\bA//;
552        is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293");
553
554        $s = "\x{ffff}!";
555        $s =~ s/\B!//;
556        is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293");
557    }
558
559    {
560        
561        # The printing characters
562        my @chars = ("A" .. "Z");
563        my $delim = ",";
564        my $size = 32771 - 4;
565        my $str = '';
566
567        # Create some random junk. Inefficient, but it works.
568        for (my $i = 0; $i < $size; $ i++) {
569            $str .= $chars [rand @chars];
570        }
571
572        $str .= ($delim x 4);
573        my $res;
574        my $matched;
575        ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583");
576        is($str, "", "Empty string; Bug 39583");
577        ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583');
578    }
579
580    {
581        like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940');
582        like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
583        like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940');
584        like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
585
586        like("X\0A", qr/X\c@?A/,  '\c@?; Bug 27940');
587        like("X\0A", qr/X\c@*A/,  '\c@*; Bug 27940');
588        like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940');
589        like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940');
590        like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940');
591
592        like("X\@A", qr/X@?A/,  '@?; Bug 27940');
593        like("X\@A", qr/X@*A/,  '@*; Bug 27940');
594        like("X\@A", qr/X@(A)/, '@(; Bug 27940');
595        like("X\@A", qr/X(@)A/, '@); Bug 27940');
596        like("X\@A", qr/X@|ZA/, '@|; Bug 27940');
597
598        local $" = ','; # non-whitespace and non-RE-specific
599        like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940');
600        like("A@+B", qr/A@{+}B/,  'Interpolation of @+ in /@{+}/; Bug 27940');
601        like("A@-B", qr/A@{-}B/,  'Interpolation of @- in /@{-}/; Bug 27940');
602        like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940');
603        like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940');
604    }
605
606    {
607        my $s = 'foo bar baz';
608        my (@k, @v, @fetch, $res);
609        my $count = 0;
610        my @names = qw ($+{A} $+{B} $+{C});
611        if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
612            while (my ($k, $v) = each (%+)) {
613                $count++;
614            }
615            @k = sort keys   (%+);
616            @v = sort values (%+);
617            $res = 1;
618            push @fetch,
619                ["$+{A}", "$1"],
620                ["$+{B}", "$2"],
621                ["$+{C}", "$3"],
622            ;
623        } 
624        foreach (0 .. 2) {
625            if ($fetch [$_]) {
626                is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
627            } else {
628                ok 0, $names[$_];
629            }
630        }
631        is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496");
632        is($count, 3, "Got 3 keys in %+ via each; Bug 50496");
633        is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496");
634        is("@k", "A B C", "Got expected keys; Bug 50496");
635        is("@v", "bar baz foo", "Got expected values; Bug 50496");
636        eval '
637            no warnings "uninitialized";
638            print for $+ {this_key_doesnt_exist};
639        ';
640        is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
641    }
642
643    {
644        #
645        # Almost the same as the block above, except that the capture is nested.
646        #
647
648        my $s = 'foo bar baz';
649        my (@k, @v, @fetch, $res);
650        my $count = 0;
651        my @names = qw ($+{A} $+{B} $+{C} $+{D});
652        if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
653            while (my ($k,$v) = each(%+)) {
654                $count++;
655            }
656            @k = sort keys   (%+);
657            @v = sort values (%+);
658            $res = 1;
659            push @fetch,
660                ["$+{A}", "$2"],
661                ["$+{B}", "$3"],
662                ["$+{C}", "$4"],
663                ["$+{D}", "$1"],
664            ;
665        }
666        foreach (0 .. 3) {
667            if ($fetch [$_]) {
668                is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
669            } else {
670                ok 0, $names [$_];
671            }
672        }
673        is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496");
674        is($count, 4, "Got 4 keys in %+ via each; Bug 50496");
675        is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496");
676        is("@k", "A B C D", "Got expected keys; Bug 50496");
677        is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496");
678        eval '
679            no warnings "uninitialized";
680            print for $+ {this_key_doesnt_exist};
681        ';
682        is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
683    }
684
685    {
686        my $str = 'abc'; 
687        my $count = 0;
688        my $mval = 0;
689        my $pval = 0;
690        while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
691        is($mval,  0, '@- should be empty; Bug 36046');
692        is($pval,  0, '@+ should be empty; Bug 36046');
693        is($count, 1, 'Should have matched once only; Bug 36046');
694    }
695
696    {
697        my $message = '/m in precompiled regexp; Bug 40684';
698        my $s = "abc\ndef";
699        my $rex = qr'^abc$'m;
700        ok($s =~ m/$rex/, $message);
701        ok($s =~ m/^abc$/m, $message);
702    }
703
704    {
705        my $message = '(?: ... )? should not lose $^R; Bug 36909';
706        $^R = 'Nothing';
707        {
708            local $^R = "Bad";
709            ok('x foofoo y' =~ m {
710                      (foo) # $^R correctly set
711                      (?{ "last regexp code result" })
712            }x, $message);
713            is($^R, 'last regexp code result', $message);
714        }
715        is($^R, 'Nothing', $message);
716
717        {
718            local $^R = "Bad";
719
720            ok('x foofoo y' =~ m {
721                      (?:foo|bar)+ # $^R correctly set
722                      (?{ "last regexp code result" })
723            }x, $message);
724            is($^R, 'last regexp code result', $message);
725        }
726        is($^R, 'Nothing', $message);
727
728        {
729            local $^R = "Bad";
730            ok('x foofoo y' =~ m {
731                      (foo|bar)\1+ # $^R undefined
732                      (?{ "last regexp code result" })
733            }x, $message);
734            is($^R, 'last regexp code result', $message);
735        }
736        is($^R, 'Nothing', $message);
737
738        {
739            local $^R = "Bad";
740            ok('x foofoo y' =~ m {
741                      (foo|bar)\1 # This time without the +
742                      (?{"last regexp code result"})
743            }x, $message);
744            is($^R, 'last regexp code result', $message);
745        }
746        is($^R, 'Nothing', $message);
747    }
748
749    {
750        my $message = 'Match is quadratic due to eval; See Bug 22395';
751        our $count;
752        for my $l (10, 100, 1000) {
753            $count = 0;
754            ('a' x $l) =~ /(.*)(?{ $count++ })[bc]/;
755            is($count, $l*($l+3)/2+1, $message);
756        }
757    }
758    {
759        my $message = 'Match is linear, not quadratic; Bug 22395.';
760        our $count;
761        my $ok= 0;
762        for my $l (10, 100, 1000) {
763            $count = 0;
764            ('a' x $l) =~ /(.*)(*{ $count++ })[bc]/;
765            $ok += is($count, $l + 1, $message);
766        }
767        is($ok,3, "Optimistic eval does not disable optimisations");
768    }
769
770    {
771        my $message = '@-/@+ should not have undefined values; Bug 22614';
772        local $_ = 'ab';
773        our @len = ();
774        /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
775        is("@len", "2 2 2", $message);
776    }
777
778    {
779        my $message = '$& set on s///; Bug 18209';
780        my $text = ' word1 word2 word3 word4 word5 word6 ';
781
782        my @words = ('word1', 'word3', 'word5');
783        my $count;
784        foreach my $word (@words) {
785            $text =~ s/$word\s//gi; # Leave a space to separate words
786                                    # in the resultant str.
787            # The following block is not working.
788            if ($&) {
789                $count ++;
790            }
791            # End bad block
792        }
793        is($count, 3, $message);
794        is($text, ' word2 word4 word6 ', $message);
795    }
796
797    {
798        # RT#6893
799
800        local $_ = qq (A\nB\nC\n); 
801        my @res;
802        while (m#(\G|\n)([^\n]*)\n#gsx) { 
803            push @res, "$2"; 
804            last if @res > 3;
805        }
806        is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893");
807    }
808
809    {
810        # No optimizer bug
811        my @tails  = ('', '(?(1))', '(|)', '()?');    
812        my @quants = ('*','+');
813        my $doit = sub {
814            my $pats = shift;
815            for (@_) {
816                for my $pat (@$pats) {
817                    for my $quant (@quants) {
818                        for my $tail (@tails) {
819                            my $re = "($pat$quant\$)$tail";
820                            ok(/$re/  && $1 eq $_, "'$_' =~ /$re/; Bug 41010");
821                            ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010");
822                        }
823                    }
824                }
825            }
826        };    
827        
828        my @dpats = ('\d',
829                     '[1234567890]',
830                     '(1|[23]|4|[56]|[78]|[90])',
831                     '(?:1|[23]|4|[56]|[78]|[90])',
832                     '(1|2|3|4|5|6|7|8|9|0)',
833                     '(?:1|2|3|4|5|6|7|8|9|0)');
834        my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
835        my @sstrs = ('  ');
836        my @dstrs = ('12345');
837        $doit -> (\@spats, @sstrs);
838        $doit -> (\@dpats, @dstrs);
839    }
840
841    {
842        # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
843
844        my $utf_8 = "\xd6schel";
845        utf8::upgrade ($utf_8);
846        $utf_8 =~ m {(\xd6|&Ouml;)schel};
847        is($1, "\xd6", "Upgrade error; Bug 45605");
848    }
849
850    {
851        # Regardless of utf8ness any character matches itself when 
852        # doing a case insensitive match. See also [perl #36207] 
853
854        for my $o (0 .. 255) {
855            my @ch = (chr ($o), chr ($o));
856            utf8::upgrade ($ch [1]);
857            for my $u_str (0, 1) {
858                for my $u_pat (0, 1) {
859                    like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i,
860			 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
861                    like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i,
862			 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
863                }
864            }
865        }
866    }
867
868    {
869         my $message = '$REGMARK in replacement; Bug 49190';
870         our $REGMARK;
871         local $_ = "A";
872         ok(s/(*:B)A/$REGMARK/, $message);
873         is($_, "B", $message);
874         $_ = "CCCCBAA";
875         ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message);
876         is($_, "ZYX", $message);
877         # Use a longer name to force reallocation of $REGMARK.
878         $_ = "CCCCBAA";
879         ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message);
880         is($_, "ZYYYYYYYYYYYYYYYYX", $message);
881    }
882
883    {
884        my $message = 'Substitution evaluation in list context; Bug 52658';
885        my $reg = '../xxx/';
886        my @te  = ($reg =~ m{^(/?(?:\.\./)*)},
887                   $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
888        is($reg, '../bbb/', $message);
889        is($te [0], '../', $message);
890    }
891
892    {
893        my $a = "xyzt" x 8192;
894        like($a, qr/\A(?>[a-z])*\z/,
895	     '(?>) does not cause wrongness on long string; Bug 60034');
896        my $b = $a . chr 256;
897        chop $b;
898	is($a, $b, 'Bug 60034');
899        like($b, qr/\A(?>[a-z])*\z/,
900	     '(?>) does not cause wrongness on long string with UTF-8; Bug 60034');
901    }
902
903    #
904    # Keep the following tests last -- they may crash perl
905    #
906    print "# Tests that follow may crash perl\n";
907    {   
908
909        my $message = 'Pattern in a loop, failure should not ' .
910                         'affect previous success; Bug 19049/38869';
911        my @list = (
912            'ab cdef',             # Matches regex
913            ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
914        );
915        my $y;
916        my $x;
917        foreach (@list) {
918            m/ab(.+)cd/i; # The ignore-case seems to be important
919            $y = $1;      # Use $1, which might not be from the last match!
920            $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
921        }
922        is($y, ' ', $message);
923        is($x, 'ab cd', $message);
924    }
925
926    SKIP: {
927        skip("Can run out of memory on os390", 1) if $^O eq 'os390';
928        ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274");
929    }
930    {
931        ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, 
932            "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274");
933    }
934
935    {
936        # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
937
938        local ${^UTF8CACHE} = -1;
939        my $message = "Shouldn't panic; Bug 45337";
940        my $s = "[a]a{2}";
941        utf8::upgrade $s;
942        like("aaa", qr/$s/, $message);
943    }
944    {
945	my $message = "Check if tree logic breaks \$^R; Bug 57042";
946	my $cond_re = qr/\s*
947	    \s* (?:
948		   \( \s* A  (?{1})
949		 | \( \s* B  (?{2})
950	       )
951	   /x;
952	my @res;
953	for my $line ("(A)","(B)") {
954	   if ($line =~ m/$cond_re/) {
955	       push @res, $^R ? "#$^R" : "UNDEF";
956	   }
957	}
958	is("@res","#1 #2", $message);
959    }
960    {
961	no warnings 'closure';
962	my $re = qr/A(??{"1"})/;
963	ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
964	ok $1 eq "A1";
965	ok $2 eq "B";
966    }
967
968    # This only works under -DEBUGGING because it relies on an assert().
969    {
970	# Check capture offset re-entrancy of utf8 code.
971
972        sub fswash { $_[0] =~ s/([>X])//g; }
973
974        my $k1 = "." x 4 . ">>";
975        fswash($k1);
976
977        my $k2 = "\x{f1}\x{2022}";
978        $k2 =~ s/([\360-\362])/>/g;
979        fswash($k2);
980
981        is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508");
982    }
983
984    {
985	# minimal CURLYM limited to 32767 matches
986	my @pat = (
987	    qr{a(x|y)*b},	# CURLYM
988	    qr{a(x|y)*?b},	# .. with minmod
989	    qr{a([wx]|[yz])*b},	# .. and without tries
990	    qr{a([wx]|[yz])*?b},
991	);
992	my $len = 32768;
993	my $s = join '', 'a', 'x' x $len, 'b';
994	for my $pat (@pat) {
995	    like($s, $pat, "$pat; Bug 65372");
996	}
997    }
998
999    {
1000        local $::TODO = "[perl #38133]";
1001
1002        "A" =~ /(((?:A))?)+/;
1003        my $first = $2;
1004
1005        "A" =~ /(((A))?)+/;
1006        my $second = $2;
1007
1008        is($first, $second);
1009    }    
1010
1011    {
1012       my $message
1013        = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998';
1014
1015       # Catch warnings:
1016       my $w;
1017       local $SIG{__WARN__} = sub { $w .= shift };
1018
1019       # This bug can be reduced to
1020       qq{\x{30ab}} =~ /\xab|\xa9/;
1021       # but it's nice to have a more 'real-world' test. The original test
1022       # case from the RT ticket follows:
1023
1024       my %conv = (
1025                   "\xab"     => "&lt;",
1026                   "\xa9"     => "(c)",
1027                  );
1028       my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')';
1029       $conv_rx = qr{$conv_rx};
1030
1031       my $x
1032        = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}}
1033        . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}}
1034        . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}}
1035        . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}}
1036        . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}}
1037        . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}}
1038        . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}}
1039        . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}};
1040
1041       $x =~ s{$conv_rx}{$conv{$1}}eg;
1042
1043       is($w, undef, $message);
1044    }
1045
1046    {
1047        # minimal CURLYM limited to 32767 matches
1048
1049        is(join("-", "   abc   def  " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f",
1050	   'stclass optimisation does not break + inside (?=); Bug 68564');
1051    }
1052
1053    {
1054        use charnames ":full";
1055        # Delayed interpolation of \N'
1056        my $r1 = qr/\N{THAI CHARACTER SARA I}/;
1057        my $r2 = qr'\N{THAI CHARACTER SARA I}';
1058        my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
1059
1060        # Bug #56444
1061        ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
1062        ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\'';
1063
1064        # Bug #62056
1065        ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
1066
1067        ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
1068        ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"';
1069        ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
1070        ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"';
1071    }
1072
1073    {
1074        use charnames ":full";
1075        my $message = '[perl #74982] Period coming after \N{}';
1076        ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message);
1077        ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message);
1078        ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message);
1079        ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message);
1080    }
1081
1082SKIP: {
1083    ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org
1084
1085    skip('Perl configured without Encode module', 1)
1086	unless $Config{extensions} =~ / Encode /;
1087
1088    # Test case cut down by jhi
1089    fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\)!, {}, 'Segfault using HTML::Entities');
1090use Encode;
1091my $t = ord('A') == 193 ? "\xEA" : "\xE9";
1092Encode::_utf8_on($t);
1093substr($t,0);
1094$t =~ s/([^a])//ge;
1095EOP
1096    }
1097
1098    {
1099        # pattern must be compiled late or we can break the test file
1100        my $message = '[perl #115050] repeated nothings in a trie can cause panic';
1101        my $pattern;
1102        $pattern = '[xyz]|||';
1103        ok("blah blah" =~ /$pattern/, $message);
1104        ok("blah blah" =~ /(?:$pattern)h/, $message);
1105        $pattern = '|||[xyz]';
1106        ok("blah blah" =~ /$pattern/, $message);
1107        ok("blah blah" =~ /(?:$pattern)h/, $message);
1108    }
1109
1110    {
1111        # [perl #4289] First mention $& after a match
1112	local $::TODO = "these tests fail without Copy-on-Write enabled"
1113	    if $Config{ccflags} =~ /PERL_NO_COW/;
1114        fresh_perl_is(
1115            '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"',
1116            "b\n", {}, '$& first mentioned after match');
1117        fresh_perl_is(
1118            '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"',
1119            "a\n", {}, '$` first mentioned after match');
1120        fresh_perl_is(
1121            '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"',
1122            "c\n", {}, '$\' first mentioned after match');
1123    }
1124
1125    {
1126	# [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t
1127	# this tests some related failures
1128	#
1129	# The tests in the block *only* fail when run on 32-bit systems
1130	# with a malloc that allocates above the 2GB line.  On the system
1131	# in the report above that only happened in a thread.
1132	my $s = "\x{1ff}" . "f" x 32;
1133	ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap");
1134    }
1135
1136    {
1137        # RT #129012 heap-buffer-overflow Perl_fbm_instr.
1138        # This test is unlikely to not pass, but it used to fail
1139        # ASAN/valgrind
1140
1141        my $s ="\x{100}0000000";
1142        ok($s !~ /00000?\x80\x80\x80/, "RT #129012");
1143    }
1144
1145    {
1146        # RT #129085 heap-buffer-overflow Perl_re_intuit_start
1147        # this did fail under ASAN, but didn't under valgrind
1148        my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}";
1149        ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085");
1150    }
1151    {
1152        # rt
1153        fresh_perl_is(
1154            'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"',
1155            "ok", {},  'RT #130561 - allowing impossible quantifier should not cause SEGVs');
1156        my $s= "foo";
1157        no warnings 'regexp';
1158        ok($s=~/(foo){1,0}|(?1)/,
1159            "RT #130561 - allowing impossible quantifier should not break recursion");
1160    }
1161	{
1162		# RT #133892 Coredump in Perl_re_intuit_start
1163		# Second match flips to checking floating substring before fixed
1164		# substring, which triggers a pathway that failed to check there
1165		# was a non-utf8 version of the string before trying to use it
1166		# resulting in a SEGV.
1167		my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 };
1168		ok($result == 0);
1169	}
1170
1171} # End of sub run_tests
1172
11731;
1174