1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9use Config; 10use 5.010; 11 12 13sub run_tests; 14 15$| = 1; 16 17 18BEGIN { 19 chdir 't' if -d 't'; 20 require './test.pl'; 21 set_up_inc('../lib'); 22 require './charset_tools.pl'; 23} 24 25our @global; 26 27 28plan tests => 527; # Update this when adding/deleting tests. 29 30run_tests() unless caller; 31 32# test that runtime code without 'use re eval' is trapped 33 34sub norun { 35 like($@, qr/Eval-group not allowed at runtime/, @_); 36} 37 38# 39# Tests start here. 40# 41sub run_tests { 42 { 43 my $message = "Call code from qr //"; 44 local $_ = 'var="foo"'; 45 $a = qr/(?{++$b})/; 46 $b = 7; 47 ok(/$a$a/ && $b eq '9', $message); 48 49 my $c="$a"; 50 ok(/$a$a/ && $b eq '11', $message); 51 52 undef $@; 53 eval {/$c/}; 54 norun("$message norun 1"); 55 56 57 { 58 eval {/$a$c$a/}; 59 norun("$message norun 2"); 60 use re "eval"; 61 /$a$c$a/; 62 is($b, '14', $message); 63 } 64 65 our $lex_a = 43; 66 our $lex_b = 17; 67 our $lex_c = 27; 68 my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); 69 70 is($lex_res, 1, $message); 71 is($lex_a, 44, $message); 72 is($lex_c, 43, $message); 73 74 undef $@; 75 my $d = '(?{1})'; 76 my $match = eval { /$a$c$a$d/ }; 77 ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message); 78 is($b, '14', $message); 79 80 $lex_a = 2; 81 $lex_a = 43; 82 $lex_b = 17; 83 $lex_c = 27; 84 $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); 85 86 is($lex_res, 1, $message); 87 is($lex_a, 44, $message); 88 is($lex_c, 43, $message); 89 90 } 91 92 { 93 our $a = bless qr /foo/ => 'Foo'; 94 ok 'goodfood' =~ $a, "Reblessed qr // matches"; 95 is($a, '(?^:foo)', "Reblessed qr // stringifies"); 96 my $x = "\x{3fe}"; 97 my $z = my $y = byte_utf8a_to_utf8n("\317\276"); # Byte representation 98 # of $x 99 $a = qr /$x/; 100 ok $x =~ $a, "UTF-8 interpolation in qr //"; 101 ok "a$a" =~ $x, "Stringified qr // preserves UTF-8"; 102 ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8"; 103 ok "a$x" =~ /^a(??{$a})\z/, 104 "Postponed interpolation of qr // preserves UTF-8"; 105 106 107 is(length qr /##/x, 9, "## in qr // doesn't corrupt memory; Bug 17776"); 108 109 { 110 ok "$x$x" =~ /^$x(??{$x})\z/, 111 "Postponed UTF-8 string in UTF-8 re matches UTF-8"; 112 ok "$y$x" =~ /^$y(??{$x})\z/, 113 "Postponed UTF-8 string in non-UTF-8 re matches UTF-8"; 114 ok "$y$x" !~ /^$y(??{$y})\z/, 115 "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8"; 116 ok "$x$x" !~ /^$x(??{$y})\z/, 117 "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8"; 118 ok "$y$y" =~ /^$y(??{$y})\z/, 119 "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8"; 120 ok "$x$y" =~ /^$x(??{$y})\z/, 121 "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8"; 122 123 $y = $z; # Reset $y after upgrade. 124 ok "$x$y" !~ /^$x(??{$x})\z/, 125 "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8"; 126 ok "$y$y" !~ /^$y(??{$x})\z/, 127 "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; 128 } 129 } 130 { 131 our $this_counter; 132 ok( "ABDE" =~ /(A(A|B(*ACCEPT)|C)+D)(E)(?{ $this_counter++ })/, 133 "ACCEPT/CURLYX/EVAL - pattern should match"); 134 is( "$1-$2", "AB-B", 135 "Make sure that ACCEPT works in CURLYX by using EVAL"); 136 } 137 { 138 ok( "AB"=~/(A)(?(*{ 1 })B|C)/, "(?(*{ ... })yes|no) works as expected"); 139 ok( "AC"=~/(A)(?(*{ 0 })B|C)/, "(?(*{ ... })yes|no) works as expected"); 140 } 141 142 { 143 # Test if $^N and $+ work in (*{ }) (optimistic eval) 144 our @ctl_n = (); 145 our @plus = (); 146 my $nested_tags = qr{ 147 (?<nested_tags> 148 < 149 ((\w)+) 150 (*{ 151 push @ctl_n, (defined $^N ? $^N : "undef"); 152 push @plus, (defined $+ ? $+ : "undef"); 153 }) 154 > 155 (?&nested_tags)* 156 </\s* \w+ \s*> 157 ) 158 }x; 159 160 # note the results of this may change from perl to perl as different optimisations 161 # are added or enabled. It is testing that things *work*, not that they produce 162 # a specific output. The whole idea of optimistic eval is to have an eval that 163 # does not disable optimizations in the way a normal eval does. 164 my $c = 0; 165 for my $test ( 166 # Test structure: 167 # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+, "note" ] 168 [ 1, qr#^$nested_tags$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], 169 [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], 170 [ 1, qr#^(|)$nested_tags$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], 171 [ 1, qr#^(?:|)$nested_tags$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], 172 [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], 173 ) { #"#silence vim highlighting 174 $c++; 175 @ctl_n = (); 176 @plus = (); 177 my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0); 178 push @ctl_n, (defined $^N ? $^N : "undef"); 179 push @plus, (defined $+ ? $+ : "undef"); 180 ok($test->[0] == $match, "(*{ ... }) match $c"); 181 if ($test->[0] != $match) { 182 # unset @ctl_n and @plus 183 @ctl_n = @plus = (); 184 } 185 my $note = $test->[4] ? " - $test->[4]" : ""; 186 is("@ctl_n", $test->[2], "(*{ ... }) ctl_n $c$note"); 187 is("@plus", $test->[3], "(*{ ... }) plus $c$note"); 188 } 189 } 190 191 { 192 # Test if $^N and $+ work in (?{}) 193 our @ctl_n = (); 194 our @plus = (); 195 our $nested_tags; 196 $nested_tags = qr{ 197 < 198 ((\w)+) 199 (?{ 200 push @ctl_n, (defined $^N ? $^N : "undef"); 201 push @plus, (defined $+ ? $+ : "undef"); 202 }) 203 > 204 (??{$nested_tags})* 205 </\s* \w+ \s*> 206 }x; 207 208 209 my $c = 0; 210 for my $test ( 211 # Test structure: 212 # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ] 213 [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ], 214 [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], 215 [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ], 216 [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ], 217 [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], 218 [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ], 219 [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ], 220 [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ], 221 [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ], 222 [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], 223 [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], 224 [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], 225 [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, # changed in perl 5.37.7 226 "bla blubb blub blu bl b bl b undef", 227 "a b b u l b l b undef" ], 228 229 ) { #"#silence vim highlighting 230 $c++; 231 @ctl_n = (); 232 @plus = (); 233 my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0); 234 push @ctl_n, (defined $^N ? $^N : "undef"); 235 push @plus, (defined $+ ? $+ : "undef"); 236 ok($test->[0] == $match, "match $c"); 237 if ($test->[0] != $match) { 238 # unset @ctl_n and @plus 239 @ctl_n = @plus = (); 240 } 241 is("@ctl_n", $test->[2], "ctl_n $c"); 242 is("@plus", $test->[3], "plus $c"); 243 } 244 } 245 246 { 247 our $f; 248 local $f; 249 $f = sub { 250 defined $_[0] ? $_[0] : "undef"; 251 }; 252 253 like("123", qr/^(\d)(((??{1 + $^N})))+$/, 'Bug 56194'); 254 255 our @ctl_n; 256 our @plus; 257 258 my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#; 259 my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#; 260 my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#; 261 our $re5; 262 local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#; 263 my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; 264 my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; 265 my $re8 = qr/(\d+)/; 266 my $c = 0; 267 for my $test ( 268 # Test structure: 269 # [ 270 # String to match 271 # Regex too match 272 # Expected values of $^N 273 # Expected values of $+ 274 # Expected values of $1, $2, $3, $4 and $5 275 # ] 276 [ 277 "1233", 278 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#, 279 "1 2 3 3", 280 "1 2 3 3", 281 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 282 ], 283 [ 284 "1233", 285 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#, 286 "1 2 3 3", 287 "1 2 3 3", 288 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 289 ], 290 [ 291 "1233", 292 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#, 293 "1 2 3 3", 294 "1 2 3 3", 295 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 296 ], 297 [ 298 "1233", 299 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#, 300 "1 2 3 3", 301 "1 2 3 3", 302 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 303 ], 304 [ 305 "1233", 306 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#, 307 "1 2 3 3", 308 "1 2 3 3", 309 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 310 ], 311 [ 312 "123abc3", 313 qr#^($re)(|a(b)c|def)(??{$^R})$#, 314 "1 2 3 abc", 315 "1 2 3 b", 316 "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", 317 ], 318 [ 319 "123abc3", 320 qr#^($re2)$#, 321 "1 2 3 123abc3", 322 "1 2 3 b", 323 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", 324 ], 325 [ 326 "123abc3", 327 qr#^($re3)$#, 328 "1 2 123abc3", 329 "1 2 b", 330 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", 331 ], 332 [ 333 "123abc3", 334 qr#^(??{$re5})(|abc|def)(??{"$^R"})$#, 335 "1 2 abc", 336 "1 2 abc", 337 "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef", 338 ], 339 [ 340 "123abc3", 341 qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#, 342 "1 2 abc", 343 "1 2 b", 344 "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef", 345 ], 346 [ 347 "1234", 348 qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#, 349 "1234 123 12 1 2 3 1234", 350 "1234 123 12 1 2 3 4", 351 "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4", 352 ], 353 [ 354 "1234556", 355 qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#, 356 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56", 357 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5", 358 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56", 359 ], 360 [ 361 "12345562", 362 qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#, 363 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62", 364 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2", 365 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5", 366 ], 367 ) { 368 $c++; 369 @ctl_n = (); 370 @plus = (); 371 undef $^R; 372 my $match = $test->[0] =~ $test->[1]; 373 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5)); 374 push @ctl_n, $f->($^N); 375 push @plus, $f->($+); 376 ok($match, "match $c; Bug 56194"); 377 if (not $match) { 378 # unset $str, @ctl_n and @plus 379 $str = ""; 380 @ctl_n = @plus = (); 381 } 382 is("@ctl_n", $test->[2], "ctl_n $c; Bug 56194"); 383 is("@plus", $test->[3], "plus $c; Bug 56194"); 384 is($str, $test->[4], "str $c; Bug 56194"); 385 } 386 387 { 388 @ctl_n = (); 389 @plus = (); 390 391 our $re4; 392 local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#; 393 undef $^R; 394 my $match = "123abc3" =~ m/^(??{$re4})$/; 395 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R)); 396 push @ctl_n, $f->($^N); 397 push @plus, $f->($+); 398 ok($match, 'Bug 56194'); 399 if (not $match) { 400 # unset $str 401 @ctl_n = (); 402 @plus = (); 403 $str = ""; 404 } 405 is("@ctl_n", "1 2 undef", 'Bug 56194'); 406 is("@plus", "1 2 undef", 'Bug 56194'); 407 is($str, 408 "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = 3", 409 'Bug 56194 ($^R tweaked by 121070)'); 410 } 411 { 412 undef $^R; 413 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })/; 414 is("$^R", 42, 'Bug 121070 - use of (?&Char) should not clobber $^R'); 415 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })(?{ 43 })/; 416 is("$^R", 43, 'related to 121070 - use of (?&Char) should not clobber $^R'); 417 } 418 } 419 420 { 421 # re evals within \U, \Q etc shouldn't be seen by the lexer 422 local our $a = "i"; 423 local our $B = "J"; 424 ok('(?{1})' =~ /^\Q(?{1})\E$/, '\Q(?{1})\E'); 425 ok('(?{1})' =~ /^\Q(?{\E1\}\)$/, '\Q(?{\E1\}\)'); 426 eval {/^\U(??{"$a\Ea"})$/ }; norun('^\U(??{"$a\Ea"})$ norun'); 427 eval {/^\L(??{"$B\Ea"})$/ }; norun('^\L(??{"$B\Ea"})$ norun'); 428 use re 'eval'; 429 ok('Ia' =~ /^\U(??{"$a\Ea"})$/, '^\U(??{"$a\Ea"})$'); 430 ok('ja' =~ /^\L(??{"$B\Ea"})$/, '^\L(??{"$B\Ea"})$'); 431 } 432 433 { 434 # Comprehensive (hopefully) tests of closure behaviour: 435 # i.e. when do (?{}) blocks get (re)compiled, and what instances 436 # of lexical vars do they close over? 437 438 # if the pattern string gets utf8 upgraded while concatenating, 439 # make sure a literal code block is still detected (by still 440 # compiling in the absence of use re 'eval') 441 442 { 443 my $s1 = "\x{80}"; 444 my $s2 = "\x{100}"; 445 ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade"); 446 } 447 448 my ($cr1, $cr2, $cr3, $cr4); 449 450 for my $x (qw(a b c)) { 451 my $bc = ($x ne 'a'); 452 my $c80 = chr(0x80); 453 454 # the most basic: literal code should be in same scope 455 # as the parent 456 457 ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code"); 458 ok("\x{100}$x" =~ /^\x{100}(??{$x})$/, "[$x] literal code UTF8"); 459 460 # the "don't recompile if pattern unchanged" mechanism 461 # shouldn't apply to code blocks - recompile every time 462 # to pick up new instances of variables 463 464 my $code1 = 'B(??{$x})'; 465 my $code1u = $c80 . "\x{100}" . '(??{$x})'; 466 467 eval {/^A$code1$/}; 468 norun("[$x] unvarying runtime code AA norun"); 469 eval {/^A$code1u$/}; 470 norun("[$x] unvarying runtime code AU norun"); 471 eval {/^$c80\x{100}$code1$/}; 472 norun("[$x] unvarying runtime code UA norun"); 473 eval {/^$c80\x{101}$code1u$/}; 474 norun("[$x] unvarying runtime code UU norun"); 475 476 { 477 use re 'eval'; 478 ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA"); 479 ok("A$c80\x{100}$x" =~ /^A$code1u$/, 480 "[$x] unvarying runtime code AU"); 481 ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/, 482 "[$x] unvarying runtime code UA"); 483 ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/, 484 "[$x] unvarying runtime code UU"); 485 } 486 487 # mixed literal and run-time code blocks 488 489 my $code2 = 'B(??{$x})'; 490 my $code2u = $c80 . "\x{100}" . '(??{$x})'; 491 492 eval {/^A(??{$x})-$code2$/}; 493 norun("[$x] literal+runtime AA norun"); 494 eval {/^A(??{$x})-$code2u$/}; 495 norun("[$x] literal+runtime AU norun"); 496 eval {/^$c80\x{100}(??{$x})-$code2$/}; 497 norun("[$x] literal+runtime UA norun"); 498 eval {/^$c80\x{101}(??{$x})-$code2u$/}; 499 norun("[$x] literal+runtime UU norun"); 500 501 { 502 use re 'eval'; 503 ok("A$x-B$x" =~ /^A(??{$x})-$code2$/, 504 "[$x] literal+runtime AA"); 505 ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/, 506 "[$x] literal+runtime AU"); 507 ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/, 508 "[$x] literal+runtime UA"); 509 ok("$c80\x{101}$x-$c80\x{100}$x" 510 =~ /^$c80\x{101}(??{$x})-$code2u$/, 511 "[$x] literal+runtime UU"); 512 } 513 514 # literal qr code only created once, naked 515 516 $cr1 //= qr/^A(??{$x})$/; 517 ok("Aa" =~ $cr1, "[$x] literal qr once naked"); 518 519 # literal qr code only created once, embedded with text 520 521 $cr2 //= qr/B(??{$x})$/; 522 ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text"); 523 524 # literal qr code only created once, embedded with text + lit code 525 526 $cr3 //= qr/C(??{$x})$/; 527 ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/, 528 "[$x] literal qr once embedded text + lit code"); 529 530 # literal qr code only created once, embedded with text + run code 531 532 $cr4 //= qr/C(??{$x})$/; 533 my $code3 = 'A(??{$x})'; 534 535 eval {/^$code3-B$cr4/}; 536 norun("[$x] literal qr once embedded text + run code norun"); 537 { 538 use re 'eval'; 539 ok("A$x-BCa" =~ /^$code3-B$cr4/, 540 "[$x] literal qr once embedded text + run code"); 541 } 542 543 # literal qr code, naked 544 545 my $r1 = qr/^A(??{$x})$/; 546 ok("A$x" =~ $r1, "[$x] literal qr naked"); 547 548 # literal qr code, embedded with text 549 550 my $r2 = qr/B(??{$x})$/; 551 ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text"); 552 553 # literal qr code, embedded with text + lit code 554 555 my $r3 = qr/C(??{$x})$/; 556 ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/, 557 "[$x] literal qr embedded text + lit code"); 558 559 # literal qr code, embedded with text + run code 560 561 my $r4 = qr/C(??{$x})$/; 562 my $code4 = '(??{$x})'; 563 564 eval {/^A$code4-B$r4/}; 565 norun("[$x] literal qr embedded text + run code"); 566 { 567 use re 'eval'; 568 ok("A$x-BC$x" =~ /^A$code4-B$r4/, 569 "[$x] literal qr embedded text + run code"); 570 } 571 572 # nested qr in different scopes 573 574 my $code5 = '(??{$x})'; 575 my $r5 = qr/C(??{$x})/; 576 577 my $r6; 578 eval {qr/$code5-C(??{$x})/}; norun("r6 norun"); 579 { 580 use re 'eval'; 581 $r6 = qr/$code5-C(??{$x})/; 582 } 583 584 my @rr5; 585 my @rr6; 586 587 for my $y (qw(d e f)) { 588 589 my $rr5 = qr/^A(??{"$x$y"})-$r5/; 590 push @rr5, $rr5; 591 ok("A$x$y-C$x" =~ $rr5, 592 "[$x-$y] literal qr + r5"); 593 594 my $rr6 = qr/^A(??{"$x$y"})-$r6/; 595 push @rr6, $rr6; 596 ok("A$x$y-$x-C$x" =~ $rr6, 597 "[$x-$y] literal qr + r6"); 598 } 599 600 for my $i (0,1,2) { 601 my $y = 'Y'; 602 my $yy = (qw(d e f))[$i]; 603 my $rr5 = $rr5[$i]; 604 ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside"); 605 ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/, 606 "[$x-$yy] literal qr + r5 + lit, outside"); 607 608 609 my $rr6 = $rr6[$i]; 610 push @rr6, $rr6; 611 ok("A$x$yy-$x-C$x" =~ $rr6, 612 "[$x-$yy] literal qr + r6, outside"); 613 ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/, 614 "[$x-$yy] literal qr + r6 +lit, outside"); 615 } 616 } 617 618 # recursive subs should get lexical from the correct pad depth 619 620 sub recurse { 621 my ($n) = @_; 622 return if $n > 2; 623 ok("A$n" =~ /^A(??{$n})$/, "recurse($n)"); 624 recurse($n+1); 625 } 626 recurse(0); 627 628 # for qr// containing run-time elements but with a compile-time 629 # code block, make sure the run-time bits are executed in the same 630 # pad they were compiled in 631 { 632 my $a = 'a'; # ensure outer and inner pads don't align 633 my $b = 'b'; 634 my $c = 'c'; 635 my $d = 'd'; 636 my $r = qr/^$b(??{$c})$d$/; 637 ok("bcd" =~ $r, "qr with run-time elements and code block"); 638 } 639 640 # check that cascaded embedded regexes all see their own lexical 641 # environment 642 643 { 644 my ($r1, $r2, $r3, $r4); 645 my ($x1, $x2, $x3, $x4) = (5,6,7,8); 646 { my $x1 = 1; $r1 = qr/A(??{$x1})/; } 647 { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; } 648 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; } 649 { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; } 650 ok("A1234" =~ /^$r4$/, "cascaded qr"); 651 } 652 653 # and again, but in a loop, with no external references 654 # being maintained to the qr's 655 656 { 657 my $r = 'A'; 658 for my $x (1..4) { 659 $r = qr/$r(??{$x})/; 660 } 661 my $x = 5; 662 ok("A1234" =~ /^$r$/, "cascaded qr loop"); 663 } 664 665 666 # and again, but compiling the qrs in an eval so there 667 # aren't even refs to the qrs from any ops 668 669 { 670 my $r = 'A'; 671 for my $x (1..4) { 672 $r = eval q[ qr/$r(??{$x})/; ]; 673 } 674 my $x = 5; 675 ok("A1234" =~ /^$r$/, "cascaded qr loop"); 676 } 677 678 # have qrs with either literal code blocks or only embedded 679 # code blocks, but not both 680 681 { 682 my ($r1, $r2, $r3, $r4); 683 my ($x1, $x3) = (7,8); 684 { my $x1 = 1; $r1 = qr/A(??{$x1})/; } 685 { $r2 = qr/${r1}2/; } 686 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; } 687 { $r4 = qr/${r3}4/; } 688 ok("A1234" =~ /^$r4$/, "cascaded qr mix 1"); 689 ok("A12345" =~ /^${r4}5$/, "cascaded qr mix 2"); 690 ok("A1234" =~ qr/^$r4$/ , "cascaded qr mix 3"); 691 ok("A12345" =~ qr/^${r4}5$/, "cascaded qr mix 4"); 692 } 693 694 # and make sure things are freed at the right time 695 { 696 sub Foo99::DESTROY { $Foo99::d++ } 697 $Foo99::d = 0; 698 my $r1; 699 { 700 my $x = bless [1], 'Foo99'; 701 $r1 = eval 'qr/(??{$x->[0]})/'; 702 } 703 my $r2 = eval 'qr/a$r1/'; 704 my $x = 2; 705 ok(eval '"a1" =~ qr/^$r2$/', "match while in scope"); 706 # make sure PL_reg_curpm isn't holding on to anything 707 "a" =~ /a(?{1})/; 708 is($Foo99::d, 0, "before scope exit"); 709 } 710 ::is($Foo99::d, 1, "after scope exit"); 711 712 # forward declared subs should Do The Right Thing with any anon CVs 713 # within them (i.e. pad_fixup_inner_anons() should work) 714 715 sub forward; 716 sub forward { 717 my $x = "a"; 718 my $A = "A"; 719 ok("Aa" =~ qr/^A(??{$x})$/, "forward qr compiletime"); 720 ok("Aa" =~ qr/^$A(??{$x})$/, "forward qr runtime"); 721 } 722 forward; 723 } 724 725 # test that run-time embedded code, when re-fed into toker, 726 # does all the right escapes 727 728 { 729 my $enc; 730 $enc = eval 'use Encode; find_encoding("ascii")' unless $::IS_EBCDIC; 731 732 my $x = 0; 733 my $y = 'bad'; 734 735 # note that most of the strings below are single-quoted, and the 736 # things within them, like '$y', *aren't* intended to interpolate 737 738 my $s1 = 739 'a\\$y(?# (??{BEGIN{$x=1} "X1"})b(?# \Ux2\E)c\'d\\\\e\\\\Uf\\\\E'; 740 741 ok(q{a$ybc'd\e\Uf\E} =~ /^$s1$/, "reparse"); 742 is($x, 0, "reparse no BEGIN"); 743 744 my $s2 = 'g\\$y# (??{{BEGIN{$x=2} "X3"}) \Ux3\E' . "\nh"; 745 746 ok(q{a$ybc'd\\e\\Uf\\Eg$yh} =~ /^$s1$s2$/x, "reparse /x"); 747 is($x, 0, "reparse /x no BEGIN"); 748 749 my $b = '\\'; 750 my $q = '\''; 751 752 # non-ascii in string as "<0xNNN>" 753 sub esc_str { 754 my $s = shift; 755 $s =~ s{(.)}{ 756 my $c = ord($1); 757 (utf8::native_to_unicode($c)< 32 758 || utf8::native_to_unicode($c) > 127) 759 ? sprintf("<0x%x>", $c) : $1; 760 }ge; 761 $s; 762 } 763 sub fmt { sprintf "hairy backslashes %s [%s] =~ /^%s/", 764 $_[0], esc_str($_[1]), esc_str($_[2]); 765 } 766 767 768 for my $u ( 769 [ '', '', 'blank ' ], 770 [ "\x{100}", '\x{100}', 'single' ], 771 [ "\x{100}", "\x{100}", 'double' ]) 772 { 773 for my $pair ( 774 [ "$b", "$b$b" ], 775 [ "$q", "$q" ], 776 [ "$b$q", "$b$b$b$q" ], 777 [ "$b$b$q", "$b$b$b$b$q" ], 778 [ "$b$b$b$q", "$b$b$b$b$b$b$q" ], 779 [ "$b$b$b$b$q","$b$b$b$b$b$b$b$b$q" ], 780 ) { 781 my ($s, $r) = @$pair; 782 $s = "9$s"; 783 my $ss = "$u->[0]$s"; 784 785 my $c = '9' . $r; 786 my $cc = "$u->[1]$c"; 787 788 ok($ss =~ /^$cc/, fmt("plain $u->[2]", $ss, $cc)); 789 790 no strict; 791 $nine = $nine = "bad"; 792 $ss = "$u->[0]\t${q}\x41${b}x42$s" if $::IS_ASCII; 793 $ss = "$u->[0]\t${q}\xC1${b}xC2$s" if $::IS_EBCDIC; 794 for my $use_qr ('', 'qr') { 795 $c = qq[(??{my \$z='{';] 796 . (($::IS_ASCII) 797 ? qq[$use_qr"$b${b}t$b$q$b${b}x41$b$b$b${b}x42"] 798 : qq[$use_qr"$b${b}t$b$q$b${b}xC1$b$b$b${b}xC2"]) 799 . qq[. \$nine})]; 800 # (??{ qr/str/ }) goes through one less interpolation 801 # stage than (??{ qq/str/ }) 802 $c =~ s{\\\\}{\\}g if ($use_qr eq 'qr'); 803 $c .= $r; 804 $cc = "$u->[1]$c"; 805 my $nine = 9; 806 807 eval {/^$cc/}; norun(fmt("code norun $u->[2]", $ss, $cc)); 808 { 809 use re 'eval'; 810 ok($ss =~ /^$cc/, fmt("code $u->[2]", $ss, $cc)); 811 } 812 } 813 } 814 } 815 816 my $code1u = "(??{qw(\x{100})})"; 817 eval {/^$code1u$/}; norun("reparse embedded unicode norun"); 818 { 819 use re 'eval'; 820 ok("\x{100}" =~ /^$code1u$/, "reparse embedded unicode"); 821 } 822 } 823 824 # a non-pattern literal won't get code blocks parsed at compile time; 825 # but they must get parsed later on if 'use re eval' is in scope 826 # also check that unbalanced {}'s are parsed ok 827 828 { 829 eval q["a{" =~ '^(??{"a{"})$']; 830 norun("non-pattern literal code norun"); 831 eval {/^${\'(??{"a{"})'}$/}; 832 norun("runtime code with unbalanced {} norun"); 833 834 use re 'eval'; 835 ok("a{" =~ '^a(??{"{"})$', "non-pattern literal code"); 836 ok("a{" =~ /^a${\'(??{"{"})'}$/, "runtime code with unbalanced {}"); 837 } 838 839 # make sure warnings come from the right place 840 841 { 842 use warnings; 843 my ($s, $t, $w); 844 local $SIG{__WARN__} = sub { $w .= "@_" }; 845 846 $w = ''; $s = 's'; 847 my $r = qr/(?{$t=$s+1})/; 848 "a" =~ /a$r/; 849 like($w, qr/pat_re_eval/, "warning main file"); 850 851 # do it in an eval to get predictable line numbers 852 eval q[ 853 854 $r = qr/(?{$t=$s+1})/; 855 ]; 856 $w = ''; $s = 's'; 857 "a" =~ /a$r/; 858 like($w, qr/ at \(eval \d+\) line 3/, "warning eval A"); 859 860 $w = ''; $s = 's'; 861 eval q[ 862 use re 'eval'; 863 my $c = '(?{$t=$s+1})'; 864 "a" =~ /a$c/; 865 1; 866 ]; 867 like($w, qr/ at \(eval \d+\) line 1/, "warning eval B"); 868 } 869 870 # jumbo test for: 871 # * recursion; 872 # * mixing all the different types of blocks (literal, qr/literal/, 873 # runtime); 874 # * backtracking (the Z+ alternation ensures CURLYX and full 875 # scope popping on backtracking) 876 877 { 878 sub recurse2 { 879 my ($depth)= @_; 880 return unless $depth; 881 my $s1 = '3-LMN'; 882 my $r1 = qr/(??{"$s1-$depth"})/; 883 884 my $s2 = '4-PQR'; 885 my $c1 = '(??{"$s2-$depth"})'; 886 use re 'eval'; 887 ok( "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>" 888 . "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>" 889 =~ 890 /^<(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1> 891 <(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>$/x, 892 "recurse2($depth)"); 893 recurse2($depth-1); 894 } 895 recurse2(5); 896 } 897 898 # nested (??{}) called from various levels of a recursive function 899 900 { 901 sub recurse3 { 902 my ($n) = @_; 903 return if $n > 3; 904 ok("A$n" =~ m{^A(??{ "0123" =~ /((??{$n}))/; $1 })$}, 905 "recurse3($n)"); 906 ok("A$n" !~ m{^A(??{ "0123" =~ /((??{$n}))/; "X" })$}, 907 "recurse3($n) nomatch"); 908 recurse3($n+1); 909 } 910 recurse3(0); 911 } 912 913 # nested (??{}) being invoked recursively via a function 914 915 { 916 my $s = ''; 917 our $recurse4; 918 my @alpha = qw(A B C D E); 919 $recurse4 = sub { 920 my ($n) = @_; 921 $s .= "(n=$n:"; 922 if ($n < 4) { 923 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~ 924 m{^([A-Z]) 925 (??{ 926 $s .= "1=$1:"; 927 "$n-0123" =~ m{^(\d)-(((??{$recurse4->($n+1)})))}; 928 $s .= "i1=$1:<=[$2]"; 929 $3; # NB - not stringified 930 }) 931 $ 932 }x; 933 $s .= "1a=$1:"; 934 $s .= $m ? 'M' : '!M'; 935 } 936 my $ret = '.*?' . ($n-1); 937 $s .= "<=[$ret])"; 938 return $ret; 939 }; 940 $recurse4->(0); 941 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])' 942 . 'i1=3:<=[0123]1a=D:M<=[.*?2])i1=2:<=[012]1a=C:M<=[.*?1])' 943 . 'i1=1:<=[01]1a=B:M<=[.*?0])i1=0:<=[0]1a=A:M<=[.*?-1])'; 944 is($s, $exp, 'recurse4'); 945 } 946 947 # single (??{}) being invoked recursively via a function 948 949 { 950 my $s = ''; 951 our $recurse5; 952 my @alpha = qw(A B C D E); 953 $recurse5 = sub { 954 my ($n) = @_; 955 $s .= "(n=$n:"; 956 if ($n < 4) { 957 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~ 958 m{^([A-Z]) 959 ((??{ 960 $s .= "1=$1:"; 961 $recurse5->($n+1); 962 })) 963 $ 964 }x; 965 $s .= "1a=$1:2=$2:"; 966 $s .= $m ? 'M' : '!M'; 967 } 968 my $ret = '.*?' . ($n-1); 969 $s .= "<=[$ret])"; 970 return $ret; 971 }; 972 $recurse5->(0); 973 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])' 974 . '1a=D:2=0123:M<=[.*?2])1a=C:2=012:M<=[.*?1])' 975 . '1a=B:2=01:M<=[.*?0])1a=A:2=0:M<=[.*?-1])'; 976 is($s, $exp, 'recurse5'); 977 } 978 979 980 # make sure that errors during compiling run-time code get trapped 981 982 { 983 use re 'eval'; 984 985 my $code = '(?{$x=})'; 986 eval { "a" =~ /^a$code/ }; 987 like($@, qr/syntax error at \(eval \d+\) line \d+/, 'syntax error'); 988 989 $code = '(?{BEGIN{die})'; 990 eval { "a" =~ /^a$code/ }; 991 like($@, 992 qr/BEGIN failed--compilation aborted at \(eval \d+\) line \d+/, 993 'syntax error'); 994 995 use utf8; 996 $code = '(?{���������::$bar})'; 997 eval { "a" =~ /^a$code/ }; 998 like($@, qr/Bad name after ���������:: at \(eval \d+\) line \d+/, 'UTF8 sytax error'); 999 } 1000 1001 # make sure that 'use re eval' is propagated into compiling the 1002 # pattern returned by (??{}) 1003 1004 { 1005 use re 'eval'; 1006 my $pat = 'B(??{1})C'; 1007 my $A = 'A'; 1008 # compile-time outer code-block 1009 ok("AB1CD" =~ /^A(??{$pat})D$/, "re eval propagated compile-time"); 1010 # run-time outer code-block 1011 ok("AB1CD" =~ /^$A(??{$pat})D$/, "re eval propagated run-time"); 1012 } 1013 1014 # returning a ref to something that had set magic but wasn't 1015 # PERL_MAGIC_qr triggered a false positive assertion failure 1016 # The test is not so much concerned with it not matching, 1017 # as with not failing the assertion 1018 1019 { 1020 ok("a" !~ /^(a)(??{ \$1 })/, '(??{ ref })'); 1021 } 1022 1023 # make sure the uninit warning from returning an undef var 1024 # sees the right var 1025 1026 { 1027 my ($u1, $u2); 1028 my $warn = ''; 1029 local $SIG{__WARN__} = sub { $warn .= $_[0] }; 1030 $u1 =~ /(??{$u2})/ or die; 1031 like($warn, qr/value \$u1 in pattern match.*\n.*value at/, 'uninit'); 1032 } 1033 1034 # test that code blocks are called in scalar context 1035 1036 { 1037 my @a = (0); 1038 ok("" =~ /^(?{@a})$/, '(?{}) in scalar context'); 1039 is($^R, 1, '(?{}) in scalar context: $^R'); 1040 ok("1" =~ /^(??{@a})$/, '(??{}) in scalar context'); 1041 ok("foo" =~ /^(?(?{@a})foo|bar)$/, '(?(?{})|) in scalar context'); 1042 } 1043 1044 # BEGIN in compiled blocks shouldn't mess with $1 et al 1045 1046 { 1047 use re 'eval'; 1048 my $code1 = '(B)(??{ BEGIN { "X" =~ /X/ } $1})(C)'; 1049 ok("ABBCA" =~ /^(.)(??{$code1})\1$/, '(?{}) BEGIN and $1'); 1050 my $code2 = '(B)(??{ BEGIN { "X" =~ /X/ } $1 =~ /(.)/ ? $1 : ""})(C)'; 1051 ok("ABBCA" =~ /^(.)(??{$code2})\1$/, '(?{}) BEGIN and $1 mark 2'); 1052 } 1053 1054 # check that the optimiser is applied to code blocks: see if aelem has 1055 # been converted to aelemfast 1056 1057 { 1058 my $out; 1059 for my $prog ( 1060 '/(?{$a[0]})/', 1061 'q() =~ qr/(?{$a[0]})/', 1062 'use re q(eval); q() =~ q{(?{$a[0]})}', 1063 'use re q(eval); $c = q{(?{$a[0]})}; /$c/', 1064 'use re q(eval); $c = q{(?{$a[0]})}; /(?{1;})$c/', 1065 ) { 1066 $out = runperl(switches => ["-Dt"], prog => $prog, stderr => 1); 1067 like($out, qr/aelemfast|Recompile perl with -DDEBUGGING/, 1068 "optimise: '$prog'"); 1069 } 1070 } 1071 1072 # [perl #115080] 1073 # Ensure that ?pat? matches exactly once, even when the run-time 1074 # pattern changes, and even when the presence of run-time (?{}) affects 1075 # how and when patterns are recompiled 1076 1077 { 1078 my $m; 1079 1080 $m = ''; 1081 for (qw(a a a)) { 1082 $m .= $_ if m?$_?; 1083 } 1084 is($m, 'a', '?pat? with a,a,a'); 1085 1086 $m = ''; 1087 for (qw(a b c)) { 1088 $m .= $_ if m?$_?; 1089 } 1090 is($m, 'a', '?pat? with a,b,c'); 1091 1092 use re 'eval'; 1093 1094 $m = ''; 1095 for (qw(a a a)) { 1096 my $e = qq[(??{"$_"})]; 1097 $m .= $_ if m?$e?; 1098 } 1099 is($m, 'a', '?pat? with (??{a,a,a})'); 1100 1101 $m = ''; 1102 for (qw(a b c)) { 1103 my $e = qq[(??{"$_"})]; 1104 $m .= $_ if m?$e?; 1105 } 1106 is($m, 'a', '?pat? with (??{a,b,c})'); 1107 } 1108 1109 { 1110 # this code won't actually fail, but it used to fail valgrind, 1111 # so its here just to make sure valgrind doesn't fail again 1112 # While examining the ops of the secret anon sub wrapped around 1113 # the qr//, the pad of the sub was in scope, so cSVOPo_sv 1114 # got the const from the wrong pad. By having lots of $s's 1115 # (aka gvsv(*s), this forces the targs of the consts which have 1116 # been moved to the pad, to have high indices. 1117 1118 sub { 1119 local our $s = "abc"; 1120 my $qr = qr/^(?{1})$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s/; 1121 }->(); 1122 pass("cSVOPo_sv"); 1123 } 1124 1125 # [perl #115004] 1126 # code blocks in qr objects that are interpolated in arrays need 1127 # handling the same as if they were interpolated from scalar vars 1128 # (before this code would need 'use re "eval"') 1129 1130 { 1131 use Tie::Array; 1132 1133 local @global; 1134 my @array; 1135 my @refs = (0, \@array, 2); 1136 my @tied; 1137 tie @tied, 'Tie::StdArray'; 1138 { 1139 my $bb = 'B'; 1140 my $dd = 'D'; 1141 @array = ('A', qr/(??{$bb})/, 'C', qr/(??{$dd})/, 'E'); 1142 @tied = @array; 1143 @global = @array; 1144 } 1145 my $bb = 'X'; 1146 my $dd = 'Y'; 1147 ok("A B C D E=" =~ /@array/, 'bare interpolated array match'); 1148 ok("A B C D E=" =~ qr/@array/, 'qr bare interpolated array match'); 1149 ok("A B C D E=" =~ /@global/, 'bare interpolated global array match'); 1150 ok("A B C D E=" =~ qr/@global/, 1151 'qr bare interpolated global array match'); 1152 ok("A B C D E=" =~ /@{$refs[1]}/, 'bare interpolated ref array match'); 1153 ok("A B C D E=" =~ qr/@{$refs[1]}/, 1154 'qr bare interpolated ref array match'); 1155 ok("A B C D E=" =~ /@tied/, 'bare interpolated tied array match'); 1156 ok("A B C D E=" =~ qr/@tied/, 'qr bare interpolated tied array match'); 1157 ok("aA B C D E=" =~ /^a@array=$/, 'interpolated array match'); 1158 ok("aA B C D E=" =~ qr/^a@array=$/, 'qr interpolated array match'); 1159 ok("aA B C D E=" =~ /^a@global=$/, 'interpolated global array match'); 1160 ok("aA B C D E=" =~ qr/^a@global=$/, 1161 'qr interpolated global array match'); 1162 ok("aA B C D E=" =~ /^a@{$refs[1]}=$/, 'interpolated ref array match'); 1163 ok("aA B C D E=" =~ qr/^a@{$refs[1]}=$/, 1164 'qr interpolated ref array match'); 1165 ok("aA B C D E=" =~ /^a@tied=$/, 'interpolated tied array match'); 1166 ok("aA B C D E=" =~ qr/^a@tied=$/, 'qr interpolated tied array match'); 1167 1168 { 1169 local $" = '-'; 1170 ok("aA-B-C-D-E=" =~ /^a@{array}=$/, 1171 'interpolated array match with local sep'); 1172 ok("aA-B-C-D-E=" =~ qr/^a@{array}=$/, 1173 'qr interpolated array match with local sep'); 1174 ok("aA-B-C-D-E=" =~ /^a@{global}=$/, 1175 'interpolated global array match with local sep'); 1176 ok("aA-B-C-D-E=" =~ qr/^a@{global}=$/, 1177 'qr interpolated global array match with local sep'); 1178 ok("aA-B-C-D-E=" =~ /^a@{tied}=$/, 1179 'interpolated tied array match with local sep'); 1180 ok("aA-B-C-D-E=" =~ qr/^a@{tied}=$/, 1181 'qr interpolated tied array match with local sep'); 1182 } 1183 1184 # but don't handle the array ourselves in the presence of \Q etc 1185 1186 @array = ('A', '(?{})'); 1187 @global = @array; 1188 @tied = @array; 1189 ok("aA (?{})=" =~ /^a\Q@{array}\E=$/, 1190 'interpolated array match with \Q'); 1191 ok("aA (?{})=" =~ qr/^a\Q@{array}\E=$/, 1192 'qr interpolated array match with \Q'); 1193 ok("aA (?{})=" =~ /^a\Q@{global}\E=$/, 1194 'interpolated global array match with \Q'); 1195 ok("aA (?{})=" =~ qr/^a\Q@{global}\E=$/, 1196 'qr interpolated global array match with \Q'); 1197 ok("aA (?{})=" =~ /^a\Q@{$refs[1]}\E=$/, 1198 'interpolated ref array match with \Q'); 1199 ok("aA (?{})=" =~ qr/^a\Q@{$refs[1]}\E=$/, 1200 'qr interpolated ref array match with \Q'); 1201 ok("aA (?{})=" =~ /^a\Q@{tied}\E=$/, 1202 'interpolated tied array match with \Q'); 1203 ok("aA (?{})=" =~ qr/^a\Q@{tied}\E=$/, 1204 'qr interpolated tied array match with \Q'); 1205 1206 # and check it works with an empty array 1207 1208 @array = (); 1209 @global = (); 1210 @tied = (); 1211 ok("a=" =~ /^a@array=$/, 'empty array match'); 1212 ok("a=" =~ qr/^a@array=$/, 'qr empty array match'); 1213 ok("a=" =~ /^a@global=$/, 'empty global array match'); 1214 ok("a=" =~ qr/^a@global=$/, 'qr empty global array match'); 1215 ok("a=" =~ /^a@tied=$/, 'empty tied array match'); 1216 ok("a=" =~ qr/^a@tied=$/, 'qr empty tied array match'); 1217 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q'); 1218 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q'); 1219 ok("a=" =~ qr/^a\Q@{global}\E=$/, 1220 'qr empty global array match with \Q'); 1221 ok("a=" =~ /^a\Q@{tied}\E=$/, 'empty tied array match with \Q'); 1222 ok("a=" =~ qr/^a\Q@{tied}\E=$/, 'qr empty tied array match with \Q'); 1223 1224 # NB: these below are empty patterns, so they happen to use the 1225 # successful match from the line above 1226 1227 ok("a=" =~ /@array/, 'empty array pattern'); 1228 ok("a=" =~ qr/@array/, 'qr empty array pattern'); 1229 ok("a=" =~ /@global/, 'empty global array pattern'); 1230 ok("a=" =~ qr/@global/, 'qr empty global array pattern'); 1231 ok("a=" =~ /@tied/, 'empty tied pattern'); 1232 ok("a=" =~ qr/@tied/, 'qr empty tied pattern'); 1233 ok("a=" =~ /\Q@array\E/, 'empty array pattern with \Q'); 1234 ok("a=" =~ qr/\Q@array\E/, 'qr empty array pattern with \Q'); 1235 ok("a=" =~ /\Q@global\E/, 'empty global array pattern with \Q'); 1236 ok("a=" =~ qr/\Q@global\E/, 'qr empty global array pattern with \Q'); 1237 ok("a=" =~ /\Q@tied\E/, 'empty tied pattern with \Q'); 1238 ok("a=" =~ qr/\Q@tied\E/, 'qr empty tied pattern with \Q'); 1239 ok("a=" =~ //, 'completely empty pattern'); 1240 ok("a=" =~ qr//, 'qr completely empty pattern'); 1241 } 1242 1243 { 1244 { package o; use overload '""'=>sub { "abc" } } 1245 my $x = bless [],"o"; 1246 my $y = \$x; 1247 (my $y_addr = "$y") =~ y/()//d; # REF(0x7fcb9c02) -> REF0x7fcb9c02 1248 # $y_addr =~ $y should be true, as should $y_addr =~ /(??{$y})/ 1249 "abc$y_addr" =~ /(??{$x})(??{$y})/; 1250 is "$&", "abc$y_addr", 1251 '(??{$x}) does not leak cached qr to (??{\$x}) (match)'; 1252 is scalar "abcabc" =~ /(??{$x})(??{$y})/, "", 1253 '(??{$x}) does not leak cached qr to (??{\$x}) (no match)'; 1254 } 1255 1256 { 1257 sub ReEvalTieTest::TIESCALAR {bless[], "ReEvalTieTest"} 1258 sub ReEvalTieTest::STORE{} 1259 sub ReEvalTieTest::FETCH { "$1" } 1260 tie my $t, "ReEvalTieTest"; 1261 $t = bless [], "o"; 1262 "aab" =~ /(a)((??{"b" =~ m|(.)|; $t}))/; 1263 is "[$1 $2]", "[a b]", 1264 '(??{$tied_former_overload}) sees the right $1 in FETCH'; 1265 } 1266 1267 { 1268 my @matchsticks; 1269 my $ref = bless \my $o, "o"; 1270 my $foo = sub { push @matchsticks, scalar "abc" =~ /(??{$ref})/ }; 1271 &$foo; 1272 bless \$o; 1273 () = "$ref"; # flush AMAGIC flag on main 1274 &$foo; 1275 is "@matchsticks", "1 ", 'qr magic is not cached on refs'; 1276 } 1277 1278 { 1279 my ($foo, $bar) = ("foo"x1000, "bar"x1000); 1280 "$foo$bar" =~ /(??{".*"})/; 1281 is "$&", "foo"x1000 . "bar"x1000, 1282 'padtmp swiping does not affect "$a$b" =~ /(??{})/' 1283 } 1284 1285 { 1286 # [perl #129140] 1287 # this used to cause a double-free of the code_block struct 1288 # when re-running the compilation after spotting utf8. 1289 # This test doesn't catch it, but might panic, or fail under 1290 # valgrind etc 1291 1292 my $s = ''; 1293 /$s(?{})\x{100}/ for '', ''; 1294 pass "RT #129140"; 1295 } 1296 1297 # RT #130650 code blocks could get double-freed during a pattern 1298 # compilation croak 1299 1300 { 1301 # this used to panic or give ASAN errors 1302 eval 'qr/(?{})\6/'; 1303 like $@, qr/Reference to nonexistent group/, "RT #130650"; 1304 } 1305 1306 # RT #129881 1307 # on exit from a pattern with multiple code blocks from different 1308 # CVs, PL_comppad wasn't being restored correctly 1309 1310 sub { 1311 # give first few pad slots known values 1312 my ($x1, $x2, $x3, $x4, $x5) = 101..105; 1313 # these vars are in a separate pad 1314 my $r = qr/((?{my ($y1, $y2) = 201..202; 1;})A){2}X/; 1315 # the first alt fails, causing a switch to this anon 1316 # sub's pad 1317 "AAA" =~ /$r|(?{my ($z1, $z2) = 301..302; 1;})A/; 1318 is $x1, 101, "RT #129881: x1"; 1319 is $x2, 102, "RT #129881: x2"; 1320 is $x3, 103, "RT #129881: x3"; 1321 }->(); 1322 1323 1324 # RT #126697 1325 # savestack wasn't always being unwound on EVAL failure 1326 { 1327 local our $i = 0; 1328 my $max = 0; 1329 1330 'ABC' =~ m{ 1331 \A 1332 (?: 1333 (?: AB | A | BC ) 1334 (?{ 1335 local $i = $i + 1; 1336 $max = $i if $max < $i; 1337 }) 1338 )* 1339 \z 1340 }x; 1341 is $max, 2, "RT #126697"; 1342 } 1343 1344 # RT #132772 1345 # 1346 # Ensure that optimisation of OP_CONST into OP_MULTICONCAT doesn't 1347 # leave any freed ops in the execution path. This is associated 1348 # with rpeep() being called before optimize_optree(), which causes 1349 # gv/rv2sv to be prematurely optimised into gvsv, confusing 1350 # S_maybe_multiconcat when it tries to reorganise a concat subtree 1351 # into a multiconcat list 1352 1353 { 1354 my $a = "a"; 1355 local $b = "b"; # not lexical, so optimised to OP_GVSV 1356 local $_ = "abc"; 1357 ok /^a(??{ $b."c" })$/, "RT #132772 - compile time"; 1358 ok /^$a(??{ $b."c" })$/, "RT #132772 - run time"; 1359 my $qr = qr/^a(??{ $b."c" })$/; 1360 ok /$qr/, "RT #132772 - compile time qr//"; 1361 $qr = qr/(??{ $b."c" })$/; 1362 ok /^a$qr$/, "RT #132772 - compile time qr// compound"; 1363 $qr = qr/$a(??{ $b."c" })$/; 1364 ok /^$qr$/, "RT #132772 - run time qr//"; 1365 } 1366 1367 # RT #133687 1368 # mixing compile-time (?(?{code})) with run-time code blocks 1369 # was failing, because the second pass through the parser 1370 # (which compiles the runtime code blocks) was failing to adequately 1371 # mask the compile-time code blocks to shield them from a second 1372 # compile: /X(?{...})Y/ was being correctly masked as /X________Y/ 1373 # but /X(?(?{...}))Y/ was being incorrectly masked as 1374 # /X(?________)Y/ 1375 1376 { 1377 use re 'eval'; 1378 my $runtime_re = '(??{ "A"; })'; 1379 ok "ABC" =~ /^ $runtime_re (?(?{ 1; })BC) $/x, 'RT #133687 yes'; 1380 ok "ABC" =~ /^ $runtime_re (?(?{ 0; })xy|BC) $/x, 'RT #133687 yes|no'; 1381 } 1382 1383 # RT #134208 1384 # when the string being matched was an SvTEMP and the re_eval died, 1385 # the SV's magic was being restored after the SV was freed. 1386 # Give ASan something to play with. 1387 1388 { 1389 my $a; 1390 no warnings 'uninitialized'; 1391 eval { "$a $1" =~ /(?{ die })/ }; 1392 pass("SvTEMP 1"); 1393 eval { sub { " " }->() =~ /(?{ die })/ }; 1394 pass("SvTEMP 2"); 1395 } 1396 1397 # GH #19680 "panic: restartop in perl_run" 1398 # The eval block embedded within the (?{}) - but with no more code 1399 # following it - causes the next op after the OP_LEAVETRY to be NULL 1400 # (not even an OP_LEAVE). This confused the exception-catching and 1401 # rethrowing code: it was incorrectly rethrowing the exception rather 1402 # than just stopping at that point. 1403 1404 ok("test" =~ m{^ (?{eval {die "boo!"}}) test $}x, "GH #19680"); 1405 1406 # GH #19390 Segmentation fault with use re 'eval' 1407 # Similar to GH #19680 above, but exiting the eval via a syntax error 1408 # rather than throwing an exception 1409 1410 ok("" =~ m{^ (?{eval q{$x=}})}x, "GH #19390"); 1411 1412} # End of sub run_tests 1413 14141; 1415