regex_sets.t revision 1.3
1#!./perl 2 3# This tests (?[...]). XXX These are just basic tests, as full ones would be 4# best done with an infrastructure change to allow getting out the inversion 5# list of the constructed set and then comparing it character by character 6# with the expected result. 7 8BEGIN { 9 chdir 't' if -d 't'; 10 @INC = ('../lib','.','../ext/re'); 11 require './test.pl'; 12 require './charset_tools.pl'; 13 require './loc_tools.pl'; 14 skip_all_without_unicode_tables(); 15} 16 17use strict; 18use warnings; 19 20$| = 1; 21 22use utf8; 23no warnings 'experimental::regex_sets'; 24 25like("a", qr/(?[ [a] # This is a comment 26 ])/, 'Can ignore a comment'); 27like("a", qr/(?[ [a] # [[:notaclass:]] 28 ])/, 'A comment isn\'t parsed'); 29unlike(uni_to_native("\x85"), qr/(?[ \t�� ])/, 'NEL is white space'); 30like(uni_to_native("\x85"), qr/(?[ \t + \�� ])/, 'can escape NEL to match'); 31like(uni_to_native("\x85"), qr/(?[ [\��] ])/, '... including within nested []'); 32like("\t", qr/(?[ \t + \�� ])/, 'can do basic union'); 33like("\cK", qr/(?[ \s ])/, '\s matches \cK'); 34unlike("\cK", qr/(?[ \s - \cK ])/, 'can do basic subtraction'); 35like(" ", qr/(?[ \s - \cK ])/, 'can do basic subtraction'); 36like(":", qr/(?[ [:] ])/, '[:] is not a posix class'); 37unlike("\t", qr/(?[ ! \t ])/, 'can do basic complement'); 38like("\t", qr/(?[ ! [ ^ \t ] ])/, 'can do basic complement'); 39unlike("\r", qr/(?[ \t ])/, '\r doesn\'t match \t '); 40like("\r", qr/(?[ ! \t ])/, 'can do basic complement'); 41like("0", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection'); 42unlike("A", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection'); 43like("0", qr/(?[[:word:]&[:digit:]])/, 'spaces around internal [] aren\'t required'); 44 45like("a", qr/(?[ [a] | [b] ])/, '| means union'); 46like("b", qr/(?[ [a] | [b] ])/, '| means union'); 47unlike("c", qr/(?[ [a] | [b] ])/, '| means union'); 48 49like("a", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works'); 50unlike("b", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works'); 51like("c", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works'); 52 53like("2", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping'); 54unlike("a", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping'); 55 56unlike("\x{17f}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i doesn\'t affect \p{}'); 57like("\N{KELVIN SIGN}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i does affect literals'); 58 59my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/; 60my $thai_or_lao_digit = qr/(?[ \p{Digit} & $thai_or_lao ])/; 61like("\N{THAI DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 62unlike(chr(ord("\N{THAI DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 63like("\N{THAI DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 64unlike(chr(ord("\N{THAI DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 65like("\N{LAO DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 66unlike(chr(ord("\N{LAO DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 67like("\N{LAO DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 68unlike(chr(ord("\N{LAO DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works'); 69 70my $ascii_word = qr/(?[ \w ])/a; 71my $ascii_digits_plus_all_of_arabic = qr/(?[ \p{Arabic} + \p{Digit} & $ascii_word ])/; 72like("9", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII in the set"); 73unlike("A", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII not in the set"); 74unlike("\N{BENGALI DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII not in either set"); 75unlike("\N{BENGALI LETTER A}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII in one set"); 76like("\N{ARABIC LETTER HAMZA}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union"); 77like("\N{EXTENDED ARABIC-INDIC DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union"); 78 79like("\r", qr/(?[ \p{lb=cr} ])/, '\r matches \p{lb=cr}'); 80unlike("\r", qr/(?[ ! \p{lb=cr} ])/, '\r doesnt match ! \p{lb=cr}'); 81like("\r", qr/(?[ ! ! \p{lb=cr} ])/, 'Two ! ! are the original'); 82unlike("\r", qr/(?[ ! ! ! \p{lb=cr} ])/, 'Three ! ! ! are the complement'); 83# left associatve 84 85my $kelvin = qr/(?[ \N{KELVIN SIGN} ])/; 86my $fold = qr/(?[ $kelvin ])/i; 87like("\N{KELVIN SIGN}", $kelvin, '"\N{KELVIN SIGN}" matches compiled qr/(?[ \N{KELVIN SIGN} ])/'); 88unlike("K", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one"); 89unlike("k", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one"); 90 91my $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i; 92my $still_fold = qr/(?[ $kelvin_fold ])/; 93like("K", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i"); 94like("k", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i"); 95 96eval 'my $x = qr/(?[ [a] ])/; qr/(?[ $x ])/'; 97is($@, "", 'qr/(?[ [a] ])/ can be interpolated'); 98 99like("B", qr/(?[ [B] | ! ( [^B] ) ])/, "[perl #125892]"); 100 101like("a", qr/(?[ (?#comment) [a]])/, "Can have (?#comments)"); 102 103if (! is_miniperl() && locales_enabled('LC_CTYPE')) { 104 my $utf8_locale = find_utf8_ctype_locale; 105 SKIP: { 106 skip("No utf8 locale available on this platform", 8) unless $utf8_locale; 107 108 setlocale(&POSIX::LC_ALL, "C"); 109 use locale; 110 111 $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i; 112 my $single_char_class = qr/(?[ \: ])/; 113 114 setlocale(&POSIX::LC_ALL, $utf8_locale); 115 116 like("\N{KELVIN SIGN}", $kelvin_fold, 117 '(?[ \N{KELVIN SIGN} ]) matches itself under /i in UTF8-locale'); 118 like("K", $kelvin_fold, 119 '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in UTF8-locale'); 120 like("k", $kelvin_fold, 121 '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in UTF8-locale'); 122 like(":", $single_char_class, 123 '(?[ : ]) matches itself in UTF8-locale (a single character class)'); 124 125 setlocale(&POSIX::LC_ALL, "C"); 126 127 # These should generate warnings (the above 4 shouldn't), but like() 128 # suppresses them, so the warnings tests are in t/lib/warnings/regexec 129 $^W = 0; # Suppress the warnings that occur when run by hand with 130 # the -w option 131 like("\N{KELVIN SIGN}", $kelvin_fold, 132 '(?[ \N{KELVIN SIGN} ]) matches itself under /i in C locale'); 133 like("K", $kelvin_fold, 134 '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in C locale'); 135 like("k", $kelvin_fold, 136 '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in C locale'); 137 like(":", $single_char_class, 138 '(?[ : ]) matches itself in C locale (a single character class)'); 139 } 140} 141 142# Tests that no warnings given for valid Unicode digit range. 143my $arabic_digits = qr/(?[ [ �� - �� ] ])/; 144for my $char ("��", "��", "��") { 145 use charnames (); 146 my @got = capture_warnings(sub { 147 like("��", $arabic_digits, "Matches " 148 . charnames::viacode(ord $char)); 149 }); 150 is (@got, 0, "... without warnings"); 151} 152 153# RT #126181: \cX behaves strangely inside (?[]) 154{ 155 no warnings qw(syntax regexp); 156 157 eval { $_ = '/(?[(\c]) /'; qr/$_/ }; 158 like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic'); 159 eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ }; 160 like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic'); 161 eval { $_ = '(?[(\c])'; qr/$_/ }; 162 like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error'); 163 eval { $_ = '(?[(\c]) ]\b'; qr/$_/ }; 164 like($@, qr/^Syntax error/, '/(?[(\c]) ]\b/ should be a syntax error'); 165 eval { $_ = '(?[\c[]](])'; qr/$_/ }; 166 like($@, qr/^Syntax error/, '/(?[\c[]](])/ should be a syntax error'); 167 like("\c#", qr/(?[\c#])/, '\c# should match itself'); 168 like("\c[", qr/(?[\c[])/, '\c[ should match itself'); 169 like("\c\ ", qr/(?[\c\])/, '\c\ should match itself'); 170 like("\c]", qr/(?[\c]])/, '\c] should match itself'); 171} 172 173# RT #126481 !! with syntax error panics 174{ 175 fresh_perl_like('no warnings "experimental::regex_sets"; qr/(?[ ! ! (\w])/', 176 qr/^Unmatched \(/, {}, 177 'qr/(?[ ! ! (\w])/ doesnt panic'); 178 # The following didn't panic before, but easy to add this here with a 179 # paren between the !! 180 fresh_perl_like('no warnings "experimental::regex_sets";qr/(?[ ! ( ! (\w)])/', 181 qr/^Unmatched \(/, {}, 182 'qr/qr/(?[ ! ( ! (\w)])/'); 183} 184 185{ # RT #129122 186 my $pat = '(?[ ( [ABC] - [B] ) + ( [abc] - [b] ) + [def] ])'; 187 like("A", qr/$pat/, "'A' matches /$pat/"); 188 unlike("B", qr/$pat/, "'B' doesn't match /$pat/"); 189 like("C", qr/$pat/, "'C' matches /$pat/"); 190 unlike("D", qr/$pat/, "'D' doesn't match /$pat/"); 191 like("a", qr/$pat/, "'a' matches /$pat/"); 192 unlike("b", qr/$pat/, "'b' doesn't match /$pat/"); 193 like("c", qr/$pat/, "'c' matches /$pat/"); 194 like("d", qr/$pat/, "'d' matches /$pat/"); 195 like("e", qr/$pat/, "'e' matches /$pat/"); 196 like("f", qr/$pat/, "'f' matches /$pat/"); 197 unlike("g", qr/$pat/, "'g' doesn't match /$pat/"); 198} 199 200done_testing(); 201 2021; 203