regex_sets.t revision 1.8
1#!./perl
2
3# This tests (?[...]).  XXX These are just basic tests, as full ones would be
4# best done with an infrastructure change to allow getting out the inversion
5# list of the constructed set and then comparing it character by character
6# with the expected result.
7
8BEGIN {
9    chdir 't' if -d 't';
10    require './test.pl';
11    set_up_inc( '../lib','.','../ext/re' );
12    require './charset_tools.pl';
13    require './loc_tools.pl';
14}
15
16skip_all_without_unicode_tables();
17
18use strict;
19use warnings;
20
21$| = 1;
22
23use utf8;
24
25like("a", qr/(?[ [a]      # This is a comment
26                    ])/, 'Can ignore a comment');
27like("a", qr/(?[ [a]      # [[:notaclass:]]
28                    ])/, 'A comment isn\'t parsed');
29unlike(uni_to_native("\x85"), qr/(?[ \t�� ])/, 'NEL is white space');
30like(uni_to_native("\x85"), qr/(?[ \t + \�� ])/, 'can escape NEL to match');
31like(uni_to_native("\x85"), qr/(?[ [\��] ])/, '... including within nested []');
32like("\t", qr/(?[ \t + \�� ])/, 'can do basic union');
33like("\cK", qr/(?[ \s ])/, '\s matches \cK');
34unlike("\cK", qr/(?[ \s - \cK ])/, 'can do basic subtraction');
35like(" ", qr/(?[ \s - \cK ])/, 'can do basic subtraction');
36like(":", qr/(?[ [:] ])/, '[:] is not a posix class');
37unlike("\t", qr/(?[ ! \t ])/, 'can do basic complement');
38like("\t", qr/(?[ ! [ ^ \t ] ])/, 'can do basic complement');
39unlike("\r", qr/(?[ \t ])/, '\r doesn\'t match \t ');
40like("\r", qr/(?[ ! \t ])/, 'can do basic complement');
41like("0", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection');
42unlike("A", qr/(?[ [:word:] & [:digit:] ])/, 'can do basic intersection');
43like("0", qr/(?[[:word:]&[:digit:]])/, 'spaces around internal [] aren\'t required');
44
45like("a", qr/(?[ [a] | [b] ])/, '| means union');
46like("b", qr/(?[ [a] | [b] ])/, '| means union');
47unlike("c", qr/(?[ [a] | [b] ])/, '| means union');
48
49like("a", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works');
50unlike("b", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works');
51like("c", qr/(?[ [ab] ^ [bc] ])/, 'basic symmetric difference works');
52
53like("2", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping');
54unlike("a", qr/(?[ ( ( \pN & ( [a] + [2] ) ) ) ])/, 'Nesting parens and grouping');
55
56unlike("\x{17f}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i doesn\'t affect \p{}');
57like("\N{KELVIN SIGN}", qr/(?[ [k] + \p{Blk=ASCII} ])/i, '/i does affect literals');
58
59my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
60my $thai_or_lao_digit = qr/(?[ \p{Digit} & $thai_or_lao ])/;
61like("\N{THAI DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
62unlike(chr(ord("\N{THAI DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
63like("\N{THAI DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
64unlike(chr(ord("\N{THAI DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
65like("\N{LAO DIGIT ZERO}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
66unlike(chr(ord("\N{LAO DIGIT ZERO}") - 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
67like("\N{LAO DIGIT NINE}", $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
68unlike(chr(ord("\N{LAO DIGIT NINE}") + 1), $thai_or_lao_digit, 'embedded qr/(?[ ])/ works');
69
70my $ascii_word = qr/(?[ \w ])/a;
71my $ascii_digits_plus_all_of_arabic = qr/(?[ \p{Arabic} + \p{Digit} & $ascii_word ])/;
72like("9", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII in the set");
73unlike("A", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for ASCII not in the set");
74unlike("\N{BENGALI DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII not in either set");
75unlike("\N{BENGALI LETTER A}", $ascii_digits_plus_all_of_arabic, "/a, then interpolating and intersection works for non-ASCII in one set");
76like("\N{ARABIC LETTER HAMZA}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union");
77like("\N{EXTENDED ARABIC-INDIC DIGIT ZERO}", $ascii_digits_plus_all_of_arabic, "intersection has higher precedence than union");
78
79like("\r", qr/(?[ \p{lb=cr} ])/, '\r matches \p{lb=cr}');
80unlike("\r", qr/(?[ ! \p{lb=cr} ])/, '\r doesnt match ! \p{lb=cr}');
81like("\r", qr/(?[ ! ! \p{lb=cr} ])/, 'Two ! ! are the original');
82unlike("\r", qr/(?[ ! ! ! \p{lb=cr} ])/, 'Three ! ! ! are the complement');
83# left associatve
84
85my $kelvin = qr/(?[ \N{KELVIN SIGN} ])/;
86my $fold = qr/(?[ $kelvin ])/i;
87like("\N{KELVIN SIGN}", $kelvin, '"\N{KELVIN SIGN}" matches compiled qr/(?[ \N{KELVIN SIGN} ])/');
88unlike("K", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one");
89unlike("k", $fold, "/i on outer (?[ ]) doesn't leak to interpolated one");
90
91my $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i;
92my $still_fold = qr/(?[ $kelvin_fold ])/;
93like("K", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i");
94like("k", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /i");
95
96eval 'my $x = qr/(?[ [a] ])/; qr/(?[ $x ])/';
97is($@, "", 'qr/(?[ [a] ])/ can be interpolated');
98
99like("B", qr/(?[ [B] | ! ( [^B] ) ])/, "[perl #125892]");
100
101like("a", qr/(?[ (?#comment) [a]])/, "Can have (?#comments)");
102
103if (! is_miniperl() && locales_enabled('LC_CTYPE')) {
104    my $utf8_locale = find_utf8_ctype_locale;
105    SKIP: {
106        skip("No utf8 locale available on this platform", 8) unless $utf8_locale;
107
108        setlocale(&POSIX::LC_ALL, "C");
109        use locale;
110
111        $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i;
112        my $single_char_class = qr/(?[ \: ])/;
113
114        setlocale(&POSIX::LC_ALL, $utf8_locale);
115
116        like("\N{KELVIN SIGN}", $kelvin_fold,
117             '(?[ \N{KELVIN SIGN} ]) matches itself under /i in UTF8-locale');
118        like("K", $kelvin_fold,
119                '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in UTF8-locale');
120        like("k", $kelvin_fold,
121                '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in UTF8-locale');
122        like(":", $single_char_class,
123             '(?[ : ]) matches itself in UTF8-locale (a single character class)');
124
125        setlocale(&POSIX::LC_ALL, "C");
126
127        # These should generate warnings (the above 4 shouldn't), but like()
128        # suppresses them, so the warnings tests are in t/lib/warnings/regexec
129        $^W = 0;   # Suppress the warnings that occur when run by hand with
130                   # the -w option
131        like("\N{KELVIN SIGN}", $kelvin_fold,
132             '(?[ \N{KELVIN SIGN} ]) matches itself under /i in C locale');
133        like("K", $kelvin_fold,
134                '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in C locale');
135        like("k", $kelvin_fold,
136                '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in C locale');
137        like(":", $single_char_class,
138             '(?[ : ]) matches itself in C locale (a single character class)');
139    }
140}
141
142# Tests that no warnings given for valid Unicode digit range.
143my $arabic_digits = qr/(?[ [ �� - �� ] ])/;
144for my $char ("��", "��", "��") {
145    use charnames ();
146    my @got = capture_warnings(sub {
147                like("��", $arabic_digits, "Matches "
148                                                . charnames::viacode(ord $char));
149            });
150    is (@got, 0, "... without warnings");
151}
152
153# RT #126181: \cX behaves strangely inside (?[])
154{
155	no warnings qw(syntax regexp);
156
157	eval { $_ = '/(?[(\c]) /'; qr/$_/ };
158	like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
159	eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ };
160	like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic');
161	eval { $_ = '(?[(\c])'; qr/$_/ };
162	like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error');
163	eval { $_ = '(?[(\c]) ]\b'; qr/$_/ };
164	like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error');
165	eval { $_ = '(?[\c[]](])'; qr/$_/ };
166	like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error');
167	like("\c#", qr/(?[\c#])/, '\c# should match itself');
168	like("\c[", qr/(?[\c[])/, '\c[ should match itself');
169	like("\c\ ", qr/(?[\c\])/, '\c\ should match itself');
170	like("\c]", qr/(?[\c]])/, '\c] should match itself');
171}
172
173# RT #126481 !! with syntax error panics
174{
175    fresh_perl_like('qr/(?[ ! ! (\w])/',
176                    qr/^Unmatched \(/, {},
177                    'qr/(?[ ! ! (\w])/ doesnt panic');
178
179    # The following didn't panic before, but easy to add this here with a
180    # paren between the !!
181    fresh_perl_like('qr/(?[ ! ( ! (\w)])/',
182                    qr/^Unmatched \(/, {},
183                    'qr/qr/(?[ ! ( ! (\w)])/');
184}
185
186{   # RT #129122
187    my $pat = '(?[ ( [ABC] - [B] ) + ( [abc] - [b] ) + [def] ])';
188    like("A", qr/$pat/, "'A' matches /$pat/");
189    unlike("B", qr/$pat/, "'B' doesn't match /$pat/");
190    like("C", qr/$pat/, "'C' matches /$pat/");
191    unlike("D", qr/$pat/, "'D' doesn't match /$pat/");
192    like("a", qr/$pat/, "'a' matches /$pat/");
193    unlike("b", qr/$pat/, "'b' doesn't match /$pat/");
194    like("c", qr/$pat/, "'c' matches /$pat/");
195    like("d", qr/$pat/, "'d' matches /$pat/");
196    like("e", qr/$pat/, "'e' matches /$pat/");
197    like("f", qr/$pat/, "'f' matches /$pat/");
198    unlike("g", qr/$pat/, "'g' doesn't match /$pat/");
199}
200
201{   # [perl #129322 ]  This crashed perl, so keep after the ones that don't
202    my $pat = '(?[[!]&[0]^[!]&[0]+[a]])';
203    like("a", qr/$pat/, "/$pat/ compiles and matches 'a'");
204}
205
206{   # [perl #132167]
207    fresh_perl_is(
208        'print "c" =~ qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b]))  ])/;',
209        1, {},
210        'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b]))  ]) compiles and properly matches');
211    fresh_perl_is(
212        'print "b" =~ qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b]))  ])/;',
213        "", {},
214        'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b]))  ]) compiles and properly matches');
215}
216
217{   # [perl #133889]    Caused assertion failure
218    fresh_perl_like(
219        'qr/(?[\P{Is0}])/', qr/\QUnknown user-defined property name "Is0"/, {}, "[perl #133889]");
220}
221
222{
223    my $s = qr/(?x:(?[ [ x ] ]))/;
224    like("x", qr/(?[ $s ])/ , "Modifier flags in interpolated set don't"
225                            . " disrupt");
226}
227
228{   # GH #16779
229    like("x", qr/(?[ (?^x:(?[ [x] ])) ])/ ,
230         "Can use '^' flag in a nested call");
231    like("x", qr/(?[ (?x-imns:(?[ [x] ])) ])/ ,
232         "Can use various flags in a nested call");
233}
234
235done_testing();
236
2371;
238