1#!./perl
2
3# Checks if the parser behaves correctly in edge cases
4# (including weird syntax errors)
5
6BEGIN {
7    chdir 't' if -d 't';
8    require './test.pl';
9    require './charset_tools.pl';
10    skip_all_without_unicode_tables();
11}
12
13plan (tests => 58);
14
15use utf8;
16use open qw( :utf8 :std );
17
18is *t��st, "*main::t��st", "sanity check.";
19ok $::{"t��st"}, "gets the right glob in the stash.";
20
21my $glob_by_sub = sub { *������������::method }->();
22
23is *������������::method, "*������������::method", "glob stringy works";
24is "" . *������������::method, "*������������::method", "glob stringify-through-concat works";
25is $glob_by_sub, "*������������::method", "glob stringy works";
26is "" . $glob_by_sub, "*������������::method", "";
27
28sub gimme_glob {
29    no strict 'refs';
30    is *{$_[0]}, "*main::$_[0]";
31    *{$_[0]};
32}
33
34is "" . gimme_glob("������"), "*main::������";
35$a = *������;
36is "" . $a, "*main::������";
37
38*{gimme_glob("������")} = sub {};
39
40{
41    ok defined *{"������"}{CODE};
42    ok !defined *{"\344\270\213\351\203\216"}{CODE};
43}
44
45$L��on = 1;
46is ${*L��on{SCALAR}}, 1, "scalar define in the right glob,";
47ok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one.";
48
49my $a = "foo" . chr(190);
50my $b = $a    . chr(256);
51chop $b; # $b is $a with utf8 on
52
53is $a, $b, '$a equals $b';
54
55*$b = sub { 5 };
56
57is eval { main->$a }, 5, q!$a can call $b's sub!;
58ok !$@, "..and there's no error.";
59
60my $c = $b;
61utf8::encode($c);
62ok $b ne $c, '$b unequal $c';
63eval { main->$c };
64ok $@, q!$c can't call $b's sub.!;
65
66# Now define another sub under the downgraded name:
67*$a = sub { 6 };
68# Call it:
69is eval { main->$a }, 6, "Adding a new sub to *a and calling it works,";
70ok !$@, "..without errors.";
71eval { main->$c };
72ok $@, "but it's still unreachable through *c";
73
74*$b = \10;
75is ${*$a{SCALAR}}, 10;
76is ${*$b{SCALAR}}, 10;
77is ${*$c{SCALAR}}, undef;
78
79opendir F����, ".";
80closedir F����;
81::ok($::{"F����"}, "Bareword generates the right glob.");
82::ok(!$::{"F\303\222\303\222"});
83
84sub ������������������ { 1 }
85
86ok $::{"������������������"}, "non-const sub declarations generate the right glob";
87is $::{"������������������"}->(), 1;
88
89sub ��� () { 1 }
90
91is grep({ $_ eq "\x{539f}"     } keys %::), 1, "Constant subs generate the right glob.";
92is grep({ $_ eq "\345\216\237" } keys %::), 0;
93
94#These should probably go elsewhere.
95eval q{ sub wr��ng1 (_$); wr��ng1(1,2) };
96like( $@, qr/Malformed prototype for main::wr��ng1/, 'Malformed prototype croak is clean.' );
97
98eval q{ sub ����::������ ($__); ����::������(1,2) };
99like( $@, qr/Malformed prototype for ����::������/ );
100
101our $��� = 10;
102is $���, 10, "our works";
103is $main::���, 10, "...as does getting the same variable through the fully qualified name";
104is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";
105
106{
107    use charnames qw( :full );
108
109    eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !;
110    $@ =~ s/eval \d+/eval 11/;
111    is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after  my $��� <-- HERE near column 8 at (eval 11) line 1.
112', "'Unrecognized character' croak is UTF-8 clean";
113
114    eval "q\0foobar\0 \x{FFFF}+1";
115    $@ =~ s/eval \d+/eval 11/;
116    is(
117        $@,
118       "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n",
119       "...and nul-clean"
120    );
121
122    {
123        use re 'eval';
124        my $f = qq{(?{\$���+ 1; \x{1F42A} })};
125        eval { "a" =~ /^a$f/ };
126        my $e = $@;
127        $e =~ s/eval \d+/eval 11/;
128        is(
129            $e,
130            "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$���+ 1; <-- HERE near column 13 at (eval 11) line 1.\n",
131            "Messages from a re-eval are UTF-8 clean"
132        );
133
134        $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })};
135        eval { "a" =~ /^a$f/ };
136        my $e = $@;
137        $e =~ s/eval \d+/eval 11/;
138        is(
139            $e,
140            "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n",
141           "...and nul-clean"
142        );
143    }
144    
145    {
146        eval qq{\$���+ 1; \x{1F42A}};
147        $@ =~ s/eval \d+/eval 11/;
148        is(
149            $@,
150            "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$���+ 1; <-- HERE near column 8 at (eval 11) line 1.\n",
151            "Unrecognized character error doesn't cut off in the middle of characters"
152        )
153    }
154
155}
156
157{
158    use feature 'state';
159    for ( qw( my state our ) ) {
160        local $@;
161        eval "$_ ��������� $x = 1;";
162        like $@, qr/No such class ���������/u, "'No such class' warning for $_ is UTF-8 clean";
163    }
164}
165
166{
167    local $@;
168    eval "our \$main::\x{30cb};";
169    like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean";
170}
171
172{
173    use feature 'state';
174    local $@;
175    for ( qw( my state ) ) {
176        eval "$_ \$::\x{30cb};";
177        like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!;
178    }
179}
180
181{
182    local $@;
183    eval qq!print \x{30cb}, "comma""!;
184    like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
185}
186
187# tests for "Bad name"
188eval q{ ���������::$bar };
189like( $@, qr/Bad name after ���������::/, 'Bad name after ���������::' );
190eval q{ ���������''bar };
191like( $@, qr/Bad name after ���������'/, 'Bad name after ���������\'' );
192
193{
194    no warnings 'utf8';
195    local $SIG{__WARN__} = sub { }; # The eval will also output a warning,
196                                    # which we ignore
197    my $malformed_to_be = ($::IS_EBCDIC)   # Overlong sequence
198                           ? "\x{74}\x{41}"
199                           : "\x{c0}\x{a0}";
200    CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
201    like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}');
202}
203
204# RT# 124216: Perl_sv_clear: Assertion
205# If a parsing error occurred during a forced token within an interpolated
206# context, the stack unwinding failed to restore PL_lex_defer and so after
207# error recovery the state restored after the forced token was processed
208# was the wrong one, resulting in the lexer thinking we're still inside a
209# quoted string and things getting freed multiple times.
210#
211# The \x{3030} char isn't a legal var name, and this triggers the error.
212#
213# NB: this only failed if the closing quote of the interpolated string is
214# the last char of the file (i.e. no trailing \n).
215
216{
217    my $bad = "\x{3030}";
218    # Write out the individual utf8 bytes making up \x{3030}. This
219    # avoids 'Wide char in print' warnings from test.pl. (We may still
220    # get that warning when compiling the prog itself, since the
221    # error it prints to stderr contains a wide char.)
222    utf8::encode($bad);
223
224    fresh_perl_like(qq{use utf8; "\$$bad"},
225        qr/
226            \A
227            ( \QWide character in print at - line 1.\E\n )?
228            \Qsyntax error at - line 1, near \E"\$.*"\n
229            \QExecution of - aborted due to compilation errors.\E\z
230        /xm,
231
232        {stderr => 1}, "RT# 124216");
233}
234
235SKIP: {
236
237    use Config;
238    if ($Config{uvsize} < 8) {
239        skip("test is only valid on 64-bit ints", 4);
240    }
241    else {
242        my $a;
243        my $b;
244
245        # This caused a memory fault [perl #128738]
246        $b = byte_utf8a_to_utf8n("\xFE\x82\x80\x80\x80\x80\x80"); # 0x80000000
247        eval "\$a = q ${b}abc${b}";
248        is $@, "",
249               "No errors in eval'ing a string with large code point delimiter";
250        is $a, 'abc',
251               "Got expected result in eval'ing a string with a large code point"
252            . " delimiter";
253
254        $b = byte_utf8a_to_utf8n("\xFE\x83\xBF\xBF\xBF\xBF\xBF"); # 0xFFFFFFFF
255        eval "\$a = q ${b}Hello, \\\\whirled!${b}";
256        is $@, "",
257               "No errors in eval'ing a string with large code point delimiter";
258        is $a, 'Hello, \whirled!',
259               "Got expected result in eval'ing a string with a large code point"
260            . " delimiter";
261    }
262}
263
264fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)');
265use utf8;
266eval "sort \x{100}%";
267die $@;
268EOS
269syntax error at (eval 1) line 1, at EOF
270Execution of (eval 1) aborted due to compilation errors.
271EXPECT
272
273# New tests go here ^^^^^
274
275# Keep this test last, as it will mess up line number reporting for any
276# subsequent tests.
277
278<<END;
279${
280#line 57
281qq ���� }
282END
283is __LINE__, 59, '#line directive and qq with uni delims inside heredoc';
284
285# Put new tests above the line number tests.
286