1/*    toke.c
2 *
3 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 *    You may distribute under the terms of either the GNU General Public
7 *    License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 *  'It all comes from here, the stench and the peril.'    --Frodo
13 *
14 *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl.  It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26This is the lower layer of the Perl parser, managing characters and tokens.
27
28=for apidoc AmnU|yy_parser *|PL_parser
29
30Pointer to a structure encapsulating the state of the parsing operation
31currently in progress.  The pointer can be locally changed to perform
32a nested parse without interfering with the state of an outer parse.
33Individual members of C<PL_parser> have their own documentation.
34
35=cut
36*/
37
38#include "EXTERN.h"
39#define PERL_IN_TOKE_C
40#include "perl.h"
41#include "invlist_inline.h"
42
43#define new_constant(a,b,c,d,e,f,g, h)	\
44        S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45
46#define pl_yylval	(PL_parser->yylval)
47
48/* XXX temporary backwards compatibility */
49#define PL_lex_brackets		(PL_parser->lex_brackets)
50#define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51#define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52#define PL_lex_brackstack	(PL_parser->lex_brackstack)
53#define PL_lex_casemods		(PL_parser->lex_casemods)
54#define PL_lex_casestack        (PL_parser->lex_casestack)
55#define PL_lex_dojoin		(PL_parser->lex_dojoin)
56#define PL_lex_formbrack        (PL_parser->lex_formbrack)
57#define PL_lex_inpat		(PL_parser->lex_inpat)
58#define PL_lex_inwhat		(PL_parser->lex_inwhat)
59#define PL_lex_op		(PL_parser->lex_op)
60#define PL_lex_repl		(PL_parser->lex_repl)
61#define PL_lex_starts		(PL_parser->lex_starts)
62#define PL_lex_stuff		(PL_parser->lex_stuff)
63#define PL_multi_start		(PL_parser->multi_start)
64#define PL_multi_open		(PL_parser->multi_open)
65#define PL_multi_close		(PL_parser->multi_close)
66#define PL_preambled		(PL_parser->preambled)
67#define PL_linestr		(PL_parser->linestr)
68#define PL_expect		(PL_parser->expect)
69#define PL_copline		(PL_parser->copline)
70#define PL_bufptr		(PL_parser->bufptr)
71#define PL_oldbufptr		(PL_parser->oldbufptr)
72#define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
73#define PL_linestart		(PL_parser->linestart)
74#define PL_bufend		(PL_parser->bufend)
75#define PL_last_uni		(PL_parser->last_uni)
76#define PL_last_lop		(PL_parser->last_lop)
77#define PL_last_lop_op		(PL_parser->last_lop_op)
78#define PL_lex_state		(PL_parser->lex_state)
79#define PL_rsfp			(PL_parser->rsfp)
80#define PL_rsfp_filters		(PL_parser->rsfp_filters)
81#define PL_in_my		(PL_parser->in_my)
82#define PL_in_my_stash		(PL_parser->in_my_stash)
83#define PL_tokenbuf		(PL_parser->tokenbuf)
84#define PL_multi_end		(PL_parser->multi_end)
85#define PL_error_count		(PL_parser->error_count)
86
87#  define PL_nexttoke		(PL_parser->nexttoke)
88#  define PL_nexttype		(PL_parser->nexttype)
89#  define PL_nextval		(PL_parser->nextval)
90
91
92#define SvEVALED(sv) \
93    (SvTYPE(sv) >= SVt_PVNV \
94    && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95
96static const char ident_too_long[] = "Identifier too long";
97static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98
99#  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100
101#define XENUMMASK  0x3f
102#define XFAKEEOF   0x40
103#define XFAKEBRACK 0x80
104
105#ifdef USE_UTF8_SCRIPTS
106#   define UTF cBOOL(!IN_BYTES)
107#else
108#   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109#endif
110
111/* The maximum number of characters preceding the unrecognized one to display */
112#define UNRECOGNIZED_PRECEDE_COUNT 10
113
114/* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116#define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117
118/* Non-identifier plugin infix operators are allowed any printing character
119 * except spaces, digits, or identifier chars
120 */
121#define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
122/* Plugin infix operators may not begin with a quote symbol */
123#define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
124
125#define PLUGINFIX_IS_ENABLED  UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
126
127#define SPACE_OR_TAB(c) isBLANK_A(c)
128
129#define HEXFP_PEEK(s)     \
130    (((s[0] == '.') && \
131      (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
132     isALPHA_FOLD_EQ(s[0], 'p'))
133
134/* LEX_* are values for PL_lex_state, the state of the lexer.
135 * They are arranged oddly so that the guard on the switch statement
136 * can get by with a single comparison (if the compiler is smart enough).
137 *
138 * These values refer to the various states within a sublex parse,
139 * i.e. within a double quotish string
140 */
141
142/* #define LEX_NOTPARSING		11 is done in perl.h. */
143
144#define LEX_NORMAL		10 /* normal code (ie not within "...")     */
145#define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
146#define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
147#define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
148#define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
149
150                                   /* at end of code, eg "$x" followed by:  */
151#define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
152#define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
153
154#define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
155                                        string or after \E, $foo, etc       */
156#define LEX_INTERPCONST		 2 /* NOT USED */
157#define LEX_FORMLINE		 1 /* expecting a format line               */
158
159/* returned to yyl_try() to request it to retry the parse loop, expected to only
160   be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
161   can also return it.
162
163   yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
164   other token values are 258 or higher (see perly.h), so -1 should be
165   a safe value here.
166*/
167#define YYL_RETRY (-1)
168
169#ifdef DEBUGGING
170static const char* const lex_state_names[] = {
171    "KNOWNEXT",
172    "FORMLINE",
173    "INTERPCONST",
174    "INTERPCONCAT",
175    "INTERPENDMAYBE",
176    "INTERPEND",
177    "INTERPSTART",
178    "INTERPPUSH",
179    "INTERPCASEMOD",
180    "INTERPNORMAL",
181    "NORMAL"
182};
183#endif
184
185#include "keywords.h"
186
187/* CLINE is a macro that ensures PL_copline has a sane value */
188
189#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190
191/*
192 * Convenience functions to return different tokens and prime the
193 * lexer for the next token.  They all take an argument.
194 *
195 * TOKEN        : generic token (used for '(', DOLSHARP, etc)
196 * OPERATOR     : generic operator
197 * AOPERATOR    : assignment operator
198 * PREBLOCK     : beginning the block after an if, while, foreach, ...
199 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
200 * PREREF       : *EXPR where EXPR is not a simple identifier
201 * TERM         : expression term
202 * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
203 * LOOPX        : loop exiting command (goto, last, dump, etc)
204 * FTST         : file test operator
205 * FUN0         : zero-argument function
206 * FUN0OP       : zero-argument function, with its op created in this file
207 * FUN1         : not used, except for not, which isn't a UNIOP
208 * BOop         : bitwise or or xor
209 * BAop         : bitwise and
210 * BCop         : bitwise complement
211 * SHop         : shift operator
212 * PWop         : power operator
213 * PMop         : pattern-matching operator
214 * Aop          : addition-level operator
215 * AopNOASSIGN  : addition-level operator that is never part of .=
216 * Mop          : multiplication-level operator
217 * ChEop        : chaining equality-testing operator
218 * NCEop        : non-chaining comparison operator at equality precedence
219 * ChRop        : chaining relational operator <= != gt
220 * NCRop        : non-chaining relational operator isa
221 *
222 * Also see LOP and lop() below.
223 */
224
225#ifdef DEBUGGING /* Serve -DT. */
226#   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
227#else
228#   define REPORT(retval) (retval)
229#endif
230
231#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
234#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238#define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
239#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
240#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
241                         pl_yylval.ival=f, \
242                         PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
243                         REPORT((int)LOOPEX))
244#define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
245#define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
246#define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
247#define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
248#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
249#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
250#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
251                       REPORT(PERLY_TILDE)
252#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
253#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
254#define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
255#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
256#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
257#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
258#define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
259#define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
260#define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
261#define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
262
263/* This bit of chicanery makes a unary function followed by
264 * a parenthesis into a function with one argument, highest precedence.
265 * The UNIDOR macro is for unary functions that can be followed by the //
266 * operator (such as C<shift // 0>).
267 */
268#define UNI3(f,x,have_x) { \
269        pl_yylval.ival = f; \
270        if (have_x) PL_expect = x; \
271        PL_bufptr = s; \
272        PL_last_uni = PL_oldbufptr; \
273        PL_last_lop_op = (f) < 0 ? -(f) : (f); \
274        if (*s == '(') \
275            return REPORT( (int)FUNC1 ); \
276        s = skipspace(s); \
277        return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278        }
279#define UNI(f)    UNI3(f,XTERM,1)
280#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
281#define UNIPROTO(f,optional) { \
282        if (optional) PL_last_uni = PL_oldbufptr; \
283        OPERATOR(f); \
284        }
285
286#define UNIBRACK(f) UNI3(f,0,0)
287
288/* return has special case parsing.
289 *
290 * List operators have low precedence. Functions have high precedence.
291 * Every built in, *except return*, if written with () around its arguments, is
292 * parsed as a function. Hence every other list built in:
293 *
294 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
295 * 429
296 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
297 * 639
298 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
299 * Useless use of a constant (2) in void context at -e line 1.
300 * Useless use of a constant (4) in void context at -e line 1.
301 *
302 * $
303 *
304 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
305 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
306 *
307 * Whereas return:
308 *
309 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
310 * 2
311 * 4
312 * 9
313 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
314 * Useless use of a constant (2) in void context at -e line 1.
315 * Useless use of a constant (4) in void context at -e line 1.
316 * 9
317 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
318 * Useless use of a constant (2) in void context at -e line 1.
319 * Useless use of a constant (4) in void context at -e line 1.
320 * 9
321 * $
322 *
323 * and:
324 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
325 * 2
326 * 4
327 * 6
328 *
329 * This last example is what we expect, but it's clearly inconsistent with how
330 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
331 * followed.
332 *
333 *
334 * Perl 3 attempted to be consistent:
335 *
336 *   The rules are more consistent about where parens are needed and
337 *   where they are not.  In particular, unary operators and list operators now
338 *   behave like functions if they're called like functions.
339 *
340 * However, the behaviour for return was reverted to the "old" parsing with
341 * patches 9-12:
342 *
343 *   The construct
344 *   return (1,2,3);
345 *   did not do what was expected, since return was swallowing the
346 *   parens in order to consider itself a function.  The solution,
347 *   since return never wants any trailing expression such as
348 *   return (1,2,3) + 2;
349 *   is to simply make return an exception to the paren-makes-a-function
350 *   rule, and treat it the way it always was, so that it doesn't
351 *   strip the parens.
352 *
353 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
354 * LOP(OP_RETURN, XTERM);
355 *
356 * and constructs such as
357 *
358 *     return (Internals::V())[2]
359 *
360 * turn into syntax errors
361 */
362
363#define OLDLOP(f) \
364        do { \
365            if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
366                PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
367            pl_yylval.ival = (f); \
368            PL_expect = XTERM; \
369            PL_bufptr = s; \
370            return (int)LSTOP; \
371        } while(0)
372
373#define COPLINE_INC_WITH_HERELINES		    \
374    STMT_START {				     \
375        CopLINE_inc(PL_curcop);			      \
376        if (PL_parser->herelines)		       \
377            CopLINE(PL_curcop) += PL_parser->herelines, \
378            PL_parser->herelines = 0;			 \
379    } STMT_END
380/* Called after scan_str to update CopLINE(PL_curcop), but only when there
381 * is no sublex_push to follow. */
382#define COPLINE_SET_FROM_MULTI_END	      \
383    STMT_START {			       \
384        CopLINE_set(PL_curcop, PL_multi_end);	\
385        if (PL_multi_end != PL_multi_start)	 \
386            PL_parser->herelines = 0;		  \
387    } STMT_END
388
389
390/* A file-local structure for passing around information about subroutines and
391 * related definable words */
392struct code {
393    SV *sv;
394    CV *cv;
395    GV *gv, **gvp;
396    OP *rv2cv_op;
397    PADOFFSET off;
398    bool lex;
399};
400
401static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
402
403#ifdef DEBUGGING
404
405/* how to interpret the pl_yylval associated with the token */
406enum token_type {
407    TOKENTYPE_NONE,
408    TOKENTYPE_IVAL,
409    TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
410    TOKENTYPE_PVAL,
411    TOKENTYPE_OPVAL
412};
413
414#define DEBUG_TOKEN(Type, Name)                                         \
415    { Name, TOKENTYPE_##Type, #Name }
416
417static struct debug_tokens {
418    const int token;
419    enum token_type type;
420    const char *name;
421} const debug_tokens[] =
422{
423    DEBUG_TOKEN (OPNUM, ADDOP),
424    DEBUG_TOKEN (NONE,  ANDAND),
425    DEBUG_TOKEN (NONE,  ANDOP),
426    DEBUG_TOKEN (NONE,  ARROW),
427    DEBUG_TOKEN (OPNUM, ASSIGNOP),
428    DEBUG_TOKEN (OPNUM, BITANDOP),
429    DEBUG_TOKEN (OPNUM, BITOROP),
430    DEBUG_TOKEN (OPNUM, CHEQOP),
431    DEBUG_TOKEN (OPNUM, CHRELOP),
432    DEBUG_TOKEN (NONE,  COLONATTR),
433    DEBUG_TOKEN (NONE,  DOLSHARP),
434    DEBUG_TOKEN (NONE,  DORDOR),
435    DEBUG_TOKEN (IVAL,  DOTDOT),
436    DEBUG_TOKEN (NONE,  FORMLBRACK),
437    DEBUG_TOKEN (NONE,  FORMRBRACK),
438    DEBUG_TOKEN (OPNUM, FUNC),
439    DEBUG_TOKEN (OPNUM, FUNC0),
440    DEBUG_TOKEN (OPVAL, FUNC0OP),
441    DEBUG_TOKEN (OPVAL, FUNC0SUB),
442    DEBUG_TOKEN (OPNUM, FUNC1),
443    DEBUG_TOKEN (NONE,  HASHBRACK),
444    DEBUG_TOKEN (IVAL,  KW_CATCH),
445    DEBUG_TOKEN (IVAL,  KW_CLASS),
446    DEBUG_TOKEN (IVAL,  KW_CONTINUE),
447    DEBUG_TOKEN (IVAL,  KW_DEFAULT),
448    DEBUG_TOKEN (IVAL,  KW_DO),
449    DEBUG_TOKEN (IVAL,  KW_ELSE),
450    DEBUG_TOKEN (IVAL,  KW_ELSIF),
451    DEBUG_TOKEN (IVAL,  KW_FIELD),
452    DEBUG_TOKEN (IVAL,  KW_GIVEN),
453    DEBUG_TOKEN (IVAL,  KW_FOR),
454    DEBUG_TOKEN (IVAL,  KW_FORMAT),
455    DEBUG_TOKEN (IVAL,  KW_IF),
456    DEBUG_TOKEN (IVAL,  KW_LOCAL),
457    DEBUG_TOKEN (IVAL,  KW_METHOD_anon),
458    DEBUG_TOKEN (IVAL,  KW_METHOD_named),
459    DEBUG_TOKEN (IVAL,  KW_MY),
460    DEBUG_TOKEN (IVAL,  KW_PACKAGE),
461    DEBUG_TOKEN (IVAL,  KW_REQUIRE),
462    DEBUG_TOKEN (IVAL,  KW_SUB_anon),
463    DEBUG_TOKEN (IVAL,  KW_SUB_anon_sig),
464    DEBUG_TOKEN (IVAL,  KW_SUB_named),
465    DEBUG_TOKEN (IVAL,  KW_SUB_named_sig),
466    DEBUG_TOKEN (IVAL,  KW_TRY),
467    DEBUG_TOKEN (IVAL,  KW_USE_or_NO),
468    DEBUG_TOKEN (IVAL,  KW_UNLESS),
469    DEBUG_TOKEN (IVAL,  KW_UNTIL),
470    DEBUG_TOKEN (IVAL,  KW_WHEN),
471    DEBUG_TOKEN (IVAL,  KW_WHILE),
472    DEBUG_TOKEN (OPVAL, LABEL),
473    DEBUG_TOKEN (OPNUM, LOOPEX),
474    DEBUG_TOKEN (OPNUM, LSTOP),
475    DEBUG_TOKEN (OPVAL, LSTOPSUB),
476    DEBUG_TOKEN (OPNUM, MATCHOP),
477    DEBUG_TOKEN (OPVAL, METHCALL),
478    DEBUG_TOKEN (OPVAL, METHCALL0),
479    DEBUG_TOKEN (OPNUM, MULOP),
480    DEBUG_TOKEN (OPNUM, NCEQOP),
481    DEBUG_TOKEN (OPNUM, NCRELOP),
482    DEBUG_TOKEN (NONE,  NOAMP),
483    DEBUG_TOKEN (NONE,  NOTOP),
484    DEBUG_TOKEN (IVAL,  OROP),
485    DEBUG_TOKEN (NONE,  OROR),
486    DEBUG_TOKEN (IVAL,  PERLY_AMPERSAND),
487    DEBUG_TOKEN (IVAL,  PERLY_BRACE_CLOSE),
488    DEBUG_TOKEN (IVAL,  PERLY_BRACE_OPEN),
489    DEBUG_TOKEN (IVAL,  PERLY_BRACKET_CLOSE),
490    DEBUG_TOKEN (IVAL,  PERLY_BRACKET_OPEN),
491    DEBUG_TOKEN (IVAL,  PERLY_COLON),
492    DEBUG_TOKEN (IVAL,  PERLY_COMMA),
493    DEBUG_TOKEN (IVAL,  PERLY_DOT),
494    DEBUG_TOKEN (IVAL,  PERLY_EQUAL_SIGN),
495    DEBUG_TOKEN (IVAL,  PERLY_EXCLAMATION_MARK),
496    DEBUG_TOKEN (IVAL,  PERLY_MINUS),
497    DEBUG_TOKEN (IVAL,  PERLY_PAREN_OPEN),
498    DEBUG_TOKEN (IVAL,  PERLY_PERCENT_SIGN),
499    DEBUG_TOKEN (IVAL,  PERLY_PLUS),
500    DEBUG_TOKEN (IVAL,  PERLY_QUESTION_MARK),
501    DEBUG_TOKEN (IVAL,  PERLY_SEMICOLON),
502    DEBUG_TOKEN (IVAL,  PERLY_SLASH),
503    DEBUG_TOKEN (IVAL,  PERLY_SNAIL),
504    DEBUG_TOKEN (IVAL,  PERLY_STAR),
505    DEBUG_TOKEN (IVAL,  PERLY_TILDE),
506    DEBUG_TOKEN (OPVAL, PLUGEXPR),
507    DEBUG_TOKEN (OPVAL, PLUGSTMT),
508    DEBUG_TOKEN (PVAL,  PLUGIN_ADD_OP),
509    DEBUG_TOKEN (PVAL,  PLUGIN_ASSIGN_OP),
510    DEBUG_TOKEN (PVAL,  PLUGIN_HIGH_OP),
511    DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_OP),
512    DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_OP),
513    DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_LOW_OP),
514    DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_LOW_OP),
515    DEBUG_TOKEN (PVAL,  PLUGIN_LOW_OP),
516    DEBUG_TOKEN (PVAL,  PLUGIN_MUL_OP),
517    DEBUG_TOKEN (PVAL,  PLUGIN_POW_OP),
518    DEBUG_TOKEN (PVAL,  PLUGIN_REL_OP),
519    DEBUG_TOKEN (OPVAL, PMFUNC),
520    DEBUG_TOKEN (NONE,  POSTJOIN),
521    DEBUG_TOKEN (NONE,  POSTDEC),
522    DEBUG_TOKEN (NONE,  POSTINC),
523    DEBUG_TOKEN (OPNUM, POWOP),
524    DEBUG_TOKEN (NONE,  PREDEC),
525    DEBUG_TOKEN (NONE,  PREINC),
526    DEBUG_TOKEN (OPVAL, PRIVATEREF),
527    DEBUG_TOKEN (OPVAL, QWLIST),
528    DEBUG_TOKEN (NONE,  REFGEN),
529    DEBUG_TOKEN (OPNUM, SHIFTOP),
530    DEBUG_TOKEN (NONE,  SUBLEXEND),
531    DEBUG_TOKEN (NONE,  SUBLEXSTART),
532    DEBUG_TOKEN (OPVAL, THING),
533    DEBUG_TOKEN (NONE,  UMINUS),
534    DEBUG_TOKEN (OPNUM, UNIOP),
535    DEBUG_TOKEN (OPVAL, UNIOPSUB),
536    DEBUG_TOKEN (OPVAL, BAREWORD),
537    DEBUG_TOKEN (IVAL,  YADAYADA),
538    { 0,		TOKENTYPE_NONE,		NULL }
539};
540
541#undef DEBUG_TOKEN
542
543/* dump the returned token in rv, plus any optional arg in pl_yylval */
544
545STATIC int
546S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
547{
548    PERL_ARGS_ASSERT_TOKEREPORT;
549
550    if (DEBUG_T_TEST) {
551        const char *name = NULL;
552        enum token_type type = TOKENTYPE_NONE;
553        const struct debug_tokens *p;
554        SV* const report = newSVpvs("<== ");
555
556        for (p = debug_tokens; p->token; p++) {
557            if (p->token == (int)rv) {
558                name = p->name;
559                type = p->type;
560                break;
561            }
562        }
563        if (name)
564            Perl_sv_catpv(aTHX_ report, name);
565        else if (isGRAPH(rv))
566        {
567            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
568            if ((char)rv == 'p')
569                sv_catpvs(report, " (pending identifier)");
570        }
571        else if (!rv)
572            sv_catpvs(report, "EOF");
573        else
574            Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
575        switch (type) {
576        case TOKENTYPE_NONE:
577            break;
578        case TOKENTYPE_IVAL:
579            Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
580            break;
581        case TOKENTYPE_OPNUM:
582            Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
583                                    PL_op_name[lvalp->ival]);
584            break;
585        case TOKENTYPE_PVAL:
586            Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
587            break;
588        case TOKENTYPE_OPVAL:
589            if (lvalp->opval) {
590                Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
591                                    PL_op_name[lvalp->opval->op_type]);
592                if (lvalp->opval->op_type == OP_CONST) {
593                    Perl_sv_catpvf(aTHX_ report, " %s",
594                        SvPEEK(cSVOPx_sv(lvalp->opval)));
595                }
596
597            }
598            else
599                sv_catpvs(report, "(opval=null)");
600            break;
601        }
602        PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
603    };
604    return (int)rv;
605}
606
607
608/* print the buffer with suitable escapes */
609
610STATIC void
611S_printbuf(pTHX_ const char *const fmt, const char *const s)
612{
613    SV* const tmp = newSVpvs("");
614
615    PERL_ARGS_ASSERT_PRINTBUF;
616
617    GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
618    PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
619    GCC_DIAG_RESTORE_STMT;
620    SvREFCNT_dec(tmp);
621}
622
623#endif
624
625/*
626 * S_ao
627 *
628 * This subroutine looks for an '=' next to the operator that has just been
629 * parsed and turns it into an ASSIGNOP if it finds one.
630 */
631
632STATIC int
633S_ao(pTHX_ int toketype)
634{
635    if (*PL_bufptr == '=') {
636        PL_bufptr++;
637
638        switch (toketype) {
639            case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
640            case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
641            case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
642        }
643
644        toketype = ASSIGNOP;
645    }
646    return REPORT(toketype);
647}
648
649/*
650 * S_no_op
651 * When Perl expects an operator and finds something else, no_op
652 * prints the warning.  It always prints "<something> found where
653 * operator expected.  It prints "Missing semicolon on previous line?"
654 * if the surprise occurs at the start of the line.  "do you need to
655 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
656 * where the compiler doesn't know if foo is a method call or a function.
657 * It prints "Missing operator before end of line" if there's nothing
658 * after the missing operator, or "... before <...>" if there is something
659 * after the missing operator.
660 *
661 * PL_bufptr is expected to point to the start of the thing that was found,
662 * and s after the next token or partial token.
663 */
664
665STATIC void
666S_no_op(pTHX_ const char *const what, char *s)
667{
668    char * const oldbp = PL_bufptr;
669    const bool is_first = (PL_oldbufptr == PL_linestart);
670    SV *message = sv_2mortal( newSVpvf(
671                   PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
672                   what
673                  ) );
674
675    PERL_ARGS_ASSERT_NO_OP;
676
677    if (!s)
678        s = oldbp;
679    else
680        PL_bufptr = s;
681
682    if (ckWARN_d(WARN_SYNTAX)) {
683        bool has_more = FALSE;
684        if (is_first) {
685            has_more = TRUE;
686            sv_catpvs(message,
687                    " (Missing semicolon on previous line?)");
688        }
689        else if (PL_oldoldbufptr) {
690            /* yyerror (via yywarn) would do this itself, so we should too */
691            const char *t;
692            for (t = PL_oldoldbufptr;
693                 t < PL_bufptr && isSPACE(*t);
694                 t += UTF ? UTF8SKIP(t) : 1)
695            {
696                NOOP;
697            }
698            /* see if we can identify the cause of the warning */
699            if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
700            {
701                const char *t_start= t;
702                for ( ;
703                     (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704                     t += UTF ? UTF8SKIP(t) : 1)
705                {
706                    NOOP;
707                }
708                if (t < PL_bufptr && isSPACE(*t)) {
709                    has_more = TRUE;
710                    sv_catpvf( message,
711                            " (Do you need to predeclare \"%" UTF8f "\"?)",
712                          UTF8fARG(UTF, t - t_start, t_start));
713                }
714            }
715        }
716        if (!has_more) {
717            const char *t= oldbp;
718            assert(s >= oldbp);
719            while (t < s && isSPACE(*t)) {
720                t += UTF ? UTF8SKIP(t) : 1;
721            }
722
723            sv_catpvf(message,
724                    " (Missing operator before \"%" UTF8f "\"?)",
725                     UTF8fARG(UTF, s - t, t));
726        }
727    }
728    yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
729    PL_bufptr = oldbp;
730}
731
732/*
733 * S_missingterm
734 * Complain about missing quote/regexp/heredoc terminator.
735 * If it's called with NULL then it cauterizes the line buffer.
736 * If we're in a delimited string and the delimiter is a control
737 * character, it's reformatted into a two-char sequence like ^C.
738 * This is fatal.
739 */
740
741STATIC void
742S_missingterm(pTHX_ char *s, STRLEN len)
743{
744    char tmpbuf[UTF8_MAXBYTES + 1];
745    char q;
746    bool uni = FALSE;
747    if (s) {
748        char * const nl = (char *) my_memrchr(s, '\n', len);
749        if (nl) {
750            *nl = '\0';
751            len = nl - s;
752        }
753        uni = UTF;
754    }
755    else if (PL_multi_close < 32) {
756        *tmpbuf = '^';
757        tmpbuf[1] = (char)toCTRL(PL_multi_close);
758        tmpbuf[2] = '\0';
759        s = tmpbuf;
760        len = 2;
761    }
762    else {
763        if (! UTF && LIKELY(PL_multi_close < 256)) {
764            *tmpbuf = (char)PL_multi_close;
765            tmpbuf[1] = '\0';
766            len = 1;
767        }
768        else {
769            char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
770            *end = '\0';
771            len = end - tmpbuf;
772            uni = TRUE;
773        }
774        s = tmpbuf;
775    }
776    q = memchr(s, '"', len) ? '\'' : '"';
777    Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
778                     " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
779}
780
781#include "feature.h"
782
783/*
784 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
785 * utf16-to-utf8-reversed.
786 */
787
788#ifdef PERL_CR_FILTER
789static void
790strip_return(SV *sv)
791{
792    const char *s = SvPVX_const(sv);
793    const char * const e = s + SvCUR(sv);
794
795    PERL_ARGS_ASSERT_STRIP_RETURN;
796
797    /* outer loop optimized to do nothing if there are no CR-LFs */
798    while (s < e) {
799        if (*s++ == '\r' && *s == '\n') {
800            /* hit a CR-LF, need to copy the rest */
801            char *d = s - 1;
802            *d++ = *s++;
803            while (s < e) {
804                if (*s == '\r' && s[1] == '\n')
805                    s++;
806                *d++ = *s++;
807            }
808            SvCUR(sv) -= s - d;
809            return;
810        }
811    }
812}
813
814STATIC I32
815S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
816{
817    const I32 count = FILTER_READ(idx+1, sv, maxlen);
818    if (count > 0 && !maxlen)
819        strip_return(sv);
820    return count;
821}
822#endif
823
824/*
825=for apidoc lex_start
826
827Creates and initialises a new lexer/parser state object, supplying
828a context in which to lex and parse from a new source of Perl code.
829A pointer to the new state object is placed in L</PL_parser>.  An entry
830is made on the save stack so that upon unwinding, the new state object
831will be destroyed and the former value of L</PL_parser> will be restored.
832Nothing else need be done to clean up the parsing context.
833
834The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
835non-null, provides a string (in SV form) containing code to be parsed.
836A copy of the string is made, so subsequent modification of C<line>
837does not affect parsing.  C<rsfp>, if non-null, provides an input stream
838from which code will be read to be parsed.  If both are non-null, the
839code in C<line> comes first and must consist of complete lines of input,
840and C<rsfp> supplies the remainder of the source.
841
842The C<flags> parameter is reserved for future use.  Currently it is only
843used by perl internally, so extensions should always pass zero.
844
845=cut
846*/
847
848/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
849   can share filters with the current parser.
850   LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
851   caller, hence isn't owned by the parser, so shouldn't be closed on parser
852   destruction. This is used to handle the case of defaulting to reading the
853   script from the standard input because no filename was given on the command
854   line (without getting confused by situation where STDIN has been closed, so
855   the script handle is opened on fd 0)  */
856
857void
858Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
859{
860    const char *s = NULL;
861    yy_parser *parser, *oparser;
862
863    if (flags && flags & ~LEX_START_FLAGS)
864        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
865
866    /* create and initialise a parser */
867
868    Newxz(parser, 1, yy_parser);
869    parser->old_parser = oparser = PL_parser;
870    PL_parser = parser;
871
872    parser->stack = NULL;
873    parser->stack_max1 = NULL;
874    parser->ps = NULL;
875
876    /* on scope exit, free this parser and restore any outer one */
877    SAVEPARSER(parser);
878    parser->saved_curcop = PL_curcop;
879
880    /* initialise lexer state */
881
882    parser->nexttoke = 0;
883    parser->error_count = oparser ? oparser->error_count : 0;
884    parser->copline = parser->preambling = NOLINE;
885    parser->lex_state = LEX_NORMAL;
886    parser->expect = XSTATE;
887    parser->rsfp = rsfp;
888    parser->recheck_utf8_validity = TRUE;
889    parser->rsfp_filters =
890      !(flags & LEX_START_SAME_FILTER) || !oparser
891        ? NULL
892        : MUTABLE_AV(SvREFCNT_inc(
893            oparser->rsfp_filters
894             ? oparser->rsfp_filters
895             : (oparser->rsfp_filters = newAV())
896          ));
897
898    Newx(parser->lex_brackstack, 120, char);
899    Newx(parser->lex_casestack, 12, char);
900    *parser->lex_casestack = '\0';
901    Newxz(parser->lex_shared, 1, LEXSHARED);
902
903    if (line) {
904        Size_t len;
905        const U8* first_bad_char_loc;
906
907        s = SvPV_const(line, len);
908
909        if (   SvUTF8(line)
910            && UNLIKELY(! is_utf8_string_loc((U8 *) s,
911                                             SvCUR(line),
912                                             &first_bad_char_loc)))
913        {
914            _force_out_malformed_utf8_message(first_bad_char_loc,
915                                              (U8 *) s + SvCUR(line),
916                                              0,
917                                              1 /* 1 means die */ );
918            NOT_REACHED; /* NOTREACHED */
919        }
920
921        parser->linestr = flags & LEX_START_COPIED
922                            ? SvREFCNT_inc_simple_NN(line)
923                            : newSVpvn_flags(s, len, SvUTF8(line));
924        if (!rsfp)
925            sv_catpvs(parser->linestr, "\n;");
926    } else {
927        parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
928    }
929
930    parser->oldoldbufptr =
931        parser->oldbufptr =
932        parser->bufptr =
933        parser->linestart = SvPVX(parser->linestr);
934    parser->bufend = parser->bufptr + SvCUR(parser->linestr);
935    parser->last_lop = parser->last_uni = NULL;
936
937    STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
938                                                        |LEX_DONT_CLOSE_RSFP));
939    parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
940                                                        |LEX_DONT_CLOSE_RSFP));
941
942    parser->in_pod = parser->filtered = 0;
943}
944
945
946/* delete a parser object */
947
948void
949Perl_parser_free(pTHX_  const yy_parser *parser)
950{
951    PERL_ARGS_ASSERT_PARSER_FREE;
952
953    PL_curcop = parser->saved_curcop;
954    SvREFCNT_dec(parser->linestr);
955
956    if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
957        PerlIO_clearerr(parser->rsfp);
958    else if (parser->rsfp && (!parser->old_parser
959          || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
960        PerlIO_close(parser->rsfp);
961    SvREFCNT_dec(parser->rsfp_filters);
962    SvREFCNT_dec(parser->lex_stuff);
963    SvREFCNT_dec(parser->lex_sub_repl);
964
965    Safefree(parser->lex_brackstack);
966    Safefree(parser->lex_casestack);
967    Safefree(parser->lex_shared);
968    PL_parser = parser->old_parser;
969    Safefree(parser);
970}
971
972void
973Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
974{
975    I32 nexttoke = parser->nexttoke;
976    PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
977    while (nexttoke--) {
978        if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
979         && parser->nextval[nexttoke].opval
980         && parser->nextval[nexttoke].opval->op_slabbed
981         && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
982            op_free(parser->nextval[nexttoke].opval);
983            parser->nextval[nexttoke].opval = NULL;
984        }
985    }
986}
987
988
989/*
990=for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
991
992Buffer scalar containing the chunk currently under consideration of the
993text currently being lexed.  This is always a plain string scalar (for
994which C<SvPOK> is true).  It is not intended to be used as a scalar by
995normal scalar means; instead refer to the buffer directly by the pointer
996variables described below.
997
998The lexer maintains various C<char*> pointers to things in the
999C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
1000reallocated, all of these pointers must be updated.  Don't attempt to
1001do this manually, but rather use L</lex_grow_linestr> if you need to
1002reallocate the buffer.
1003
1004The content of the text chunk in the buffer is commonly exactly one
1005complete line of input, up to and including a newline terminator,
1006but there are situations where it is otherwise.  The octets of the
1007buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1008The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
1009flag on this scalar, which may disagree with it.
1010
1011For direct examination of the buffer, the variable
1012L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
1013lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
1014of these pointers is usually preferable to examination of the scalar
1015through normal scalar means.
1016
1017=for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
1018
1019Direct pointer to the end of the chunk of text currently being lexed, the
1020end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
1021+ SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
1022always located at the end of the buffer, and does not count as part of
1023the buffer's contents.
1024
1025=for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
1026
1027Points to the current position of lexing inside the lexer buffer.
1028Characters around this point may be freely examined, within
1029the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
1030L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
1031interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1032
1033Lexing code (whether in the Perl core or not) moves this pointer past
1034the characters that it consumes.  It is also expected to perform some
1035bookkeeping whenever a newline character is consumed.  This movement
1036can be more conveniently performed by the function L</lex_read_to>,
1037which handles newlines appropriately.
1038
1039Interpretation of the buffer's octets can be abstracted out by
1040using the slightly higher-level functions L</lex_peek_unichar> and
1041L</lex_read_unichar>.
1042
1043=for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1044
1045Points to the start of the current line inside the lexer buffer.
1046This is useful for indicating at which column an error occurred, and
1047not much else.  This must be updated by any lexing code that consumes
1048a newline; the function L</lex_read_to> handles this detail.
1049
1050=cut
1051*/
1052
1053/*
1054=for apidoc lex_bufutf8
1055
1056Indicates whether the octets in the lexer buffer
1057(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1058of Unicode characters.  If not, they should be interpreted as Latin-1
1059characters.  This is analogous to the C<SvUTF8> flag for scalars.
1060
1061In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062contains valid UTF-8.  Lexing code must be robust in the face of invalid
1063encoding.
1064
1065The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1066is significant, but not the whole story regarding the input character
1067encoding.  Normally, when a file is being read, the scalar contains octets
1068and its C<SvUTF8> flag is off, but the octets should be interpreted as
1069UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1070however, the scalar may have the C<SvUTF8> flag on, and in this case its
1071octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1072is in effect.  This logic may change in the future; use this function
1073instead of implementing the logic yourself.
1074
1075=cut
1076*/
1077
1078bool
1079Perl_lex_bufutf8(pTHX)
1080{
1081    return UTF;
1082}
1083
1084/*
1085=for apidoc lex_grow_linestr
1086
1087Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1088at least C<len> octets (including terminating C<NUL>).  Returns a
1089pointer to the reallocated buffer.  This is necessary before making
1090any direct modification of the buffer that would increase its length.
1091L</lex_stuff_pvn> provides a more convenient way to insert text into
1092the buffer.
1093
1094Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1095this function updates all of the lexer's variables that point directly
1096into the buffer.
1097
1098=cut
1099*/
1100
1101char *
1102Perl_lex_grow_linestr(pTHX_ STRLEN len)
1103{
1104    SV *linestr;
1105    char *buf;
1106    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1107    STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1108    bool current;
1109
1110    linestr = PL_parser->linestr;
1111    buf = SvPVX(linestr);
1112    if (len <= SvLEN(linestr))
1113        return buf;
1114
1115    /* Is the lex_shared linestr SV the same as the current linestr SV?
1116     * Only in this case does re_eval_start need adjusting, since it
1117     * points within lex_shared->ls_linestr's buffer */
1118    current = (   !PL_parser->lex_shared->ls_linestr
1119               || linestr == PL_parser->lex_shared->ls_linestr);
1120
1121    bufend_pos = PL_parser->bufend - buf;
1122    bufptr_pos = PL_parser->bufptr - buf;
1123    oldbufptr_pos = PL_parser->oldbufptr - buf;
1124    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1125    linestart_pos = PL_parser->linestart - buf;
1126    last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1127    last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1128    re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1129                            PL_parser->lex_shared->re_eval_start - buf : 0;
1130
1131    buf = sv_grow(linestr, len);
1132
1133    PL_parser->bufend = buf + bufend_pos;
1134    PL_parser->bufptr = buf + bufptr_pos;
1135    PL_parser->oldbufptr = buf + oldbufptr_pos;
1136    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1137    PL_parser->linestart = buf + linestart_pos;
1138    if (PL_parser->last_uni)
1139        PL_parser->last_uni = buf + last_uni_pos;
1140    if (PL_parser->last_lop)
1141        PL_parser->last_lop = buf + last_lop_pos;
1142    if (current && PL_parser->lex_shared->re_eval_start)
1143        PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1144    return buf;
1145}
1146
1147/*
1148=for apidoc lex_stuff_pvn
1149
1150Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152reallocating the buffer if necessary.  This means that lexing code that
1153runs later will see the characters as if they had appeared in the input.
1154It is not recommended to do this as part of normal parsing, and most
1155uses of this facility run the risk of the inserted characters being
1156interpreted in an unintended manner.
1157
1158The string to be inserted is represented by C<len> octets starting
1159at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1160according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1161The characters are recoded for the lexer buffer, according to how the
1162buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1163to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1164function is more convenient.
1165
1166=for apidoc Amnh||LEX_STUFF_UTF8
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1173{
1174    char *bufptr;
1175    PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1176    if (flags & ~(LEX_STUFF_UTF8))
1177        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1178    if (UTF) {
1179        if (flags & LEX_STUFF_UTF8) {
1180            goto plain_copy;
1181        } else {
1182            STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1183                                                       (U8 *) pv + len);
1184            const char *p, *e = pv+len;;
1185            if (!highhalf)
1186                goto plain_copy;
1187            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1188            bufptr = PL_parser->bufptr;
1189            Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1190            SvCUR_set(PL_parser->linestr,
1191                SvCUR(PL_parser->linestr) + len+highhalf);
1192            PL_parser->bufend += len+highhalf;
1193            for (p = pv; p != e; p++) {
1194                append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1195            }
1196        }
1197    } else {
1198        if (flags & LEX_STUFF_UTF8) {
1199            STRLEN highhalf = 0;
1200            const char *p, *e = pv+len;
1201            for (p = pv; p != e; p++) {
1202                U8 c = (U8)*p;
1203                if (UTF8_IS_ABOVE_LATIN1(c)) {
1204                    Perl_croak(aTHX_ "Lexing code attempted to stuff "
1205                                "non-Latin-1 character into Latin-1 input");
1206                } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1207                    p++;
1208                    highhalf++;
1209                } else assert(UTF8_IS_INVARIANT(c));
1210            }
1211            if (!highhalf)
1212                goto plain_copy;
1213            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1214            bufptr = PL_parser->bufptr;
1215            Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1216            SvCUR_set(PL_parser->linestr,
1217                SvCUR(PL_parser->linestr) + len-highhalf);
1218            PL_parser->bufend += len-highhalf;
1219            p = pv;
1220            while (p < e) {
1221                if (UTF8_IS_INVARIANT(*p)) {
1222                    *bufptr++ = *p;
1223                    p++;
1224                }
1225                else {
1226                    assert(p < e -1 );
1227                    *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1228                    p += 2;
1229                }
1230            }
1231        } else {
1232          plain_copy:
1233            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1234            bufptr = PL_parser->bufptr;
1235            Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1236            SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1237            PL_parser->bufend += len;
1238            Copy(pv, bufptr, len, char);
1239        }
1240    }
1241}
1242
1243/*
1244=for apidoc lex_stuff_pv
1245
1246Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1247immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1248reallocating the buffer if necessary.  This means that lexing code that
1249runs later will see the characters as if they had appeared in the input.
1250It is not recommended to do this as part of normal parsing, and most
1251uses of this facility run the risk of the inserted characters being
1252interpreted in an unintended manner.
1253
1254The string to be inserted is represented by octets starting at C<pv>
1255and continuing to the first nul.  These octets are interpreted as either
1256UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1257in C<flags>.  The characters are recoded for the lexer buffer, according
1258to how the buffer is currently being interpreted (L</lex_bufutf8>).
1259If it is not convenient to nul-terminate a string to be inserted, the
1260L</lex_stuff_pvn> function is more appropriate.
1261
1262=cut
1263*/
1264
1265void
1266Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1267{
1268    PERL_ARGS_ASSERT_LEX_STUFF_PV;
1269    lex_stuff_pvn(pv, strlen(pv), flags);
1270}
1271
1272/*
1273=for apidoc lex_stuff_sv
1274
1275Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1276immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1277reallocating the buffer if necessary.  This means that lexing code that
1278runs later will see the characters as if they had appeared in the input.
1279It is not recommended to do this as part of normal parsing, and most
1280uses of this facility run the risk of the inserted characters being
1281interpreted in an unintended manner.
1282
1283The string to be inserted is the string value of C<sv>.  The characters
1284are recoded for the lexer buffer, according to how the buffer is currently
1285being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1286not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1287need to construct a scalar.
1288
1289=cut
1290*/
1291
1292void
1293Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1294{
1295    char *pv;
1296    STRLEN len;
1297    PERL_ARGS_ASSERT_LEX_STUFF_SV;
1298    if (flags)
1299        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1300    pv = SvPV(sv, len);
1301    lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1302}
1303
1304/*
1305=for apidoc lex_unstuff
1306
1307Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1308C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1309This hides the discarded text from any lexing code that runs later,
1310as if the text had never appeared.
1311
1312This is not the normal way to consume lexed text.  For that, use
1313L</lex_read_to>.
1314
1315=cut
1316*/
1317
1318void
1319Perl_lex_unstuff(pTHX_ char *ptr)
1320{
1321    char *buf, *bufend;
1322    STRLEN unstuff_len;
1323    PERL_ARGS_ASSERT_LEX_UNSTUFF;
1324    buf = PL_parser->bufptr;
1325    if (ptr < buf)
1326        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1327    if (ptr == buf)
1328        return;
1329    bufend = PL_parser->bufend;
1330    if (ptr > bufend)
1331        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1332    unstuff_len = ptr - buf;
1333    Move(ptr, buf, bufend+1-ptr, char);
1334    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1335    PL_parser->bufend = bufend - unstuff_len;
1336}
1337
1338/*
1339=for apidoc lex_read_to
1340
1341Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1342to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1343performing the correct bookkeeping whenever a newline character is passed.
1344This is the normal way to consume lexed text.
1345
1346Interpretation of the buffer's octets can be abstracted out by
1347using the slightly higher-level functions L</lex_peek_unichar> and
1348L</lex_read_unichar>.
1349
1350=cut
1351*/
1352
1353void
1354Perl_lex_read_to(pTHX_ char *ptr)
1355{
1356    char *s;
1357    PERL_ARGS_ASSERT_LEX_READ_TO;
1358    s = PL_parser->bufptr;
1359    if (ptr < s || ptr > PL_parser->bufend)
1360        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1361    for (; s != ptr; s++)
1362        if (*s == '\n') {
1363            COPLINE_INC_WITH_HERELINES;
1364            PL_parser->linestart = s+1;
1365        }
1366    PL_parser->bufptr = ptr;
1367}
1368
1369/*
1370=for apidoc lex_discard_to
1371
1372Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1373up to C<ptr>.  The remaining content of the buffer will be moved, and
1374all pointers into the buffer updated appropriately.  C<ptr> must not
1375be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1376it is not permitted to discard text that has yet to be lexed.
1377
1378Normally it is not necessarily to do this directly, because it suffices to
1379use the implicit discarding behaviour of L</lex_next_chunk> and things
1380based on it.  However, if a token stretches across multiple lines,
1381and the lexing code has kept multiple lines of text in the buffer for
1382that purpose, then after completion of the token it would be wise to
1383explicitly discard the now-unneeded earlier lines, to avoid future
1384multi-line tokens growing the buffer without bound.
1385
1386=cut
1387*/
1388
1389void
1390Perl_lex_discard_to(pTHX_ char *ptr)
1391{
1392    char *buf;
1393    STRLEN discard_len;
1394    PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1395    buf = SvPVX(PL_parser->linestr);
1396    if (ptr < buf)
1397        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1398    if (ptr == buf)
1399        return;
1400    if (ptr > PL_parser->bufptr)
1401        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1402    discard_len = ptr - buf;
1403    if (PL_parser->oldbufptr < ptr)
1404        PL_parser->oldbufptr = ptr;
1405    if (PL_parser->oldoldbufptr < ptr)
1406        PL_parser->oldoldbufptr = ptr;
1407    if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1408        PL_parser->last_uni = NULL;
1409    if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1410        PL_parser->last_lop = NULL;
1411    Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1412    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1413    PL_parser->bufend -= discard_len;
1414    PL_parser->bufptr -= discard_len;
1415    PL_parser->oldbufptr -= discard_len;
1416    PL_parser->oldoldbufptr -= discard_len;
1417    if (PL_parser->last_uni)
1418        PL_parser->last_uni -= discard_len;
1419    if (PL_parser->last_lop)
1420        PL_parser->last_lop -= discard_len;
1421}
1422
1423void
1424Perl_notify_parser_that_changed_to_utf8(pTHX)
1425{
1426    /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1427     * off to on.  At compile time, this has the effect of entering a 'use
1428     * utf8' section.  This means that any input was not previously checked for
1429     * UTF-8 (because it was off), but now we do need to check it, or our
1430     * assumptions about the input being sane could be wrong, and we could
1431     * segfault.  This routine just sets a flag so that the next time we look
1432     * at the input we do the well-formed UTF-8 check.  If we aren't in the
1433     * proper phase, there may not be a parser object, but if there is, setting
1434     * the flag is harmless */
1435
1436    if (PL_parser) {
1437        PL_parser->recheck_utf8_validity = TRUE;
1438    }
1439}
1440
1441/*
1442=for apidoc lex_next_chunk
1443
1444Reads in the next chunk of text to be lexed, appending it to
1445L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1446looked to the end of the current chunk and wants to know more.  It is
1447usual, but not necessary, for lexing to have consumed the entirety of
1448the current chunk at this time.
1449
1450If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1451chunk (i.e., the current chunk has been entirely consumed), normally the
1452current chunk will be discarded at the same time that the new chunk is
1453read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1454will not be discarded.  If the current chunk has not been entirely
1455consumed, then it will not be discarded regardless of the flag.
1456
1457Returns true if some new text was added to the buffer, or false if the
1458buffer has reached the end of the input text.
1459
1460=for apidoc Amnh||LEX_KEEP_PREVIOUS
1461
1462=cut
1463*/
1464
1465#define LEX_FAKE_EOF 0x80000000
1466#define LEX_NO_TERM  0x40000000 /* here-doc */
1467
1468bool
1469Perl_lex_next_chunk(pTHX_ U32 flags)
1470{
1471    SV *linestr;
1472    char *buf;
1473    STRLEN old_bufend_pos, new_bufend_pos;
1474    STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1475    STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1476    bool got_some_for_debugger = 0;
1477    bool got_some;
1478
1479    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1480        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1481    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1482        return FALSE;
1483    linestr = PL_parser->linestr;
1484    buf = SvPVX(linestr);
1485    if (!(flags & LEX_KEEP_PREVIOUS)
1486          && PL_parser->bufptr == PL_parser->bufend)
1487    {
1488        old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1489        linestart_pos = 0;
1490        if (PL_parser->last_uni != PL_parser->bufend)
1491            PL_parser->last_uni = NULL;
1492        if (PL_parser->last_lop != PL_parser->bufend)
1493            PL_parser->last_lop = NULL;
1494        last_uni_pos = last_lop_pos = 0;
1495        *buf = 0;
1496        SvCUR_set(linestr, 0);
1497    } else {
1498        old_bufend_pos = PL_parser->bufend - buf;
1499        bufptr_pos = PL_parser->bufptr - buf;
1500        oldbufptr_pos = PL_parser->oldbufptr - buf;
1501        oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1502        linestart_pos = PL_parser->linestart - buf;
1503        last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1504        last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1505    }
1506    if (flags & LEX_FAKE_EOF) {
1507        goto eof;
1508    } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1509        got_some = 0;
1510    } else if (filter_gets(linestr, old_bufend_pos)) {
1511        got_some = 1;
1512        got_some_for_debugger = 1;
1513    } else if (flags & LEX_NO_TERM) {
1514        got_some = 0;
1515    } else {
1516        if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1517            SvPVCLEAR(linestr);
1518        eof:
1519        /* End of real input.  Close filehandle (unless it was STDIN),
1520         * then add implicit termination.
1521         */
1522        if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1523            PerlIO_clearerr(PL_parser->rsfp);
1524        else if (PL_parser->rsfp)
1525            (void)PerlIO_close(PL_parser->rsfp);
1526        PL_parser->rsfp = NULL;
1527        PL_parser->in_pod = PL_parser->filtered = 0;
1528        if (!PL_in_eval && PL_minus_p) {
1529            sv_catpvs(linestr,
1530                /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1531            PL_minus_n = PL_minus_p = 0;
1532        } else if (!PL_in_eval && PL_minus_n) {
1533            sv_catpvs(linestr, /*{*/";}");
1534            PL_minus_n = 0;
1535        } else
1536            sv_catpvs(linestr, ";");
1537        got_some = 1;
1538    }
1539    buf = SvPVX(linestr);
1540    new_bufend_pos = SvCUR(linestr);
1541    PL_parser->bufend = buf + new_bufend_pos;
1542    PL_parser->bufptr = buf + bufptr_pos;
1543
1544    if (UTF) {
1545        const U8* first_bad_char_loc;
1546        if (UNLIKELY(! is_utf8_string_loc(
1547                            (U8 *) PL_parser->bufptr,
1548                                   PL_parser->bufend - PL_parser->bufptr,
1549                                   &first_bad_char_loc)))
1550        {
1551            _force_out_malformed_utf8_message(first_bad_char_loc,
1552                                              (U8 *) PL_parser->bufend,
1553                                              0,
1554                                              1 /* 1 means die */ );
1555            NOT_REACHED; /* NOTREACHED */
1556        }
1557    }
1558
1559    PL_parser->oldbufptr = buf + oldbufptr_pos;
1560    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1561    PL_parser->linestart = buf + linestart_pos;
1562    if (PL_parser->last_uni)
1563        PL_parser->last_uni = buf + last_uni_pos;
1564    if (PL_parser->last_lop)
1565        PL_parser->last_lop = buf + last_lop_pos;
1566    if (PL_parser->preambling != NOLINE) {
1567        CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1568        PL_parser->preambling = NOLINE;
1569    }
1570    if (   got_some_for_debugger
1571        && PERLDB_LINE_OR_SAVESRC
1572        && PL_curstash != PL_debstash)
1573    {
1574        /* debugger active and we're not compiling the debugger code,
1575         * so store the line into the debugger's array of lines
1576         */
1577        update_debugger_info(NULL, buf+old_bufend_pos,
1578            new_bufend_pos-old_bufend_pos);
1579    }
1580    return got_some;
1581}
1582
1583/*
1584=for apidoc lex_peek_unichar
1585
1586Looks ahead one (Unicode) character in the text currently being lexed.
1587Returns the codepoint (unsigned integer value) of the next character,
1588or -1 if lexing has reached the end of the input text.  To consume the
1589peeked character, use L</lex_read_unichar>.
1590
1591If the next character is in (or extends into) the next chunk of input
1592text, the next chunk will be read in.  Normally the current chunk will be
1593discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1594bit set, then the current chunk will not be discarded.
1595
1596If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1597is encountered, an exception is generated.
1598
1599=cut
1600*/
1601
1602I32
1603Perl_lex_peek_unichar(pTHX_ U32 flags)
1604{
1605    char *s, *bufend;
1606    if (flags & ~(LEX_KEEP_PREVIOUS))
1607        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1608    s = PL_parser->bufptr;
1609    bufend = PL_parser->bufend;
1610    if (UTF) {
1611        U8 head;
1612        I32 unichar;
1613        STRLEN len, retlen;
1614        if (s == bufend) {
1615            if (!lex_next_chunk(flags))
1616                return -1;
1617            s = PL_parser->bufptr;
1618            bufend = PL_parser->bufend;
1619        }
1620        head = (U8)*s;
1621        if (UTF8_IS_INVARIANT(head))
1622            return head;
1623        if (UTF8_IS_START(head)) {
1624            len = UTF8SKIP(&head);
1625            while ((STRLEN)(bufend-s) < len) {
1626                if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1627                    break;
1628                s = PL_parser->bufptr;
1629                bufend = PL_parser->bufend;
1630            }
1631        }
1632        unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1633        if (retlen == (STRLEN)-1) {
1634            _force_out_malformed_utf8_message((U8 *) s,
1635                                              (U8 *) bufend,
1636                                              0,
1637                                              1 /* 1 means die */ );
1638            NOT_REACHED; /* NOTREACHED */
1639        }
1640        return unichar;
1641    } else {
1642        if (s == bufend) {
1643            if (!lex_next_chunk(flags))
1644                return -1;
1645            s = PL_parser->bufptr;
1646        }
1647        return (U8)*s;
1648    }
1649}
1650
1651/*
1652=for apidoc lex_read_unichar
1653
1654Reads the next (Unicode) character in the text currently being lexed.
1655Returns the codepoint (unsigned integer value) of the character read,
1656and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1657if lexing has reached the end of the input text.  To non-destructively
1658examine the next character, use L</lex_peek_unichar> instead.
1659
1660If the next character is in (or extends into) the next chunk of input
1661text, the next chunk will be read in.  Normally the current chunk will be
1662discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1663bit set, then the current chunk will not be discarded.
1664
1665If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1666is encountered, an exception is generated.
1667
1668=cut
1669*/
1670
1671I32
1672Perl_lex_read_unichar(pTHX_ U32 flags)
1673{
1674    I32 c;
1675    if (flags & ~(LEX_KEEP_PREVIOUS))
1676        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1677    c = lex_peek_unichar(flags);
1678    if (c != -1) {
1679        if (c == '\n')
1680            COPLINE_INC_WITH_HERELINES;
1681        if (UTF)
1682            PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1683        else
1684            ++(PL_parser->bufptr);
1685    }
1686    return c;
1687}
1688
1689/*
1690=for apidoc lex_read_space
1691
1692Reads optional spaces, in Perl style, in the text currently being
1693lexed.  The spaces may include ordinary whitespace characters and
1694Perl-style comments.  C<#line> directives are processed if encountered.
1695L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1696at a non-space character (or the end of the input text).
1697
1698If spaces extend into the next chunk of input text, the next chunk will
1699be read in.  Normally the current chunk will be discarded at the same
1700time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1701chunk will not be discarded.
1702
1703=cut
1704*/
1705
1706#define LEX_NO_INCLINE    0x40000000
1707#define LEX_NO_NEXT_CHUNK 0x80000000
1708
1709void
1710Perl_lex_read_space(pTHX_ U32 flags)
1711{
1712    char *s, *bufend;
1713    const bool can_incline = !(flags & LEX_NO_INCLINE);
1714    bool need_incline = 0;
1715    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1716        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1717    s = PL_parser->bufptr;
1718    bufend = PL_parser->bufend;
1719    while (1) {
1720        char c = *s;
1721        if (c == '#') {
1722            do {
1723                c = *++s;
1724            } while (!(c == '\n' || (c == 0 && s == bufend)));
1725        } else if (c == '\n') {
1726            s++;
1727            if (can_incline) {
1728                PL_parser->linestart = s;
1729                if (s == bufend)
1730                    need_incline = 1;
1731                else
1732                    incline(s, bufend);
1733            }
1734        } else if (isSPACE(c)) {
1735            s++;
1736        } else if (c == 0 && s == bufend) {
1737            bool got_more;
1738            line_t l;
1739            if (flags & LEX_NO_NEXT_CHUNK)
1740                break;
1741            PL_parser->bufptr = s;
1742            l = CopLINE(PL_curcop);
1743            CopLINE(PL_curcop) += PL_parser->herelines + 1;
1744            got_more = lex_next_chunk(flags);
1745            CopLINE_set(PL_curcop, l);
1746            s = PL_parser->bufptr;
1747            bufend = PL_parser->bufend;
1748            if (!got_more)
1749                break;
1750            if (can_incline && need_incline && PL_parser->rsfp) {
1751                incline(s, bufend);
1752                need_incline = 0;
1753            }
1754        } else if (!c) {
1755            s++;
1756        } else {
1757            break;
1758        }
1759    }
1760    PL_parser->bufptr = s;
1761}
1762
1763/*
1764
1765=for apidoc validate_proto
1766
1767This function performs syntax checking on a prototype, C<proto>.
1768If C<warn> is true, any illegal characters or mismatched brackets
1769will trigger illegalproto warnings, declaring that they were
1770detected in the prototype for C<name>.
1771
1772The return value is C<true> if this is a valid prototype, and
1773C<false> if it is not, regardless of whether C<warn> was C<true> or
1774C<false>.
1775
1776Note that C<NULL> is a valid C<proto> and will always return C<true>.
1777
1778=cut
1779
1780 */
1781
1782bool
1783Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1784{
1785    STRLEN len, origlen;
1786    char *p;
1787    bool bad_proto = FALSE;
1788    bool in_brackets = FALSE;
1789    bool after_slash = FALSE;
1790    char greedy_proto = ' ';
1791    bool proto_after_greedy_proto = FALSE;
1792    bool must_be_last = FALSE;
1793    bool underscore = FALSE;
1794    bool bad_proto_after_underscore = FALSE;
1795
1796    PERL_ARGS_ASSERT_VALIDATE_PROTO;
1797
1798    if (!proto)
1799        return TRUE;
1800
1801    p = SvPV(proto, len);
1802    origlen = len;
1803    for (; len--; p++) {
1804        if (!isSPACE(*p)) {
1805            if (must_be_last)
1806                proto_after_greedy_proto = TRUE;
1807            if (underscore) {
1808                if (!memCHRs(";@%", *p))
1809                    bad_proto_after_underscore = TRUE;
1810                underscore = FALSE;
1811            }
1812            if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1813                bad_proto = TRUE;
1814            }
1815            else {
1816                if (*p == '[')
1817                    in_brackets = TRUE;
1818                else if (*p == ']')
1819                    in_brackets = FALSE;
1820                else if ((*p == '@' || *p == '%')
1821                         && !after_slash
1822                         && !in_brackets )
1823                {
1824                    must_be_last = TRUE;
1825                    greedy_proto = *p;
1826                }
1827                else if (*p == '_')
1828                    underscore = TRUE;
1829            }
1830            if (*p == '\\')
1831                after_slash = TRUE;
1832            else
1833                after_slash = FALSE;
1834        }
1835    }
1836
1837    if (warn) {
1838        SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1839        p -= origlen;
1840        p = SvUTF8(proto)
1841            ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1842                             origlen, UNI_DISPLAY_ISPRINT)
1843            : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1844
1845        if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1846            SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1847            sv_catpvs(name2, "::");
1848            sv_catsv(name2, (SV *)name);
1849            name = name2;
1850        }
1851
1852        if (proto_after_greedy_proto)
1853            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1854                        "Prototype after '%c' for %" SVf " : %s",
1855                        greedy_proto, SVfARG(name), p);
1856        if (in_brackets)
1857            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1858                        "Missing ']' in prototype for %" SVf " : %s",
1859                        SVfARG(name), p);
1860        if (bad_proto)
1861            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1862                        "Illegal character in prototype for %" SVf " : %s",
1863                        SVfARG(name), p);
1864        if (bad_proto_after_underscore)
1865            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1866                        "Illegal character after '_' in prototype for %" SVf " : %s",
1867                        SVfARG(name), p);
1868    }
1869
1870    return (! (proto_after_greedy_proto || bad_proto) );
1871}
1872
1873/*
1874 * S_incline
1875 * This subroutine has nothing to do with tilting, whether at windmills
1876 * or pinball tables.  Its name is short for "increment line".  It
1877 * increments the current line number in CopLINE(PL_curcop) and checks
1878 * to see whether the line starts with a comment of the form
1879 *    # line 500 "foo.pm"
1880 * If so, it sets the current line number and file to the values in the comment.
1881 */
1882
1883STATIC void
1884S_incline(pTHX_ const char *s, const char *end)
1885{
1886    const char *t;
1887    const char *n;
1888    const char *e;
1889    line_t line_num;
1890    UV uv;
1891
1892    PERL_ARGS_ASSERT_INCLINE;
1893
1894    assert(end >= s);
1895
1896    COPLINE_INC_WITH_HERELINES;
1897    if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1898     && s+1 == PL_bufend && *s == ';') {
1899        /* fake newline in string eval */
1900        CopLINE_dec(PL_curcop);
1901        return;
1902    }
1903    if (*s++ != '#')
1904        return;
1905    while (SPACE_OR_TAB(*s))
1906        s++;
1907    if (memBEGINs(s, (STRLEN) (end - s), "line"))
1908        s += sizeof("line") - 1;
1909    else
1910        return;
1911    if (SPACE_OR_TAB(*s))
1912        s++;
1913    else
1914        return;
1915    while (SPACE_OR_TAB(*s))
1916        s++;
1917    if (!isDIGIT(*s))
1918        return;
1919
1920    n = s;
1921    while (isDIGIT(*s))
1922        s++;
1923    if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1924        return;
1925    while (SPACE_OR_TAB(*s))
1926        s++;
1927    if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1928        s++;
1929        e = t + 1;
1930    }
1931    else {
1932        t = s;
1933        while (*t && !isSPACE(*t))
1934            t++;
1935        e = t;
1936    }
1937    while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1938        e++;
1939    if (*e != '\n' && *e != '\0')
1940        return;		/* false alarm */
1941
1942    if (!grok_atoUV(n, &uv, &e))
1943        return;
1944    line_num = ((line_t)uv) - 1;
1945
1946    if (t - s > 0) {
1947        const STRLEN len = t - s;
1948
1949        if (!PL_rsfp && !PL_parser->filtered) {
1950            /* must copy *{"::_<(eval N)[oldfilename:L]"}
1951             * to *{"::_<newfilename"} */
1952            /* However, the long form of evals is only turned on by the
1953               debugger - usually they're "(eval %lu)" */
1954            GV * const cfgv = CopFILEGV(PL_curcop);
1955            if (cfgv) {
1956                char smallbuf[128];
1957                STRLEN tmplen2 = len;
1958                char *tmpbuf2;
1959                GV *gv2;
1960
1961                if (tmplen2 + 2 <= sizeof smallbuf)
1962                    tmpbuf2 = smallbuf;
1963                else
1964                    Newx(tmpbuf2, tmplen2 + 2, char);
1965
1966                tmpbuf2[0] = '_';
1967                tmpbuf2[1] = '<';
1968
1969                memcpy(tmpbuf2 + 2, s, tmplen2);
1970                tmplen2 += 2;
1971
1972                gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1973                if (!isGV(gv2)) {
1974                    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1975                    /* adjust ${"::_<newfilename"} to store the new file name */
1976                    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1977                    /* The line number may differ. If that is the case,
1978                       alias the saved lines that are in the array.
1979                       Otherwise alias the whole array. */
1980                    if (CopLINE(PL_curcop) == line_num) {
1981                        GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1982                        GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1983                    }
1984                    else if (GvAV(cfgv)) {
1985                        AV * const av = GvAV(cfgv);
1986                        const line_t start = CopLINE(PL_curcop)+1;
1987                        SSize_t items = AvFILLp(av) - start;
1988                        if (items > 0) {
1989                            AV * const av2 = GvAVn(gv2);
1990                            SV **svp = AvARRAY(av) + start;
1991                            Size_t l = line_num+1;
1992                            while (items-- && l < SSize_t_MAX && l == (line_t)l)
1993                                av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1994                        }
1995                    }
1996                }
1997
1998                if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1999            }
2000        }
2001        CopFILE_free(PL_curcop);
2002        CopFILE_setn(PL_curcop, s, len);
2003    }
2004    CopLINE_set(PL_curcop, line_num);
2005}
2006
2007STATIC void
2008S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
2009{
2010    AV *av = CopFILEAVx(PL_curcop);
2011    if (av) {
2012        SV * sv;
2013        if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
2014        else {
2015            sv = *av_fetch(av, 0, 1);
2016            SvUPGRADE(sv, SVt_PVMG);
2017        }
2018        if (!SvPOK(sv)) SvPVCLEAR(sv);
2019        if (orig_sv)
2020            sv_catsv(sv, orig_sv);
2021        else
2022            sv_catpvn(sv, buf, len);
2023        if (!SvIOK(sv)) {
2024            (void)SvIOK_on(sv);
2025            SvIV_set(sv, 0);
2026        }
2027        if (PL_parser->preambling == NOLINE)
2028            av_store(av, CopLINE(PL_curcop), sv);
2029    }
2030}
2031
2032/*
2033 * skipspace
2034 * Called to gobble the appropriate amount and type of whitespace.
2035 * Skips comments as well.
2036 * Returns the next character after the whitespace that is skipped.
2037 *
2038 * peekspace
2039 * Same thing, but look ahead without incrementing line numbers or
2040 * adjusting PL_linestart.
2041 */
2042
2043#define skipspace(s) skipspace_flags(s, 0)
2044#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2045
2046char *
2047Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2048{
2049    PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2050    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2051        while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2052            s++;
2053    } else {
2054        STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2055        PL_bufptr = s;
2056        lex_read_space(flags | LEX_KEEP_PREVIOUS |
2057                (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2058                    LEX_NO_NEXT_CHUNK : 0));
2059        s = PL_bufptr;
2060        PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2061        if (PL_linestart > PL_bufptr)
2062            PL_bufptr = PL_linestart;
2063        return s;
2064    }
2065    return s;
2066}
2067
2068/*
2069 * S_check_uni
2070 * Check the unary operators to ensure there's no ambiguity in how they're
2071 * used.  An ambiguous piece of code would be:
2072 *     rand + 5
2073 * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2074 * the +5 is its argument.
2075 */
2076
2077STATIC void
2078S_check_uni(pTHX)
2079{
2080    const char *s;
2081
2082    if (PL_oldoldbufptr != PL_last_uni)
2083        return;
2084    while (isSPACE(*PL_last_uni))
2085        PL_last_uni++;
2086    s = PL_last_uni;
2087    while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088        s += UTF ? UTF8SKIP(s) : 1;
2089    if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2090        return;
2091
2092    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2093                     "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2094                     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2095}
2096
2097/*
2098 * LOP : macro to build a list operator.  Its behaviour has been replaced
2099 * with a subroutine, S_lop() for which LOP is just another name.
2100 */
2101
2102#define LOP(f,x) return lop(f,x,s)
2103
2104/*
2105 * S_lop
2106 * Build a list operator (or something that might be one).  The rules:
2107 *  - if we have a next token, then it's a list operator (no parens) for
2108 *    which the next token has already been parsed; e.g.,
2109 *       sort foo @args
2110 *       sort foo (@args)
2111 *  - if the next thing is an opening paren, then it's a function
2112 *  - else it's a list operator
2113 */
2114
2115STATIC I32
2116S_lop(pTHX_ I32 f, U8 x, char *s)
2117{
2118    PERL_ARGS_ASSERT_LOP;
2119
2120    pl_yylval.ival = f;
2121    CLINE;
2122    PL_bufptr = s;
2123    PL_last_lop = PL_oldbufptr;
2124    PL_last_lop_op = (OPCODE)f;
2125    if (PL_nexttoke)
2126        goto lstop;
2127    PL_expect = x;
2128    if (*s == '(')
2129        return REPORT(FUNC);
2130    s = skipspace(s);
2131    if (*s == '(')
2132        return REPORT(FUNC);
2133    else {
2134        lstop:
2135        if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2136            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2137        return REPORT(LSTOP);
2138    }
2139}
2140
2141/*
2142 * S_force_next
2143 * When the lexer realizes it knows the next token (for instance,
2144 * it is reordering tokens for the parser) then it can call S_force_next
2145 * to know what token to return the next time the lexer is called.  Caller
2146 * will need to set PL_nextval[] and possibly PL_expect to ensure
2147 * the lexer handles the token correctly.
2148 */
2149
2150STATIC void
2151S_force_next(pTHX_ I32 type)
2152{
2153#ifdef DEBUGGING
2154    if (DEBUG_T_TEST) {
2155        PerlIO_printf(Perl_debug_log, "### forced token:\n");
2156        tokereport(type, &NEXTVAL_NEXTTOKE);
2157    }
2158#endif
2159    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2160    PL_nexttype[PL_nexttoke] = type;
2161    PL_nexttoke++;
2162}
2163
2164/*
2165 * S_postderef
2166 *
2167 * This subroutine handles postfix deref syntax after the arrow has already
2168 * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2169 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170 * only the first, leaving yylex to find the next.
2171 */
2172
2173static int
2174S_postderef(pTHX_ int const funny, char const next)
2175{
2176    assert(funny == DOLSHARP
2177        || funny == PERLY_DOLLAR
2178        || funny == PERLY_SNAIL
2179        || funny == PERLY_PERCENT_SIGN
2180        || funny == PERLY_AMPERSAND
2181        || funny == PERLY_STAR
2182    );
2183    if (next == '*') {
2184        PL_expect = XOPERATOR;
2185        if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186            assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2187            PL_lex_state = LEX_INTERPEND;
2188            if (PERLY_SNAIL == funny)
2189                force_next(POSTJOIN);
2190        }
2191        force_next(PERLY_STAR);
2192        PL_bufptr+=2;
2193    }
2194    else {
2195        if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2196         && !PL_lex_brackets)
2197            PL_lex_dojoin = 2;
2198        PL_expect = XOPERATOR;
2199        PL_bufptr++;
2200    }
2201    return funny;
2202}
2203
2204void
2205Perl_yyunlex(pTHX)
2206{
2207    int yyc = PL_parser->yychar;
2208    if (yyc != YYEMPTY) {
2209        if (yyc) {
2210            NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211            if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2212                PL_lex_allbrackets--;
2213                PL_lex_brackets--;
2214                yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215            } else if (yyc == PERLY_PAREN_OPEN) {
2216                PL_lex_allbrackets--;
2217                yyc |= (2<<24);
2218            }
2219            force_next(yyc);
2220        }
2221        PL_parser->yychar = YYEMPTY;
2222    }
2223}
2224
2225STATIC SV *
2226S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227{
2228    SV * const sv = newSVpvn_utf8(start, len,
2229                    ! IN_BYTES
2230                  &&  UTF
2231                  &&  len != 0
2232                  &&  is_utf8_non_invariant_string((const U8*)start, len));
2233    return sv;
2234}
2235
2236/*
2237 * S_force_word
2238 * When the lexer knows the next thing is a word (for instance, it has
2239 * just seen -> and it knows that the next char is a word char, then
2240 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2241 * lookahead.
2242 *
2243 * Arguments:
2244 *   char *start : buffer position (must be within PL_linestr)
2245 *   int token   : PL_next* will be this type of bare word
2246 *                 (e.g., METHCALL0,BAREWORD)
2247 *   int check_keyword : if true, Perl checks to make sure the word isn't
2248 *       a keyword (do this if the word is a label, e.g. goto FOO)
2249 *   int allow_pack : if true, : characters will also be allowed (require,
2250 *       use, etc. do this)
2251 */
2252
2253STATIC char *
2254S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2255{
2256    char *s;
2257    STRLEN len;
2258
2259    PERL_ARGS_ASSERT_FORCE_WORD;
2260
2261    start = skipspace(start);
2262    s = start;
2263    if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2264        || (allow_pack && *s == ':' && s[1] == ':') )
2265    {
2266        s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
2267        if (check_keyword) {
2268          char *s2 = PL_tokenbuf;
2269          STRLEN len2 = len;
2270          if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2271            s2 += sizeof("CORE::") - 1;
2272            len2 -= sizeof("CORE::") - 1;
2273          }
2274          if (keyword(s2, len2, 0))
2275            return start;
2276        }
2277        if (token == METHCALL0) {
2278            s = skipspace(s);
2279            if (*s == '(')
2280                PL_expect = XTERM;
2281            else {
2282                PL_expect = XOPERATOR;
2283            }
2284        }
2285        NEXTVAL_NEXTTOKE.opval
2286            = newSVOP(OP_CONST,0,
2287                           S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2288        NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2289        force_next(token);
2290    }
2291    return s;
2292}
2293
2294/*
2295 * S_force_ident
2296 * Called when the lexer wants $foo *foo &foo etc, but the program
2297 * text only contains the "foo" portion.  The first argument is a pointer
2298 * to the "foo", and the second argument is the type symbol to prefix.
2299 * Forces the next token to be a "BAREWORD".
2300 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2301 */
2302
2303STATIC void
2304S_force_ident(pTHX_ const char *s, int kind)
2305{
2306    PERL_ARGS_ASSERT_FORCE_IDENT;
2307
2308    if (s[0]) {
2309        const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2310        OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2311                                                                UTF ? SVf_UTF8 : 0));
2312        NEXTVAL_NEXTTOKE.opval = o;
2313        force_next(BAREWORD);
2314        if (kind) {
2315            o->op_private = OPpCONST_ENTERED;
2316            /* XXX see note in pp_entereval() for why we forgo typo
2317               warnings if the symbol must be introduced in an eval.
2318               GSAR 96-10-12 */
2319            gv_fetchpvn_flags(s, len,
2320                              (PL_in_eval ? GV_ADDMULTI
2321                              : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2322                              kind == PERLY_DOLLAR ? SVt_PV :
2323                              kind == PERLY_SNAIL ? SVt_PVAV :
2324                              kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2325                              SVt_PVGV
2326                              );
2327        }
2328    }
2329}
2330
2331static void
2332S_force_ident_maybe_lex(pTHX_ char pit)
2333{
2334    NEXTVAL_NEXTTOKE.ival = pit;
2335    force_next('p');
2336}
2337
2338NV
2339Perl_str_to_version(pTHX_ SV *sv)
2340{
2341    NV retval = 0.0;
2342    NV nshift = 1.0;
2343    STRLEN len;
2344    const char *start = SvPV_const(sv,len);
2345    const char * const end = start + len;
2346    const bool utf = cBOOL(SvUTF8(sv));
2347
2348    PERL_ARGS_ASSERT_STR_TO_VERSION;
2349
2350    while (start < end) {
2351        STRLEN skip;
2352        UV n;
2353        if (utf)
2354            n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2355        else {
2356            n = *(U8*)start;
2357            skip = 1;
2358        }
2359        retval += ((NV)n)/nshift;
2360        start += skip;
2361        nshift *= 1000;
2362    }
2363    return retval;
2364}
2365
2366/*
2367 * S_force_version
2368 * Forces the next token to be a version number.
2369 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2370 * and if "guessing" is TRUE, then no new token is created (and the caller
2371 * must use an alternative parsing method).
2372 */
2373
2374STATIC char *
2375S_force_version(pTHX_ char *s, int guessing)
2376{
2377    OP *version = NULL;
2378    char *d;
2379
2380    PERL_ARGS_ASSERT_FORCE_VERSION;
2381
2382    s = skipspace(s);
2383
2384    d = s;
2385    if (*d == 'v')
2386        d++;
2387    if (isDIGIT(*d)) {
2388        while (isDIGIT(*d) || *d == '_' || *d == '.')
2389            d++;
2390        if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2391            SV *ver;
2392            s = scan_num(s, &pl_yylval);
2393            version = pl_yylval.opval;
2394            ver = cSVOPx(version)->op_sv;
2395            if (SvPOK(ver) && !SvNIOK(ver)) {
2396                SvUPGRADE(ver, SVt_PVNV);
2397                SvNV_set(ver, str_to_version(ver));
2398                SvNOK_on(ver);		/* hint that it is a version */
2399            }
2400        }
2401        else if (guessing) {
2402            return s;
2403        }
2404    }
2405
2406    /* NOTE: The parser sees the package name and the VERSION swapped */
2407    NEXTVAL_NEXTTOKE.opval = version;
2408    force_next(BAREWORD);
2409
2410    return s;
2411}
2412
2413/*
2414 * S_force_strict_version
2415 * Forces the next token to be a version number using strict syntax rules.
2416 */
2417
2418STATIC char *
2419S_force_strict_version(pTHX_ char *s)
2420{
2421    OP *version = NULL;
2422    const char *errstr = NULL;
2423
2424    PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2425
2426    while (isSPACE(*s)) /* leading whitespace */
2427        s++;
2428
2429    if (is_STRICT_VERSION(s,&errstr)) {
2430        SV *ver = newSV_type(SVt_NULL);
2431        s = (char *)scan_version(s, ver, 0);
2432        version = newSVOP(OP_CONST, 0, ver);
2433    }
2434    else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
2435             && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
2436    {
2437        PL_bufptr = s;
2438        if (errstr)
2439            yyerror(errstr); /* version required */
2440        return s;
2441    }
2442
2443    /* NOTE: The parser sees the package name and the VERSION swapped */
2444    NEXTVAL_NEXTTOKE.opval = version;
2445    force_next(BAREWORD);
2446
2447    return s;
2448}
2449
2450/*
2451 * S_tokeq
2452 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2453 * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2454 * unchanged, and a new SV containing the modified input is returned.
2455 */
2456
2457STATIC SV *
2458S_tokeq(pTHX_ SV *sv)
2459{
2460    char *s;
2461    char *send;
2462    char *d;
2463    SV *pv = sv;
2464
2465    PERL_ARGS_ASSERT_TOKEQ;
2466
2467    assert (SvPOK(sv));
2468    assert (SvLEN(sv));
2469    assert (!SvIsCOW(sv));
2470    if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2471        goto finish;
2472    s = SvPVX(sv);
2473    send = SvEND(sv);
2474    /* This is relying on the SV being "well formed" with a trailing '\0'  */
2475    while (s < send && !(*s == '\\' && s[1] == '\\'))
2476        s++;
2477    if (s == send)
2478        goto finish;
2479    d = s;
2480    if ( PL_hints & HINT_NEW_STRING ) {
2481        pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2482                            SVs_TEMP | SvUTF8(sv));
2483    }
2484    while (s < send) {
2485        if (*s == '\\') {
2486            if (s + 1 < send && (s[1] == '\\'))
2487                s++;		/* all that, just for this */
2488        }
2489        *d++ = *s++;
2490    }
2491    *d = '\0';
2492    SvCUR_set(sv, d - SvPVX_const(sv));
2493  finish:
2494    if ( PL_hints & HINT_NEW_STRING )
2495       return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2496    return sv;
2497}
2498
2499/*
2500 * Now come three functions related to double-quote context,
2501 * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2502 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2503 * interact with PL_lex_state, and create fake ( ... ) argument lists
2504 * to handle functions and concatenation.
2505 * For example,
2506 *   "foo\lbar"
2507 * is tokenised as
2508 *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2509 */
2510
2511/*
2512 * S_sublex_start
2513 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2514 *
2515 * Pattern matching will set PL_lex_op to the pattern-matching op to
2516 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2517 *
2518 * OP_CONST is easy--just make the new op and return.
2519 *
2520 * Everything else becomes a FUNC.
2521 *
2522 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2523 * had an OP_CONST.  This just sets us up for a
2524 * call to S_sublex_push().
2525 */
2526
2527STATIC I32
2528S_sublex_start(pTHX)
2529{
2530    const I32 op_type = pl_yylval.ival;
2531
2532    if (op_type == OP_NULL) {
2533        pl_yylval.opval = PL_lex_op;
2534        PL_lex_op = NULL;
2535        return THING;
2536    }
2537    if (op_type == OP_CONST) {
2538        SV *sv = PL_lex_stuff;
2539        PL_lex_stuff = NULL;
2540        sv = tokeq(sv);
2541
2542        if (SvTYPE(sv) == SVt_PVIV) {
2543            /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2544            STRLEN len;
2545            const char * const p = SvPV_const(sv, len);
2546            SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2547            SvREFCNT_dec(sv);
2548            sv = nsv;
2549        }
2550        pl_yylval.opval = newSVOP(op_type, 0, sv);
2551        return THING;
2552    }
2553
2554    PL_parser->lex_super_state = PL_lex_state;
2555    PL_parser->lex_sub_inwhat = (U16)op_type;
2556    PL_parser->lex_sub_op = PL_lex_op;
2557    PL_parser->sub_no_recover = FALSE;
2558    PL_parser->sub_error_count = PL_error_count;
2559    PL_lex_state = LEX_INTERPPUSH;
2560
2561    PL_expect = XTERM;
2562    if (PL_lex_op) {
2563        pl_yylval.opval = PL_lex_op;
2564        PL_lex_op = NULL;
2565        return PMFUNC;
2566    }
2567    else
2568        return FUNC;
2569}
2570
2571/*
2572 * S_sublex_push
2573 * Create a new scope to save the lexing state.  The scope will be
2574 * ended in S_sublex_done.  Returns a '(', starting the function arguments
2575 * to the uc, lc, etc. found before.
2576 * Sets PL_lex_state to LEX_INTERPCONCAT.
2577 */
2578
2579STATIC I32
2580S_sublex_push(pTHX)
2581{
2582    LEXSHARED *shared;
2583    const bool is_heredoc = PL_multi_close == '<';
2584    ENTER;
2585
2586    PL_lex_state = PL_parser->lex_super_state;
2587    SAVEI8(PL_lex_dojoin);
2588    SAVEI32(PL_lex_brackets);
2589    SAVEI32(PL_lex_allbrackets);
2590    SAVEI32(PL_lex_formbrack);
2591    SAVEI8(PL_lex_fakeeof);
2592    SAVEI32(PL_lex_casemods);
2593    SAVEI32(PL_lex_starts);
2594    SAVEI8(PL_lex_state);
2595    SAVESPTR(PL_lex_repl);
2596    SAVEVPTR(PL_lex_inpat);
2597    SAVEI16(PL_lex_inwhat);
2598    if (is_heredoc)
2599    {
2600        SAVECOPLINE(PL_curcop);
2601        SAVEI32(PL_multi_end);
2602        SAVEI32(PL_parser->herelines);
2603        PL_parser->herelines = 0;
2604    }
2605    SAVEIV(PL_multi_close);
2606    SAVEPPTR(PL_bufptr);
2607    SAVEPPTR(PL_bufend);
2608    SAVEPPTR(PL_oldbufptr);
2609    SAVEPPTR(PL_oldoldbufptr);
2610    SAVEPPTR(PL_last_lop);
2611    SAVEPPTR(PL_last_uni);
2612    SAVEPPTR(PL_linestart);
2613    SAVESPTR(PL_linestr);
2614    SAVEGENERICPV(PL_lex_brackstack);
2615    SAVEGENERICPV(PL_lex_casestack);
2616    SAVEGENERICPV(PL_parser->lex_shared);
2617    SAVEBOOL(PL_parser->lex_re_reparsing);
2618    SAVEI32(PL_copline);
2619
2620    /* The here-doc parser needs to be able to peek into outer lexing
2621       scopes to find the body of the here-doc.  So we put PL_linestr and
2622       PL_bufptr into lex_shared, to 'share' those values.
2623     */
2624    PL_parser->lex_shared->ls_linestr = PL_linestr;
2625    PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2626
2627    PL_linestr = PL_lex_stuff;
2628    PL_lex_repl = PL_parser->lex_sub_repl;
2629    PL_lex_stuff = NULL;
2630    PL_parser->lex_sub_repl = NULL;
2631
2632    /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2633       set for an inner quote-like operator and then an error causes scope-
2634       popping.  We must not have a PL_lex_stuff value left dangling, as
2635       that breaks assumptions elsewhere.  See bug #123617.  */
2636    SAVEGENERICSV(PL_lex_stuff);
2637    SAVEGENERICSV(PL_parser->lex_sub_repl);
2638
2639    PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2640        = SvPVX(PL_linestr);
2641    PL_bufend += SvCUR(PL_linestr);
2642    PL_last_lop = PL_last_uni = NULL;
2643    SAVEFREESV(PL_linestr);
2644    if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2645
2646    PL_lex_dojoin = FALSE;
2647    PL_lex_brackets = PL_lex_formbrack = 0;
2648    PL_lex_allbrackets = 0;
2649    PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2650    Newx(PL_lex_brackstack, 120, char);
2651    Newx(PL_lex_casestack, 12, char);
2652    PL_lex_casemods = 0;
2653    *PL_lex_casestack = '\0';
2654    PL_lex_starts = 0;
2655    PL_lex_state = LEX_INTERPCONCAT;
2656    if (is_heredoc)
2657        CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2658    PL_copline = NOLINE;
2659
2660    Newxz(shared, 1, LEXSHARED);
2661    shared->ls_prev = PL_parser->lex_shared;
2662    PL_parser->lex_shared = shared;
2663
2664    PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2665    if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2666    if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2667        PL_lex_inpat = PL_parser->lex_sub_op;
2668    else
2669        PL_lex_inpat = NULL;
2670
2671    PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2672    PL_in_eval &= ~EVAL_RE_REPARSING;
2673
2674    return SUBLEXSTART;
2675}
2676
2677/*
2678 * S_sublex_done
2679 * Restores lexer state after a S_sublex_push.
2680 */
2681
2682STATIC I32
2683S_sublex_done(pTHX)
2684{
2685    if (!PL_lex_starts++) {
2686        SV * const sv = newSVpvs("");
2687        if (SvUTF8(PL_linestr))
2688            SvUTF8_on(sv);
2689        PL_expect = XOPERATOR;
2690        pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2691        return THING;
2692    }
2693
2694    if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2695        PL_lex_state = LEX_INTERPCASEMOD;
2696        return yylex();
2697    }
2698
2699    /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2700    assert(PL_lex_inwhat != OP_TRANSR);
2701    if (PL_lex_repl) {
2702        assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2703        PL_linestr = PL_lex_repl;
2704        PL_lex_inpat = 0;
2705        PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2706        PL_bufend += SvCUR(PL_linestr);
2707        PL_last_lop = PL_last_uni = NULL;
2708        PL_lex_dojoin = FALSE;
2709        PL_lex_brackets = 0;
2710        PL_lex_allbrackets = 0;
2711        PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2712        PL_lex_casemods = 0;
2713        *PL_lex_casestack = '\0';
2714        PL_lex_starts = 0;
2715        if (SvEVALED(PL_lex_repl)) {
2716            PL_lex_state = LEX_INTERPNORMAL;
2717            PL_lex_starts++;
2718            /*	we don't clear PL_lex_repl here, so that we can check later
2719                whether this is an evalled subst; that means we rely on the
2720                logic to ensure sublex_done() is called again only via the
2721                branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2722        }
2723        else {
2724            PL_lex_state = LEX_INTERPCONCAT;
2725            PL_lex_repl = NULL;
2726        }
2727        if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2728            CopLINE(PL_curcop) +=
2729                ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2730                 + PL_parser->herelines;
2731            PL_parser->herelines = 0;
2732        }
2733        return PERLY_SLASH;
2734    }
2735    else {
2736        const line_t l = CopLINE(PL_curcop);
2737        LEAVE;
2738        if (PL_parser->sub_error_count != PL_error_count) {
2739            if (PL_parser->sub_no_recover) {
2740                yyquit();
2741                NOT_REACHED;
2742            }
2743        }
2744        if (PL_multi_close == '<')
2745            PL_parser->herelines += l - PL_multi_end;
2746        PL_bufend = SvPVX(PL_linestr);
2747        PL_bufend += SvCUR(PL_linestr);
2748        PL_expect = XOPERATOR;
2749        return SUBLEXEND;
2750    }
2751}
2752
2753HV *
2754Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2755                          const STRLEN context_len, const char ** error_msg)
2756{
2757    /* Load the official _charnames module if not already there.  The
2758     * parameters are just to give info for any error messages generated:
2759     *  char_name   a name to look up which is the reason for loading this
2760     *  context     'char_name' in the context in the input in which it appears
2761     *  context_len how many bytes 'context' occupies
2762     *  error_msg   *error_msg will be set to any error
2763     *
2764     *  Returns the ^H table if success; otherwise NULL */
2765
2766    unsigned int i;
2767    HV * table;
2768    SV **cvp;
2769    SV * res;
2770
2771    PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2772
2773    /* This loop is executed 1 1/2 times.  On the first time through, if it
2774     * isn't already loaded, try loading it, and iterate just once to see if it
2775     * worked.  */
2776    for (i = 0; i < 2; i++) {
2777        table = GvHV(PL_hintgv);		 /* ^H */
2778
2779        if (    table
2780            && (PL_hints & HINT_LOCALIZE_HH)
2781            && (cvp = hv_fetchs(table, "charnames", FALSE))
2782            &&  SvOK(*cvp))
2783        {
2784            return table;   /* Quit if already loaded */
2785        }
2786
2787        if (i == 0) {
2788            Perl_load_module(aTHX_
2789                0,
2790                newSVpvs("_charnames"),
2791
2792                /* version parameter; no need to specify it, as if we get too early
2793                * a version, will fail anyway, not being able to find 'charnames'
2794                * */
2795                NULL,
2796                newSVpvs(":full"),
2797                newSVpvs(":short"),
2798                NULL);
2799        }
2800    }
2801
2802    /* Here, it failed; new_constant will give appropriate error messages */
2803    *error_msg = NULL;
2804    res = new_constant( NULL, 0, "charnames", char_name, NULL,
2805                        context, context_len, error_msg);
2806    SvREFCNT_dec(res);
2807
2808    return NULL;
2809}
2810
2811STATIC SV*
2812S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2813{
2814    /* This justs wraps get_and_check_backslash_N_name() to output any error
2815     * message it returns. */
2816
2817    const char * error_msg = NULL;
2818    SV * result;
2819
2820    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2821
2822    /* charnames doesn't work well if there have been errors found */
2823    if (PL_error_count > 0) {
2824        return NULL;
2825    }
2826
2827    result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2828
2829    if (error_msg) {
2830        yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2831    }
2832
2833    return result;
2834}
2835
2836SV*
2837Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2838                                          const char* e,
2839                                          const bool is_utf8,
2840                                          const char ** error_msg)
2841{
2842    /* <s> points to first character of interior of \N{}, <e> to one beyond the
2843     * interior, hence to the "}".  Finds what the name resolves to, returning
2844     * an SV* containing it; NULL if no valid one found.
2845     *
2846     * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2847     * doesn't have to be. */
2848
2849    SV* char_name;
2850    SV* res;
2851    HV * table;
2852    SV **cvp;
2853    SV *cv;
2854    SV *rv;
2855    HV *stash;
2856
2857    /* Points to the beginning of the \N{... so that any messages include the
2858     * context of what's failing*/
2859    const char* context = s - 3;
2860    STRLEN context_len = e - context + 1; /* include all of \N{...} */
2861
2862
2863    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2864
2865    assert(e >= s);
2866    assert(s > (char *) 3);
2867
2868    while (s < e && isBLANK(*s)) {
2869        s++;
2870    }
2871
2872    while (s < e && isBLANK(*(e - 1))) {
2873        e--;
2874    }
2875
2876    char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2877
2878    if (!SvCUR(char_name)) {
2879        SvREFCNT_dec_NN(char_name);
2880        /* diag_listed_as: Unknown charname '%s' */
2881        *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2882        return NULL;
2883    }
2884
2885    /* Autoload the charnames module */
2886
2887    table = load_charnames(char_name, context, context_len, error_msg);
2888    if (table == NULL) {
2889        return NULL;
2890    }
2891
2892    *error_msg = NULL;
2893    res = new_constant( NULL, 0, "charnames", char_name, NULL,
2894                        context, context_len, error_msg);
2895    if (*error_msg) {
2896        *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2897
2898        SvREFCNT_dec(res);
2899        return NULL;
2900    }
2901
2902    /* See if the charnames handler is the Perl core's, and if so, we can skip
2903     * the validation needed for a user-supplied one, as Perl's does its own
2904     * validation. */
2905    cvp = hv_fetchs(table, "charnames", FALSE);
2906    if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2907        SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2908    {
2909        const char * const name = HvNAME(stash);
2910         if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2911           return res;
2912       }
2913    }
2914
2915    /* Here, it isn't Perl's charname handler.  We can't rely on a
2916     * user-supplied handler to validate the input name.  For non-ut8 input,
2917     * look to see that the first character is legal.  Then loop through the
2918     * rest checking that each is a continuation */
2919
2920    /* This code makes the reasonable assumption that the only Latin1-range
2921     * characters that begin a character name alias are alphabetic, otherwise
2922     * would have to create a isCHARNAME_BEGIN macro */
2923
2924    if (! is_utf8) {
2925        if (! isALPHAU(*s)) {
2926            goto bad_charname;
2927        }
2928        s++;
2929        while (s < e) {
2930            if (! isCHARNAME_CONT(*s)) {
2931                goto bad_charname;
2932            }
2933            if (*s == ' ' && *(s-1) == ' ') {
2934                goto multi_spaces;
2935            }
2936            s++;
2937        }
2938    }
2939    else {
2940        /* Similarly for utf8.  For invariants can check directly; for other
2941         * Latin1, can calculate their code point and check; otherwise  use an
2942         * inversion list */
2943        if (UTF8_IS_INVARIANT(*s)) {
2944            if (! isALPHAU(*s)) {
2945                goto bad_charname;
2946            }
2947            s++;
2948        } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2949            if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2950                goto bad_charname;
2951            }
2952            s += 2;
2953        }
2954        else {
2955            if (! _invlist_contains_cp(PL_utf8_charname_begin,
2956                                       utf8_to_uvchr_buf((U8 *) s,
2957                                                         (U8 *) e,
2958                                                         NULL)))
2959            {
2960                goto bad_charname;
2961            }
2962            s += UTF8SKIP(s);
2963        }
2964
2965        while (s < e) {
2966            if (UTF8_IS_INVARIANT(*s)) {
2967                if (! isCHARNAME_CONT(*s)) {
2968                    goto bad_charname;
2969                }
2970                if (*s == ' ' && *(s-1) == ' ') {
2971                    goto multi_spaces;
2972                }
2973                s++;
2974            }
2975            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2976                if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2977                {
2978                    goto bad_charname;
2979                }
2980                s += 2;
2981            }
2982            else {
2983                if (! _invlist_contains_cp(PL_utf8_charname_continue,
2984                                           utf8_to_uvchr_buf((U8 *) s,
2985                                                             (U8 *) e,
2986                                                             NULL)))
2987                {
2988                    goto bad_charname;
2989                }
2990                s += UTF8SKIP(s);
2991            }
2992        }
2993    }
2994    if (*(s-1) == ' ') {
2995        /* diag_listed_as: charnames alias definitions may not contain
2996                           trailing white-space; marked by <-- HERE in %s
2997         */
2998        *error_msg = Perl_form(aTHX_
2999            "charnames alias definitions may not contain trailing "
3000            "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
3001            (int)(s - context + 1), context,
3002            (int)(e - s + 1), s + 1);
3003        return NULL;
3004    }
3005
3006    if (SvUTF8(res)) { /* Don't accept malformed charname value */
3007        const U8* first_bad_char_loc;
3008        STRLEN len;
3009        const char* const str = SvPV_const(res, len);
3010        if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
3011                                          &first_bad_char_loc)))
3012        {
3013            _force_out_malformed_utf8_message(first_bad_char_loc,
3014                                              (U8 *) PL_parser->bufend,
3015                                              0,
3016                                              0 /* 0 means don't die */ );
3017            /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3018                               immediately after '%s' */
3019            *error_msg = Perl_form(aTHX_
3020                "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3021                 (int) context_len, context,
3022                 (int) ((char *) first_bad_char_loc - str), str);
3023            return NULL;
3024        }
3025    }
3026
3027    return res;
3028
3029  bad_charname: {
3030
3031        /* The final %.*s makes sure that should the trailing NUL be missing
3032         * that this print won't run off the end of the string */
3033        /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
3034                           in \N{%s} */
3035        *error_msg = Perl_form(aTHX_
3036            "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3037            (int)(s - context + 1), context,
3038            (int)(e - s + 1), s + 1);
3039        return NULL;
3040    }
3041
3042  multi_spaces:
3043        /* diag_listed_as: charnames alias definitions may not contain a
3044                           sequence of multiple spaces; marked by <-- HERE
3045                           in %s */
3046        *error_msg = Perl_form(aTHX_
3047            "charnames alias definitions may not contain a sequence of "
3048            "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3049            (int)(s - context + 1), context,
3050            (int)(e - s + 1), s + 1);
3051        return NULL;
3052}
3053
3054/*
3055  scan_const
3056
3057  Extracts the next constant part of a pattern, double-quoted string,
3058  or transliteration.  This is terrifying code.
3059
3060  For example, in parsing the double-quoted string "ab\x63$d", it would
3061  stop at the '$' and return an OP_CONST containing 'abc'.
3062
3063  It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3064  processing a pattern (PL_lex_inpat is true), a transliteration
3065  (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3066
3067  Returns a pointer to the character scanned up to. If this is
3068  advanced from the start pointer supplied (i.e. if anything was
3069  successfully parsed), will leave an OP_CONST for the substring scanned
3070  in pl_yylval. Caller must intuit reason for not parsing further
3071  by looking at the next characters herself.
3072
3073  In patterns:
3074    expand:
3075      \N{FOO}  => \N{U+hex_for_character_FOO}
3076      (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3077
3078    pass through:
3079        all other \-char, including \N and \N{ apart from \N{ABC}
3080
3081    stops on:
3082        @ and $ where it appears to be a var, but not for $ as tail anchor
3083        \l \L \u \U \Q \E
3084        (?{  or  (??{ or (*{
3085
3086  In transliterations:
3087    characters are VERY literal, except for - not at the start or end
3088    of the string, which indicates a range.  However some backslash sequences
3089    are recognized: \r, \n, and the like
3090                    \007 \o{}, \x{}, \N{}
3091    If all elements in the transliteration are below 256,
3092    scan_const expands the range to the full set of intermediate
3093    characters. If the range is in utf8, the hyphen is replaced with
3094    a certain range mark which will be handled by pmtrans() in op.c.
3095
3096  In double-quoted strings:
3097    backslashes:
3098      all those recognized in transliterations
3099      deprecated backrefs: \1 (in substitution replacements)
3100      case and quoting: \U \Q \E
3101    stops on @ and $
3102
3103  scan_const does *not* construct ops to handle interpolated strings.
3104  It stops processing as soon as it finds an embedded $ or @ variable
3105  and leaves it to the caller to work out what's going on.
3106
3107  embedded arrays (whether in pattern or not) could be:
3108      @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3109
3110  $ in double-quoted strings must be the symbol of an embedded scalar.
3111
3112  $ in pattern could be $foo or could be tail anchor.  Assumption:
3113  it's a tail anchor if $ is the last thing in the string, or if it's
3114  followed by one of "()| \r\n\t"
3115
3116  \1 (backreferences) are turned into $1 in substitutions
3117
3118  The structure of the code is
3119      while (there's a character to process) {
3120          handle transliteration ranges
3121          skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
3122          skip #-initiated comments in //x patterns
3123          check for embedded arrays
3124          check for embedded scalars
3125          if (backslash) {
3126              deprecate \1 in substitution replacements
3127              handle string-changing backslashes \l \U \Q \E, etc.
3128              switch (what was escaped) {
3129                  handle \- in a transliteration (becomes a literal -)
3130                  if a pattern and not \N{, go treat as regular character
3131                  handle \132 (octal characters)
3132                  handle \x15 and \x{1234} (hex characters)
3133                  handle \N{name} (named characters, also \N{3,5} in a pattern)
3134                  handle \cV (control characters)
3135                  handle printf-style backslashes (\f, \r, \n, etc)
3136              } (end switch)
3137              continue
3138          } (end if backslash)
3139          handle regular character
3140    } (end while character to read)
3141
3142*/
3143
3144STATIC char *
3145S_scan_const(pTHX_ char *start)
3146{
3147    const char * const send = PL_bufend;/* end of the constant */
3148    SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3149                                           on sizing. */
3150    char *s = start;			/* start of the constant */
3151    char *d = SvPVX(sv);		/* destination for copies */
3152    bool dorange = FALSE;               /* are we in a translit range? */
3153    bool didrange = FALSE;              /* did we just finish a range? */
3154    bool in_charclass = FALSE;          /* within /[...]/ */
3155    const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3156                                           UTF8?  But, this can show as true
3157                                           when the source isn't utf8, as for
3158                                           example when it is entirely composed
3159                                           of hex constants */
3160    bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3161    STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3162                                           number of characters found so far
3163                                           that will expand (into 2 bytes)
3164                                           should we have to convert to
3165                                           UTF-8) */
3166    SV *res;		                /* result from charnames */
3167    STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3168                                   high-end character is temporarily placed */
3169
3170    /* Does something require special handling in tr/// ?  This avoids extra
3171     * work in a less likely case.  As such, khw didn't feel it was worth
3172     * adding any branches to the more mainline code to handle this, which
3173     * means that this doesn't get set in some circumstances when things like
3174     * \x{100} get expanded out.  As a result there needs to be extra testing
3175     * done in the tr code */
3176    bool has_above_latin1 = FALSE;
3177
3178    /* Note on sizing:  The scanned constant is placed into sv, which is
3179     * initialized by newSV() assuming one byte of output for every byte of
3180     * input.  This routine expects newSV() to allocate an extra byte for a
3181     * trailing NUL, which this routine will append if it gets to the end of
3182     * the input.  There may be more bytes of input than output (eg., \N{LATIN
3183     * CAPITAL LETTER A}), or more output than input if the constant ends up
3184     * recoded to utf8, but each time a construct is found that might increase
3185     * the needed size, SvGROW() is called.  Its size parameter each time is
3186     * based on the best guess estimate at the time, namely the length used so
3187     * far, plus the length the current construct will occupy, plus room for
3188     * the trailing NUL, plus one byte for every input byte still unscanned */
3189
3190    UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3191                       before set */
3192#ifdef EBCDIC
3193    int backslash_N = 0;            /* ? was the character from \N{} */
3194    int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3195                                       platform-specific like \x65 */
3196#endif
3197
3198    PERL_ARGS_ASSERT_SCAN_CONST;
3199
3200    assert(PL_lex_inwhat != OP_TRANSR);
3201
3202    /* Protect sv from errors and fatal warnings. */
3203    ENTER_with_name("scan_const");
3204    SAVEFREESV(sv);
3205
3206    /* A bunch of code in the loop below assumes that if s[n] exists and is not
3207     * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3208     * valid */
3209    assert(*send == '\0');
3210
3211    while (s < send
3212           || dorange   /* Handle tr/// range at right edge of input */
3213    ) {
3214
3215        /* get transliterations out of the way (they're most literal) */
3216        if (PL_lex_inwhat == OP_TRANS) {
3217
3218            /* But there isn't any special handling necessary unless there is a
3219             * range, so for most cases we just drop down and handle the value
3220             * as any other.  There are two exceptions.
3221             *
3222             * 1.  A hyphen indicates that we are actually going to have a
3223             *     range.  In this case, skip the '-', set a flag, then drop
3224             *     down to handle what should be the end range value.
3225             * 2.  After we've handled that value, the next time through, that
3226             *     flag is set and we fix up the range.
3227             *
3228             * Ranges entirely within Latin1 are expanded out entirely, in
3229             * order to make the transliteration a simple table look-up.
3230             * Ranges that extend above Latin1 have to be done differently, so
3231             * there is no advantage to expanding them here, so they are
3232             * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3233             * a byte that can't occur in legal UTF-8, and hence can signify a
3234             * hyphen without any possible ambiguity.  On EBCDIC machines, if
3235             * the range is expressed as Unicode, the Latin1 portion is
3236             * expanded out even if the range extends above Latin1.  This is
3237             * because each code point in it has to be processed here
3238             * individually to get its native translation */
3239
3240            if (! dorange) {
3241
3242                /* Here, we don't think we're in a range.  If the new character
3243                 * is not a hyphen; or if it is a hyphen, but it's too close to
3244                 * either edge to indicate a range, or if we haven't output any
3245                 * characters yet then it's a regular character. */
3246                if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3247                {
3248
3249                    /* A regular character.  Process like any other, but first
3250                     * clear any flags */
3251                    didrange = FALSE;
3252                    dorange = FALSE;
3253#ifdef EBCDIC
3254                    non_portable_endpoint = 0;
3255                    backslash_N = 0;
3256#endif
3257                    /* The tests here for being above Latin1 and similar ones
3258                     * in the following 'else' suffice to find all such
3259                     * occurences in the constant, except those added by a
3260                     * backslash escape sequence, like \x{100}.  Mostly, those
3261                     * set 'has_above_latin1' as appropriate */
3262                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3263                        has_above_latin1 = TRUE;
3264                    }
3265
3266                    /* Drops down to generic code to process current byte */
3267                }
3268                else {  /* Is a '-' in the context where it means a range */
3269                    if (didrange) { /* Something like y/A-C-Z// */
3270                        Perl_croak(aTHX_ "Ambiguous range in transliteration"
3271                                         " operator");
3272                    }
3273
3274                    dorange = TRUE;
3275
3276                    s++;    /* Skip past the hyphen */
3277
3278                    /* d now points to where the end-range character will be
3279                     * placed.  Drop down to get that character.  We'll finish
3280                     * processing the range the next time through the loop */
3281
3282                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3283                        has_above_latin1 = TRUE;
3284                    }
3285
3286                    /* Drops down to generic code to process current byte */
3287                }
3288            }  /* End of not a range */
3289            else {
3290                /* Here we have parsed a range.  Now must handle it.  At this
3291                 * point:
3292                 * 'sv' is a SV* that contains the output string we are
3293                 *      constructing.  The final two characters in that string
3294                 *      are the range start and range end, in order.
3295                 * 'd'  points to just beyond the range end in the 'sv' string,
3296                 *      where we would next place something
3297                 */
3298                char * max_ptr;
3299                char * min_ptr;
3300                IV range_min;
3301                IV range_max;	/* last character in range */
3302                STRLEN grow;
3303                Size_t offset_to_min = 0;
3304                Size_t extras = 0;
3305#ifdef EBCDIC
3306                bool convert_unicode;
3307                IV real_range_max = 0;
3308#endif
3309                /* Get the code point values of the range ends. */
3310                max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3311                offset_to_max = max_ptr - SvPVX_const(sv);
3312                if (d_is_utf8) {
3313                    /* We know the utf8 is valid, because we just constructed
3314                     * it ourselves in previous loop iterations */
3315                    min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3316                    range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3317                    range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3318
3319                    /* This compensates for not all code setting
3320                     * 'has_above_latin1', so that we don't skip stuff that
3321                     * should be executed */
3322                    if (range_max > 255) {
3323                        has_above_latin1 = TRUE;
3324                    }
3325                }
3326                else {
3327                    min_ptr = max_ptr - 1;
3328                    range_min = * (U8*) min_ptr;
3329                    range_max = * (U8*) max_ptr;
3330                }
3331
3332                /* If the range is just a single code point, like tr/a-a/.../,
3333                 * that code point is already in the output, twice.  We can
3334                 * just back up over the second instance and avoid all the rest
3335                 * of the work.  But if it is a variant character, it's been
3336                 * counted twice, so decrement.  (This unlikely scenario is
3337                 * special cased, like the one for a range of 2 code points
3338                 * below, only because the main-line code below needs a range
3339                 * of 3 or more to work without special casing.  Might as well
3340                 * get it out of the way now.) */
3341                if (UNLIKELY(range_max == range_min)) {
3342                    d = max_ptr;
3343                    if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3344                        utf8_variant_count--;
3345                    }
3346                    goto range_done;
3347                }
3348
3349#ifdef EBCDIC
3350                /* On EBCDIC platforms, we may have to deal with portable
3351                 * ranges.  These happen if at least one range endpoint is a
3352                 * Unicode value (\N{...}), or if the range is a subset of
3353                 * [A-Z] or [a-z], and both ends are literal characters,
3354                 * like 'A', and not like \x{C1} */
3355                convert_unicode =
3356                               cBOOL(backslash_N)   /* \N{} forces Unicode,
3357                                                       hence portable range */
3358                    || (     ! non_portable_endpoint
3359                        && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3360                           || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3361                if (convert_unicode) {
3362
3363                    /* Special handling is needed for these portable ranges.
3364                     * They are defined to be in Unicode terms, which includes
3365                     * all the Unicode code points between the end points.
3366                     * Convert to Unicode to get the Unicode range.  Later we
3367                     * will convert each code point in the range back to
3368                     * native.  */
3369                    range_min = NATIVE_TO_UNI(range_min);
3370                    range_max = NATIVE_TO_UNI(range_max);
3371                }
3372#endif
3373
3374                if (range_min > range_max) {
3375#ifdef EBCDIC
3376                    if (convert_unicode) {
3377                        /* Need to convert back to native for meaningful
3378                         * messages for this platform */
3379                        range_min = UNI_TO_NATIVE(range_min);
3380                        range_max = UNI_TO_NATIVE(range_max);
3381                    }
3382#endif
3383                    /* Use the characters themselves for the error message if
3384                     * ASCII printables; otherwise some visible representation
3385                     * of them */
3386                    if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3387                        Perl_croak(aTHX_
3388                         "Invalid range \"%c-%c\" in transliteration operator",
3389                         (char)range_min, (char)range_max);
3390                    }
3391#ifdef EBCDIC
3392                    else if (convert_unicode) {
3393        /* diag_listed_as: Invalid range "%s" in transliteration operator */
3394                        Perl_croak(aTHX_
3395                           "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3396                           UVXf "}\" in transliteration operator",
3397                           range_min, range_max);
3398                    }
3399#endif
3400                    else {
3401        /* diag_listed_as: Invalid range "%s" in transliteration operator */
3402                        Perl_croak(aTHX_
3403                           "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3404                           " in transliteration operator",
3405                           range_min, range_max);
3406                    }
3407                }
3408
3409                /* If the range is exactly two code points long, they are
3410                 * already both in the output */
3411                if (UNLIKELY(range_min + 1 == range_max)) {
3412                    goto range_done;
3413                }
3414
3415                /* Here the range contains at least 3 code points */
3416
3417                if (d_is_utf8) {
3418
3419                    /* If everything in the transliteration is below 256, we
3420                     * can avoid special handling later.  A translation table
3421                     * for each of those bytes is created by op.c.  So we
3422                     * expand out all ranges to their constituent code points.
3423                     * But if we've encountered something above 255, the
3424                     * expanding won't help, so skip doing that.  But if it's
3425                     * EBCDIC, we may have to look at each character below 256
3426                     * if we have to convert to/from Unicode values */
3427                    if (   has_above_latin1
3428#ifdef EBCDIC
3429                        && (range_min > 255 || ! convert_unicode)
3430#endif
3431                    ) {
3432                        const STRLEN off = d - SvPVX(sv);
3433                        const STRLEN extra = 1 + (send - s) + 1;
3434                        char *e;
3435
3436                        /* Move the high character one byte to the right; then
3437                         * insert between it and the range begin, an illegal
3438                         * byte which serves to indicate this is a range (using
3439                         * a '-' would be ambiguous). */
3440
3441                        if (off + extra > SvLEN(sv)) {
3442                            d = off + SvGROW(sv, off + extra);
3443                            max_ptr = d - off + offset_to_max;
3444                        }
3445
3446                        e = d++;
3447                        while (e-- > max_ptr) {
3448                            *(e + 1) = *e;
3449                        }
3450                        *(e + 1) = (char) RANGE_INDICATOR;
3451                        goto range_done;
3452                    }
3453
3454                    /* Here, we're going to expand out the range.  For EBCDIC
3455                     * the range can extend above 255 (not so in ASCII), so
3456                     * for EBCDIC, split it into the parts above and below
3457                     * 255/256 */
3458#ifdef EBCDIC
3459                    if (range_max > 255) {
3460                        real_range_max = range_max;
3461                        range_max = 255;
3462                    }
3463#endif
3464                }
3465
3466                /* Here we need to expand out the string to contain each
3467                 * character in the range.  Grow the output to handle this.
3468                 * For non-UTF8, we need a byte for each code point in the
3469                 * range, minus the three that we've already allocated for: the
3470                 * hyphen, the min, and the max.  For UTF-8, we need this
3471                 * plus an extra byte for each code point that occupies two
3472                 * bytes (is variant) when in UTF-8 (except we've already
3473                 * allocated for the end points, including if they are
3474                 * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3475                 * platforms, it's easy to calculate a precise number.  To
3476                 * start, we count the variants in the range, which we need
3477                 * elsewhere in this function anyway.  (For the case where it
3478                 * isn't easy to calculate, 'extras' has been initialized to 0,
3479                 * and the calculation is done in a loop further down.) */
3480#ifdef EBCDIC
3481                if (convert_unicode)
3482#endif
3483                {
3484                    /* This is executed unconditionally on ASCII, and for
3485                     * Unicode ranges on EBCDIC.  Under these conditions, all
3486                     * code points above a certain value are variant; and none
3487                     * under that value are.  We just need to find out how much
3488                     * of the range is above that value.  We don't count the
3489                     * end points here, as they will already have been counted
3490                     * as they were parsed. */
3491                    if (range_min >= UTF_CONTINUATION_MARK) {
3492
3493                        /* The whole range is made up of variants */
3494                        extras = (range_max - 1) - (range_min + 1) + 1;
3495                    }
3496                    else if (range_max >= UTF_CONTINUATION_MARK) {
3497
3498                        /* Only the higher portion of the range is variants */
3499                        extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3500                    }
3501
3502                    utf8_variant_count += extras;
3503                }
3504
3505                /* The base growth is the number of code points in the range,
3506                 * not including the endpoints, which have already been sized
3507                 * for (and output).  We don't subtract for the hyphen, as it
3508                 * has been parsed but not output, and the SvGROW below is
3509                 * based only on what's been output plus what's left to parse.
3510                 * */
3511                grow = (range_max - 1) - (range_min + 1) + 1;
3512
3513                if (d_is_utf8) {
3514#ifdef EBCDIC
3515                    /* In some cases in EBCDIC, we haven't yet calculated a
3516                     * precise amount needed for the UTF-8 variants.  Just
3517                     * assume the worst case, that everything will expand by a
3518                     * byte */
3519                    if (! convert_unicode) {
3520                        grow *= 2;
3521                    }
3522                    else
3523#endif
3524                    {
3525                        /* Otherwise we know exactly how many variants there
3526                         * are in the range. */
3527                        grow += extras;
3528                    }
3529                }
3530
3531                /* Grow, but position the output to overwrite the range min end
3532                 * point, because in some cases we overwrite that */
3533                SvCUR_set(sv, d - SvPVX_const(sv));
3534                offset_to_min = min_ptr - SvPVX_const(sv);
3535
3536                /* See Note on sizing above. */
3537                d = offset_to_min + SvGROW(sv, SvCUR(sv)
3538                                             + (send - s)
3539                                             + grow
3540                                             + 1 /* Trailing NUL */ );
3541
3542                /* Now, we can expand out the range. */
3543#ifdef EBCDIC
3544                if (convert_unicode) {
3545                    SSize_t i;
3546
3547                    /* Recall that the min and max are now in Unicode terms, so
3548                     * we have to convert each character to its native
3549                     * equivalent */
3550                    if (d_is_utf8) {
3551                        for (i = range_min; i <= range_max; i++) {
3552                            append_utf8_from_native_byte(
3553                                                    LATIN1_TO_NATIVE((U8) i),
3554                                                    (U8 **) &d);
3555                        }
3556                    }
3557                    else {
3558                        for (i = range_min; i <= range_max; i++) {
3559                            *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3560                        }
3561                    }
3562                }
3563                else
3564#endif
3565                /* Always gets run for ASCII, and sometimes for EBCDIC. */
3566                {
3567                    /* Here, no conversions are necessary, which means that the
3568                     * first character in the range is already in 'd' and
3569                     * valid, so we can skip overwriting it */
3570                    if (d_is_utf8) {
3571                        SSize_t i;
3572                        d += UTF8SKIP(d);
3573                        for (i = range_min + 1; i <= range_max; i++) {
3574                            append_utf8_from_native_byte((U8) i, (U8 **) &d);
3575                        }
3576                    }
3577                    else {
3578                        SSize_t i;
3579                        d++;
3580                        assert(range_min + 1 <= range_max);
3581                        for (i = range_min + 1; i < range_max; i++) {
3582#ifdef EBCDIC
3583                            /* In this case on EBCDIC, we haven't calculated
3584                             * the variants.  Do it here, as we go along */
3585                            if (! UVCHR_IS_INVARIANT(i)) {
3586                                utf8_variant_count++;
3587                            }
3588#endif
3589                            *d++ = (char)i;
3590                        }
3591
3592                        /* The range_max is done outside the loop so as to
3593                         * avoid having to special case not incrementing
3594                         * 'utf8_variant_count' on EBCDIC (it's already been
3595                         * counted when originally parsed) */
3596                        *d++ = (char) range_max;
3597                    }
3598                }
3599
3600#ifdef EBCDIC
3601                /* If the original range extended above 255, add in that
3602                 * portion. */
3603                if (real_range_max) {
3604                    *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3605                    *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3606                    if (real_range_max > 0x100) {
3607                        if (real_range_max > 0x101) {
3608                            *d++ = (char) RANGE_INDICATOR;
3609                        }
3610                        d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3611                    }
3612                }
3613#endif
3614
3615              range_done:
3616                /* mark the range as done, and continue */
3617                didrange = TRUE;
3618                dorange = FALSE;
3619#ifdef EBCDIC
3620                non_portable_endpoint = 0;
3621                backslash_N = 0;
3622#endif
3623                continue;
3624            } /* End of is a range */
3625        } /* End of transliteration.  Joins main code after these else's */
3626        else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3627            char *s1 = s-1;
3628            int esc = 0;
3629            while (s1 >= start && *s1-- == '\\')
3630                esc = !esc;
3631            if (!esc)
3632                in_charclass = TRUE;
3633        }
3634        else if (*s == ']' && PL_lex_inpat && in_charclass) {
3635            char *s1 = s-1;
3636            int esc = 0;
3637            while (s1 >= start && *s1-- == '\\')
3638                esc = !esc;
3639            if (!esc)
3640                in_charclass = FALSE;
3641        }
3642            /* skip for regexp comments /(?#comment)/, except for the last
3643             * char, which will be done separately.  Stop on (?{..}) and
3644             * friends (??{ ... }) or (*{ ... }) */
3645        else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
3646            if (s[1] == '?' && s[2] == '#') {
3647                if (s_is_utf8) {
3648                    PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3649
3650                    while (s + len < send && *s != ')') {
3651                        Copy(s, d, len, U8);
3652                        d += len;
3653                        s += len;
3654                        len = UTF8_SAFE_SKIP(s, send);
3655                    }
3656                }
3657                else while (s+1 < send && *s != ')') {
3658                    *d++ = *s++;
3659                }
3660            }
3661            else
3662            if (!PL_lex_casemods &&
3663                /* The following should match regcomp.c */
3664                ((s[1] == '?' && (s[2] == '{'                        /* (?{ ... })  */
3665                              || (s[2] == '?' && s[3] == '{'))) ||   /* (??{ ... }) */
3666                 (s[1] == '*' && (s[2] == '{' )))                    /* (*{ ... })  */
3667            ){
3668                break;
3669            }
3670        }
3671            /* likewise skip #-initiated comments in //x patterns */
3672        else if (*s == '#'
3673                 && PL_lex_inpat
3674                 && !in_charclass
3675                 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3676        {
3677            while (s < send && *s != '\n')
3678                *d++ = *s++;
3679        }
3680            /* no further processing of single-quoted regex */
3681        else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3682            goto default_action;
3683
3684            /* check for embedded arrays
3685             * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3686             */
3687        else if (*s == '@' && s[1]) {
3688            if (UTF
3689               ? isIDFIRST_utf8_safe(s+1, send)
3690               : isWORDCHAR_A(s[1]))
3691            {
3692                break;
3693            }
3694            if (memCHRs(":'{$", s[1]))
3695                break;
3696            if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3697                break; /* in regexp, neither @+ nor @- are interpolated */
3698        }
3699            /* check for embedded scalars.  only stop if we're sure it's a
3700             * variable.  */
3701        else if (*s == '$') {
3702            if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3703                break;
3704            if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3705                if (s[1] == '\\') {
3706                    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3707                                   "Possible unintended interpolation of $\\ in regex");
3708                }
3709                break;		/* in regexp, $ might be tail anchor */
3710            }
3711        }
3712
3713        /* End of else if chain - OP_TRANS rejoin rest */
3714
3715        if (UNLIKELY(s >= send)) {
3716            assert(s == send);
3717            break;
3718        }
3719
3720        /* backslashes */
3721        if (*s == '\\' && s+1 < send) {
3722            char* bslash = s;   /* point to beginning \ */
3723            char* rbrace;	/* point to ending '}' */
3724            char* e;	        /* 1 past the meat (non-blanks) before the
3725                                   brace */
3726            s++;
3727
3728            /* warn on \1 - \9 in substitution replacements, but note that \11
3729             * is an octal; and \19 is \1 followed by '9' */
3730            if (PL_lex_inwhat == OP_SUBST
3731                && !PL_lex_inpat
3732                && isDIGIT(*s)
3733                && *s != '0'
3734                && !isDIGIT(s[1]))
3735            {
3736                /* diag_listed_as: \%d better written as $%d */
3737                Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3738                s = bslash;
3739                *s = '$';
3740                break;
3741            }
3742
3743            /* string-change backslash escapes */
3744            if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3745                s = bslash;
3746                break;
3747            }
3748            /* In a pattern, process \N, but skip any other backslash escapes.
3749             * This is because we don't want to translate an escape sequence
3750             * into a meta symbol and have the regex compiler use the meta
3751             * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3752             * in spite of this, we do have to process \N here while the proper
3753             * charnames handler is in scope.  See bugs #56444 and #62056.
3754             *
3755             * There is a complication because \N in a pattern may also stand
3756             * for 'match a non-nl', and not mean a charname, in which case its
3757             * processing should be deferred to the regex compiler.  To be a
3758             * charname it must be followed immediately by a '{', and not look
3759             * like \N followed by a curly quantifier, i.e., not something like
3760             * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3761             * quantifier */
3762            else if (PL_lex_inpat
3763                    && (*s != 'N'
3764                        || s[1] != '{'
3765                        || regcurly(s + 1, send, NULL)))
3766            {
3767                *d++ = '\\';
3768                goto default_action;
3769            }
3770
3771            switch (*s) {
3772            default:
3773                {
3774                    if ((isALPHANUMERIC(*s)))
3775                        Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3776                                       "Unrecognized escape \\%c passed through",
3777                                       *s);
3778                    /* default action is to copy the quoted character */
3779                    goto default_action;
3780                }
3781
3782            /* eg. \132 indicates the octal constant 0132 */
3783            case '0': case '1': case '2': case '3':
3784            case '4': case '5': case '6': case '7':
3785                {
3786                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3787                              | PERL_SCAN_NOTIFY_ILLDIGIT;
3788                    STRLEN len = 3;
3789                    uv = grok_oct(s, &len, &flags, NULL);
3790                    s += len;
3791                    if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3792                        && s < send
3793                        && isDIGIT(*s)  /* like \08, \178 */
3794                        && ckWARN(WARN_MISC))
3795                    {
3796                        Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3797                            form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3798                    }
3799                }
3800                goto NUM_ESCAPE_INSERT;
3801
3802            /* eg. \o{24} indicates the octal constant \024 */
3803            case 'o':
3804                {
3805                    const char* error;
3806
3807                    if (! grok_bslash_o(&s, send,
3808                                               &uv, &error,
3809                                               NULL,
3810                                               FALSE, /* Not strict */
3811                                               FALSE, /* No illegal cp's */
3812                                               UTF))
3813                    {
3814                        yyerror(error);
3815                        uv = 0; /* drop through to ensure range ends are set */
3816                    }
3817                    goto NUM_ESCAPE_INSERT;
3818                }
3819
3820            /* eg. \x24 indicates the hex constant 0x24 */
3821            case 'x':
3822                {
3823                    const char* error;
3824
3825                    if (! grok_bslash_x(&s, send,
3826                                               &uv, &error,
3827                                               NULL,
3828                                               FALSE, /* Not strict */
3829                                               FALSE, /* No illegal cp's */
3830                                               UTF))
3831                    {
3832                        yyerror(error);
3833                        uv = 0; /* drop through to ensure range ends are set */
3834                    }
3835                }
3836
3837              NUM_ESCAPE_INSERT:
3838                /* Insert oct or hex escaped character. */
3839
3840                /* Here uv is the ordinal of the next character being added */
3841                if (UVCHR_IS_INVARIANT(uv)) {
3842                    *d++ = (char) uv;
3843                }
3844                else {
3845                    if (!d_is_utf8 && uv > 255) {
3846
3847                        /* Here, 'uv' won't fit unless we convert to UTF-8.
3848                         * If we've only seen invariants so far, all we have to
3849                         * do is turn on the flag */
3850                        if (utf8_variant_count == 0) {
3851                            SvUTF8_on(sv);
3852                        }
3853                        else {
3854                            SvCUR_set(sv, d - SvPVX_const(sv));
3855                            SvPOK_on(sv);
3856                            *d = '\0';
3857
3858                            sv_utf8_upgrade_flags_grow(
3859                                           sv,
3860                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3861
3862                                           /* Since we're having to grow here,
3863                                            * make sure we have enough room for
3864                                            * this escape and a NUL, so the
3865                                            * code immediately below won't have
3866                                            * to actually grow again */
3867                                          UVCHR_SKIP(uv)
3868                                        + (STRLEN)(send - s) + 1);
3869                            d = SvPVX(sv) + SvCUR(sv);
3870                        }
3871
3872                        has_above_latin1 = TRUE;
3873                        d_is_utf8 = TRUE;
3874                    }
3875
3876                    if (! d_is_utf8) {
3877                        *d++ = (char)uv;
3878                        utf8_variant_count++;
3879                    }
3880                    else {
3881                       /* Usually, there will already be enough room in 'sv'
3882                        * since such escapes are likely longer than any UTF-8
3883                        * sequence they can end up as.  This isn't the case on
3884                        * EBCDIC where \x{40000000} contains 12 bytes, and the
3885                        * UTF-8 for it contains 14.  And, we have to allow for
3886                        * a trailing NUL.  It probably can't happen on ASCII
3887                        * platforms, but be safe.  See Note on sizing above. */
3888                        const STRLEN needed = d - SvPVX(sv)
3889                                            + UVCHR_SKIP(uv)
3890                                            + (send - s)
3891                                            + 1;
3892                        if (UNLIKELY(needed > SvLEN(sv))) {
3893                            SvCUR_set(sv, d - SvPVX_const(sv));
3894                            d = SvCUR(sv) + SvGROW(sv, needed);
3895                        }
3896
3897                        d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3898                                                   (ckWARN(WARN_PORTABLE))
3899                                                   ? UNICODE_WARN_PERL_EXTENDED
3900                                                   : 0);
3901                    }
3902                }
3903#ifdef EBCDIC
3904                non_portable_endpoint++;
3905#endif
3906                continue;
3907
3908            case 'N':
3909                /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3910                 * named character, like \N{LATIN SMALL LETTER A}, or a named
3911                 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3912                 * GRAVE} (except y/// can't handle the latter, croaking).  For
3913                 * convenience all three forms are referred to as "named
3914                 * characters" below.
3915                 *
3916                 * For patterns, \N also can mean to match a non-newline.  Code
3917                 * before this 'switch' statement should already have handled
3918                 * this situation, and hence this code only has to deal with
3919                 * the named character cases.
3920                 *
3921                 * For non-patterns, the named characters are converted to
3922                 * their string equivalents.  In patterns, named characters are
3923                 * not converted to their ultimate forms for the same reasons
3924                 * that other escapes aren't (mainly that the ultimate
3925                 * character could be considered a meta-symbol by the regex
3926                 * compiler).  Instead, they are converted to the \N{U+...}
3927                 * form to get the value from the charnames that is in effect
3928                 * right now, while preserving the fact that it was a named
3929                 * character, so that the regex compiler knows this.
3930                 *
3931                 * The structure of this section of code (besides checking for
3932                 * errors and upgrading to utf8) is:
3933                 *    If the named character is of the form \N{U+...}, pass it
3934                 *      through if a pattern; otherwise convert the code point
3935                 *      to utf8
3936                 *    Otherwise must be some \N{NAME}: convert to
3937                 *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3938                 *
3939                 * Transliteration is an exception.  The conversion to utf8 is
3940                 * only done if the code point requires it to be representable.
3941                 *
3942                 * Here, 's' points to the 'N'; the test below is guaranteed to
3943                 * succeed if we are being called on a pattern, as we already
3944                 * know from a test above that the next character is a '{'.  A
3945                 * non-pattern \N must mean 'named character', which requires
3946                 * braces */
3947                s++;
3948                if (*s != '{') {
3949                    yyerror("Missing braces on \\N{}");
3950                    *d++ = '\0';
3951                    continue;
3952                }
3953                s++;
3954
3955                /* If there is no matching '}', it is an error. */
3956                if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3957                    if (! PL_lex_inpat) {
3958                        yyerror("Missing right brace on \\N{}");
3959                    } else {
3960                        yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3961                    }
3962                    yyquit(); /* Have exhausted the input. */
3963                }
3964
3965                /* Here it looks like a named character */
3966                while (s < rbrace && isBLANK(*s)) {
3967                    s++;
3968                }
3969
3970                e = rbrace;
3971                while (s < e && isBLANK(*(e - 1))) {
3972                    e--;
3973                }
3974
3975                if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3976                    s += 2;	    /* Skip to next char after the 'U+' */
3977                    if (PL_lex_inpat) {
3978
3979                        /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3980                        /* Check the syntax.  */
3981                        if (!isXDIGIT(*s)) {
3982                          bad_NU:
3983                            yyerror(
3984                                "Invalid hexadecimal number in \\N{U+...}"
3985                            );
3986                            s = rbrace + 1;
3987                            *d++ = '\0';
3988                            continue;
3989                        }
3990                        while (++s < e) {
3991                            if (isXDIGIT(*s))
3992                                continue;
3993                            else if ((*s == '.' || *s == '_')
3994                                  && isXDIGIT(s[1]))
3995                                continue;
3996                            goto bad_NU;
3997                        }
3998
3999                        /* Pass everything through unchanged.
4000                         * +1 is to include the '}' */
4001                        Copy(bslash, d, rbrace - bslash + 1, char);
4002                        d += rbrace - bslash + 1;
4003                    }
4004                    else {  /* Not a pattern: convert the hex to string */
4005                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
4006                                  | PERL_SCAN_SILENT_ILLDIGIT
4007                                  | PERL_SCAN_SILENT_OVERFLOW
4008                                  | PERL_SCAN_DISALLOW_PREFIX;
4009                        STRLEN len = e - s;
4010
4011                        uv = grok_hex(s, &len, &flags, NULL);
4012                        if (len == 0 || (len != (STRLEN)(e - s)))
4013                            goto bad_NU;
4014
4015                        if (    uv > MAX_LEGAL_CP
4016                            || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
4017                        {
4018                            yyerror(form_cp_too_large_msg(16, s, len, 0));
4019                            uv = 0; /* drop through to ensure range ends are
4020                                       set */
4021                        }
4022
4023                         /* For non-tr///, if the destination is not in utf8,
4024                          * unconditionally recode it to be so.  This is
4025                          * because \N{} implies Unicode semantics, and scalars
4026                          * have to be in utf8 to guarantee those semantics.
4027                          * tr/// doesn't care about Unicode rules, so no need
4028                          * there to upgrade to UTF-8 for small enough code
4029                          * points */
4030                        if (! d_is_utf8 && (   uv > 0xFF
4031                                           || PL_lex_inwhat != OP_TRANS))
4032                        {
4033                            /* See Note on sizing above.  */
4034                            const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
4035
4036                            SvCUR_set(sv, d - SvPVX_const(sv));
4037                            SvPOK_on(sv);
4038                            *d = '\0';
4039
4040                            if (utf8_variant_count == 0) {
4041                                SvUTF8_on(sv);
4042                                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4043                            }
4044                            else {
4045                                sv_utf8_upgrade_flags_grow(
4046                                               sv,
4047                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4048                                               extra);
4049                                d = SvPVX(sv) + SvCUR(sv);
4050                            }
4051
4052                            d_is_utf8 = TRUE;
4053                            has_above_latin1 = TRUE;
4054                        }
4055
4056                        /* Add the (Unicode) code point to the output. */
4057                        if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4058                            *d++ = (char) LATIN1_TO_NATIVE(uv);
4059                        }
4060                        else {
4061                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4062                                                   (ckWARN(WARN_PORTABLE))
4063                                                   ? UNICODE_WARN_PERL_EXTENDED
4064                                                   : 0);
4065                        }
4066                    }
4067                }
4068                else     /* Here is \N{NAME} but not \N{U+...}. */
4069                     if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4070                {   /* Failed.  We should die eventually, but for now use a NUL
4071                       to keep parsing */
4072                    *d++ = '\0';
4073                }
4074                else {  /* Successfully evaluated the name */
4075                    STRLEN len;
4076                    const char *str = SvPV_const(res, len);
4077                    if (PL_lex_inpat) {
4078
4079                        if (! len) { /* The name resolved to an empty string */
4080                            const char empty_N[] = "\\N{_}";
4081                            Copy(empty_N, d, sizeof(empty_N) - 1, char);
4082                            d += sizeof(empty_N) - 1;
4083                        }
4084                        else {
4085                            /* In order to not lose information for the regex
4086                            * compiler, pass the result in the specially made
4087                            * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4088                            * the code points in hex of each character
4089                            * returned by charnames */
4090
4091                            const char *str_end = str + len;
4092                            const STRLEN off = d - SvPVX_const(sv);
4093
4094                            if (! SvUTF8(res)) {
4095                                /* For the non-UTF-8 case, we can determine the
4096                                 * exact length needed without having to parse
4097                                 * through the string.  Each character takes up
4098                                 * 2 hex digits plus either a trailing dot or
4099                                 * the "}" */
4100                                const char initial_text[] = "\\N{U+";
4101                                const STRLEN initial_len = sizeof(initial_text)
4102                                                           - 1;
4103                                d = off + SvGROW(sv, off
4104                                                    + 3 * len
4105
4106                                                    /* +1 for trailing NUL */
4107                                                    + initial_len + 1
4108
4109                                                    + (STRLEN)(send - rbrace));
4110                                Copy(initial_text, d, initial_len, char);
4111                                d += initial_len;
4112                                while (str < str_end) {
4113                                    char hex_string[4];
4114                                    int len =
4115                                        my_snprintf(hex_string,
4116                                                  sizeof(hex_string),
4117                                                  "%02X.",
4118
4119                                                  /* The regex compiler is
4120                                                   * expecting Unicode, not
4121                                                   * native */
4122                                                  NATIVE_TO_LATIN1(*str));
4123                                    PERL_MY_SNPRINTF_POST_GUARD(len,
4124                                                           sizeof(hex_string));
4125                                    Copy(hex_string, d, 3, char);
4126                                    d += 3;
4127                                    str++;
4128                                }
4129                                d--;    /* Below, we will overwrite the final
4130                                           dot with a right brace */
4131                            }
4132                            else {
4133                                STRLEN char_length; /* cur char's byte length */
4134
4135                                /* and the number of bytes after this is
4136                                 * translated into hex digits */
4137                                STRLEN output_length;
4138
4139                                /* 2 hex per byte; 2 chars for '\N'; 2 chars
4140                                 * for max('U+', '.'); and 1 for NUL */
4141                                char hex_string[2 * UTF8_MAXBYTES + 5];
4142
4143                                /* Get the first character of the result. */
4144                                U32 uv = utf8n_to_uvchr((U8 *) str,
4145                                                        len,
4146                                                        &char_length,
4147                                                        UTF8_ALLOW_ANYUV);
4148                                /* Convert first code point to Unicode hex,
4149                                 * including the boiler plate before it. */
4150                                output_length =
4151                                    my_snprintf(hex_string, sizeof(hex_string),
4152                                             "\\N{U+%X",
4153                                             (unsigned int) NATIVE_TO_UNI(uv));
4154
4155                                /* Make sure there is enough space to hold it */
4156                                d = off + SvGROW(sv, off
4157                                                    + output_length
4158                                                    + (STRLEN)(send - rbrace)
4159                                                    + 2);	/* '}' + NUL */
4160                                /* And output it */
4161                                Copy(hex_string, d, output_length, char);
4162                                d += output_length;
4163
4164                                /* For each subsequent character, append dot and
4165                                * its Unicode code point in hex */
4166                                while ((str += char_length) < str_end) {
4167                                    const STRLEN off = d - SvPVX_const(sv);
4168                                    U32 uv = utf8n_to_uvchr((U8 *) str,
4169                                                            str_end - str,
4170                                                            &char_length,
4171                                                            UTF8_ALLOW_ANYUV);
4172                                    output_length =
4173                                        my_snprintf(hex_string,
4174                                             sizeof(hex_string),
4175                                             ".%X",
4176                                             (unsigned int) NATIVE_TO_UNI(uv));
4177
4178                                    d = off + SvGROW(sv, off
4179                                                        + output_length
4180                                                        + (STRLEN)(send - rbrace)
4181                                                        + 2);	/* '}' +  NUL */
4182                                    Copy(hex_string, d, output_length, char);
4183                                    d += output_length;
4184                                }
4185                            }
4186
4187                            *d++ = '}';	/* Done.  Add the trailing brace */
4188                        }
4189                    }
4190                    else { /* Here, not in a pattern.  Convert the name to a
4191                            * string. */
4192
4193                        if (PL_lex_inwhat == OP_TRANS) {
4194                            str = SvPV_const(res, len);
4195                            if (len > ((SvUTF8(res))
4196                                       ? UTF8SKIP(str)
4197                                       : 1U))
4198                            {
4199                                yyerror(Perl_form(aTHX_
4200                                    "%.*s must not be a named sequence"
4201                                    " in transliteration operator",
4202                                        /*  +1 to include the "}" */
4203                                    (int) (rbrace + 1 - start), start));
4204                                *d++ = '\0';
4205                                goto end_backslash_N;
4206                            }
4207
4208                            if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4209                                has_above_latin1 = TRUE;
4210                            }
4211
4212                        }
4213                        else if (! SvUTF8(res)) {
4214                            /* Make sure \N{} return is UTF-8.  This is because
4215                             * \N{} implies Unicode semantics, and scalars have
4216                             * to be in utf8 to guarantee those semantics; but
4217                             * not needed in tr/// */
4218                            sv_utf8_upgrade_flags(res, 0);
4219                            str = SvPV_const(res, len);
4220                        }
4221
4222                         /* Upgrade destination to be utf8 if this new
4223                          * component is */
4224                        if (! d_is_utf8 && SvUTF8(res)) {
4225                            /* See Note on sizing above.  */
4226                            const STRLEN extra = len + (send - s) + 1;
4227
4228                            SvCUR_set(sv, d - SvPVX_const(sv));
4229                            SvPOK_on(sv);
4230                            *d = '\0';
4231
4232                            if (utf8_variant_count == 0) {
4233                                SvUTF8_on(sv);
4234                                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4235                            }
4236                            else {
4237                                sv_utf8_upgrade_flags_grow(sv,
4238                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4239                                                extra);
4240                                d = SvPVX(sv) + SvCUR(sv);
4241                            }
4242                            d_is_utf8 = TRUE;
4243                        } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4244
4245                            /* See Note on sizing above.  (NOTE: SvCUR() is not
4246                             * set correctly here). */
4247                            const STRLEN extra = len + (send - rbrace) + 1;
4248                            const STRLEN off = d - SvPVX_const(sv);
4249                            d = off + SvGROW(sv, off + extra);
4250                        }
4251                        Copy(str, d, len, char);
4252                        d += len;
4253                    }
4254
4255                    SvREFCNT_dec(res);
4256
4257                } /* End \N{NAME} */
4258
4259              end_backslash_N:
4260#ifdef EBCDIC
4261                backslash_N++; /* \N{} is defined to be Unicode */
4262#endif
4263                s = rbrace + 1;  /* Point to just after the '}' */
4264                continue;
4265
4266            /* \c is a control character */
4267            case 'c':
4268                s++;
4269                if (s < send) {
4270                    const char * message;
4271
4272                    if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4273                        yyerror(message);
4274                        yyquit();   /* Have always immediately croaked on
4275                                       errors in this */
4276                    }
4277                    d++;
4278                }
4279                else {
4280                    yyerror("Missing control char name in \\c");
4281                    yyquit();   /* Are at end of input, no sense continuing */
4282                }
4283#ifdef EBCDIC
4284                non_portable_endpoint++;
4285#endif
4286                break;
4287
4288            /* printf-style backslashes, formfeeds, newlines, etc */
4289            case 'b':
4290                *d++ = '\b';
4291                break;
4292            case 'n':
4293                *d++ = '\n';
4294                break;
4295            case 'r':
4296                *d++ = '\r';
4297                break;
4298            case 'f':
4299                *d++ = '\f';
4300                break;
4301            case 't':
4302                *d++ = '\t';
4303                break;
4304            case 'e':
4305                *d++ = ESC_NATIVE;
4306                break;
4307            case 'a':
4308                *d++ = '\a';
4309                break;
4310            } /* end switch */
4311
4312            s++;
4313            continue;
4314        } /* end if (backslash) */
4315
4316    default_action:
4317        /* Just copy the input to the output, though we may have to convert
4318         * to/from UTF-8.
4319         *
4320         * If the input has the same representation in UTF-8 as not, it will be
4321         * a single byte, and we don't care about UTF8ness; just copy the byte */
4322        if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4323            *d++ = *s++;
4324        }
4325        else if (! s_is_utf8 && ! d_is_utf8) {
4326            /* If neither source nor output is UTF-8, is also a single byte,
4327             * just copy it; but this byte counts should we later have to
4328             * convert to UTF-8 */
4329            *d++ = *s++;
4330            utf8_variant_count++;
4331        }
4332        else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4333            const STRLEN len = UTF8SKIP(s);
4334
4335            /* We expect the source to have already been checked for
4336             * malformedness */
4337            assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4338
4339            Copy(s, d, len, U8);
4340            d += len;
4341            s += len;
4342        }
4343        else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4344            STRLEN need = send - s + 1; /* See Note on sizing above. */
4345
4346            SvCUR_set(sv, d - SvPVX_const(sv));
4347            SvPOK_on(sv);
4348            *d = '\0';
4349
4350            if (utf8_variant_count == 0) {
4351                SvUTF8_on(sv);
4352                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4353            }
4354            else {
4355                sv_utf8_upgrade_flags_grow(sv,
4356                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4357                                           need);
4358                d = SvPVX(sv) + SvCUR(sv);
4359            }
4360            d_is_utf8 = TRUE;
4361            goto default_action; /* Redo, having upgraded so both are UTF-8 */
4362        }
4363        else {  /* UTF8ness matters: convert this non-UTF8 source char to
4364                   UTF-8 for output.  It will occupy 2 bytes, but don't include
4365                   the input byte since we haven't incremented 's' yet. See
4366                   Note on sizing above. */
4367            const STRLEN off = d - SvPVX(sv);
4368            const STRLEN extra = 2 + (send - s - 1) + 1;
4369            if (off + extra > SvLEN(sv)) {
4370                d = off + SvGROW(sv, off + extra);
4371            }
4372            *d++ = UTF8_EIGHT_BIT_HI(*s);
4373            *d++ = UTF8_EIGHT_BIT_LO(*s);
4374            s++;
4375        }
4376    } /* while loop to process each character */
4377
4378    {
4379        const STRLEN off = d - SvPVX(sv);
4380
4381        /* See if room for the terminating NUL */
4382        if (UNLIKELY(off >= SvLEN(sv))) {
4383
4384#ifndef DEBUGGING
4385
4386            if (off > SvLEN(sv))
4387#endif
4388                Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4389                        " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4390
4391            /* Whew!  Here we don't have room for the terminating NUL, but
4392             * everything else so far has fit.  It's not too late to grow
4393             * to fit the NUL and continue on.  But it is a bug, as the code
4394             * above was supposed to have made room for this, so under
4395             * DEBUGGING builds, we panic anyway.  */
4396            d = off + SvGROW(sv, off + 1);
4397        }
4398    }
4399
4400    /* terminate the string and set up the sv */
4401    *d = '\0';
4402    SvCUR_set(sv, d - SvPVX_const(sv));
4403
4404    SvPOK_on(sv);
4405    if (d_is_utf8) {
4406        SvUTF8_on(sv);
4407    }
4408
4409    /* shrink the sv if we allocated more than we used */
4410    if (SvCUR(sv) + 5 < SvLEN(sv)) {
4411        SvPV_shrink_to_cur(sv);
4412    }
4413
4414    /* return the substring (via pl_yylval) only if we parsed anything */
4415    if (s > start) {
4416        char *s2 = start;
4417        for (; s2 < s; s2++) {
4418            if (*s2 == '\n')
4419                COPLINE_INC_WITH_HERELINES;
4420        }
4421        SvREFCNT_inc_simple_void_NN(sv);
4422        if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4423            && ! PL_parser->lex_re_reparsing)
4424        {
4425            const char *const key = PL_lex_inpat ? "qr" : "q";
4426            const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4427            const char *type;
4428            STRLEN typelen;
4429
4430            if (PL_lex_inwhat == OP_TRANS) {
4431                type = "tr";
4432                typelen = 2;
4433            } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4434                type = "s";
4435                typelen = 1;
4436            } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4437                type = "q";
4438                typelen = 1;
4439            } else {
4440                type = "qq";
4441                typelen = 2;
4442            }
4443
4444            sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4445                                type, typelen, NULL);
4446        }
4447        pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4448    }
4449    LEAVE_with_name("scan_const");
4450    return s;
4451}
4452
4453/* S_intuit_more
4454 * Returns TRUE if there's more to the expression (e.g., a subscript),
4455 * FALSE otherwise.
4456 *
4457 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4458 *
4459 * ->[ and ->{ return TRUE
4460 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4461 * { and [ outside a pattern are always subscripts, so return TRUE
4462 * if we're outside a pattern and it's not { or [, then return FALSE
4463 * if we're in a pattern and the first char is a {
4464 *   {4,5} (any digits around the comma) returns FALSE
4465 * if we're in a pattern and the first char is a [
4466 *   [] returns FALSE
4467 *   [SOMETHING] has a funky algorithm to decide whether it's a
4468 *      character class or not.  It has to deal with things like
4469 *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4470 * anything else returns TRUE
4471 */
4472
4473/* This is the one truly awful dwimmer necessary to conflate C and sed. */
4474
4475STATIC int
4476S_intuit_more(pTHX_ char *s, char *e)
4477{
4478    PERL_ARGS_ASSERT_INTUIT_MORE;
4479
4480    if (PL_lex_brackets)
4481        return TRUE;
4482    if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4483        return TRUE;
4484    if (*s == '-' && s[1] == '>'
4485     && FEATURE_POSTDEREF_QQ_IS_ENABLED
4486     && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4487        ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4488        return TRUE;
4489    if (*s != '{' && *s != '[')
4490        return FALSE;
4491    PL_parser->sub_no_recover = TRUE;
4492    if (!PL_lex_inpat)
4493        return TRUE;
4494
4495    /* In a pattern, so maybe we have {n,m}. */
4496    if (*s == '{') {
4497        if (regcurly(s, e, NULL)) {
4498            return FALSE;
4499        }
4500        return TRUE;
4501    }
4502
4503    /* On the other hand, maybe we have a character class */
4504
4505    s++;
4506    if (*s == ']' || *s == '^')
4507        return FALSE;
4508    else {
4509        /* this is terrifying, and it works */
4510        int weight;
4511        char seen[256];
4512        const char * const send = (char *) memchr(s, ']', e - s);
4513        unsigned char un_char, last_un_char;
4514        char tmpbuf[sizeof PL_tokenbuf * 4];
4515
4516        if (!send)		/* has to be an expression */
4517            return TRUE;
4518        weight = 2;		/* let's weigh the evidence */
4519
4520        if (*s == '$')
4521            weight -= 3;
4522        else if (isDIGIT(*s)) {
4523            if (s[1] != ']') {
4524                if (isDIGIT(s[1]) && s[2] == ']')
4525                    weight -= 10;
4526            }
4527            else
4528                weight -= 100;
4529        }
4530        Zero(seen,256,char);
4531        un_char = 255;
4532        for (; s < send; s++) {
4533            last_un_char = un_char;
4534            un_char = (unsigned char)*s;
4535            switch (*s) {
4536            case '@':
4537            case '&':
4538            case '$':
4539                weight -= seen[un_char] * 10;
4540                if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4541                    int len;
4542                    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4543                    len = (int)strlen(tmpbuf);
4544                    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4545                                                    UTF ? SVf_UTF8 : 0, SVt_PV))
4546                        weight -= 100;
4547                    else
4548                        weight -= 10;
4549                }
4550                else if (*s == '$'
4551                         && s[1]
4552                         && memCHRs("[#!%*<>()-=",s[1]))
4553                {
4554                    if (/*{*/ memCHRs("])} =",s[2]))
4555                        weight -= 10;
4556                    else
4557                        weight -= 1;
4558                }
4559                break;
4560            case '\\':
4561                un_char = 254;
4562                if (s[1]) {
4563                    if (memCHRs("wds]",s[1]))
4564                        weight += 100;
4565                    else if (seen[(U8)'\''] || seen[(U8)'"'])
4566                        weight += 1;
4567                    else if (memCHRs("rnftbxcav",s[1]))
4568                        weight += 40;
4569                    else if (isDIGIT(s[1])) {
4570                        weight += 40;
4571                        while (s[1] && isDIGIT(s[1]))
4572                            s++;
4573                    }
4574                }
4575                else
4576                    weight += 100;
4577                break;
4578            case '-':
4579                if (s[1] == '\\')
4580                    weight += 50;
4581                if (memCHRs("aA01! ",last_un_char))
4582                    weight += 30;
4583                if (memCHRs("zZ79~",s[1]))
4584                    weight += 30;
4585                if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4586                    weight -= 5;	/* cope with negative subscript */
4587                break;
4588            default:
4589                if (!isWORDCHAR(last_un_char)
4590                    && !(last_un_char == '$' || last_un_char == '@'
4591                         || last_un_char == '&')
4592                    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4593                    char *d = s;
4594                    while (isALPHA(*s))
4595                        s++;
4596                    if (keyword(d, s - d, 0))
4597                        weight -= 150;
4598                }
4599                if (un_char == last_un_char + 1)
4600                    weight += 5;
4601                weight -= seen[un_char];
4602                break;
4603            }
4604            seen[un_char]++;
4605        }
4606        if (weight >= 0)	/* probably a character class */
4607            return FALSE;
4608    }
4609
4610    return TRUE;
4611}
4612
4613/*
4614 * S_intuit_method
4615 *
4616 * Does all the checking to disambiguate
4617 *   foo bar
4618 * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4619 * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4620 *
4621 * First argument is the stuff after the first token, e.g. "bar".
4622 *
4623 * Not a method if foo is a filehandle.
4624 * Not a method if foo is a subroutine prototyped to take a filehandle.
4625 * Not a method if it's really "Foo $bar"
4626 * Method if it's "foo $bar"
4627 * Not a method if it's really "print foo $bar"
4628 * Method if it's really "foo package::" (interpreted as package->foo)
4629 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4630 * Not a method if bar is a filehandle or package, but is quoted with
4631 *   =>
4632 */
4633
4634STATIC int
4635S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4636{
4637    char *s = start + (*start == '$');
4638    char tmpbuf[sizeof PL_tokenbuf];
4639    STRLEN len;
4640    GV* indirgv;
4641        /* Mustn't actually add anything to a symbol table.
4642           But also don't want to "initialise" any placeholder
4643           constants that might already be there into full
4644           blown PVGVs with attached PVCV.  */
4645    GV * const gv =
4646        ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4647
4648    PERL_ARGS_ASSERT_INTUIT_METHOD;
4649
4650    if (!FEATURE_INDIRECT_IS_ENABLED)
4651        return 0;
4652
4653    if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4654            return 0;
4655    if (cv && SvPOK(cv)) {
4656        const char *proto = CvPROTO(cv);
4657        if (proto) {
4658            while (*proto && (isSPACE(*proto) || *proto == ';'))
4659                proto++;
4660            if (*proto == '*')
4661                return 0;
4662        }
4663    }
4664
4665    if (*start == '$') {
4666        SSize_t start_off = start - SvPVX(PL_linestr);
4667        if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4668            || isUPPER(*PL_tokenbuf))
4669            return 0;
4670        /* this could be $# */
4671        if (isSPACE(*s))
4672            s = skipspace(s);
4673        PL_bufptr = SvPVX(PL_linestr) + start_off;
4674        PL_expect = XREF;
4675        return *s == '(' ? METHCALL : METHCALL0;
4676    }
4677
4678    s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
4679    /* start is the beginning of the possible filehandle/object,
4680     * and s is the end of it
4681     * tmpbuf is a copy of it (but with single quotes as double colons)
4682     */
4683
4684    if (!keyword(tmpbuf, len, 0)) {
4685        if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4686            len -= 2;
4687            tmpbuf[len] = '\0';
4688            goto bare_package;
4689        }
4690        indirgv = gv_fetchpvn_flags(tmpbuf, len,
4691                                    GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4692                                    SVt_PVCV);
4693        if (indirgv && SvTYPE(indirgv) != SVt_NULL
4694         && (!isGV(indirgv) || GvCVu(indirgv)))
4695            return 0;
4696        /* filehandle or package name makes it a method */
4697        if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4698            s = skipspace(s);
4699            if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4700                return 0;	/* no assumptions -- "=>" quotes bareword */
4701      bare_package:
4702            NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4703                                                  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4704            NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4705            PL_expect = XTERM;
4706            force_next(BAREWORD);
4707            PL_bufptr = s;
4708            return *s == '(' ? METHCALL : METHCALL0;
4709        }
4710    }
4711    return 0;
4712}
4713
4714/* Encoded script support. filter_add() effectively inserts a
4715 * 'pre-processing' function into the current source input stream.
4716 * Note that the filter function only applies to the current source file
4717 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4718 *
4719 * The datasv parameter (which may be NULL) can be used to pass
4720 * private data to this instance of the filter. The filter function
4721 * can recover the SV using the FILTER_DATA macro and use it to
4722 * store private buffers and state information.
4723 *
4724 * The supplied datasv parameter is upgraded to a PVIO type
4725 * and the IoDIRP/IoANY field is used to store the function pointer,
4726 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4727 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4728 * private use must be set using malloc'd pointers.
4729 */
4730
4731SV *
4732Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4733{
4734    if (!funcp)
4735        return NULL;
4736
4737    if (!PL_parser)
4738        return NULL;
4739
4740    if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4741        Perl_croak(aTHX_ "Source filters apply only to byte streams");
4742
4743    if (!PL_rsfp_filters)
4744        PL_rsfp_filters = newAV();
4745    if (!datasv)
4746        datasv = newSV(0);
4747    SvUPGRADE(datasv, SVt_PVIO);
4748    IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4749    IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4750    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4751                          FPTR2DPTR(void *, IoANY(datasv)),
4752                          SvPV_nolen(datasv)));
4753    av_unshift(PL_rsfp_filters, 1);
4754    av_store(PL_rsfp_filters, 0, datasv) ;
4755    if (
4756        !PL_parser->filtered
4757     && PL_parser->lex_flags & LEX_EVALBYTES
4758     && PL_bufptr < PL_bufend
4759    ) {
4760        const char *s = PL_bufptr;
4761        while (s < PL_bufend) {
4762            if (*s == '\n') {
4763                SV *linestr = PL_parser->linestr;
4764                char *buf = SvPVX(linestr);
4765                STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4766                STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4767                STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4768                STRLEN const linestart_pos = PL_parser->linestart - buf;
4769                STRLEN const last_uni_pos =
4770                    PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4771                STRLEN const last_lop_pos =
4772                    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4773                av_push(PL_rsfp_filters, linestr);
4774                PL_parser->linestr =
4775                    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4776                buf = SvPVX(PL_parser->linestr);
4777                PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4778                PL_parser->bufptr = buf + bufptr_pos;
4779                PL_parser->oldbufptr = buf + oldbufptr_pos;
4780                PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4781                PL_parser->linestart = buf + linestart_pos;
4782                if (PL_parser->last_uni)
4783                    PL_parser->last_uni = buf + last_uni_pos;
4784                if (PL_parser->last_lop)
4785                    PL_parser->last_lop = buf + last_lop_pos;
4786                SvLEN_set(linestr, SvCUR(linestr));
4787                SvCUR_set(linestr, s - SvPVX(linestr));
4788                PL_parser->filtered = 1;
4789                break;
4790            }
4791            s++;
4792        }
4793    }
4794    return(datasv);
4795}
4796
4797/*
4798=for apidoc_section $filters
4799=for apidoc filter_del
4800
4801Delete most recently added instance of the filter function argument
4802
4803=cut
4804*/
4805
4806void
4807Perl_filter_del(pTHX_ filter_t funcp)
4808{
4809    SV *datasv;
4810
4811    PERL_ARGS_ASSERT_FILTER_DEL;
4812
4813#ifdef DEBUGGING
4814    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4815                          FPTR2DPTR(void*, funcp)));
4816#endif
4817    if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4818        return;
4819    /* if filter is on top of stack (usual case) just pop it off */
4820    datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4821    if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4822        SvREFCNT_dec(av_pop(PL_rsfp_filters));
4823
4824        return;
4825    }
4826    /* we need to search for the correct entry and clear it	*/
4827    Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4828}
4829
4830
4831/* Invoke the idxth filter function for the current rsfp.	 */
4832/* maxlen 0 = read one text line */
4833I32
4834Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4835{
4836    filter_t funcp;
4837    I32 ret;
4838    SV *datasv = NULL;
4839    /* This API is bad. It should have been using unsigned int for maxlen.
4840       Not sure if we want to change the API, but if not we should sanity
4841       check the value here.  */
4842    unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4843
4844    PERL_ARGS_ASSERT_FILTER_READ;
4845
4846    if (!PL_parser || !PL_rsfp_filters)
4847        return -1;
4848    if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4849        /* Provide a default input filter to make life easy.	*/
4850        /* Note that we append to the line. This is handy.	*/
4851        DEBUG_P(PerlIO_printf(Perl_debug_log,
4852                              "filter_read %d: from rsfp\n", idx));
4853        if (correct_length) {
4854            /* Want a block */
4855            int len ;
4856            const int old_len = SvCUR(buf_sv);
4857
4858            /* ensure buf_sv is large enough */
4859            SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4860            if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4861                                   correct_length)) <= 0) {
4862                if (PerlIO_error(PL_rsfp))
4863                    return -1;		/* error */
4864                else
4865                    return 0 ;		/* end of file */
4866            }
4867            SvCUR_set(buf_sv, old_len + len) ;
4868            SvPVX(buf_sv)[old_len + len] = '\0';
4869        } else {
4870            /* Want a line */
4871            if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4872                if (PerlIO_error(PL_rsfp))
4873                    return -1;		/* error */
4874                else
4875                    return 0 ;		/* end of file */
4876            }
4877        }
4878        return SvCUR(buf_sv);
4879    }
4880    /* Skip this filter slot if filter has been deleted	*/
4881    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4882        DEBUG_P(PerlIO_printf(Perl_debug_log,
4883                              "filter_read %d: skipped (filter deleted)\n",
4884                              idx));
4885        return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4886    }
4887    if (SvTYPE(datasv) != SVt_PVIO) {
4888        if (correct_length) {
4889            /* Want a block */
4890            const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4891            if (!remainder) return 0; /* eof */
4892            if (correct_length > remainder) correct_length = remainder;
4893            sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4894            SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4895        } else {
4896            /* Want a line */
4897            const char *s = SvEND(datasv);
4898            const char *send = SvPVX(datasv) + SvLEN(datasv);
4899            while (s < send) {
4900                if (*s == '\n') {
4901                    s++;
4902                    break;
4903                }
4904                s++;
4905            }
4906            if (s == send) return 0; /* eof */
4907            sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4908            SvCUR_set(datasv, s-SvPVX(datasv));
4909        }
4910        return SvCUR(buf_sv);
4911    }
4912    /* Get function pointer hidden within datasv	*/
4913    funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4914    DEBUG_P(PerlIO_printf(Perl_debug_log,
4915                          "filter_read %d: via function %p (%s)\n",
4916                          idx, (void*)datasv, SvPV_nolen_const(datasv)));
4917    /* Call function. The function is expected to 	*/
4918    /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4919    /* Return: <0:error, =0:eof, >0:not eof 		*/
4920    ENTER;
4921    save_scalar(PL_errgv);
4922    ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4923    LEAVE;
4924    return ret;
4925}
4926
4927STATIC char *
4928S_filter_gets(pTHX_ SV *sv, STRLEN append)
4929{
4930    PERL_ARGS_ASSERT_FILTER_GETS;
4931
4932#ifdef PERL_CR_FILTER
4933    if (!PL_rsfp_filters) {
4934        filter_add(S_cr_textfilter,NULL);
4935    }
4936#endif
4937    if (PL_rsfp_filters) {
4938        if (!append)
4939            SvCUR_set(sv, 0);	/* start with empty line	*/
4940        if (FILTER_READ(0, sv, 0) > 0)
4941            return ( SvPVX(sv) ) ;
4942        else
4943            return NULL ;
4944    }
4945    else
4946        return (sv_gets(sv, PL_rsfp, append));
4947}
4948
4949STATIC HV *
4950S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4951{
4952    GV *gv;
4953
4954    PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4955
4956    if (memEQs(pkgname, len, "__PACKAGE__"))
4957        return PL_curstash;
4958
4959    if (len > 2
4960        && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4961        && (gv = gv_fetchpvn_flags(pkgname,
4962                                   len,
4963                                   ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4964    {
4965        return GvHV(gv);			/* Foo:: */
4966    }
4967
4968    /* use constant CLASS => 'MyClass' */
4969    gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4970    if (gv && GvCV(gv)) {
4971        SV * const sv = cv_const_sv(GvCV(gv));
4972        if (sv)
4973            return gv_stashsv(sv, 0);
4974    }
4975
4976    return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4977}
4978
4979
4980STATIC char *
4981S_tokenize_use(pTHX_ int is_use, char *s) {
4982    PERL_ARGS_ASSERT_TOKENIZE_USE;
4983
4984    if (PL_expect != XSTATE)
4985        /* diag_listed_as: "use" not allowed in expression */
4986        yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4987                    is_use ? "use" : "no"));
4988    PL_expect = XTERM;
4989    s = skipspace(s);
4990    if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4991        s = force_version(s, TRUE);
4992        if (*s == ';' || *s == '}'
4993                || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4994            NEXTVAL_NEXTTOKE.opval = NULL;
4995            force_next(BAREWORD);
4996        }
4997        else if (*s == 'v') {
4998            s = force_word(s,BAREWORD,FALSE,TRUE);
4999            s = force_version(s, FALSE);
5000        }
5001    }
5002    else {
5003        s = force_word(s,BAREWORD,FALSE,TRUE);
5004        s = force_version(s, FALSE);
5005    }
5006    pl_yylval.ival = is_use;
5007    return s;
5008}
5009#ifdef DEBUGGING
5010    static const char* const exp_name[] =
5011        { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
5012          "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
5013          "SIGVAR", "TERMORDORDOR"
5014        };
5015#endif
5016
5017#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
5018STATIC bool
5019S_word_takes_any_delimiter(char *p, STRLEN len)
5020{
5021    return (len == 1 && memCHRs("msyq", p[0]))
5022            || (len == 2
5023                && ((p[0] == 't' && p[1] == 'r')
5024                    || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
5025}
5026
5027static void
5028S_check_scalar_slice(pTHX_ char *s)
5029{
5030    s++;
5031    while (SPACE_OR_TAB(*s)) s++;
5032    if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
5033                                                             PL_bufend,
5034                                                             UTF))
5035    {
5036        return;
5037    }
5038    while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5039           || (*s && memCHRs(" \t$#+-'\"", *s)))
5040    {
5041        s += UTF ? UTF8SKIP(s) : 1;
5042    }
5043    if (*s == '}' || *s == ']')
5044        pl_yylval.ival = OPpSLICEWARNING;
5045}
5046
5047#define lex_token_boundary() S_lex_token_boundary(aTHX)
5048static void
5049S_lex_token_boundary(pTHX)
5050{
5051    PL_oldoldbufptr = PL_oldbufptr;
5052    PL_oldbufptr = PL_bufptr;
5053}
5054
5055#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5056static char *
5057S_vcs_conflict_marker(pTHX_ char *s)
5058{
5059    lex_token_boundary();
5060    PL_bufptr = s;
5061    yyerror("Version control conflict marker");
5062    while (s < PL_bufend && *s != '\n')
5063        s++;
5064    return s;
5065}
5066
5067static int
5068yyl_sigvar(pTHX_ char *s)
5069{
5070    /* we expect the sigil and optional var name part of a
5071     * signature element here. Since a '$' is not necessarily
5072     * followed by a var name, handle it specially here; the general
5073     * yylex code would otherwise try to interpret whatever follows
5074     * as a var; e.g. ($, ...) would be seen as the var '$,'
5075     */
5076
5077    U8 sigil;
5078
5079    s = skipspace(s);
5080    sigil = *s++;
5081    PL_bufptr = s; /* for error reporting */
5082    switch (sigil) {
5083    case '$':
5084    case '@':
5085    case '%':
5086        /* spot stuff that looks like an prototype */
5087        if (memCHRs("$:@%&*;\\[]", *s)) {
5088            yyerror("Illegal character following sigil in a subroutine signature");
5089            break;
5090        }
5091        /* '$#' is banned, while '$ # comment' isn't */
5092        if (*s == '#') {
5093            yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5094            break;
5095        }
5096        s = skipspace(s);
5097        if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5098            char *dest = PL_tokenbuf + 1;
5099            /* read var name, including sigil, into PL_tokenbuf */
5100            PL_tokenbuf[0] = sigil;
5101            parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5102                0, cBOOL(UTF), FALSE, FALSE);
5103            *dest = '\0';
5104            assert(PL_tokenbuf[1]); /* we have a variable name */
5105        }
5106        else {
5107            *PL_tokenbuf = 0;
5108            PL_in_my = 0;
5109        }
5110
5111        s = skipspace(s);
5112        /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5113         * as the ASSIGNOP, and exclude other tokens that start with =
5114         */
5115        if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5116            /* save now to report with the same context as we did when
5117             * all ASSIGNOPS were accepted */
5118            PL_oldbufptr = s;
5119
5120            ++s;
5121            NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
5122            force_next(ASSIGNOP);
5123            PL_expect = XTERM;
5124        }
5125        else if(*s == '/' && s[1] == '/' && s[2] == '=') {
5126            PL_oldbufptr = s;
5127
5128            s += 3;
5129            NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
5130            force_next(ASSIGNOP);
5131            PL_expect = XTERM;
5132        }
5133        else if(*s == '|' && s[1] == '|' && s[2] == '=') {
5134            PL_oldbufptr = s;
5135
5136            s += 3;
5137            NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
5138            force_next(ASSIGNOP);
5139            PL_expect = XTERM;
5140        }
5141        else if (*s == ',' || *s == ')') {
5142            PL_expect = XOPERATOR;
5143        }
5144        else {
5145            /* make sure the context shows the unexpected character and
5146             * hopefully a bit more */
5147            if (*s) ++s;
5148            while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5149                s++;
5150            PL_bufptr = s; /* for error reporting */
5151            yyerror("Illegal operator following parameter in a subroutine signature");
5152            PL_in_my = 0;
5153        }
5154        if (*PL_tokenbuf) {
5155            NEXTVAL_NEXTTOKE.ival = sigil;
5156            force_next('p'); /* force a signature pending identifier */
5157        }
5158        break;
5159
5160    case ')':
5161        PL_expect = XBLOCK;
5162        break;
5163    case ',': /* handle ($a,,$b) */
5164        break;
5165
5166    default:
5167        PL_in_my = 0;
5168        yyerror("A signature parameter must start with '$', '@' or '%'");
5169        /* very crude error recovery: skip to likely next signature
5170         * element */
5171        while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5172            s++;
5173        break;
5174    }
5175
5176    switch (sigil) {
5177        case ',': TOKEN (PERLY_COMMA);
5178        case '$': TOKEN (PERLY_DOLLAR);
5179        case '@': TOKEN (PERLY_SNAIL);
5180        case '%': TOKEN (PERLY_PERCENT_SIGN);
5181        case ')': TOKEN (PERLY_PAREN_CLOSE);
5182        default:  TOKEN (sigil);
5183    }
5184}
5185
5186static int
5187yyl_dollar(pTHX_ char *s)
5188{
5189    CLINE;
5190
5191    if (PL_expect == XPOSTDEREF) {
5192        if (s[1] == '#') {
5193            s++;
5194            POSTDEREF(DOLSHARP);
5195        }
5196        POSTDEREF(PERLY_DOLLAR);
5197    }
5198
5199    if (   s[1] == '#'
5200        && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5201            || memCHRs("{$:+-@", s[2])))
5202    {
5203        PL_tokenbuf[0] = '@';
5204        s = scan_ident(s + 1, PL_tokenbuf + 1,
5205                       sizeof PL_tokenbuf - 1, FALSE);
5206        if (PL_expect == XOPERATOR) {
5207            char *d = s;
5208            if (PL_bufptr > s) {
5209                d = PL_bufptr-1;
5210                PL_bufptr = PL_oldbufptr;
5211            }
5212            no_op("Array length", d);
5213        }
5214        if (!PL_tokenbuf[1])
5215            PREREF(DOLSHARP);
5216        PL_expect = XOPERATOR;
5217        force_ident_maybe_lex('#');
5218        TOKEN(DOLSHARP);
5219    }
5220
5221    PL_tokenbuf[0] = '$';
5222    s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5223    if (PL_expect == XOPERATOR) {
5224        char *d = s;
5225        if (PL_bufptr > s) {
5226            d = PL_bufptr-1;
5227            PL_bufptr = PL_oldbufptr;
5228        }
5229        no_op("Scalar", d);
5230    }
5231    if (!PL_tokenbuf[1]) {
5232        if (s == PL_bufend)
5233            yyerror("Final $ should be \\$ or $name");
5234        PREREF(PERLY_DOLLAR);
5235    }
5236
5237    {
5238        const char tmp = *s;
5239        if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5240            s = skipspace(s);
5241
5242        if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5243            && intuit_more(s, PL_bufend)) {
5244            if (*s == '[') {
5245                PL_tokenbuf[0] = '@';
5246                if (ckWARN(WARN_SYNTAX)) {
5247                    char *t = s+1;
5248
5249                    while ( t < PL_bufend ) {
5250                        if (isSPACE(*t)) {
5251                            do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5252                            /* consumed one or more space chars */
5253                        } else if (*t == '$' || *t == '@') {
5254                            /* could be more than one '$' like $$ref or @$ref */
5255                            do { t++; } while (t < PL_bufend && *t == '$');
5256
5257                            /* could be an abigail style identifier like $ foo */
5258                            while (t < PL_bufend && *t == ' ') t++;
5259
5260                            /* strip off the name of the var */
5261                            while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5262                                t += UTF ? UTF8SKIP(t) : 1;
5263                            /* consumed a varname */
5264                        } else if (isDIGIT(*t)) {
5265                            /* deal with hex constants like 0x11 */
5266                            if (t[0] == '0' && t[1] == 'x') {
5267                                t += 2;
5268                                while (t < PL_bufend && isXDIGIT(*t)) t++;
5269                            } else {
5270                                /* deal with decimal/octal constants like 1 and 0123 */
5271                                do { t++; } while (isDIGIT(*t));
5272                                if (t<PL_bufend && *t == '.') {
5273                                    do { t++; } while (isDIGIT(*t));
5274                                }
5275                            }
5276                            /* consumed a number */
5277                        } else {
5278                            /* not a var nor a space nor a number */
5279                            break;
5280                        }
5281                    }
5282                    if (t < PL_bufend && *t++ == ',') {
5283                        PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5284                        while (t < PL_bufend && *t != ']')
5285                            t++;
5286                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5287                                    "Multidimensional syntax %" UTF8f " not supported",
5288                                    UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5289                    }
5290                }
5291            }
5292            else if (*s == '{') {
5293                char *t;
5294                PL_tokenbuf[0] = '%';
5295                if (    strEQ(PL_tokenbuf+1, "SIG")
5296                    && ckWARN(WARN_SYNTAX)
5297                    && (t = (char *) memchr(s, '}', PL_bufend - s))
5298                    && (t = (char *) memchr(t, '=', PL_bufend - t)))
5299                {
5300                    char tmpbuf[sizeof PL_tokenbuf];
5301                    do {
5302                        t++;
5303                    } while (isSPACE(*t));
5304                    if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5305                        STRLEN len;
5306                        t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE,
5307                                      &len, TRUE);
5308                        while (isSPACE(*t))
5309                            t++;
5310                        if (  *t == ';'
5311                            && get_cvn_flags(tmpbuf, len, UTF
5312                                                            ? SVf_UTF8
5313                                                            : 0))
5314                        {
5315                            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5316                                "You need to quote \"%" UTF8f "\"",
5317                                    UTF8fARG(UTF, len, tmpbuf));
5318                        }
5319                    }
5320                }
5321            }
5322        }
5323
5324        PL_expect = XOPERATOR;
5325        if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5326            const bool islop = (PL_last_lop == PL_oldoldbufptr);
5327            if (!islop || PL_last_lop_op == OP_GREPSTART)
5328                PL_expect = XOPERATOR;
5329            else if (memCHRs("$@\"'`q", *s))
5330                PL_expect = XTERM;		/* e.g. print $fh "foo" */
5331            else if (   memCHRs("&*<%", *s)
5332                     && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5333            {
5334                PL_expect = XTERM;		/* e.g. print $fh &sub */
5335            }
5336            else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5337                char tmpbuf[sizeof PL_tokenbuf];
5338                int t2;
5339                STRLEN len;
5340                scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
5341                if ((t2 = keyword(tmpbuf, len, 0))) {
5342                    /* binary operators exclude handle interpretations */
5343                    switch (t2) {
5344                    case -KEY_x:
5345                    case -KEY_eq:
5346                    case -KEY_ne:
5347                    case -KEY_gt:
5348                    case -KEY_lt:
5349                    case -KEY_ge:
5350                    case -KEY_le:
5351                    case -KEY_cmp:
5352                        break;
5353                    default:
5354                        PL_expect = XTERM;	/* e.g. print $fh length() */
5355                        break;
5356                    }
5357                }
5358                else {
5359                    PL_expect = XTERM;	/* e.g. print $fh subr() */
5360                }
5361            }
5362            else if (isDIGIT(*s))
5363                PL_expect = XTERM;		/* e.g. print $fh 3 */
5364            else if (*s == '.' && isDIGIT(s[1]))
5365                PL_expect = XTERM;		/* e.g. print $fh .3 */
5366            else if ((*s == '?' || *s == '-' || *s == '+')
5367                && !isSPACE(s[1]) && s[1] != '=')
5368                PL_expect = XTERM;		/* e.g. print $fh -1 */
5369            else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5370                     && s[1] != '/')
5371                PL_expect = XTERM;		/* e.g. print $fh /.../
5372                                               XXX except DORDOR operator
5373                                            */
5374            else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5375                     && s[2] != '=')
5376                PL_expect = XTERM;		/* print $fh <<"EOF" */
5377        }
5378    }
5379    force_ident_maybe_lex('$');
5380    TOKEN(PERLY_DOLLAR);
5381}
5382
5383static int
5384yyl_sub(pTHX_ char *s, const int key)
5385{
5386    char * const tmpbuf = PL_tokenbuf + 1;
5387    bool have_name, have_proto;
5388    STRLEN len;
5389    SV *format_name = NULL;
5390    bool is_method = (key == KEY_method);
5391
5392    /* method always implies signatures */
5393    bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
5394
5395    SSize_t off = s-SvPVX(PL_linestr);
5396    char *d;
5397
5398    s = skipspace(s); /* can move PL_linestr */
5399
5400    d = SvPVX(PL_linestr)+off;
5401
5402    SAVEBOOL(PL_parser->sig_seen);
5403    PL_parser->sig_seen = FALSE;
5404
5405    if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5406        || *s == '\''
5407        || (*s == ':' && s[1] == ':'))
5408    {
5409
5410        PL_expect = XATTRBLOCK;
5411        d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5412                      &len, TRUE);
5413        if (key == KEY_format)
5414            format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5415        *PL_tokenbuf = '&';
5416        if (memchr(tmpbuf, ':', len) || key != KEY_sub
5417         || pad_findmy_pvn(
5418                PL_tokenbuf, len + 1, 0
5419            ) != NOT_IN_PAD)
5420            sv_setpvn(PL_subname, tmpbuf, len);
5421        else {
5422            sv_setsv(PL_subname,PL_curstname);
5423            sv_catpvs(PL_subname,"::");
5424            sv_catpvn(PL_subname,tmpbuf,len);
5425        }
5426        if (SvUTF8(PL_linestr))
5427            SvUTF8_on(PL_subname);
5428        have_name = TRUE;
5429
5430        s = skipspace(d);
5431    }
5432    else {
5433        if (key == KEY_my || key == KEY_our || key==KEY_state) {
5434            *d = '\0';
5435            /* diag_listed_as: Missing name in "%s sub" */
5436            Perl_croak(aTHX_
5437                      "Missing name in \"%s\"", PL_bufptr);
5438        }
5439        PL_expect = XATTRTERM;
5440        sv_setpvs(PL_subname,"?");
5441        have_name = FALSE;
5442    }
5443
5444    if (key == KEY_format) {
5445        if (format_name) {
5446            NEXTVAL_NEXTTOKE.opval
5447                = newSVOP(OP_CONST,0, format_name);
5448            NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5449            force_next(BAREWORD);
5450        }
5451        PREBLOCK(KW_FORMAT);
5452    }
5453
5454    /* Look for a prototype */
5455    if (*s == '(' && !is_sigsub) {
5456        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5457        if (!s)
5458            Perl_croak(aTHX_ "Prototype not terminated");
5459        COPLINE_SET_FROM_MULTI_END;
5460        (void)validate_proto(PL_subname, PL_lex_stuff,
5461                             ckWARN(WARN_ILLEGALPROTO), 0);
5462        have_proto = TRUE;
5463
5464        s = skipspace(s);
5465    }
5466    else
5467        have_proto = FALSE;
5468
5469    if (  !(*s == ':' && s[1] != ':')
5470        && (*s != '{' && *s != '(') && key != KEY_format)
5471    {
5472        assert(key == KEY_sub || key == KEY_method ||
5473               key == KEY_AUTOLOAD || key == KEY_DESTROY ||
5474               key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
5475               key == KEY_INIT || key == KEY_END ||
5476               key == KEY_my || key == KEY_state ||
5477               key == KEY_our);
5478        if (!have_name)
5479            Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5480        else if (*s != ';' && *s != '}')
5481            Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5482    }
5483
5484    if (have_proto) {
5485        NEXTVAL_NEXTTOKE.opval =
5486            newSVOP(OP_CONST, 0, PL_lex_stuff);
5487        PL_lex_stuff = NULL;
5488        force_next(THING);
5489    }
5490
5491    if (!have_name) {
5492        if (PL_curstash)
5493            sv_setpvs(PL_subname, "__ANON__");
5494        else
5495            sv_setpvs(PL_subname, "__ANON__::__ANON__");
5496        if (is_method)
5497            TOKEN(KW_METHOD_anon);
5498        else if (is_sigsub)
5499            TOKEN(KW_SUB_anon_sig);
5500        else
5501            TOKEN(KW_SUB_anon);
5502    }
5503    force_ident_maybe_lex('&');
5504    if (is_method)
5505        TOKEN(KW_METHOD_named);
5506    else if (is_sigsub)
5507        TOKEN(KW_SUB_named_sig);
5508    else
5509        TOKEN(KW_SUB_named);
5510}
5511
5512static int
5513yyl_interpcasemod(pTHX_ char *s)
5514{
5515#ifdef DEBUGGING
5516    if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5517        Perl_croak(aTHX_
5518                   "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5519                   PL_bufptr, PL_bufend, *PL_bufptr);
5520#endif
5521
5522    if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5523        /* if at a \E */
5524        if (PL_lex_casemods) {
5525            const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5526            PL_lex_casestack[PL_lex_casemods] = '\0';
5527
5528            if (PL_bufptr != PL_bufend
5529                && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5530                    || oldmod == 'F')) {
5531                PL_bufptr += 2;
5532                PL_lex_state = LEX_INTERPCONCAT;
5533            }
5534            PL_lex_allbrackets--;
5535            return REPORT(PERLY_PAREN_CLOSE);
5536        }
5537        else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5538           /* Got an unpaired \E */
5539           Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5540                    "Useless use of \\E");
5541        }
5542        if (PL_bufptr != PL_bufend)
5543            PL_bufptr += 2;
5544        PL_lex_state = LEX_INTERPCONCAT;
5545        return yylex();
5546    }
5547    else {
5548        DEBUG_T({
5549            PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5550        });
5551        s = PL_bufptr + 1;
5552        if (s[1] == '\\' && s[2] == 'E') {
5553            PL_bufptr = s + 3;
5554            PL_lex_state = LEX_INTERPCONCAT;
5555            return yylex();
5556        }
5557        else {
5558            I32 tmp;
5559            if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5560                || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5561            {
5562                tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
5563            }
5564            if ((*s == 'L' || *s == 'U' || *s == 'F')
5565                && (strpbrk(PL_lex_casestack, "LUF")))
5566            {
5567                PL_lex_casestack[--PL_lex_casemods] = '\0';
5568                PL_lex_allbrackets--;
5569                return REPORT(PERLY_PAREN_CLOSE);
5570            }
5571            if (PL_lex_casemods > 10)
5572                Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5573            PL_lex_casestack[PL_lex_casemods++] = *s;
5574            PL_lex_casestack[PL_lex_casemods] = '\0';
5575            PL_lex_state = LEX_INTERPCONCAT;
5576            NEXTVAL_NEXTTOKE.ival = 0;
5577            force_next((2<<24)|PERLY_PAREN_OPEN);
5578            if (*s == 'l')
5579                NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5580            else if (*s == 'u')
5581                NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5582            else if (*s == 'L')
5583                NEXTVAL_NEXTTOKE.ival = OP_LC;
5584            else if (*s == 'U')
5585                NEXTVAL_NEXTTOKE.ival = OP_UC;
5586            else if (*s == 'Q')
5587                NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5588            else if (*s == 'F')
5589                NEXTVAL_NEXTTOKE.ival = OP_FC;
5590            else
5591                Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5592            PL_bufptr = s + 1;
5593        }
5594        force_next(FUNC);
5595        if (PL_lex_starts) {
5596            s = PL_bufptr;
5597            PL_lex_starts = 0;
5598            /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5599            if (PL_lex_casemods == 1 && PL_lex_inpat)
5600                TOKEN(PERLY_COMMA);
5601            else
5602                AopNOASSIGN(OP_CONCAT);
5603        }
5604        else
5605            return yylex();
5606    }
5607}
5608
5609static int
5610yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5611                        GV **pgv, GV ***pgvp)
5612{
5613    GV *ogv = NULL;	/* override (winner) */
5614    GV *hgv = NULL;	/* hidden (loser) */
5615    GV *gv = *pgv;
5616
5617    if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5618        CV *cv;
5619        if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5620                                    (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5621                                    SVt_PVCV))
5622            && (cv = GvCVu(gv)))
5623        {
5624            if (GvIMPORTED_CV(gv))
5625                ogv = gv;
5626            else if (! CvNOWARN_AMBIGUOUS(cv))
5627                hgv = gv;
5628        }
5629        if (!ogv
5630            && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5631            && (gv = **pgvp)
5632            && (isGV_with_GP(gv)
5633                ? GvCVu(gv) && GvIMPORTED_CV(gv)
5634                :   SvPCS_IMPORTED(gv)
5635                && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5636                                                         len, 0), 1)))
5637        {
5638            ogv = gv;
5639        }
5640    }
5641
5642    *pgv = gv;
5643
5644    if (ogv) {
5645        *orig_keyword = key;
5646        return 0;		/* overridden by import or by GLOBAL */
5647    }
5648    else if (gv && !*pgvp
5649             && -key==KEY_lock	/* XXX generalizable kludge */
5650             && GvCVu(gv))
5651    {
5652        return 0;		/* any sub overrides "weak" keyword */
5653    }
5654    else {			/* no override */
5655        key = -key;
5656        if (key == KEY_dump) {
5657            Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5658        }
5659        *pgv = NULL;
5660        *pgvp = 0;
5661        if (hgv && key != KEY_x)	/* never ambiguous */
5662            Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5663                           "Ambiguous call resolved as CORE::%s(), "
5664                           "qualify as such or use &",
5665                           GvENAME(hgv));
5666        return key;
5667    }
5668}
5669
5670static int
5671yyl_qw(pTHX_ char *s, STRLEN len)
5672{
5673    OP *words = NULL;
5674
5675    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5676    if (!s)
5677        missingterm(NULL, 0);
5678
5679    COPLINE_SET_FROM_MULTI_END;
5680    PL_expect = XOPERATOR;
5681    if (SvCUR(PL_lex_stuff)) {
5682        int warned_comma = !ckWARN(WARN_QW);
5683        int warned_comment = warned_comma;
5684        char *d = SvPV_force(PL_lex_stuff, len);
5685        while (len) {
5686            for (; isSPACE(*d) && len; --len, ++d)
5687                /**/;
5688            if (len) {
5689                SV *sv;
5690                const char *b = d;
5691                if (!warned_comma || !warned_comment) {
5692                    for (; !isSPACE(*d) && len; --len, ++d) {
5693                        if (!warned_comma && *d == ',') {
5694                            Perl_warner(aTHX_ packWARN(WARN_QW),
5695                                "Possible attempt to separate words with commas");
5696                            ++warned_comma;
5697                        }
5698                        else if (!warned_comment && *d == '#') {
5699                            Perl_warner(aTHX_ packWARN(WARN_QW),
5700                                "Possible attempt to put comments in qw() list");
5701                            ++warned_comment;
5702                        }
5703                    }
5704                }
5705                else {
5706                    for (; !isSPACE(*d) && len; --len, ++d)
5707                        /**/;
5708                }
5709                sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5710                words = op_append_elem(OP_LIST, words,
5711                                       newSVOP(OP_CONST, 0, tokeq(sv)));
5712            }
5713        }
5714    }
5715    if (!words)
5716        words = newNULLLIST();
5717    SvREFCNT_dec_NN(PL_lex_stuff);
5718    PL_lex_stuff = NULL;
5719    PL_expect = XOPERATOR;
5720    pl_yylval.opval = sawparens(words);
5721    TOKEN(QWLIST);
5722}
5723
5724static int
5725yyl_hyphen(pTHX_ char *s)
5726{
5727    if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5728        I32 ftst = 0;
5729        char tmp;
5730
5731        s++;
5732        PL_bufptr = s;
5733        tmp = *s++;
5734
5735        while (s < PL_bufend && SPACE_OR_TAB(*s))
5736            s++;
5737
5738        if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5739            s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5740            DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5741            OPERATOR(PERLY_MINUS);              /* unary minus */
5742        }
5743        switch (tmp) {
5744        case 'r': ftst = OP_FTEREAD;    break;
5745        case 'w': ftst = OP_FTEWRITE;   break;
5746        case 'x': ftst = OP_FTEEXEC;    break;
5747        case 'o': ftst = OP_FTEOWNED;   break;
5748        case 'R': ftst = OP_FTRREAD;    break;
5749        case 'W': ftst = OP_FTRWRITE;   break;
5750        case 'X': ftst = OP_FTREXEC;    break;
5751        case 'O': ftst = OP_FTROWNED;   break;
5752        case 'e': ftst = OP_FTIS;       break;
5753        case 'z': ftst = OP_FTZERO;     break;
5754        case 's': ftst = OP_FTSIZE;     break;
5755        case 'f': ftst = OP_FTFILE;     break;
5756        case 'd': ftst = OP_FTDIR;      break;
5757        case 'l': ftst = OP_FTLINK;     break;
5758        case 'p': ftst = OP_FTPIPE;     break;
5759        case 'S': ftst = OP_FTSOCK;     break;
5760        case 'u': ftst = OP_FTSUID;     break;
5761        case 'g': ftst = OP_FTSGID;     break;
5762        case 'k': ftst = OP_FTSVTX;     break;
5763        case 'b': ftst = OP_FTBLK;      break;
5764        case 'c': ftst = OP_FTCHR;      break;
5765        case 't': ftst = OP_FTTTY;      break;
5766        case 'T': ftst = OP_FTTEXT;     break;
5767        case 'B': ftst = OP_FTBINARY;   break;
5768        case 'M': case 'A': case 'C':
5769            gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5770            switch (tmp) {
5771            case 'M': ftst = OP_FTMTIME; break;
5772            case 'A': ftst = OP_FTATIME; break;
5773            case 'C': ftst = OP_FTCTIME; break;
5774            default:                     break;
5775            }
5776            break;
5777        default:
5778            break;
5779        }
5780        if (ftst) {
5781            PL_last_uni = PL_oldbufptr;
5782            PL_last_lop_op = (OPCODE)ftst;
5783            DEBUG_T( {
5784                PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5785            } );
5786            FTST(ftst);
5787        }
5788        else {
5789            /* Assume it was a minus followed by a one-letter named
5790             * subroutine call (or a -bareword), then. */
5791            DEBUG_T( {
5792                PerlIO_printf(Perl_debug_log,
5793                    "### '-%c' looked like a file test but was not\n",
5794                    (int) tmp);
5795            } );
5796            s = --PL_bufptr;
5797        }
5798    }
5799    {
5800        const char tmp = *s++;
5801        if (*s == tmp) {
5802            s++;
5803            if (PL_expect == XOPERATOR)
5804                TERM(POSTDEC);
5805            else
5806                OPERATOR(PREDEC);
5807        }
5808        else if (*s == '>') {
5809            s++;
5810            s = skipspace(s);
5811            if (((*s == '$' || *s == '&') && s[1] == '*')
5812              ||(*s == '$' && s[1] == '#' && s[2] == '*')
5813              ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5814              ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5815             )
5816            {
5817                PL_expect = XPOSTDEREF;
5818                TOKEN(ARROW);
5819            }
5820            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5821                s = force_word(s,METHCALL0,FALSE,TRUE);
5822                TOKEN(ARROW);
5823            }
5824            else if (*s == '$')
5825                OPERATOR(ARROW);
5826            else
5827                TERM(ARROW);
5828        }
5829        if (PL_expect == XOPERATOR) {
5830            if (*s == '='
5831                && !PL_lex_allbrackets
5832                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5833            {
5834                s--;
5835                TOKEN(0);
5836            }
5837            Aop(OP_SUBTRACT);
5838        }
5839        else {
5840            if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5841                check_uni();
5842            OPERATOR(PERLY_MINUS);              /* unary minus */
5843        }
5844    }
5845}
5846
5847static int
5848yyl_plus(pTHX_ char *s)
5849{
5850    const char tmp = *s++;
5851    if (*s == tmp) {
5852        s++;
5853        if (PL_expect == XOPERATOR)
5854            TERM(POSTINC);
5855        else
5856            OPERATOR(PREINC);
5857    }
5858    if (PL_expect == XOPERATOR) {
5859        if (*s == '='
5860            && !PL_lex_allbrackets
5861            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5862        {
5863            s--;
5864            TOKEN(0);
5865        }
5866        Aop(OP_ADD);
5867    }
5868    else {
5869        if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5870            check_uni();
5871        OPERATOR(PERLY_PLUS);
5872    }
5873}
5874
5875static int
5876yyl_star(pTHX_ char *s)
5877{
5878    if (PL_expect == XPOSTDEREF)
5879        POSTDEREF(PERLY_STAR);
5880
5881    if (PL_expect != XOPERATOR) {
5882        s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5883        PL_expect = XOPERATOR;
5884        force_ident(PL_tokenbuf, PERLY_STAR);
5885        if (!*PL_tokenbuf)
5886            PREREF(PERLY_STAR);
5887        TERM(PERLY_STAR);
5888    }
5889
5890    s++;
5891    if (*s == '*') {
5892        s++;
5893        if (*s == '=' && !PL_lex_allbrackets
5894            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5895        {
5896            s -= 2;
5897            TOKEN(0);
5898        }
5899        PWop(OP_POW);
5900    }
5901
5902    if (*s == '='
5903        && !PL_lex_allbrackets
5904        && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5905    {
5906        s--;
5907        TOKEN(0);
5908    }
5909
5910    Mop(OP_MULTIPLY);
5911}
5912
5913static int
5914yyl_percent(pTHX_ char *s)
5915{
5916    if (PL_expect == XOPERATOR) {
5917        if (s[1] == '='
5918            && !PL_lex_allbrackets
5919            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5920        {
5921            TOKEN(0);
5922        }
5923        ++s;
5924        Mop(OP_MODULO);
5925    }
5926    else if (PL_expect == XPOSTDEREF)
5927        POSTDEREF(PERLY_PERCENT_SIGN);
5928
5929    PL_tokenbuf[0] = '%';
5930    s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5931    pl_yylval.ival = 0;
5932    if (!PL_tokenbuf[1]) {
5933        PREREF(PERLY_PERCENT_SIGN);
5934    }
5935    if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5936        && intuit_more(s, PL_bufend)) {
5937        if (*s == '[')
5938            PL_tokenbuf[0] = '@';
5939    }
5940    PL_expect = XOPERATOR;
5941    force_ident_maybe_lex('%');
5942    TERM(PERLY_PERCENT_SIGN);
5943}
5944
5945static int
5946yyl_caret(pTHX_ char *s)
5947{
5948    char *d = s;
5949    const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5950    if (bof && s[1] == '.')
5951        s++;
5952    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5953            (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5954    {
5955        s = d;
5956        TOKEN(0);
5957    }
5958    s++;
5959    BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5960}
5961
5962static int
5963yyl_colon(pTHX_ char *s)
5964{
5965    OP *attrs;
5966
5967    switch (PL_expect) {
5968    case XOPERATOR:
5969        if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5970            break;
5971        PL_bufptr = s;	/* update in case we back off */
5972        if (*s == '=') {
5973            Perl_croak(aTHX_
5974                       "Use of := for an empty attribute list is not allowed");
5975        }
5976        goto grabattrs;
5977    case XATTRBLOCK:
5978        PL_expect = XBLOCK;
5979        goto grabattrs;
5980    case XATTRTERM:
5981        PL_expect = XTERMBLOCK;
5982     grabattrs:
5983        /* NB: as well as parsing normal attributes, we also end up
5984         * here if there is something looking like attributes
5985         * following a signature (which is illegal, but used to be
5986         * legal in 5.20..5.26). If the latter, we still parse the
5987         * attributes so that error messages(s) are less confusing,
5988         * but ignore them (parser->sig_seen).
5989         */
5990        s = skipspace(s);
5991        attrs = NULL;
5992        while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5993            I32 tmp;
5994            SV *sv;
5995            STRLEN len;
5996            char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
5997            if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5998                if (tmp < 0) tmp = -tmp;
5999                switch (tmp) {
6000                case KEY_or:
6001                case KEY_and:
6002                case KEY_for:
6003                case KEY_foreach:
6004                case KEY_unless:
6005                case KEY_if:
6006                case KEY_while:
6007                case KEY_until:
6008                    goto got_attrs;
6009                default:
6010                    break;
6011                }
6012            }
6013            sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6014            if (*d == '(') {
6015                d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6016                if (!d) {
6017                    if (attrs)
6018                        op_free(attrs);
6019                    ASSUME(sv && SvREFCNT(sv) == 1);
6020                    SvREFCNT_dec(sv);
6021                    Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6022                }
6023                COPLINE_SET_FROM_MULTI_END;
6024            }
6025            if (PL_lex_stuff) {
6026                sv_catsv(sv, PL_lex_stuff);
6027                attrs = op_append_elem(OP_LIST, attrs,
6028                                    newSVOP(OP_CONST, 0, sv));
6029                SvREFCNT_dec_NN(PL_lex_stuff);
6030                PL_lex_stuff = NULL;
6031            }
6032            else {
6033                attrs = op_append_elem(OP_LIST, attrs,
6034                                    newSVOP(OP_CONST, 0, sv));
6035            }
6036            s = skipspace(d);
6037            if (*s == ':' && s[1] != ':')
6038                s = skipspace(s+1);
6039            else if (s == d)
6040                break;	/* require real whitespace or :'s */
6041            /* XXX losing whitespace on sequential attributes here */
6042        }
6043
6044        if (*s != ';'
6045            && *s != '}'
6046            && !(PL_expect == XOPERATOR
6047                   /* if an operator is expected, permit =, //= and ||= or ) to end */
6048                 ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
6049                 : (*s == '{' || *s == '(')))
6050        {
6051            const char q = ((*s == '\'') ? '"' : '\'');
6052            /* If here for an expression, and parsed no attrs, back off. */
6053            if (PL_expect == XOPERATOR && !attrs) {
6054                s = PL_bufptr;
6055                break;
6056            }
6057            /* MUST advance bufptr here to avoid bogus "at end of line"
6058               context messages from yyerror().
6059            */
6060            PL_bufptr = s;
6061            yyerror( (const char *)
6062                     (*s
6063                      ? Perl_form(aTHX_ "Invalid separator character "
6064                                  "%c%c%c in attribute list", q, *s, q)
6065                      : "Unterminated attribute list" ) );
6066            if (attrs)
6067                op_free(attrs);
6068            OPERATOR(PERLY_COLON);
6069        }
6070
6071    got_attrs:
6072        if (PL_parser->sig_seen) {
6073            /* see comment about about sig_seen and parser error
6074             * handling */
6075            if (attrs)
6076                op_free(attrs);
6077            Perl_croak(aTHX_ "Subroutine attributes must come "
6078                             "before the signature");
6079        }
6080        if (attrs) {
6081            NEXTVAL_NEXTTOKE.opval = attrs;
6082            force_next(THING);
6083        }
6084        TOKEN(COLONATTR);
6085    }
6086
6087    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6088        s--;
6089        TOKEN(0);
6090    }
6091
6092    PL_lex_allbrackets--;
6093    OPERATOR(PERLY_COLON);
6094}
6095
6096static int
6097yyl_subproto(pTHX_ char *s, CV *cv)
6098{
6099    STRLEN protolen = CvPROTOLEN(cv);
6100    const char *proto = CvPROTO(cv);
6101    bool optional;
6102
6103    proto = S_strip_spaces(aTHX_ proto, &protolen);
6104    if (!protolen)
6105        TERM(FUNC0SUB);
6106    if ((optional = *proto == ';')) {
6107        do {
6108            proto++;
6109        } while (*proto == ';');
6110    }
6111
6112    if (
6113        (
6114            (
6115                *proto == '$' || *proto == '_'
6116             || *proto == '*' || *proto == '+'
6117            )
6118         && proto[1] == '\0'
6119        )
6120     || (
6121         *proto == '\\' && proto[1] && proto[2] == '\0'
6122        )
6123    ) {
6124        UNIPROTO(UNIOPSUB,optional);
6125    }
6126
6127    if (*proto == '\\' && proto[1] == '[') {
6128        const char *p = proto + 2;
6129        while(*p && *p != ']')
6130            ++p;
6131        if(*p == ']' && !p[1])
6132            UNIPROTO(UNIOPSUB,optional);
6133    }
6134
6135    if (*proto == '&' && *s == '{') {
6136        if (PL_curstash)
6137            sv_setpvs(PL_subname, "__ANON__");
6138        else
6139            sv_setpvs(PL_subname, "__ANON__::__ANON__");
6140        if (!PL_lex_allbrackets
6141            && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6142        {
6143            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6144        }
6145        PREBLOCK(LSTOPSUB);
6146    }
6147
6148    return KEY_NULL;
6149}
6150
6151static int
6152yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6153{
6154    char *d;
6155    if (PL_lex_brackets > 100) {
6156        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6157    }
6158
6159    switch (PL_expect) {
6160    case XTERM:
6161    case XTERMORDORDOR:
6162        PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6163        PL_lex_allbrackets++;
6164        OPERATOR(HASHBRACK);
6165    case XOPERATOR:
6166        while (s < PL_bufend && SPACE_OR_TAB(*s))
6167            s++;
6168        d = s;
6169        PL_tokenbuf[0] = '\0';
6170        if (d < PL_bufend && *d == '-') {
6171            PL_tokenbuf[0] = '-';
6172            d++;
6173            while (d < PL_bufend && SPACE_OR_TAB(*d))
6174                d++;
6175        }
6176        if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6177            STRLEN len;
6178            d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6179                          FALSE, &len, FALSE);
6180            while (d < PL_bufend && SPACE_OR_TAB(*d))
6181                d++;
6182            if (*d == '}') {
6183                const char minus = (PL_tokenbuf[0] == '-');
6184                s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6185                if (minus)
6186                    force_next(PERLY_MINUS);
6187            }
6188        }
6189        /* FALLTHROUGH */
6190    case XATTRTERM:
6191    case XTERMBLOCK:
6192        PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6193        PL_lex_allbrackets++;
6194        PL_expect = XSTATE;
6195        break;
6196    case XATTRBLOCK:
6197    case XBLOCK:
6198        PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6199        PL_lex_allbrackets++;
6200        PL_expect = XSTATE;
6201        break;
6202    case XBLOCKTERM:
6203        PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6204        PL_lex_allbrackets++;
6205        PL_expect = XSTATE;
6206        break;
6207    default: {
6208            const char *t;
6209            if (PL_oldoldbufptr == PL_last_lop)
6210                PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6211            else
6212                PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6213            PL_lex_allbrackets++;
6214            s = skipspace(s);
6215            if (*s == '}') {
6216                if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6217                    PL_expect = XTERM;
6218                    /* This hack is to get the ${} in the message. */
6219                    PL_bufptr = s+1;
6220                    yyerror("syntax error");
6221                    yyquit();
6222                    break;
6223                }
6224                OPERATOR(HASHBRACK);
6225            }
6226            if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6227                /* ${...} or @{...} etc., but not print {...}
6228                 * Skip the disambiguation and treat this as a block.
6229                 */
6230                goto block_expectation;
6231            }
6232            /* This hack serves to disambiguate a pair of curlies
6233             * as being a block or an anon hash.  Normally, expectation
6234             * determines that, but in cases where we're not in a
6235             * position to expect anything in particular (like inside
6236             * eval"") we have to resolve the ambiguity.  This code
6237             * covers the case where the first term in the curlies is a
6238             * quoted string.  Most other cases need to be explicitly
6239             * disambiguated by prepending a "+" before the opening
6240             * curly in order to force resolution as an anon hash.
6241             *
6242             * XXX should probably propagate the outer expectation
6243             * into eval"" to rely less on this hack, but that could
6244             * potentially break current behavior of eval"".
6245             * GSAR 97-07-21
6246             */
6247            t = s;
6248            if (*s == '\'' || *s == '"' || *s == '`') {
6249                /* common case: get past first string, handling escapes */
6250                for (t++; t < PL_bufend && *t != *s;)
6251                    if (*t++ == '\\')
6252                        t++;
6253                t++;
6254            }
6255            else if (*s == 'q') {
6256                if (++t < PL_bufend
6257                    && (!isWORDCHAR(*t)
6258                        || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6259                            && !isWORDCHAR(*t))))
6260                {
6261                    /* skip q//-like construct */
6262                    const char *tmps;
6263                    char open, close, term;
6264                    I32 brackets = 1;
6265
6266                    while (t < PL_bufend && isSPACE(*t))
6267                        t++;
6268                    /* check for q => */
6269                    if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6270                        OPERATOR(HASHBRACK);
6271                    }
6272                    term = *t;
6273                    open = term;
6274                    if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6275                        term = tmps[5];
6276                    close = term;
6277                    if (open == close)
6278                        for (t++; t < PL_bufend; t++) {
6279                            if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6280                                t++;
6281                            else if (*t == open)
6282                                break;
6283                        }
6284                    else {
6285                        for (t++; t < PL_bufend; t++) {
6286                            if (*t == '\\' && t+1 < PL_bufend)
6287                                t++;
6288                            else if (*t == close && --brackets <= 0)
6289                                break;
6290                            else if (*t == open)
6291                                brackets++;
6292                        }
6293                    }
6294                    t++;
6295                }
6296                else
6297                    /* skip plain q word */
6298                    while (   t < PL_bufend
6299                           && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6300                    {
6301                        t += UTF ? UTF8SKIP(t) : 1;
6302                    }
6303            }
6304            else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6305                t += UTF ? UTF8SKIP(t) : 1;
6306                while (   t < PL_bufend
6307                       && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6308                {
6309                    t += UTF ? UTF8SKIP(t) : 1;
6310                }
6311            }
6312            while (t < PL_bufend && isSPACE(*t))
6313                t++;
6314            /* if comma follows first term, call it an anon hash */
6315            /* XXX it could be a comma expression with loop modifiers */
6316            if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6317                               || (*t == '=' && t[1] == '>')))
6318                OPERATOR(HASHBRACK);
6319            if (PL_expect == XREF) {
6320              block_expectation:
6321                /* If there is an opening brace or 'sub:', treat it
6322                   as a term to make ${{...}}{k} and &{sub:attr...}
6323                   dwim.  Otherwise, treat it as a statement, so
6324                   map {no strict; ...} works.
6325                 */
6326                s = skipspace(s);
6327                if (*s == '{') {
6328                    PL_expect = XTERM;
6329                    break;
6330                }
6331                if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6332                    PL_bufptr = s;
6333                    d = s + 3;
6334                    d = skipspace(d);
6335                    s = PL_bufptr;
6336                    if (*d == ':') {
6337                        PL_expect = XTERM;
6338                        break;
6339                    }
6340                }
6341                PL_expect = XSTATE;
6342            }
6343            else {
6344                PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6345                PL_expect = XSTATE;
6346            }
6347        }
6348        break;
6349    }
6350
6351    pl_yylval.ival = CopLINE(PL_curcop);
6352    PL_copline = NOLINE;   /* invalidate current command line number */
6353    TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6354}
6355
6356static int
6357yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6358{
6359    assert(s != PL_bufend);
6360    s++;
6361
6362    if (PL_lex_brackets <= 0)
6363        /* diag_listed_as: Unmatched right %s bracket */
6364        yyerror("Unmatched right curly bracket");
6365    else
6366        PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6367
6368    PL_lex_allbrackets--;
6369
6370    if (PL_lex_state == LEX_INTERPNORMAL) {
6371        if (PL_lex_brackets == 0) {
6372            if (PL_expect & XFAKEBRACK) {
6373                PL_expect &= XENUMMASK;
6374                PL_lex_state = LEX_INTERPEND;
6375                PL_bufptr = s;
6376                return yylex();	/* ignore fake brackets */
6377            }
6378            if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6379             && SvEVALED(PL_lex_repl))
6380                PL_lex_state = LEX_INTERPEND;
6381            else if (*s == '-' && s[1] == '>')
6382                PL_lex_state = LEX_INTERPENDMAYBE;
6383            else if (*s != '[' && *s != '{')
6384                PL_lex_state = LEX_INTERPEND;
6385        }
6386    }
6387
6388    if (PL_expect & XFAKEBRACK) {
6389        PL_expect &= XENUMMASK;
6390        PL_bufptr = s;
6391        return yylex();		/* ignore fake brackets */
6392    }
6393
6394    force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6395    if (formbrack) LEAVE_with_name("lex_format");
6396    if (formbrack == 2) { /* means . where arguments were expected */
6397        force_next(PERLY_SEMICOLON);
6398        TOKEN(FORMRBRACK);
6399    }
6400
6401    TOKEN(PERLY_SEMICOLON);
6402}
6403
6404static int
6405yyl_ampersand(pTHX_ char *s)
6406{
6407    if (PL_expect == XPOSTDEREF)
6408        POSTDEREF(PERLY_AMPERSAND);
6409
6410    s++;
6411    if (*s++ == '&') {
6412        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6413                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6414            s -= 2;
6415            TOKEN(0);
6416        }
6417        AOPERATOR(ANDAND);
6418    }
6419    s--;
6420
6421    if (PL_expect == XOPERATOR) {
6422        char *d;
6423        bool bof;
6424        if (   PL_bufptr == PL_linestart
6425            && ckWARN(WARN_SEMICOLON)
6426            && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6427        {
6428            CopLINE_dec(PL_curcop);
6429            Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6430            CopLINE_inc(PL_curcop);
6431        }
6432        d = s;
6433        if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6434            s++;
6435        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6436                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6437            s = d;
6438            s--;
6439            TOKEN(0);
6440        }
6441        if (d == s)
6442            BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6443        else
6444            BAop(OP_SBIT_AND);
6445    }
6446
6447    PL_tokenbuf[0] = '&';
6448    s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6449    pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6450
6451    if (PL_tokenbuf[1])
6452        force_ident_maybe_lex('&');
6453    else
6454        PREREF(PERLY_AMPERSAND);
6455
6456    TERM(PERLY_AMPERSAND);
6457}
6458
6459static int
6460yyl_verticalbar(pTHX_ char *s)
6461{
6462    char *d;
6463    bool bof;
6464
6465    s++;
6466    if (*s++ == '|') {
6467        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6468                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6469            s -= 2;
6470            TOKEN(0);
6471        }
6472        AOPERATOR(OROR);
6473    }
6474
6475    s--;
6476    d = s;
6477    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6478        s++;
6479
6480    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6481            (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6482        s = d - 1;
6483        TOKEN(0);
6484    }
6485
6486    BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6487}
6488
6489static int
6490yyl_bang(pTHX_ char *s)
6491{
6492    const char tmp = *s++;
6493    if (tmp == '=') {
6494        /* was this !=~ where !~ was meant?
6495         * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6496
6497        if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6498            const char *t = s+1;
6499
6500            while (t < PL_bufend && isSPACE(*t))
6501                ++t;
6502
6503            if (*t == '/' || *t == '?'
6504                || ((*t == 'm' || *t == 's' || *t == 'y')
6505                    && !isWORDCHAR(t[1]))
6506                || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6507                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6508                            "!=~ should be !~");
6509        }
6510
6511        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6512            s -= 2;
6513            TOKEN(0);
6514        }
6515
6516        ChEop(OP_NE);
6517    }
6518
6519    if (tmp == '~')
6520        PMop(OP_NOT);
6521
6522    s--;
6523    OPERATOR(PERLY_EXCLAMATION_MARK);
6524}
6525
6526static int
6527yyl_snail(pTHX_ char *s)
6528{
6529    if (PL_expect == XPOSTDEREF)
6530        POSTDEREF(PERLY_SNAIL);
6531    PL_tokenbuf[0] = '@';
6532    s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6533    if (PL_expect == XOPERATOR) {
6534        char *d = s;
6535        if (PL_bufptr > s) {
6536            d = PL_bufptr-1;
6537            PL_bufptr = PL_oldbufptr;
6538        }
6539        no_op("Array", d);
6540    }
6541    pl_yylval.ival = 0;
6542    if (!PL_tokenbuf[1]) {
6543        PREREF(PERLY_SNAIL);
6544    }
6545    if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6546        s = skipspace(s);
6547    if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6548        && intuit_more(s, PL_bufend))
6549    {
6550        if (*s == '{')
6551            PL_tokenbuf[0] = '%';
6552
6553        /* Warn about @ where they meant $. */
6554        if (*s == '[' || *s == '{') {
6555            if (ckWARN(WARN_SYNTAX)) {
6556                S_check_scalar_slice(aTHX_ s);
6557            }
6558        }
6559    }
6560    PL_expect = XOPERATOR;
6561    force_ident_maybe_lex('@');
6562    TERM(PERLY_SNAIL);
6563}
6564
6565static int
6566yyl_slash(pTHX_ char *s)
6567{
6568    if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6569        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6570                (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6571            TOKEN(0);
6572        s += 2;
6573        AOPERATOR(DORDOR);
6574    }
6575    else if (PL_expect == XOPERATOR) {
6576        s++;
6577        if (*s == '=' && !PL_lex_allbrackets
6578            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6579        {
6580            s--;
6581            TOKEN(0);
6582        }
6583        Mop(OP_DIVIDE);
6584    }
6585    else {
6586        /* Disable warning on "study /blah/" */
6587        if (    PL_oldoldbufptr == PL_last_uni
6588            && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6589                || memNE(PL_last_uni, "study", 5)
6590                || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6591         ))
6592            check_uni();
6593        s = scan_pat(s,OP_MATCH);
6594        TERM(sublex_start());
6595    }
6596}
6597
6598static int
6599yyl_leftsquare(pTHX_ char *s)
6600{
6601    if (PL_lex_brackets > 100)
6602        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6603    PL_lex_brackstack[PL_lex_brackets++] = 0;
6604    PL_lex_allbrackets++;
6605    s++;
6606    OPERATOR(PERLY_BRACKET_OPEN);
6607}
6608
6609static int
6610yyl_rightsquare(pTHX_ char *s)
6611{
6612    if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6613        TOKEN(0);
6614    s++;
6615    if (PL_lex_brackets <= 0)
6616        /* diag_listed_as: Unmatched right %s bracket */
6617        yyerror("Unmatched right square bracket");
6618    else
6619        --PL_lex_brackets;
6620    PL_lex_allbrackets--;
6621    if (PL_lex_state == LEX_INTERPNORMAL) {
6622        if (PL_lex_brackets == 0) {
6623            if (*s == '-' && s[1] == '>')
6624                PL_lex_state = LEX_INTERPENDMAYBE;
6625            else if (*s != '[' && *s != '{')
6626                PL_lex_state = LEX_INTERPEND;
6627        }
6628    }
6629    TERM(PERLY_BRACKET_CLOSE);
6630}
6631
6632static int
6633yyl_tilde(pTHX_ char *s)
6634{
6635    bool bof;
6636    if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6637        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6638            TOKEN(0);
6639        s += 2;
6640        Perl_ck_warner_d(aTHX_
6641            packWARN(WARN_DEPRECATED__SMARTMATCH),
6642            "Smartmatch is deprecated");
6643        NCEop(OP_SMARTMATCH);
6644    }
6645    s++;
6646    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6647        s++;
6648        BCop(OP_SCOMPLEMENT);
6649    }
6650    BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6651}
6652
6653static int
6654yyl_leftparen(pTHX_ char *s)
6655{
6656    if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6657        PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6658    else
6659        PL_expect = XTERM;
6660    s = skipspace(s);
6661    PL_lex_allbrackets++;
6662    TOKEN(PERLY_PAREN_OPEN);
6663}
6664
6665static int
6666yyl_rightparen(pTHX_ char *s)
6667{
6668    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6669        TOKEN(0);
6670    s++;
6671    PL_lex_allbrackets--;
6672    s = skipspace(s);
6673    if (*s == '{')
6674        PREBLOCK(PERLY_PAREN_CLOSE);
6675    TERM(PERLY_PAREN_CLOSE);
6676}
6677
6678static int
6679yyl_leftpointy(pTHX_ char *s)
6680{
6681    char tmp;
6682
6683    if (PL_expect != XOPERATOR) {
6684        if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6685            check_uni();
6686        if (s[1] == '<' && s[2] != '>')
6687            s = scan_heredoc(s);
6688        else
6689            s = scan_inputsymbol(s);
6690        PL_expect = XOPERATOR;
6691        TOKEN(sublex_start());
6692    }
6693
6694    s++;
6695
6696    tmp = *s++;
6697    if (tmp == '<') {
6698        if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6699            s -= 2;
6700            TOKEN(0);
6701        }
6702        SHop(OP_LEFT_SHIFT);
6703    }
6704    if (tmp == '=') {
6705        tmp = *s++;
6706        if (tmp == '>') {
6707            if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6708                s -= 3;
6709                TOKEN(0);
6710            }
6711            NCEop(OP_NCMP);
6712        }
6713        s--;
6714        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6715            s -= 2;
6716            TOKEN(0);
6717        }
6718        ChRop(OP_LE);
6719    }
6720
6721    s--;
6722    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6723        s--;
6724        TOKEN(0);
6725    }
6726
6727    ChRop(OP_LT);
6728}
6729
6730static int
6731yyl_rightpointy(pTHX_ char *s)
6732{
6733    const char tmp = *s++;
6734
6735    if (tmp == '>') {
6736        if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6737            s -= 2;
6738            TOKEN(0);
6739        }
6740        SHop(OP_RIGHT_SHIFT);
6741    }
6742    else if (tmp == '=') {
6743        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6744            s -= 2;
6745            TOKEN(0);
6746        }
6747        ChRop(OP_GE);
6748    }
6749
6750    s--;
6751    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6752        s--;
6753        TOKEN(0);
6754    }
6755
6756    ChRop(OP_GT);
6757}
6758
6759static int
6760yyl_sglquote(pTHX_ char *s)
6761{
6762    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6763    if (!s)
6764        missingterm(NULL, 0);
6765    COPLINE_SET_FROM_MULTI_END;
6766    DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6767    if (PL_expect == XOPERATOR) {
6768        no_op("String",s);
6769    }
6770    pl_yylval.ival = OP_CONST;
6771    TERM(sublex_start());
6772}
6773
6774static int
6775yyl_dblquote(pTHX_ char *s)
6776{
6777    char *d;
6778    STRLEN len;
6779    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6780    DEBUG_T( {
6781        if (s)
6782            printbuf("### Saw string before %s\n", s);
6783        else
6784            PerlIO_printf(Perl_debug_log,
6785                         "### Saw unterminated string\n");
6786    } );
6787    if (PL_expect == XOPERATOR) {
6788            no_op("String",s);
6789    }
6790    if (!s)
6791        missingterm(NULL, 0);
6792    pl_yylval.ival = OP_CONST;
6793    /* FIXME. I think that this can be const if char *d is replaced by
6794       more localised variables.  */
6795    for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6796        if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6797            pl_yylval.ival = OP_STRINGIFY;
6798            break;
6799        }
6800    }
6801    if (pl_yylval.ival == OP_CONST)
6802        COPLINE_SET_FROM_MULTI_END;
6803    TERM(sublex_start());
6804}
6805
6806static int
6807yyl_backtick(pTHX_ char *s)
6808{
6809    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6810    DEBUG_T( {
6811        if (s)
6812            printbuf("### Saw backtick string before %s\n", s);
6813        else
6814            PerlIO_printf(Perl_debug_log,
6815                         "### Saw unterminated backtick string\n");
6816    } );
6817    if (PL_expect == XOPERATOR)
6818        no_op("Backticks",s);
6819    if (!s)
6820        missingterm(NULL, 0);
6821    pl_yylval.ival = OP_BACKTICK;
6822    TERM(sublex_start());
6823}
6824
6825static int
6826yyl_backslash(pTHX_ char *s)
6827{
6828    if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6829        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6830                       *s, *s);
6831    if (PL_expect == XOPERATOR)
6832        no_op("Backslash",s);
6833    OPERATOR(REFGEN);
6834}
6835
6836static void
6837yyl_data_handle(pTHX)
6838{
6839    HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6840                            ? PL_curstash
6841                            : PL_defstash;
6842    GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6843
6844    if (!isGV(gv))
6845        gv_init(gv,stash,"DATA",4,0);
6846
6847    GvMULTI_on(gv);
6848    if (!GvIO(gv))
6849        GvIOp(gv) = newIO();
6850    IoIFP(GvIOp(gv)) = PL_rsfp;
6851
6852    /* Mark this internal pseudo-handle as clean */
6853    IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6854    if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6855        IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6856    else
6857        IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6858
6859#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6860    /* if the script was opened in binmode, we need to revert
6861     * it to text mode for compatibility; but only iff it has CRs
6862     * XXX this is a questionable hack at best. */
6863    if (PL_bufend-PL_bufptr > 2
6864        && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6865    {
6866        Off_t loc = 0;
6867        if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6868            loc = PerlIO_tell(PL_rsfp);
6869            (void)PerlIO_seek(PL_rsfp, 0L, 0);
6870        }
6871        if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6872            if (loc > 0)
6873                PerlIO_seek(PL_rsfp, loc, 0);
6874        }
6875    }
6876#endif
6877
6878#ifdef PERLIO_LAYERS
6879    if (!IN_BYTES) {
6880        if (UTF)
6881            PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6882    }
6883#endif
6884
6885    PL_rsfp = NULL;
6886}
6887
6888PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6889    __attribute__noreturn__;
6890
6891PERL_STATIC_NO_RET void
6892yyl_croak_unrecognised(pTHX_ char *s)
6893{
6894    SV *dsv = newSVpvs_flags("", SVs_TEMP);
6895    const char *c;
6896    char *d;
6897    STRLEN len;
6898
6899    if (UTF) {
6900        STRLEN skiplen = UTF8SKIP(s);
6901        STRLEN stravail = PL_bufend - s;
6902        c = sv_uni_display(dsv, newSVpvn_flags(s,
6903                                               skiplen > stravail ? stravail : skiplen,
6904                                               SVs_TEMP | SVf_UTF8),
6905                           10, UNI_DISPLAY_ISPRINT);
6906    }
6907    else {
6908        c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6909    }
6910
6911    if (s >= PL_linestart) {
6912        d = PL_linestart;
6913    }
6914    else {
6915        /* somehow (probably due to a parse failure), PL_linestart has advanced
6916         * pass PL_bufptr, get a reasonable beginning of line
6917         */
6918        d = s;
6919        while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6920            --d;
6921    }
6922    len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6923    if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6924        d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6925    }
6926
6927    Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6928                      UTF8fARG(UTF, (s - d), d),
6929                     (int) len + 1);
6930}
6931
6932static int
6933yyl_require(pTHX_ char *s, I32 orig_keyword)
6934{
6935    s = skipspace(s);
6936    if (isDIGIT(*s)) {
6937        s = force_version(s, FALSE);
6938    }
6939    else if (*s != 'v' || !isDIGIT(s[1])
6940            || (s = force_version(s, TRUE), *s == 'v'))
6941    {
6942        *PL_tokenbuf = '\0';
6943        s = force_word(s,BAREWORD,TRUE,TRUE);
6944        if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6945                                   PL_tokenbuf + sizeof(PL_tokenbuf),
6946                                   UTF))
6947        {
6948            gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6949                        GV_ADD | (UTF ? SVf_UTF8 : 0));
6950        }
6951        else if (*s == '<')
6952            yyerror("<> at require-statement should be quotes");
6953    }
6954
6955    if (orig_keyword == KEY_require)
6956        pl_yylval.ival = 1;
6957    else
6958        pl_yylval.ival = 0;
6959
6960    PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6961    PL_bufptr = s;
6962    PL_last_uni = PL_oldbufptr;
6963    PL_last_lop_op = OP_REQUIRE;
6964    s = skipspace(s);
6965    return REPORT( (int)KW_REQUIRE );
6966}
6967
6968static int
6969yyl_foreach(pTHX_ char *s)
6970{
6971    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6972        return REPORT(0);
6973    pl_yylval.ival = CopLINE(PL_curcop);
6974    s = skipspace(s);
6975    if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6976        char *p = s;
6977        SSize_t s_off = s - SvPVX(PL_linestr);
6978        bool paren_is_valid = FALSE;
6979        bool maybe_package = FALSE;
6980        bool saw_core = FALSE;
6981        bool core_valid = FALSE;
6982
6983        if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6984            saw_core = TRUE;
6985            p += 6;
6986        }
6987        if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6988            core_valid = TRUE;
6989            paren_is_valid = TRUE;
6990            if (isSPACE(p[2])) {
6991                p = skipspace(p + 3);
6992                maybe_package = TRUE;
6993            }
6994            else {
6995                p += 2;
6996            }
6997        }
6998        else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6999            core_valid = TRUE;
7000            if (isSPACE(p[3])) {
7001                p = skipspace(p + 4);
7002                maybe_package = TRUE;
7003            }
7004            else {
7005                p += 3;
7006            }
7007        }
7008        else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
7009            core_valid = TRUE;
7010            if (isSPACE(p[5])) {
7011                p = skipspace(p + 6);
7012            }
7013            else {
7014                p += 5;
7015            }
7016        }
7017        if (saw_core && !core_valid) {
7018            Perl_croak(aTHX_ "Missing $ on loop variable");
7019        }
7020
7021        if (maybe_package && !saw_core) {
7022            /* skip optional package name, as in "for my abc $x (..)" */
7023            if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7024                STRLEN len;
7025                p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7026                p = skipspace(p);
7027                paren_is_valid = FALSE;
7028            }
7029        }
7030
7031        if (UNLIKELY(paren_is_valid && *p == '(')) {
7032            Perl_ck_warner_d(aTHX_
7033                             packWARN(WARN_EXPERIMENTAL__FOR_LIST),
7034                             "for my (...) is experimental");
7035        }
7036        else if (UNLIKELY(*p != '$' && *p != '\\')) {
7037            /* "for myfoo (" will end up here, but with p pointing at the 'f' */
7038            Perl_croak(aTHX_ "Missing $ on loop variable");
7039        }
7040        /* The buffer may have been reallocated, update s */
7041        s = SvPVX(PL_linestr) + s_off;
7042    }
7043    OPERATOR(KW_FOR);
7044}
7045
7046static int
7047yyl_do(pTHX_ char *s, I32 orig_keyword)
7048{
7049    s = skipspace(s);
7050    if (*s == '{')
7051        PRETERMBLOCK(KW_DO);
7052    if (*s != '\'') {
7053        char *d;
7054        STRLEN len;
7055        *PL_tokenbuf = '&';
7056        d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7057                      1, &len, TRUE);
7058        if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7059         && !keyword(PL_tokenbuf + 1, len, 0)) {
7060            SSize_t off = s-SvPVX(PL_linestr);
7061            d = skipspace(d);
7062            s = SvPVX(PL_linestr)+off;
7063            if (*d == '(') {
7064                force_ident_maybe_lex('&');
7065                s = d;
7066            }
7067        }
7068    }
7069    if (orig_keyword == KEY_do)
7070        pl_yylval.ival = 1;
7071    else
7072        pl_yylval.ival = 0;
7073    OPERATOR(KW_DO);
7074}
7075
7076static int
7077yyl_my(pTHX_ char *s, I32 my)
7078{
7079    if (PL_in_my) {
7080        PL_bufptr = s;
7081        yyerror(Perl_form(aTHX_
7082                          "Can't redeclare \"%s\" in \"%s\"",
7083                           my       == KEY_my    ? "my" :
7084                           my       == KEY_state ? "state" : "our",
7085                           PL_in_my == KEY_my    ? "my" :
7086                           PL_in_my == KEY_state ? "state" : "our"));
7087    }
7088    PL_in_my = (U16)my;
7089    s = skipspace(s);
7090    if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7091        STRLEN len;
7092        s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7093        if (memEQs(PL_tokenbuf, len, "sub"))
7094            return yyl_sub(aTHX_ s, my);
7095        PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7096        if (!PL_in_my_stash) {
7097            char tmpbuf[1024];
7098            int i;
7099            PL_bufptr = s;
7100            i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7101            PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7102            yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7103        }
7104    }
7105    else if (*s == '\\') {
7106        if (!FEATURE_MYREF_IS_ENABLED)
7107            Perl_croak(aTHX_ "The experimental declared_refs "
7108                             "feature is not enabled");
7109        Perl_ck_warner_d(aTHX_
7110             packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7111            "Declaring references is experimental");
7112    }
7113    OPERATOR(KW_MY);
7114}
7115
7116static int yyl_try(pTHX_ char*);
7117
7118static bool
7119yyl_eol_needs_semicolon(pTHX_ char **ps)
7120{
7121    char *s = *ps;
7122    if (PL_lex_state != LEX_NORMAL
7123        || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7124    {
7125        const bool in_comment = *s == '#';
7126        char *d;
7127        if (*s == '#' && s == PL_linestart && PL_in_eval
7128         && !PL_rsfp && !PL_parser->filtered) {
7129            /* handle eval qq[#line 1 "foo"\n ...] */
7130            CopLINE_dec(PL_curcop);
7131            incline(s, PL_bufend);
7132        }
7133        d = s;
7134        while (d < PL_bufend && *d != '\n')
7135            d++;
7136        if (d < PL_bufend)
7137            d++;
7138        s = d;
7139        if (in_comment && d == PL_bufend
7140            && PL_lex_state == LEX_INTERPNORMAL
7141            && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7142            && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7143        else
7144            incline(s, PL_bufend);
7145        if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7146            PL_lex_state = LEX_FORMLINE;
7147            force_next(FORMRBRACK);
7148            *ps = s;
7149            return TRUE;
7150        }
7151    }
7152    else {
7153        while (s < PL_bufend && *s != '\n')
7154            s++;
7155        if (s < PL_bufend) {
7156            s++;
7157            if (s < PL_bufend)
7158                incline(s, PL_bufend);
7159        }
7160    }
7161    *ps = s;
7162    return FALSE;
7163}
7164
7165static int
7166yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7167{
7168    char *d;
7169
7170    goto start;
7171
7172    do {
7173        fake_eof = 0;
7174        bof = cBOOL(PL_rsfp);
7175      start:
7176
7177        PL_bufptr = PL_bufend;
7178        COPLINE_INC_WITH_HERELINES;
7179        if (!lex_next_chunk(fake_eof)) {
7180            CopLINE_dec(PL_curcop);
7181            s = PL_bufptr;
7182            TOKEN(PERLY_SEMICOLON);	/* not infinite loop because rsfp is NULL now */
7183        }
7184        CopLINE_dec(PL_curcop);
7185        s = PL_bufptr;
7186        /* If it looks like the start of a BOM or raw UTF-16,
7187         * check if it in fact is. */
7188        if (bof && PL_rsfp
7189            && (   *s == 0
7190                || *(U8*)s == BOM_UTF8_FIRST_BYTE
7191                || *(U8*)s >= 0xFE
7192                || s[1] == 0))
7193        {
7194            Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7195            bof = (offset == (Off_t)SvCUR(PL_linestr));
7196#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7197            /* offset may include swallowed CR */
7198            if (!bof)
7199                bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7200#endif
7201            if (bof) {
7202                PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7203                s = swallow_bom((U8*)s);
7204            }
7205        }
7206        if (PL_parser->in_pod) {
7207            /* Incest with pod. */
7208            if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7209                && !isALPHA(s[4]))
7210            {
7211                SvPVCLEAR(PL_linestr);
7212                PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7213                PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7214                PL_last_lop = PL_last_uni = NULL;
7215                PL_parser->in_pod = 0;
7216            }
7217        }
7218        if (PL_rsfp || PL_parser->filtered)
7219            incline(s, PL_bufend);
7220    } while (PL_parser->in_pod);
7221
7222    PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7223    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7224    PL_last_lop = PL_last_uni = NULL;
7225    if (CopLINE(PL_curcop) == 1) {
7226        while (s < PL_bufend && isSPACE(*s))
7227            s++;
7228        if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7229            s++;
7230        d = NULL;
7231        if (!PL_in_eval) {
7232            if (*s == '#' && *(s+1) == '!')
7233                d = s + 2;
7234#ifdef ALTERNATE_SHEBANG
7235            else {
7236                static char const as[] = ALTERNATE_SHEBANG;
7237                if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7238                    d = s + (sizeof(as) - 1);
7239            }
7240#endif /* ALTERNATE_SHEBANG */
7241        }
7242        if (d) {
7243            char *ipath;
7244            char *ipathend;
7245
7246            while (isSPACE(*d))
7247                d++;
7248            ipath = d;
7249            while (*d && !isSPACE(*d))
7250                d++;
7251            ipathend = d;
7252
7253#ifdef ARG_ZERO_IS_SCRIPT
7254            if (ipathend > ipath) {
7255                /*
7256                 * HP-UX (at least) sets argv[0] to the script name,
7257                 * which makes $^X incorrect.  And Digital UNIX and Linux,
7258                 * at least, set argv[0] to the basename of the Perl
7259                 * interpreter. So, having found "#!", we'll set it right.
7260                 */
7261                SV* copfilesv = CopFILESV(PL_curcop);
7262                if (copfilesv) {
7263                    SV * const x =
7264                        GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7265                                         SVt_PV)); /* $^X */
7266                    assert(SvPOK(x) || SvGMAGICAL(x));
7267                    if (sv_eq(x, copfilesv)) {
7268                        sv_setpvn(x, ipath, ipathend - ipath);
7269                        SvSETMAGIC(x);
7270                    }
7271                    else {
7272                        STRLEN blen;
7273                        STRLEN llen;
7274                        const char *bstart = SvPV_const(copfilesv, blen);
7275                        const char * const lstart = SvPV_const(x, llen);
7276                        if (llen < blen) {
7277                            bstart += blen - llen;
7278                            if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
7279                                sv_setpvn(x, ipath, ipathend - ipath);
7280                                SvSETMAGIC(x);
7281                            }
7282                        }
7283                    }
7284                }
7285                else {
7286                    /* Anything to do if no copfilesv? */
7287                }
7288                TAINT_NOT;	/* $^X is always tainted, but that's OK */
7289            }
7290#endif /* ARG_ZERO_IS_SCRIPT */
7291
7292            /*
7293             * Look for options.
7294             */
7295            d = instr(s,"perl -");
7296            if (!d) {
7297                d = instr(s,"perl");
7298#if defined(DOSISH)
7299                /* avoid getting into infinite loops when shebang
7300                 * line contains "Perl" rather than "perl" */
7301                if (!d) {
7302                    for (d = ipathend-4; d >= ipath; --d) {
7303                        if (isALPHA_FOLD_EQ(*d, 'p')
7304                            && !ibcmp(d, "perl", 4))
7305                        {
7306                            break;
7307                        }
7308                    }
7309                    if (d < ipath)
7310                        d = NULL;
7311                }
7312#endif
7313            }
7314#ifdef ALTERNATE_SHEBANG
7315            /*
7316             * If the ALTERNATE_SHEBANG on this system starts with a
7317             * character that can be part of a Perl expression, then if
7318             * we see it but not "perl", we're probably looking at the
7319             * start of Perl code, not a request to hand off to some
7320             * other interpreter.  Similarly, if "perl" is there, but
7321             * not in the first 'word' of the line, we assume the line
7322             * contains the start of the Perl program.
7323             */
7324            if (d && *s != '#') {
7325                const char *c = ipath;
7326                while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7327                    c++;
7328                if (c < d)
7329                    d = NULL;	/* "perl" not in first word; ignore */
7330                else
7331                    *s = '#';	/* Don't try to parse shebang line */
7332            }
7333#endif /* ALTERNATE_SHEBANG */
7334            if (!d
7335                && *s == '#'
7336                && ipathend > ipath
7337                && !PL_minus_c
7338                && !instr(s,"indir")
7339                && instr(PL_origargv[0],"perl"))
7340            {
7341                char **newargv;
7342
7343                *ipathend = '\0';
7344                s = ipathend + 1;
7345                while (s < PL_bufend && isSPACE(*s))
7346                    s++;
7347                if (s < PL_bufend) {
7348                    Newx(newargv,PL_origargc+3,char*);
7349                    newargv[1] = s;
7350                    while (s < PL_bufend && !isSPACE(*s))
7351                        s++;
7352                    *s = '\0';
7353                    Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7354                }
7355                else
7356                    newargv = PL_origargv;
7357                newargv[0] = ipath;
7358                PERL_FPU_PRE_EXEC
7359                PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7360                PERL_FPU_POST_EXEC
7361                Perl_croak(aTHX_ "Can't exec %s", ipath);
7362            }
7363            if (d) {
7364                while (*d && !isSPACE(*d))
7365                    d++;
7366                while (SPACE_OR_TAB(*d))
7367                    d++;
7368
7369                if (*d++ == '-') {
7370                    const bool switches_done = PL_doswitches;
7371                    const U32 oldpdb = PL_perldb;
7372                    const bool oldn = PL_minus_n;
7373                    const bool oldp = PL_minus_p;
7374                    const char *d1 = d;
7375
7376                    do {
7377                        bool baduni = FALSE;
7378                        if (*d1 == 'C') {
7379                            const char *d2 = d1 + 1;
7380                            if (parse_unicode_opts((const char **)&d2)
7381                                != PL_unicode)
7382                                baduni = TRUE;
7383                        }
7384                        if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7385                            const char * const m = d1;
7386                            while (*d1 && !isSPACE(*d1))
7387                                d1++;
7388                            Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7389                                  (int)(d1 - m), m);
7390                        }
7391                        d1 = moreswitches(d1);
7392                    } while (d1);
7393                    if (PL_doswitches && !switches_done) {
7394                        int argc = PL_origargc;
7395                        char **argv = PL_origargv;
7396                        do {
7397                            argc--,argv++;
7398                        } while (argc && argv[0][0] == '-' && argv[0][1]);
7399                        init_argv_symbols(argc,argv);
7400                    }
7401                    if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7402                        || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7403                          /* if we have already added "LINE: while (<>) {",
7404                             we must not do it again */
7405                    {
7406                        SvPVCLEAR(PL_linestr);
7407                        PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7408                        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7409                        PL_last_lop = PL_last_uni = NULL;
7410                        PL_preambled = FALSE;
7411                        if (PERLDB_LINE_OR_SAVESRC)
7412                            (void)gv_fetchfile(PL_origfilename);
7413                        return YYL_RETRY;
7414                    }
7415                }
7416            }
7417        }
7418    }
7419
7420    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7421        PL_lex_state = LEX_FORMLINE;
7422        force_next(FORMRBRACK);
7423        TOKEN(PERLY_SEMICOLON);
7424    }
7425
7426    PL_bufptr = s;
7427    return YYL_RETRY;
7428}
7429
7430static int
7431yyl_fatcomma(pTHX_ char *s, STRLEN len)
7432{
7433    CLINE;
7434    pl_yylval.opval
7435        = newSVOP(OP_CONST, 0,
7436                       S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7437    pl_yylval.opval->op_private = OPpCONST_BARE;
7438    TERM(BAREWORD);
7439}
7440
7441static int
7442yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7443{
7444    if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7445        && PL_parser->saw_infix_sigil)
7446    {
7447        Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7448                         "Operator or semicolon missing before %c%" UTF8f,
7449                         lastchar,
7450                         UTF8fARG(UTF, strlen(PL_tokenbuf),
7451                                  PL_tokenbuf));
7452        Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7453                         "Ambiguous use of %c resolved as operator %c",
7454                         lastchar, lastchar);
7455    }
7456    TOKEN(BAREWORD);
7457}
7458
7459static int
7460yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7461{
7462    if (sv) {
7463        op_free(rv2cv_op);
7464        SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7465        ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7466        if (SvTYPE(sv) == SVt_PVAV)
7467            pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7468                                      pl_yylval.opval);
7469        else {
7470            pl_yylval.opval->op_private = 0;
7471            pl_yylval.opval->op_folded = 1;
7472            pl_yylval.opval->op_flags |= OPf_SPECIAL;
7473        }
7474        TOKEN(BAREWORD);
7475    }
7476
7477    op_free(pl_yylval.opval);
7478    pl_yylval.opval =
7479        off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7480    pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7481    PL_last_lop = PL_oldbufptr;
7482    PL_last_lop_op = OP_ENTERSUB;
7483
7484    /* Is there a prototype? */
7485    if (SvPOK(cv)) {
7486        int k = yyl_subproto(aTHX_ s, cv);
7487        if (k != KEY_NULL)
7488            return k;
7489    }
7490
7491    NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7492    PL_expect = XTERM;
7493    force_next(off ? PRIVATEREF : BAREWORD);
7494    if (!PL_lex_allbrackets
7495        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7496    {
7497        PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7498    }
7499
7500    TOKEN(NOAMP);
7501}
7502
7503/* Honour "reserved word" warnings, and enforce strict subs */
7504static void
7505yyl_strictwarn_bareword(pTHX_ const char lastchar)
7506{
7507    /* after "print" and similar functions (corresponding to
7508     * "F? L" in opcode.pl), whatever wasn't already parsed as
7509     * a filehandle should be subject to "strict subs".
7510     * Likewise for the optional indirect-object argument to system
7511     * or exec, which can't be a bareword */
7512    if ((PL_last_lop_op == OP_PRINT
7513            || PL_last_lop_op == OP_PRTF
7514            || PL_last_lop_op == OP_SAY
7515            || PL_last_lop_op == OP_SYSTEM
7516            || PL_last_lop_op == OP_EXEC)
7517        && (PL_hints & HINT_STRICT_SUBS))
7518    {
7519        pl_yylval.opval->op_private |= OPpCONST_STRICT;
7520    }
7521
7522    if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7523        char *d = PL_tokenbuf;
7524        while (isLOWER(*d))
7525            d++;
7526        if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7527            /* PL_warn_reserved is constant */
7528            GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7529            Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7530                        PL_tokenbuf);
7531            GCC_DIAG_RESTORE_STMT;
7532        }
7533    }
7534}
7535
7536static int
7537yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7538{
7539    int pkgname = 0;
7540    const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7541    bool safebw;
7542    bool no_op_error = FALSE;
7543    /* Use this var to track whether intuit_method has been
7544       called.  intuit_method returns 0 or > 255.  */
7545    int key = 1;
7546
7547    if (PL_expect == XOPERATOR) {
7548        if (PL_bufptr == PL_linestart) {
7549            CopLINE_dec(PL_curcop);
7550            Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7551            CopLINE_inc(PL_curcop);
7552        }
7553        else
7554            /* We want to call no_op with s pointing after the
7555               bareword, so defer it.  But we want it to come
7556               before the Bad name croak.  */
7557            no_op_error = TRUE;
7558    }
7559
7560    /* Get the rest if it looks like a package qualifier */
7561
7562    if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7563        STRLEN morelen;
7564        s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7565                      TRUE, &morelen, TRUE);
7566        if (no_op_error) {
7567            no_op("Bareword",s);
7568            no_op_error = FALSE;
7569        }
7570        if (!morelen)
7571            Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7572                    UTF8fARG(UTF, len, PL_tokenbuf),
7573                    *s == '\'' ? "'" : "::");
7574        len += morelen;
7575        pkgname = 1;
7576    }
7577
7578    if (no_op_error)
7579        no_op("Bareword",s);
7580
7581    /* See if the name is "Foo::",
7582       in which case Foo is a bareword
7583       (and a package name). */
7584
7585    if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7586        if (ckWARN(WARN_BAREWORD)
7587            && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7588            Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7589                        "Bareword \"%" UTF8f
7590                        "\" refers to nonexistent package",
7591                        UTF8fARG(UTF, len, PL_tokenbuf));
7592        len -= 2;
7593        PL_tokenbuf[len] = '\0';
7594        c.gv = NULL;
7595        c.gvp = 0;
7596        safebw = TRUE;
7597    }
7598    else {
7599        safebw = FALSE;
7600    }
7601
7602    /* if we saw a global override before, get the right name */
7603
7604    if (!c.sv)
7605        c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7606    if (c.gvp) {
7607        SV *sv = newSVpvs("CORE::GLOBAL::");
7608        sv_catsv(sv, c.sv);
7609        SvREFCNT_dec(c.sv);
7610        c.sv = sv;
7611    }
7612
7613    /* Presume this is going to be a bareword of some sort. */
7614    CLINE;
7615    pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7616    pl_yylval.opval->op_private = OPpCONST_BARE;
7617
7618    /* And if "Foo::", then that's what it certainly is. */
7619    if (safebw)
7620        return yyl_safe_bareword(aTHX_ s, lastchar);
7621
7622    if (!c.off) {
7623        OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7624        const_op->op_private = OPpCONST_BARE;
7625        c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7626        c.cv = c.lex
7627            ? isGV(c.gv)
7628                ? GvCV(c.gv)
7629                : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7630                    ? (CV *)SvRV(c.gv)
7631                    : ((CV *)c.gv)
7632            : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7633    }
7634
7635    /* See if it's the indirect object for a list operator. */
7636
7637    if (PL_oldoldbufptr
7638        && PL_oldoldbufptr < PL_bufptr
7639        && (PL_oldoldbufptr == PL_last_lop
7640            || PL_oldoldbufptr == PL_last_uni)
7641        && /* NO SKIPSPACE BEFORE HERE! */
7642           (PL_expect == XREF
7643            || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7644                                                   == OA_FILEREF))
7645    {
7646        bool immediate_paren = *s == '(';
7647        SSize_t s_off;
7648
7649        /* (Now we can afford to cross potential line boundary.) */
7650        s = skipspace(s);
7651
7652        /* intuit_method() can indirectly call lex_next_chunk(),
7653         * invalidating s
7654         */
7655        s_off = s - SvPVX(PL_linestr);
7656        /* Two barewords in a row may indicate method call. */
7657        if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7658                || *s == '$')
7659            && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7660        {
7661            /* the code at method: doesn't use s */
7662            goto method;
7663        }
7664        s = SvPVX(PL_linestr) + s_off;
7665
7666        /* If not a declared subroutine, it's an indirect object. */
7667        /* (But it's an indir obj regardless for sort.) */
7668        /* Also, if "_" follows a filetest operator, it's a bareword */
7669
7670        if (
7671            ( !immediate_paren && (PL_last_lop_op == OP_SORT
7672             || (!c.cv
7673                 && (PL_last_lop_op != OP_MAPSTART
7674                     && PL_last_lop_op != OP_GREPSTART))))
7675           || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7676                && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7677                                                == OA_FILESTATOP))
7678           )
7679        {
7680            PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7681            yyl_strictwarn_bareword(aTHX_ lastchar);
7682            op_free(c.rv2cv_op);
7683            return yyl_safe_bareword(aTHX_ s, lastchar);
7684        }
7685    }
7686
7687    PL_expect = XOPERATOR;
7688    s = skipspace(s);
7689
7690    /* Is this a word before a => operator? */
7691    if (*s == '=' && s[1] == '>' && !pkgname) {
7692        op_free(c.rv2cv_op);
7693        CLINE;
7694        if (c.gvp || (c.lex && !c.off)) {
7695            assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7696            /* This is our own scalar, created a few lines
7697               above, so this is safe. */
7698            SvREADONLY_off(c.sv);
7699            sv_setpv(c.sv, PL_tokenbuf);
7700            if (UTF && !IN_BYTES
7701             && is_utf8_string((U8*)PL_tokenbuf, len))
7702                  SvUTF8_on(c.sv);
7703            SvREADONLY_on(c.sv);
7704        }
7705        TERM(BAREWORD);
7706    }
7707
7708    /* If followed by a paren, it's certainly a subroutine. */
7709    if (*s == '(') {
7710        CLINE;
7711        if (c.cv) {
7712            char *d = s + 1;
7713            while (SPACE_OR_TAB(*d))
7714                d++;
7715            if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7716                return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7717        }
7718        NEXTVAL_NEXTTOKE.opval =
7719            c.off ? c.rv2cv_op : pl_yylval.opval;
7720        if (c.off)
7721             op_free(pl_yylval.opval), force_next(PRIVATEREF);
7722        else op_free(c.rv2cv_op),      force_next(BAREWORD);
7723        pl_yylval.ival = 0;
7724        TOKEN(PERLY_AMPERSAND);
7725    }
7726
7727    /* If followed by var or block, call it a method (unless sub) */
7728
7729    if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7730        op_free(c.rv2cv_op);
7731        PL_last_lop = PL_oldbufptr;
7732        PL_last_lop_op = OP_METHOD;
7733        if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7734            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7735        PL_expect = XBLOCKTERM;
7736        PL_bufptr = s;
7737        return REPORT(METHCALL0);
7738    }
7739
7740    /* If followed by a bareword, see if it looks like indir obj. */
7741
7742    if (   key == 1
7743        && !orig_keyword
7744        && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7745        && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7746    {
7747      method:
7748        if (c.lex && !c.off) {
7749            assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7750            SvREADONLY_off(c.sv);
7751            sv_setpvn(c.sv, PL_tokenbuf, len);
7752            if (UTF && !IN_BYTES
7753             && is_utf8_string((U8*)PL_tokenbuf, len))
7754                SvUTF8_on(c.sv);
7755            else SvUTF8_off(c.sv);
7756        }
7757        op_free(c.rv2cv_op);
7758        if (key == METHCALL0 && !PL_lex_allbrackets
7759            && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7760        {
7761            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7762        }
7763        return REPORT(key);
7764    }
7765
7766    /* Not a method, so call it a subroutine (if defined) */
7767
7768    if (c.cv) {
7769        /* Check for a constant sub */
7770        c.sv = cv_const_sv_or_av(c.cv);
7771        return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7772    }
7773
7774    /* Call it a bare word */
7775
7776    if (PL_hints & HINT_STRICT_SUBS)
7777        pl_yylval.opval->op_private |= OPpCONST_STRICT;
7778    else
7779        yyl_strictwarn_bareword(aTHX_ lastchar);
7780
7781    op_free(c.rv2cv_op);
7782
7783    return yyl_safe_bareword(aTHX_ s, lastchar);
7784}
7785
7786static int
7787yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7788{
7789    switch (key) {
7790    default:			/* not a keyword */
7791        return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7792
7793    case KEY___FILE__:
7794        FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7795
7796    case KEY___LINE__:
7797        FUN0OP(
7798            newSVOP(OP_CONST, 0,
7799                Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))
7800        );
7801
7802    case KEY___PACKAGE__:
7803        FUN0OP(
7804            newSVOP(OP_CONST, 0, (PL_curstash
7805                                     ? newSVhek(HvNAME_HEK(PL_curstash))
7806                                     : &PL_sv_undef))
7807        );
7808
7809    case KEY___DATA__:
7810    case KEY___END__:
7811        if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7812            yyl_data_handle(aTHX);
7813        return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7814
7815    case KEY___SUB__:
7816        /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7817         * OP_CONST. We need to make it big enough to allow room for that if
7818         * so */
7819        FUN0OP(CvCLONE(PL_compcv)
7820                    ? newOP(OP_RUNCV, 0)
7821                    : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7822
7823    case KEY_AUTOLOAD:
7824    case KEY_DESTROY:
7825    case KEY_BEGIN:
7826    case KEY_UNITCHECK:
7827    case KEY_CHECK:
7828    case KEY_INIT:
7829    case KEY_END:
7830        if (PL_expect == XSTATE)
7831            return yyl_sub(aTHX_ PL_bufptr, key);
7832        return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7833
7834    case KEY_ADJUST:
7835        Perl_ck_warner_d(aTHX_
7836            packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
7837
7838        /* The way that KEY_CHECK et.al. are handled currently are nothing
7839         * short of crazy. We won't copy that model for new phasers, but use
7840         * this as an experiment to test if this will work
7841         */
7842        PHASERBLOCK(KEY_ADJUST);
7843
7844    case KEY_abs:
7845        UNI(OP_ABS);
7846
7847    case KEY_alarm:
7848        UNI(OP_ALARM);
7849
7850    case KEY_accept:
7851        LOP(OP_ACCEPT,XTERM);
7852
7853    case KEY_and:
7854        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7855            return REPORT(0);
7856        OPERATOR(ANDOP);
7857
7858    case KEY_atan2:
7859        LOP(OP_ATAN2,XTERM);
7860
7861    case KEY_bind:
7862        LOP(OP_BIND,XTERM);
7863
7864    case KEY_binmode:
7865        LOP(OP_BINMODE,XTERM);
7866
7867    case KEY_bless:
7868        LOP(OP_BLESS,XTERM);
7869
7870    case KEY_break:
7871        FUN0(OP_BREAK);
7872
7873    case KEY_catch:
7874        Perl_ck_warner_d(aTHX_
7875            packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7876        PREBLOCK(KW_CATCH);
7877
7878    case KEY_chop:
7879        UNI(OP_CHOP);
7880
7881    case KEY_class:
7882        Perl_ck_warner_d(aTHX_
7883            packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
7884
7885        s = force_word(s,BAREWORD,FALSE,TRUE);
7886        s = skipspace(s);
7887        s = force_strict_version(s);
7888        PL_expect = XATTRBLOCK;
7889        TOKEN(KW_CLASS);
7890
7891    case KEY_continue:
7892        /* We have to disambiguate the two senses of
7893          "continue". If the next token is a '{' then
7894          treat it as the start of a continue block;
7895          otherwise treat it as a control operator.
7896         */
7897        s = skipspace(s);
7898        if (*s == '{')
7899            PREBLOCK(KW_CONTINUE);
7900        else
7901            FUN0(OP_CONTINUE);
7902
7903    case KEY_chdir:
7904        /* may use HOME */
7905        (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7906        UNI(OP_CHDIR);
7907
7908    case KEY_close:
7909        UNI(OP_CLOSE);
7910
7911    case KEY_closedir:
7912        UNI(OP_CLOSEDIR);
7913
7914    case KEY_cmp:
7915        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7916            return REPORT(0);
7917        NCEop(OP_SCMP);
7918
7919    case KEY_caller:
7920        UNI(OP_CALLER);
7921
7922    case KEY_crypt:
7923
7924        LOP(OP_CRYPT,XTERM);
7925
7926    case KEY_chmod:
7927        LOP(OP_CHMOD,XTERM);
7928
7929    case KEY_chown:
7930        LOP(OP_CHOWN,XTERM);
7931
7932    case KEY_connect:
7933        LOP(OP_CONNECT,XTERM);
7934
7935    case KEY_chr:
7936        UNI(OP_CHR);
7937
7938    case KEY_cos:
7939        UNI(OP_COS);
7940
7941    case KEY_chroot:
7942        UNI(OP_CHROOT);
7943
7944    case KEY_default:
7945        PREBLOCK(KW_DEFAULT);
7946
7947    case KEY_defer:
7948        Perl_ck_warner_d(aTHX_
7949            packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7950        PREBLOCK(KW_DEFER);
7951
7952    case KEY_do:
7953        return yyl_do(aTHX_ s, orig_keyword);
7954
7955    case KEY_die:
7956        PL_hints |= HINT_BLOCK_SCOPE;
7957        LOP(OP_DIE,XTERM);
7958
7959    case KEY_defined:
7960        UNI(OP_DEFINED);
7961
7962    case KEY_delete:
7963        UNI(OP_DELETE);
7964
7965    case KEY_dbmopen:
7966        Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7967                          STR_WITH_LEN("NDBM_File::"),
7968                          STR_WITH_LEN("DB_File::"),
7969                          STR_WITH_LEN("GDBM_File::"),
7970                          STR_WITH_LEN("SDBM_File::"),
7971                          STR_WITH_LEN("ODBM_File::"),
7972                          NULL);
7973        LOP(OP_DBMOPEN,XTERM);
7974
7975    case KEY_dbmclose:
7976        UNI(OP_DBMCLOSE);
7977
7978    case KEY_dump:
7979        LOOPX(OP_DUMP);
7980
7981    case KEY_else:
7982        PREBLOCK(KW_ELSE);
7983
7984    case KEY_elsif:
7985        pl_yylval.ival = CopLINE(PL_curcop);
7986        OPERATOR(KW_ELSIF);
7987
7988    case KEY_eq:
7989        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7990            return REPORT(0);
7991        ChEop(OP_SEQ);
7992
7993    case KEY_exists:
7994        UNI(OP_EXISTS);
7995
7996    case KEY_exit:
7997        UNI(OP_EXIT);
7998
7999    case KEY_eval:
8000        s = skipspace(s);
8001        if (*s == '{') { /* block eval */
8002            PL_expect = XTERMBLOCK;
8003            UNIBRACK(OP_ENTERTRY);
8004        }
8005        else { /* string eval */
8006            PL_expect = XTERM;
8007            UNIBRACK(OP_ENTEREVAL);
8008        }
8009
8010    case KEY_evalbytes:
8011        PL_expect = XTERM;
8012        UNIBRACK(-OP_ENTEREVAL);
8013
8014    case KEY_eof:
8015        UNI(OP_EOF);
8016
8017    case KEY_exp:
8018        UNI(OP_EXP);
8019
8020    case KEY_each:
8021        UNI(OP_EACH);
8022
8023    case KEY_exec:
8024        LOP(OP_EXEC,XREF);
8025
8026    case KEY_endhostent:
8027        FUN0(OP_EHOSTENT);
8028
8029    case KEY_endnetent:
8030        FUN0(OP_ENETENT);
8031
8032    case KEY_endservent:
8033        FUN0(OP_ESERVENT);
8034
8035    case KEY_endprotoent:
8036        FUN0(OP_EPROTOENT);
8037
8038    case KEY_endpwent:
8039        FUN0(OP_EPWENT);
8040
8041    case KEY_endgrent:
8042        FUN0(OP_EGRENT);
8043
8044    case KEY_field:
8045        /* TODO: maybe this should use the same parser/grammar structures as
8046         * `my`, but it's also rather messy because of the `our` conflation
8047         */
8048        Perl_ck_warner_d(aTHX_
8049            packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
8050
8051        croak_kw_unless_class("field");
8052
8053        PL_parser->in_my = KEY_field;
8054        OPERATOR(KW_FIELD);
8055
8056    case KEY_finally:
8057        Perl_ck_warner_d(aTHX_
8058            packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
8059        PREBLOCK(KW_FINALLY);
8060
8061    case KEY_for:
8062    case KEY_foreach:
8063        return yyl_foreach(aTHX_ s);
8064
8065    case KEY_formline:
8066        LOP(OP_FORMLINE,XTERM);
8067
8068    case KEY_fork:
8069        FUN0(OP_FORK);
8070
8071    case KEY_fc:
8072        UNI(OP_FC);
8073
8074    case KEY_fcntl:
8075        LOP(OP_FCNTL,XTERM);
8076
8077    case KEY_fileno:
8078        UNI(OP_FILENO);
8079
8080    case KEY_flock:
8081        LOP(OP_FLOCK,XTERM);
8082
8083    case KEY_gt:
8084        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8085            return REPORT(0);
8086        ChRop(OP_SGT);
8087
8088    case KEY_ge:
8089        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8090            return REPORT(0);
8091        ChRop(OP_SGE);
8092
8093    case KEY_grep:
8094        LOP(OP_GREPSTART, XREF);
8095
8096    case KEY_goto:
8097        LOOPX(OP_GOTO);
8098
8099    case KEY_gmtime:
8100        UNI(OP_GMTIME);
8101
8102    case KEY_getc:
8103        UNIDOR(OP_GETC);
8104
8105    case KEY_getppid:
8106        FUN0(OP_GETPPID);
8107
8108    case KEY_getpgrp:
8109        UNI(OP_GETPGRP);
8110
8111    case KEY_getpriority:
8112        LOP(OP_GETPRIORITY,XTERM);
8113
8114    case KEY_getprotobyname:
8115        UNI(OP_GPBYNAME);
8116
8117    case KEY_getprotobynumber:
8118        LOP(OP_GPBYNUMBER,XTERM);
8119
8120    case KEY_getprotoent:
8121        FUN0(OP_GPROTOENT);
8122
8123    case KEY_getpwent:
8124        FUN0(OP_GPWENT);
8125
8126    case KEY_getpwnam:
8127        UNI(OP_GPWNAM);
8128
8129    case KEY_getpwuid:
8130        UNI(OP_GPWUID);
8131
8132    case KEY_getpeername:
8133        UNI(OP_GETPEERNAME);
8134
8135    case KEY_gethostbyname:
8136        UNI(OP_GHBYNAME);
8137
8138    case KEY_gethostbyaddr:
8139        LOP(OP_GHBYADDR,XTERM);
8140
8141    case KEY_gethostent:
8142        FUN0(OP_GHOSTENT);
8143
8144    case KEY_getnetbyname:
8145        UNI(OP_GNBYNAME);
8146
8147    case KEY_getnetbyaddr:
8148        LOP(OP_GNBYADDR,XTERM);
8149
8150    case KEY_getnetent:
8151        FUN0(OP_GNETENT);
8152
8153    case KEY_getservbyname:
8154        LOP(OP_GSBYNAME,XTERM);
8155
8156    case KEY_getservbyport:
8157        LOP(OP_GSBYPORT,XTERM);
8158
8159    case KEY_getservent:
8160        FUN0(OP_GSERVENT);
8161
8162    case KEY_getsockname:
8163        UNI(OP_GETSOCKNAME);
8164
8165    case KEY_getsockopt:
8166        LOP(OP_GSOCKOPT,XTERM);
8167
8168    case KEY_getgrent:
8169        FUN0(OP_GGRENT);
8170
8171    case KEY_getgrnam:
8172        UNI(OP_GGRNAM);
8173
8174    case KEY_getgrgid:
8175        UNI(OP_GGRGID);
8176
8177    case KEY_getlogin:
8178        FUN0(OP_GETLOGIN);
8179
8180    case KEY_given:
8181        pl_yylval.ival = CopLINE(PL_curcop);
8182        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
8183                         "given is deprecated");
8184        OPERATOR(KW_GIVEN);
8185
8186    case KEY_glob:
8187        LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8188
8189    case KEY_hex:
8190        UNI(OP_HEX);
8191
8192    case KEY_if:
8193        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8194            return REPORT(0);
8195        pl_yylval.ival = CopLINE(PL_curcop);
8196        OPERATOR(KW_IF);
8197
8198    case KEY_index:
8199        LOP(OP_INDEX,XTERM);
8200
8201    case KEY_int:
8202        UNI(OP_INT);
8203
8204    case KEY_ioctl:
8205        LOP(OP_IOCTL,XTERM);
8206
8207    case KEY_isa:
8208        NCRop(OP_ISA);
8209
8210    case KEY_join:
8211        LOP(OP_JOIN,XTERM);
8212
8213    case KEY_keys:
8214        UNI(OP_KEYS);
8215
8216    case KEY_kill:
8217        LOP(OP_KILL,XTERM);
8218
8219    case KEY_last:
8220        LOOPX(OP_LAST);
8221
8222    case KEY_lc:
8223        UNI(OP_LC);
8224
8225    case KEY_lcfirst:
8226        UNI(OP_LCFIRST);
8227
8228    case KEY_local:
8229        OPERATOR(KW_LOCAL);
8230
8231    case KEY_length:
8232        UNI(OP_LENGTH);
8233
8234    case KEY_lt:
8235        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8236            return REPORT(0);
8237        ChRop(OP_SLT);
8238
8239    case KEY_le:
8240        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8241            return REPORT(0);
8242        ChRop(OP_SLE);
8243
8244    case KEY_localtime:
8245        UNI(OP_LOCALTIME);
8246
8247    case KEY_log:
8248        UNI(OP_LOG);
8249
8250    case KEY_link:
8251        LOP(OP_LINK,XTERM);
8252
8253    case KEY_listen:
8254        LOP(OP_LISTEN,XTERM);
8255
8256    case KEY_lock:
8257        UNI(OP_LOCK);
8258
8259    case KEY_lstat:
8260        UNI(OP_LSTAT);
8261
8262    case KEY_m:
8263        s = scan_pat(s,OP_MATCH);
8264        TERM(sublex_start());
8265
8266    case KEY_map:
8267        LOP(OP_MAPSTART, XREF);
8268
8269    case KEY_mkdir:
8270        LOP(OP_MKDIR,XTERM);
8271
8272    case KEY_msgctl:
8273        LOP(OP_MSGCTL,XTERM);
8274
8275    case KEY_msgget:
8276        LOP(OP_MSGGET,XTERM);
8277
8278    case KEY_msgrcv:
8279        LOP(OP_MSGRCV,XTERM);
8280
8281    case KEY_msgsnd:
8282        LOP(OP_MSGSND,XTERM);
8283
8284    case KEY_our:
8285    case KEY_my:
8286    case KEY_state:
8287        return yyl_my(aTHX_ s, key);
8288
8289    case KEY_next:
8290        LOOPX(OP_NEXT);
8291
8292    case KEY_ne:
8293        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8294            return REPORT(0);
8295        ChEop(OP_SNE);
8296
8297    case KEY_no:
8298        s = tokenize_use(0, s);
8299        TOKEN(KW_USE_or_NO);
8300
8301    case KEY_not:
8302        if (*s == '(' || (s = skipspace(s), *s == '('))
8303            FUN1(OP_NOT);
8304        else {
8305            if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8306                PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8307            OPERATOR(NOTOP);
8308        }
8309
8310    case KEY_open:
8311        s = skipspace(s);
8312        if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8313            const char *t;
8314            char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8315            for (t=d; isSPACE(*t);)
8316                t++;
8317            if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8318                /* [perl #16184] */
8319                && !(t[0] == '=' && t[1] == '>')
8320                && !(t[0] == ':' && t[1] == ':')
8321                && !keyword(s, d-s, 0)
8322            ) {
8323                Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8324                   "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8325                    UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8326            }
8327        }
8328        LOP(OP_OPEN,XTERM);
8329
8330    case KEY_or:
8331        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8332            return REPORT(0);
8333        pl_yylval.ival = OP_OR;
8334        OPERATOR(OROP);
8335
8336    case KEY_ord:
8337        UNI(OP_ORD);
8338
8339    case KEY_oct:
8340        UNI(OP_OCT);
8341
8342    case KEY_opendir:
8343        LOP(OP_OPEN_DIR,XTERM);
8344
8345    case KEY_print:
8346        checkcomma(s,PL_tokenbuf,"filehandle");
8347        LOP(OP_PRINT,XREF);
8348
8349    case KEY_printf:
8350        checkcomma(s,PL_tokenbuf,"filehandle");
8351        LOP(OP_PRTF,XREF);
8352
8353    case KEY_prototype:
8354        UNI(OP_PROTOTYPE);
8355
8356    case KEY_push:
8357        LOP(OP_PUSH,XTERM);
8358
8359    case KEY_pop:
8360        UNIDOR(OP_POP);
8361
8362    case KEY_pos:
8363        UNIDOR(OP_POS);
8364
8365    case KEY_pack:
8366        LOP(OP_PACK,XTERM);
8367
8368    case KEY_package:
8369        s = force_word(s,BAREWORD,FALSE,TRUE);
8370        s = skipspace(s);
8371        s = force_strict_version(s);
8372        PREBLOCK(KW_PACKAGE);
8373
8374    case KEY_pipe:
8375        LOP(OP_PIPE_OP,XTERM);
8376
8377    case KEY_q:
8378        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8379        if (!s)
8380            missingterm(NULL, 0);
8381        COPLINE_SET_FROM_MULTI_END;
8382        pl_yylval.ival = OP_CONST;
8383        TERM(sublex_start());
8384
8385    case KEY_quotemeta:
8386        UNI(OP_QUOTEMETA);
8387
8388    case KEY_qw:
8389        return yyl_qw(aTHX_ s, len);
8390
8391    case KEY_qq:
8392        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8393        if (!s)
8394            missingterm(NULL, 0);
8395        pl_yylval.ival = OP_STRINGIFY;
8396        if (SvIVX(PL_lex_stuff) == '\'')
8397            SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8398        TERM(sublex_start());
8399
8400    case KEY_qr:
8401        s = scan_pat(s,OP_QR);
8402        TERM(sublex_start());
8403
8404    case KEY_qx:
8405        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8406        if (!s)
8407            missingterm(NULL, 0);
8408        pl_yylval.ival = OP_BACKTICK;
8409        TERM(sublex_start());
8410
8411    case KEY_return:
8412        OLDLOP(OP_RETURN);
8413
8414    case KEY_require:
8415        return yyl_require(aTHX_ s, orig_keyword);
8416
8417    case KEY_reset:
8418        UNI(OP_RESET);
8419
8420    case KEY_redo:
8421        LOOPX(OP_REDO);
8422
8423    case KEY_rename:
8424        LOP(OP_RENAME,XTERM);
8425
8426    case KEY_rand:
8427        UNI(OP_RAND);
8428
8429    case KEY_rmdir:
8430        UNI(OP_RMDIR);
8431
8432    case KEY_rindex:
8433        LOP(OP_RINDEX,XTERM);
8434
8435    case KEY_read:
8436        LOP(OP_READ,XTERM);
8437
8438    case KEY_readdir:
8439        UNI(OP_READDIR);
8440
8441    case KEY_readline:
8442        UNIDOR(OP_READLINE);
8443
8444    case KEY_readpipe:
8445        UNIDOR(OP_BACKTICK);
8446
8447    case KEY_rewinddir:
8448        UNI(OP_REWINDDIR);
8449
8450    case KEY_recv:
8451        LOP(OP_RECV,XTERM);
8452
8453    case KEY_reverse:
8454        LOP(OP_REVERSE,XTERM);
8455
8456    case KEY_readlink:
8457        UNIDOR(OP_READLINK);
8458
8459    case KEY_ref:
8460        UNI(OP_REF);
8461
8462    case KEY_s:
8463        s = scan_subst(s);
8464        if (pl_yylval.opval)
8465            TERM(sublex_start());
8466        else
8467            TOKEN(1);	/* force error */
8468
8469    case KEY_say:
8470        checkcomma(s,PL_tokenbuf,"filehandle");
8471        LOP(OP_SAY,XREF);
8472
8473    case KEY_chomp:
8474        UNI(OP_CHOMP);
8475
8476    case KEY_scalar:
8477        UNI(OP_SCALAR);
8478
8479    case KEY_select:
8480        LOP(OP_SELECT,XTERM);
8481
8482    case KEY_seek:
8483        LOP(OP_SEEK,XTERM);
8484
8485    case KEY_semctl:
8486        LOP(OP_SEMCTL,XTERM);
8487
8488    case KEY_semget:
8489        LOP(OP_SEMGET,XTERM);
8490
8491    case KEY_semop:
8492        LOP(OP_SEMOP,XTERM);
8493
8494    case KEY_send:
8495        LOP(OP_SEND,XTERM);
8496
8497    case KEY_setpgrp:
8498        LOP(OP_SETPGRP,XTERM);
8499
8500    case KEY_setpriority:
8501        LOP(OP_SETPRIORITY,XTERM);
8502
8503    case KEY_sethostent:
8504        UNI(OP_SHOSTENT);
8505
8506    case KEY_setnetent:
8507        UNI(OP_SNETENT);
8508
8509    case KEY_setservent:
8510        UNI(OP_SSERVENT);
8511
8512    case KEY_setprotoent:
8513        UNI(OP_SPROTOENT);
8514
8515    case KEY_setpwent:
8516        FUN0(OP_SPWENT);
8517
8518    case KEY_setgrent:
8519        FUN0(OP_SGRENT);
8520
8521    case KEY_seekdir:
8522        LOP(OP_SEEKDIR,XTERM);
8523
8524    case KEY_setsockopt:
8525        LOP(OP_SSOCKOPT,XTERM);
8526
8527    case KEY_shift:
8528        UNIDOR(OP_SHIFT);
8529
8530    case KEY_shmctl:
8531        LOP(OP_SHMCTL,XTERM);
8532
8533    case KEY_shmget:
8534        LOP(OP_SHMGET,XTERM);
8535
8536    case KEY_shmread:
8537        LOP(OP_SHMREAD,XTERM);
8538
8539    case KEY_shmwrite:
8540        LOP(OP_SHMWRITE,XTERM);
8541
8542    case KEY_shutdown:
8543        LOP(OP_SHUTDOWN,XTERM);
8544
8545    case KEY_sin:
8546        UNI(OP_SIN);
8547
8548    case KEY_sleep:
8549        UNI(OP_SLEEP);
8550
8551    case KEY_socket:
8552        LOP(OP_SOCKET,XTERM);
8553
8554    case KEY_socketpair:
8555        LOP(OP_SOCKPAIR,XTERM);
8556
8557    case KEY_sort:
8558        checkcomma(s,PL_tokenbuf,"subroutine name");
8559        s = skipspace(s);
8560        PL_expect = XTERM;
8561        s = force_word(s,BAREWORD,TRUE,TRUE);
8562        LOP(OP_SORT,XREF);
8563
8564    case KEY_split:
8565        LOP(OP_SPLIT,XTERM);
8566
8567    case KEY_sprintf:
8568        LOP(OP_SPRINTF,XTERM);
8569
8570    case KEY_splice:
8571        LOP(OP_SPLICE,XTERM);
8572
8573    case KEY_sqrt:
8574        UNI(OP_SQRT);
8575
8576    case KEY_srand:
8577        UNI(OP_SRAND);
8578
8579    case KEY_stat:
8580        UNI(OP_STAT);
8581
8582    case KEY_study:
8583        UNI(OP_STUDY);
8584
8585    case KEY_substr:
8586        LOP(OP_SUBSTR,XTERM);
8587
8588    case KEY_method:
8589        /* For now we just treat 'method' identical to 'sub' plus a warning */
8590        Perl_ck_warner_d(aTHX_
8591            packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
8592        return yyl_sub(aTHX_ s, KEY_method);
8593
8594    case KEY_format:
8595    case KEY_sub:
8596        return yyl_sub(aTHX_ s, key);
8597
8598    case KEY_system:
8599        LOP(OP_SYSTEM,XREF);
8600
8601    case KEY_symlink:
8602        LOP(OP_SYMLINK,XTERM);
8603
8604    case KEY_syscall:
8605        LOP(OP_SYSCALL,XTERM);
8606
8607    case KEY_sysopen:
8608        LOP(OP_SYSOPEN,XTERM);
8609
8610    case KEY_sysseek:
8611        LOP(OP_SYSSEEK,XTERM);
8612
8613    case KEY_sysread:
8614        LOP(OP_SYSREAD,XTERM);
8615
8616    case KEY_syswrite:
8617        LOP(OP_SYSWRITE,XTERM);
8618
8619    case KEY_tr:
8620    case KEY_y:
8621        s = scan_trans(s);
8622        TERM(sublex_start());
8623
8624    case KEY_tell:
8625        UNI(OP_TELL);
8626
8627    case KEY_telldir:
8628        UNI(OP_TELLDIR);
8629
8630    case KEY_tie:
8631        LOP(OP_TIE,XTERM);
8632
8633    case KEY_tied:
8634        UNI(OP_TIED);
8635
8636    case KEY_time:
8637        FUN0(OP_TIME);
8638
8639    case KEY_times:
8640        FUN0(OP_TMS);
8641
8642    case KEY_truncate:
8643        LOP(OP_TRUNCATE,XTERM);
8644
8645    case KEY_try:
8646        pl_yylval.ival = CopLINE(PL_curcop);
8647        Perl_ck_warner_d(aTHX_
8648            packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8649        PREBLOCK(KW_TRY);
8650
8651    case KEY_uc:
8652        UNI(OP_UC);
8653
8654    case KEY_ucfirst:
8655        UNI(OP_UCFIRST);
8656
8657    case KEY_untie:
8658        UNI(OP_UNTIE);
8659
8660    case KEY_until:
8661        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8662            return REPORT(0);
8663        pl_yylval.ival = CopLINE(PL_curcop);
8664        OPERATOR(KW_UNTIL);
8665
8666    case KEY_unless:
8667        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8668            return REPORT(0);
8669        pl_yylval.ival = CopLINE(PL_curcop);
8670        OPERATOR(KW_UNLESS);
8671
8672    case KEY_unlink:
8673        LOP(OP_UNLINK,XTERM);
8674
8675    case KEY_undef:
8676        UNIDOR(OP_UNDEF);
8677
8678    case KEY_unpack:
8679        LOP(OP_UNPACK,XTERM);
8680
8681    case KEY_utime:
8682        LOP(OP_UTIME,XTERM);
8683
8684    case KEY_umask:
8685        UNIDOR(OP_UMASK);
8686
8687    case KEY_unshift:
8688        LOP(OP_UNSHIFT,XTERM);
8689
8690    case KEY_use:
8691        s = tokenize_use(1, s);
8692        TOKEN(KW_USE_or_NO);
8693
8694    case KEY_values:
8695        UNI(OP_VALUES);
8696
8697    case KEY_vec:
8698        LOP(OP_VEC,XTERM);
8699
8700    case KEY_when:
8701        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8702            return REPORT(0);
8703        pl_yylval.ival = CopLINE(PL_curcop);
8704        Perl_ck_warner_d(aTHX_
8705            packWARN(WARN_DEPRECATED__SMARTMATCH),
8706            "when is deprecated");
8707        OPERATOR(KW_WHEN);
8708
8709    case KEY_while:
8710        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8711            return REPORT(0);
8712        pl_yylval.ival = CopLINE(PL_curcop);
8713        OPERATOR(KW_WHILE);
8714
8715    case KEY_warn:
8716        PL_hints |= HINT_BLOCK_SCOPE;
8717        LOP(OP_WARN,XTERM);
8718
8719    case KEY_wait:
8720        FUN0(OP_WAIT);
8721
8722    case KEY_waitpid:
8723        LOP(OP_WAITPID,XTERM);
8724
8725    case KEY_wantarray:
8726        FUN0(OP_WANTARRAY);
8727
8728    case KEY_write:
8729        /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8730         * we use the same number on EBCDIC */
8731        gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8732        UNI(OP_ENTERWRITE);
8733
8734    case KEY_x:
8735        if (PL_expect == XOPERATOR) {
8736            if (*s == '=' && !PL_lex_allbrackets
8737                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8738            {
8739                return REPORT(0);
8740            }
8741            Mop(OP_REPEAT);
8742        }
8743        check_uni();
8744        return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8745
8746    case KEY_xor:
8747        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8748            return REPORT(0);
8749        pl_yylval.ival = OP_XOR;
8750        OPERATOR(OROP);
8751    }
8752}
8753
8754static int
8755yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8756{
8757    I32 key = 0;
8758    I32 orig_keyword = 0;
8759    STRLEN olen = len;
8760    char *d = s;
8761    s += 2;
8762    s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8763    if ((*s == ':' && s[1] == ':')
8764        || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8765    {
8766        Copy(PL_bufptr, PL_tokenbuf, olen, char);
8767        return yyl_just_a_word(aTHX_ d, olen, 0, c);
8768    }
8769    if (!key)
8770        Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8771                          UTF8fARG(UTF, len, PL_tokenbuf));
8772    if (key < 0)
8773        key = -key;
8774    else if (key == KEY_require || key == KEY_do
8775          || key == KEY_glob)
8776        /* that's a way to remember we saw "CORE::" */
8777        orig_keyword = key;
8778
8779    /* Known to be a reserved word at this point */
8780    return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8781}
8782
8783struct Perl_custom_infix_result {
8784    struct Perl_custom_infix *def;
8785    SV                       *parsedata;
8786};
8787
8788static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
8789{
8790    enum Perl_custom_infix_precedence prec = def->prec;
8791    if(prec <= INFIX_PREC_LOW)
8792        return PLUGIN_LOW_OP;
8793    if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
8794        return PLUGIN_LOGICAL_OR_LOW_OP;
8795    if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
8796        return PLUGIN_LOGICAL_AND_LOW_OP;
8797    if(prec <= INFIX_PREC_ASSIGN)
8798        return PLUGIN_ASSIGN_OP;
8799    if(prec <= INFIX_PREC_LOGICAL_OR)
8800        return PLUGIN_LOGICAL_OR_OP;
8801    if(prec <= INFIX_PREC_LOGICAL_AND)
8802        return PLUGIN_LOGICAL_AND_OP;
8803    if(prec <= INFIX_PREC_REL)
8804        return PLUGIN_REL_OP;
8805    if(prec <= INFIX_PREC_ADD)
8806        return PLUGIN_ADD_OP;
8807    if(prec <= INFIX_PREC_MUL)
8808        return PLUGIN_MUL_OP;
8809    if(prec <= INFIX_PREC_POW)
8810        return PLUGIN_POW_OP;
8811    return PLUGIN_HIGH_OP;
8812}
8813
8814OP *
8815Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
8816{
8817    PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
8818
8819    struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
8820    SAVEFREEPV(result);
8821    if(result->parsedata)
8822        SAVEFREESV(result->parsedata);
8823
8824    return (*result->def->build_op)(aTHX_
8825        &result->parsedata, lhs, rhs, result->def);
8826}
8827
8828static int
8829yyl_keylookup(pTHX_ char *s, GV *gv)
8830{
8831    STRLEN len;
8832    bool anydelim;
8833    I32 key;
8834    struct code c = no_code;
8835    I32 orig_keyword = 0;
8836    char *d;
8837
8838    c.gv = gv;
8839
8840    PL_bufptr = s;
8841    s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8842
8843    /* Some keywords can be followed by any delimiter, including ':' */
8844    anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8845
8846    /* x::* is just a word, unless x is "CORE" */
8847    if (!anydelim && *s == ':' && s[1] == ':') {
8848        if (memEQs(PL_tokenbuf, len, "CORE"))
8849            return yyl_key_core(aTHX_ s, len, c);
8850        return yyl_just_a_word(aTHX_ s, len, 0, c);
8851    }
8852
8853    d = s;
8854    while (d < PL_bufend && isSPACE(*d))
8855            d++;	/* no comments skipped here, or s### is misparsed */
8856
8857    /* Is this a word before a => operator? */
8858    if (*d == '=' && d[1] == '>') {
8859        return yyl_fatcomma(aTHX_ s, len);
8860    }
8861
8862    /* Check for plugged-in keyword */
8863    {
8864        OP *o;
8865        int result;
8866        char *saved_bufptr = PL_bufptr;
8867        PL_bufptr = s;
8868        result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8869        s = PL_bufptr;
8870        if (result == KEYWORD_PLUGIN_DECLINE) {
8871            /* not a plugged-in keyword */
8872            PL_bufptr = saved_bufptr;
8873        } else if (result == KEYWORD_PLUGIN_STMT) {
8874            pl_yylval.opval = o;
8875            CLINE;
8876            if (!PL_nexttoke) PL_expect = XSTATE;
8877            return REPORT(PLUGSTMT);
8878        } else if (result == KEYWORD_PLUGIN_EXPR) {
8879            pl_yylval.opval = o;
8880            CLINE;
8881            if (!PL_nexttoke) PL_expect = XOPERATOR;
8882            return REPORT(PLUGEXPR);
8883        } else {
8884            Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8885        }
8886    }
8887
8888    /* Check for plugged-in named operator */
8889    if(PLUGINFIX_IS_ENABLED) {
8890        struct Perl_custom_infix *def;
8891        STRLEN result;
8892        result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
8893        if(result) {
8894            if(result != len)
8895                Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
8896                    result, PL_tokenbuf);
8897            PL_bufptr = s = d;
8898            struct Perl_custom_infix_result *result;
8899            Newx(result, 1, struct Perl_custom_infix_result);
8900            result->def = def;
8901            result->parsedata = NULL;
8902            if(def->parse) {
8903                (*def->parse)(aTHX_ &result->parsedata, def);
8904                s = PL_bufptr; /* restore local s variable */
8905            }
8906            pl_yylval.pval = result;
8907            CLINE;
8908            OPERATOR(tokentype_for_plugop(def));
8909        }
8910    }
8911
8912    /* Is this a label? */
8913    if (!anydelim && PL_expect == XSTATE
8914          && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8915        s = d + 1;
8916        pl_yylval.opval =
8917            newSVOP(OP_CONST, 0,
8918                newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8919        CLINE;
8920        TOKEN(LABEL);
8921    }
8922
8923    /* Check for lexical sub */
8924    if (PL_expect != XOPERATOR) {
8925        char tmpbuf[sizeof PL_tokenbuf + 1];
8926        *tmpbuf = '&';
8927        Copy(PL_tokenbuf, tmpbuf+1, len, char);
8928        c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8929        if (c.off != NOT_IN_PAD) {
8930            assert(c.off); /* we assume this is boolean-true below */
8931            if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8932                HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8933                HEK * const stashname = HvNAME_HEK(stash);
8934                c.sv = newSVhek(stashname);
8935                sv_catpvs(c.sv, "::");
8936                sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8937                                (UTF ? SV_CATUTF8 : SV_CATBYTES));
8938                c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8939                                  SVt_PVCV);
8940                c.off = 0;
8941                if (!c.gv) {
8942                    ASSUME(c.sv && SvREFCNT(c.sv) == 1);
8943                    SvREFCNT_dec(c.sv);
8944                    c.sv = NULL;
8945                    return yyl_just_a_word(aTHX_ s, len, 0, c);
8946                }
8947            }
8948            else {
8949                c.rv2cv_op = newOP(OP_PADANY, 0);
8950                c.rv2cv_op->op_targ = c.off;
8951                c.cv = find_lexical_cv(c.off);
8952            }
8953            c.lex = TRUE;
8954            return yyl_just_a_word(aTHX_ s, len, 0, c);
8955        }
8956        c.off = 0;
8957    }
8958
8959    /* Check for built-in keyword */
8960    key = keyword(PL_tokenbuf, len, 0);
8961
8962    if (key < 0)
8963        key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8964
8965    if (key && key != KEY___DATA__ && key != KEY___END__
8966     && (!anydelim || *s != '#')) {
8967        /* no override, and not s### either; skipspace is safe here
8968         * check for => on following line */
8969        bool arrow;
8970        STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8971        STRLEN   soff = s         - SvPVX(PL_linestr);
8972        s = peekspace(s);
8973        arrow = *s == '=' && s[1] == '>';
8974        PL_bufptr = SvPVX(PL_linestr) + bufoff;
8975        s         = SvPVX(PL_linestr) +   soff;
8976        if (arrow)
8977            return yyl_fatcomma(aTHX_ s, len);
8978    }
8979
8980    return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8981}
8982
8983static int
8984yyl_try(pTHX_ char *s)
8985{
8986    char *d;
8987    GV *gv = NULL;
8988    int tok;
8989
8990  retry:
8991    /* Check for plugged-in symbolic operator */
8992    if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
8993        struct Perl_custom_infix *def;
8994        char *s_end = s, *d = PL_tokenbuf;
8995        STRLEN len;
8996
8997        /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
8998        while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
8999            *d++ = *s_end++;
9000        *d = '\0';
9001
9002        if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
9003            s += len;
9004            struct Perl_custom_infix_result *result;
9005            Newx(result, 1, struct Perl_custom_infix_result);
9006            result->def = def;
9007            result->parsedata = NULL;
9008            if(def->parse) {
9009                PL_bufptr = s;
9010                (*def->parse)(aTHX_ &result->parsedata, def);
9011                s = PL_bufptr; /* restore local s variable */
9012            }
9013            pl_yylval.pval = result;
9014            CLINE;
9015            OPERATOR(tokentype_for_plugop(def));
9016        }
9017    }
9018
9019    switch (*s) {
9020    default:
9021        if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9022            if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9023                return tok;
9024            goto retry_bufptr;
9025        }
9026        yyl_croak_unrecognised(aTHX_ s);
9027
9028    case 4:
9029    case 26:
9030        /* emulate EOF on ^D or ^Z */
9031        if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
9032            return tok;
9033    retry_bufptr:
9034        s = PL_bufptr;
9035        goto retry;
9036
9037    case 0:
9038        if ((!PL_rsfp || PL_lex_inwhat)
9039         && (!PL_parser->filtered || s+1 < PL_bufend)) {
9040            PL_last_uni = 0;
9041            PL_last_lop = 0;
9042            if (PL_lex_brackets
9043                && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
9044            {
9045                yyerror((const char *)
9046                        (PL_lex_formbrack
9047                         ? "Format not terminated"
9048                         : "Missing right curly or square bracket"));
9049            }
9050            DEBUG_T({
9051                PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
9052            });
9053            TOKEN(0);
9054        }
9055        if (s++ < PL_bufend)
9056            goto retry;  /* ignore stray nulls */
9057        PL_last_uni = 0;
9058        PL_last_lop = 0;
9059        if (!PL_in_eval && !PL_preambled) {
9060            PL_preambled = TRUE;
9061            if (PL_perldb) {
9062                /* Generate a string of Perl code to load the debugger.
9063                 * If PERL5DB is set, it will return the contents of that,
9064                 * otherwise a compile-time require of perl5db.pl.  */
9065
9066                const char * const pdb = PerlEnv_getenv("PERL5DB");
9067
9068                if (pdb) {
9069                    sv_setpv(PL_linestr, pdb);
9070                    sv_catpvs(PL_linestr,";");
9071                } else {
9072                    SETERRNO(0,SS_NORMAL);
9073                    sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
9074                }
9075                PL_parser->preambling = CopLINE(PL_curcop);
9076            } else
9077                SvPVCLEAR(PL_linestr);
9078            if (PL_preambleav) {
9079                SV **svp = AvARRAY(PL_preambleav);
9080                SV **const end = svp + AvFILLp(PL_preambleav);
9081                while(svp <= end) {
9082                    sv_catsv(PL_linestr, *svp);
9083                    ++svp;
9084                    sv_catpvs(PL_linestr, ";");
9085                }
9086                SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
9087                PL_preambleav = NULL;
9088            }
9089            if (PL_minus_E)
9090                sv_catpvs(PL_linestr,
9091                          "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
9092            if (PL_minus_n || PL_minus_p) {
9093                sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
9094                if (PL_minus_l)
9095                    sv_catpvs(PL_linestr,"chomp;");
9096                if (PL_minus_a) {
9097                    if (PL_minus_F) {
9098                        if (   (   *PL_splitstr == '/'
9099                                || *PL_splitstr == '\''
9100                                || *PL_splitstr == '"')
9101                            && strchr(PL_splitstr + 1, *PL_splitstr))
9102                        {
9103                            /* strchr is ok, because -F pattern can't contain
9104                             * embedded NULs */
9105                            Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
9106                        }
9107                        else {
9108                            /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
9109                               bytes can be used as quoting characters.  :-) */
9110                            const char *splits = PL_splitstr;
9111                            sv_catpvs(PL_linestr, "our @F=split(q\0");
9112                            do {
9113                                /* Need to \ \s  */
9114                                if (*splits == '\\')
9115                                    sv_catpvn(PL_linestr, splits, 1);
9116                                sv_catpvn(PL_linestr, splits, 1);
9117                            } while (*splits++);
9118                            /* This loop will embed the trailing NUL of
9119                               PL_linestr as the last thing it does before
9120                               terminating.  */
9121                            sv_catpvs(PL_linestr, ");");
9122                        }
9123                    }
9124                    else
9125                        sv_catpvs(PL_linestr,"our @F=split(' ');");
9126                }
9127            }
9128            sv_catpvs(PL_linestr, "\n");
9129            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
9130            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9131            PL_last_lop = PL_last_uni = NULL;
9132            if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
9133                update_debugger_info(PL_linestr, NULL, 0);
9134            goto retry;
9135        }
9136        if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
9137            return tok;
9138        goto retry_bufptr;
9139
9140    case '\r':
9141#ifdef PERL_STRICT_CR
9142        Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
9143        Perl_croak(aTHX_
9144      "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
9145#endif
9146    case ' ': case '\t': case '\f': case '\v':
9147        s++;
9148        goto retry;
9149
9150    case '#':
9151    case '\n': {
9152        const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
9153        if (needs_semicolon)
9154            TOKEN(PERLY_SEMICOLON);
9155        else
9156            goto retry;
9157    }
9158
9159    case '-':
9160        return yyl_hyphen(aTHX_ s);
9161
9162    case '+':
9163        return yyl_plus(aTHX_ s);
9164
9165    case '*':
9166        return yyl_star(aTHX_ s);
9167
9168    case '%':
9169        return yyl_percent(aTHX_ s);
9170
9171    case '^':
9172        return yyl_caret(aTHX_ s);
9173
9174    case '[':
9175        return yyl_leftsquare(aTHX_ s);
9176
9177    case '~':
9178        return yyl_tilde(aTHX_ s);
9179
9180    case ',':
9181        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9182            TOKEN(0);
9183        s++;
9184        OPERATOR(PERLY_COMMA);
9185    case ':':
9186        if (s[1] == ':')
9187            return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9188        return yyl_colon(aTHX_ s + 1);
9189
9190    case '(':
9191        return yyl_leftparen(aTHX_ s + 1);
9192
9193    case ';':
9194        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9195            TOKEN(0);
9196        CLINE;
9197        s++;
9198        PL_expect = XSTATE;
9199        TOKEN(PERLY_SEMICOLON);
9200
9201    case ')':
9202        return yyl_rightparen(aTHX_ s);
9203
9204    case ']':
9205        return yyl_rightsquare(aTHX_ s);
9206
9207    case '{':
9208        return yyl_leftcurly(aTHX_ s + 1, 0);
9209
9210    case '}':
9211        if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9212            TOKEN(0);
9213        return yyl_rightcurly(aTHX_ s, 0);
9214
9215    case '&':
9216        return yyl_ampersand(aTHX_ s);
9217
9218    case '|':
9219        return yyl_verticalbar(aTHX_ s);
9220
9221    case '=':
9222        if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9223            && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9224        {
9225            s = vcs_conflict_marker(s + 7);
9226            goto retry;
9227        }
9228
9229        s++;
9230        {
9231            const char tmp = *s++;
9232            if (tmp == '=') {
9233                if (!PL_lex_allbrackets
9234                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9235                {
9236                    s -= 2;
9237                    TOKEN(0);
9238                }
9239                ChEop(OP_EQ);
9240            }
9241            if (tmp == '>') {
9242                if (!PL_lex_allbrackets
9243                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9244                {
9245                    s -= 2;
9246                    TOKEN(0);
9247                }
9248                OPERATOR(PERLY_COMMA);
9249            }
9250            if (tmp == '~')
9251                PMop(OP_MATCH);
9252            if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9253                && memCHRs("+-*/%.^&|<",tmp))
9254                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9255                            "Reversed %c= operator",(int)tmp);
9256            s--;
9257            if (PL_expect == XSTATE
9258                && isALPHA(tmp)
9259                && (s == PL_linestart+1 || s[-2] == '\n') )
9260            {
9261                if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9262                    || PL_lex_state != LEX_NORMAL)
9263                {
9264                    d = PL_bufend;
9265                    while (s < d) {
9266                        if (*s++ == '\n') {
9267                            incline(s, PL_bufend);
9268                            if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9269                            {
9270                                s = (char *) memchr(s,'\n', d - s);
9271                                if (s)
9272                                    s++;
9273                                else
9274                                    s = d;
9275                                incline(s, PL_bufend);
9276                                goto retry;
9277                            }
9278                        }
9279                    }
9280                    goto retry;
9281                }
9282                s = PL_bufend;
9283                PL_parser->in_pod = 1;
9284                goto retry;
9285            }
9286        }
9287        if (PL_expect == XBLOCK) {
9288            const char *t = s;
9289#ifdef PERL_STRICT_CR
9290            while (SPACE_OR_TAB(*t))
9291#else
9292            while (SPACE_OR_TAB(*t) || *t == '\r')
9293#endif
9294                t++;
9295            if (*t == '\n' || *t == '#') {
9296                ENTER_with_name("lex_format");
9297                SAVEI8(PL_parser->form_lex_state);
9298                SAVEI32(PL_lex_formbrack);
9299                PL_parser->form_lex_state = PL_lex_state;
9300                PL_lex_formbrack = PL_lex_brackets + 1;
9301                PL_parser->sub_error_count = PL_error_count;
9302                return yyl_leftcurly(aTHX_ s, 1);
9303            }
9304        }
9305        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9306            s--;
9307            TOKEN(0);
9308        }
9309        pl_yylval.ival = 0;
9310        OPERATOR(ASSIGNOP);
9311
9312        case '!':
9313        return yyl_bang(aTHX_ s + 1);
9314
9315    case '<':
9316        if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9317            && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9318        {
9319            s = vcs_conflict_marker(s + 7);
9320            goto retry;
9321        }
9322        return yyl_leftpointy(aTHX_ s);
9323
9324    case '>':
9325        if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9326            && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9327        {
9328            s = vcs_conflict_marker(s + 7);
9329            goto retry;
9330        }
9331        return yyl_rightpointy(aTHX_ s + 1);
9332
9333    case '$':
9334        return yyl_dollar(aTHX_ s);
9335
9336    case '@':
9337        return yyl_snail(aTHX_ s);
9338
9339    case '/':			/* may be division, defined-or, or pattern */
9340        return yyl_slash(aTHX_ s);
9341
9342     case '?':			/* conditional */
9343        s++;
9344        if (!PL_lex_allbrackets
9345            && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9346        {
9347            s--;
9348            TOKEN(0);
9349        }
9350        PL_lex_allbrackets++;
9351        OPERATOR(PERLY_QUESTION_MARK);
9352
9353    case '.':
9354        if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9355#ifdef PERL_STRICT_CR
9356            && s[1] == '\n'
9357#else
9358            && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9359#endif
9360            && (s == PL_linestart || s[-1] == '\n') )
9361        {
9362            PL_expect = XSTATE;
9363            /* formbrack==2 means dot seen where arguments expected */
9364            return yyl_rightcurly(aTHX_ s, 2);
9365        }
9366        if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9367            s += 3;
9368            OPERATOR(YADAYADA);
9369        }
9370        if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9371            char tmp = *s++;
9372            if (*s == tmp) {
9373                if (!PL_lex_allbrackets
9374                    && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9375                {
9376                    s--;
9377                    TOKEN(0);
9378                }
9379                s++;
9380                if (*s == tmp) {
9381                    s++;
9382                    pl_yylval.ival = OPf_SPECIAL;
9383                }
9384                else
9385                    pl_yylval.ival = 0;
9386                OPERATOR(DOTDOT);
9387            }
9388            if (*s == '=' && !PL_lex_allbrackets
9389                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9390            {
9391                s--;
9392                TOKEN(0);
9393            }
9394            Aop(OP_CONCAT);
9395        }
9396        /* FALLTHROUGH */
9397    case '0': case '1': case '2': case '3': case '4':
9398    case '5': case '6': case '7': case '8': case '9':
9399        s = scan_num(s, &pl_yylval);
9400        DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9401        if (PL_expect == XOPERATOR)
9402            no_op("Number",s);
9403        TERM(THING);
9404
9405    case '\'':
9406        return yyl_sglquote(aTHX_ s);
9407
9408    case '"':
9409        return yyl_dblquote(aTHX_ s);
9410
9411    case '`':
9412        return yyl_backtick(aTHX_ s);
9413
9414    case '\\':
9415        return yyl_backslash(aTHX_ s + 1);
9416
9417    case 'v':
9418        if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9419            char *start = s + 2;
9420            while (isDIGIT(*start) || *start == '_')
9421                start++;
9422            if (*start == '.' && isDIGIT(start[1])) {
9423                s = scan_num(s, &pl_yylval);
9424                TERM(THING);
9425            }
9426            else if ((*start == ':' && start[1] == ':')
9427                     || (PL_expect == XSTATE && *start == ':')) {
9428                if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9429                    return tok;
9430                goto retry_bufptr;
9431            }
9432            else if (PL_expect == XSTATE) {
9433                d = start;
9434                while (d < PL_bufend && isSPACE(*d)) d++;
9435                if (*d == ':') {
9436                    if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9437                        return tok;
9438                    goto retry_bufptr;
9439                }
9440            }
9441            /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9442            if (!isALPHA(*start) && (PL_expect == XTERM
9443                        || PL_expect == XREF || PL_expect == XSTATE
9444                        || PL_expect == XTERMORDORDOR)) {
9445                GV *const gv = gv_fetchpvn_flags(s, start - s,
9446                                                    UTF ? SVf_UTF8 : 0, SVt_PVCV);
9447                if (!gv) {
9448                    s = scan_num(s, &pl_yylval);
9449                    TERM(THING);
9450                }
9451            }
9452        }
9453        if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9454            return tok;
9455        goto retry_bufptr;
9456
9457    case 'x':
9458        if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9459            s++;
9460            Mop(OP_REPEAT);
9461        }
9462        if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9463            return tok;
9464        goto retry_bufptr;
9465
9466    case '_':
9467    case 'a': case 'A':
9468    case 'b': case 'B':
9469    case 'c': case 'C':
9470    case 'd': case 'D':
9471    case 'e': case 'E':
9472    case 'f': case 'F':
9473    case 'g': case 'G':
9474    case 'h': case 'H':
9475    case 'i': case 'I':
9476    case 'j': case 'J':
9477    case 'k': case 'K':
9478    case 'l': case 'L':
9479    case 'm': case 'M':
9480    case 'n': case 'N':
9481    case 'o': case 'O':
9482    case 'p': case 'P':
9483    case 'q': case 'Q':
9484    case 'r': case 'R':
9485    case 's': case 'S':
9486    case 't': case 'T':
9487    case 'u': case 'U':
9488              case 'V':
9489    case 'w': case 'W':
9490              case 'X':
9491    case 'y': case 'Y':
9492    case 'z': case 'Z':
9493        if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9494            return tok;
9495        goto retry_bufptr;
9496    }
9497}
9498
9499
9500/*
9501  yylex
9502
9503  Works out what to call the token just pulled out of the input
9504  stream.  The yacc parser takes care of taking the ops we return and
9505  stitching them into a tree.
9506
9507  Returns:
9508    The type of the next token
9509
9510  Structure:
9511      Check if we have already built the token; if so, use it.
9512      Switch based on the current state:
9513          - if we have a case modifier in a string, deal with that
9514          - handle other cases of interpolation inside a string
9515          - scan the next line if we are inside a format
9516      In the normal state, switch on the next character:
9517          - default:
9518            if alphabetic, go to key lookup
9519            unrecognized character - croak
9520          - 0/4/26: handle end-of-line or EOF
9521          - cases for whitespace
9522          - \n and #: handle comments and line numbers
9523          - various operators, brackets and sigils
9524          - numbers
9525          - quotes
9526          - 'v': vstrings (or go to key lookup)
9527          - 'x' repetition operator (or go to key lookup)
9528          - other ASCII alphanumerics (key lookup begins here):
9529              word before => ?
9530              keyword plugin
9531              scan built-in keyword (but do nothing with it yet)
9532              check for statement label
9533              check for lexical subs
9534                  return yyl_just_a_word if there is one
9535              see whether built-in keyword is overridden
9536              switch on keyword number:
9537                  - default: return yyl_just_a_word:
9538                      not a built-in keyword; handle bareword lookup
9539                      disambiguate between method and sub call
9540                      fall back to bareword
9541                  - cases for built-in keywords
9542*/
9543
9544int
9545Perl_yylex(pTHX)
9546{
9547    char *s = PL_bufptr;
9548
9549    if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9550        const U8* first_bad_char_loc;
9551        if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9552                                                        PL_bufend - PL_bufptr,
9553                                                        &first_bad_char_loc)))
9554        {
9555            _force_out_malformed_utf8_message(first_bad_char_loc,
9556                                              (U8 *) PL_bufend,
9557                                              0,
9558                                              1 /* 1 means die */ );
9559            NOT_REACHED; /* NOTREACHED */
9560        }
9561        PL_parser->recheck_utf8_validity = FALSE;
9562    }
9563    DEBUG_T( {
9564        SV* tmp = newSVpvs("");
9565        PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
9566            CopLINE(PL_curcop),
9567            lex_state_names[PL_lex_state],
9568            exp_name[PL_expect],
9569            pv_display(tmp, s, strlen(s), 0, 60));
9570        SvREFCNT_dec(tmp);
9571    } );
9572
9573    /* when we've already built the next token, just pull it out of the queue */
9574    if (PL_nexttoke) {
9575        PL_nexttoke--;
9576        pl_yylval = PL_nextval[PL_nexttoke];
9577        {
9578            I32 next_type;
9579            next_type = PL_nexttype[PL_nexttoke];
9580            if (next_type & (7<<24)) {
9581                if (next_type & (1<<24)) {
9582                    if (PL_lex_brackets > 100)
9583                        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9584                    PL_lex_brackstack[PL_lex_brackets++] =
9585                        (char) ((U8) (next_type >> 16));
9586                }
9587                if (next_type & (2<<24))
9588                    PL_lex_allbrackets++;
9589                if (next_type & (4<<24))
9590                    PL_lex_allbrackets--;
9591                next_type &= 0xffff;
9592            }
9593            return REPORT(next_type == 'p' ? pending_ident() : next_type);
9594        }
9595    }
9596
9597    switch (PL_lex_state) {
9598    case LEX_NORMAL:
9599    case LEX_INTERPNORMAL:
9600        break;
9601
9602    /* interpolated case modifiers like \L \U, including \Q and \E.
9603       when we get here, PL_bufptr is at the \
9604    */
9605    case LEX_INTERPCASEMOD:
9606        /* handle \E or end of string */
9607        return yyl_interpcasemod(aTHX_ s);
9608
9609    case LEX_INTERPPUSH:
9610        return REPORT(sublex_push());
9611
9612    case LEX_INTERPSTART:
9613        if (PL_bufptr == PL_bufend)
9614            return REPORT(sublex_done());
9615        DEBUG_T({
9616            if(*PL_bufptr != '(')
9617                PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9618        });
9619        PL_expect = XTERM;
9620        /* for /@a/, we leave the joining for the regex engine to do
9621         * (unless we're within \Q etc) */
9622        PL_lex_dojoin = (*PL_bufptr == '@'
9623                            && (!PL_lex_inpat || PL_lex_casemods));
9624        PL_lex_state = LEX_INTERPNORMAL;
9625        if (PL_lex_dojoin) {
9626            NEXTVAL_NEXTTOKE.ival = 0;
9627            force_next(PERLY_COMMA);
9628            force_ident("\"", PERLY_DOLLAR);
9629            NEXTVAL_NEXTTOKE.ival = 0;
9630            force_next(PERLY_DOLLAR);
9631            NEXTVAL_NEXTTOKE.ival = 0;
9632            force_next((2<<24)|PERLY_PAREN_OPEN);
9633            NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
9634            force_next(FUNC);
9635        }
9636        /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
9637        if (PL_lex_inpat && *PL_bufptr == '(') {
9638            PL_parser->lex_shared->re_eval_start = PL_bufptr;
9639            PL_bufptr += 2;
9640            if (*PL_bufptr != '{')
9641                PL_bufptr++;
9642            PL_expect = XTERMBLOCK;
9643            force_next(KW_DO);
9644        }
9645
9646        if (PL_lex_starts++) {
9647            s = PL_bufptr;
9648            /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9649            if (!PL_lex_casemods && PL_lex_inpat)
9650                TOKEN(PERLY_COMMA);
9651            else
9652                AopNOASSIGN(OP_CONCAT);
9653        }
9654        return yylex();
9655
9656    case LEX_INTERPENDMAYBE:
9657        if (intuit_more(PL_bufptr, PL_bufend)) {
9658            PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
9659            break;
9660        }
9661        /* FALLTHROUGH */
9662
9663    case LEX_INTERPEND:
9664        if (PL_lex_dojoin) {
9665            const U8 dojoin_was = PL_lex_dojoin;
9666            PL_lex_dojoin = FALSE;
9667            PL_lex_state = LEX_INTERPCONCAT;
9668            PL_lex_allbrackets--;
9669            return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9670        }
9671        if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9672            && SvEVALED(PL_lex_repl))
9673        {
9674            if (PL_bufptr != PL_bufend)
9675                Perl_croak(aTHX_ "Bad evalled substitution pattern");
9676            PL_lex_repl = NULL;
9677        }
9678        /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9679           re_eval_str.  If the here-doc body's length equals the previous
9680           value of re_eval_start, re_eval_start will now be null.  So
9681           check re_eval_str as well. */
9682        if (PL_parser->lex_shared->re_eval_start
9683         || PL_parser->lex_shared->re_eval_str) {
9684            SV *sv;
9685            if (*PL_bufptr != ')')
9686                Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9687            PL_bufptr++;
9688            /* having compiled a (?{..}) expression, return the original
9689             * text too, as a const */
9690            if (PL_parser->lex_shared->re_eval_str) {
9691                sv = PL_parser->lex_shared->re_eval_str;
9692                PL_parser->lex_shared->re_eval_str = NULL;
9693                SvCUR_set(sv,
9694                         PL_bufptr - PL_parser->lex_shared->re_eval_start);
9695                SvPV_shrink_to_cur(sv);
9696            }
9697            else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9698                         PL_bufptr - PL_parser->lex_shared->re_eval_start);
9699            NEXTVAL_NEXTTOKE.opval =
9700                    newSVOP(OP_CONST, 0,
9701                                 sv);
9702            force_next(THING);
9703            PL_parser->lex_shared->re_eval_start = NULL;
9704            PL_expect = XTERM;
9705            return REPORT(PERLY_COMMA);
9706        }
9707
9708        /* FALLTHROUGH */
9709    case LEX_INTERPCONCAT:
9710#ifdef DEBUGGING
9711        if (PL_lex_brackets)
9712            Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9713                       (long) PL_lex_brackets);
9714#endif
9715        if (PL_bufptr == PL_bufend)
9716            return REPORT(sublex_done());
9717
9718        /* m'foo' still needs to be parsed for possible (?{...}) */
9719        if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9720            SV *sv = newSVsv(PL_linestr);
9721            sv = tokeq(sv);
9722            pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9723            s = PL_bufend;
9724        }
9725        else {
9726            int save_error_count = PL_error_count;
9727
9728            s = scan_const(PL_bufptr);
9729
9730            /* Set flag if this was a pattern and there were errors.  op.c will
9731             * refuse to compile a pattern with this flag set.  Otherwise, we
9732             * could get segfaults, etc. */
9733            if (PL_lex_inpat && PL_error_count > save_error_count) {
9734                ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9735            }
9736            if (*s == '\\')
9737                PL_lex_state = LEX_INTERPCASEMOD;
9738            else
9739                PL_lex_state = LEX_INTERPSTART;
9740        }
9741
9742        if (s != PL_bufptr) {
9743            NEXTVAL_NEXTTOKE = pl_yylval;
9744            PL_expect = XTERM;
9745            force_next(THING);
9746            if (PL_lex_starts++) {
9747                /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9748                if (!PL_lex_casemods && PL_lex_inpat)
9749                    TOKEN(PERLY_COMMA);
9750                else
9751                    AopNOASSIGN(OP_CONCAT);
9752            }
9753            else {
9754                PL_bufptr = s;
9755                return yylex();
9756            }
9757        }
9758
9759        return yylex();
9760    case LEX_FORMLINE:
9761        if (PL_parser->sub_error_count != PL_error_count) {
9762            /* There was an error parsing a formline, which tends to
9763               mess up the parser.
9764               Unlike interpolated sub-parsing, we can't treat any of
9765               these as recoverable, so no need to check sub_no_recover.
9766            */
9767            yyquit();
9768        }
9769        assert(PL_lex_formbrack);
9770        s = scan_formline(PL_bufptr);
9771        if (!PL_lex_formbrack)
9772            return yyl_rightcurly(aTHX_ s, 1);
9773        PL_bufptr = s;
9774        return yylex();
9775    }
9776
9777    /* We really do *not* want PL_linestr ever becoming a COW. */
9778    assert (!SvIsCOW(PL_linestr));
9779    s = PL_bufptr;
9780    PL_oldoldbufptr = PL_oldbufptr;
9781    PL_oldbufptr = s;
9782
9783    if (PL_in_my == KEY_sigvar) {
9784        PL_parser->saw_infix_sigil = 0;
9785        return yyl_sigvar(aTHX_ s);
9786    }
9787
9788    {
9789        /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9790           On its return, we then need to set it to indicate whether the token
9791           we just encountered was an infix operator that (if we hadn't been
9792           expecting an operator) have been a sigil.
9793        */
9794        bool expected_operator = (PL_expect == XOPERATOR);
9795        int ret = yyl_try(aTHX_ s);
9796        switch (pl_yylval.ival) {
9797        case OP_BIT_AND:
9798        case OP_MODULO:
9799        case OP_MULTIPLY:
9800        case OP_NBIT_AND:
9801            if (expected_operator) {
9802                PL_parser->saw_infix_sigil = 1;
9803                break;
9804            }
9805            /* FALLTHROUGH */
9806        default:
9807            PL_parser->saw_infix_sigil = 0;
9808        }
9809        return ret;
9810    }
9811}
9812
9813
9814/*
9815  S_pending_ident
9816
9817  Looks up an identifier in the pad or in a package
9818
9819  PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9820  rather than a plain pad var.
9821
9822  Returns:
9823    PRIVATEREF if this is a lexical name.
9824    BAREWORD   if this belongs to a package.
9825
9826  Structure:
9827      if we're in a my declaration
9828          croak if they tried to say my($foo::bar)
9829          build the ops for a my() declaration
9830      if it's an access to a my() variable
9831          build ops for access to a my() variable
9832      if in a dq string, and they've said @foo and we can't find @foo
9833          warn
9834      build ops for a bareword
9835*/
9836
9837static int
9838S_pending_ident(pTHX)
9839{
9840    PADOFFSET tmp = 0;
9841    const char pit = (char)pl_yylval.ival;
9842    const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9843    /* All routes through this function want to know if there is a colon.  */
9844    const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9845
9846    DEBUG_T({ PerlIO_printf(Perl_debug_log,
9847          "### Pending identifier '%s'\n", PL_tokenbuf); });
9848    assert(tokenbuf_len >= 2);
9849
9850    /* if we're in a my(), we can't allow dynamics here.
9851       $foo'bar has already been turned into $foo::bar, so
9852       just check for colons.
9853
9854       if it's a legal name, the OP is a PADANY.
9855    */
9856    if (PL_in_my) {
9857        if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
9858            if (has_colon)
9859                /* diag_listed_as: No package name allowed for variable %s
9860                                   in "our" */
9861                yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9862                                  "%s %s in \"our\"",
9863                                  *PL_tokenbuf=='&' ? "subroutine" : "variable",
9864                                  PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9865            tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9866        }
9867        else {
9868            OP *o;
9869            if (has_colon) {
9870                /* "my" variable %s can't be in a package */
9871                /* PL_no_myglob is constant */
9872                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9873                yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9874                            PL_in_my == KEY_my ? "my" :
9875                            PL_in_my == KEY_field ? "field" : "state",
9876                            *PL_tokenbuf == '&' ? "subroutine" : "variable",
9877                            PL_tokenbuf),
9878                            UTF ? SVf_UTF8 : 0);
9879                GCC_DIAG_RESTORE_STMT;
9880            }
9881
9882            if (PL_in_my == KEY_sigvar) {
9883                /* A signature 'padop' needs in addition, an op_first to
9884                 * point to a child sigdefelem, and an extra field to hold
9885                 * the signature index. We can achieve both by using an
9886                 * UNOP_AUX and (ab)using the op_aux field to hold the
9887                 * index. If we ever need more fields, use a real malloced
9888                 * aux strut instead.
9889                 */
9890                o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9891                                    INT2PTR(UNOP_AUX_item *,
9892                                        (PL_parser->sig_elems)));
9893                o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9894                                  : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9895                                  :                         OPpARGELEM_HV);
9896            }
9897            else
9898                o = newOP(OP_PADANY, 0);
9899            o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9900                                                        UTF ? SVf_UTF8 : 0);
9901            if (PL_in_my == KEY_sigvar)
9902                PL_in_my = 0;
9903
9904            pl_yylval.opval = o;
9905            return PRIVATEREF;
9906        }
9907    }
9908
9909    /*
9910       build the ops for accesses to a my() variable.
9911    */
9912
9913    if (!has_colon) {
9914        if (!PL_in_my)
9915            tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9916                                 0);
9917        if (tmp != NOT_IN_PAD) {
9918            /* might be an "our" variable" */
9919            if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9920                /* build ops for a bareword */
9921                HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9922                HEK * const stashname = HvNAME_HEK(stash);
9923                SV *  const sym = newSVhek(stashname);
9924                sv_catpvs(sym, "::");
9925                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9926                pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9927                pl_yylval.opval->op_private = OPpCONST_ENTERED;
9928                if (pit != '&')
9929                  gv_fetchsv(sym,
9930                    GV_ADDMULTI,
9931                    ((PL_tokenbuf[0] == '$') ? SVt_PV
9932                     : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9933                     : SVt_PVHV));
9934                return BAREWORD;
9935            }
9936
9937            pl_yylval.opval = newOP(OP_PADANY, 0);
9938            pl_yylval.opval->op_targ = tmp;
9939            return PRIVATEREF;
9940        }
9941    }
9942
9943    /*
9944       Whine if they've said @foo or @foo{key} in a doublequoted string,
9945       and @foo (or %foo) isn't a variable we can find in the symbol
9946       table.
9947    */
9948    if (ckWARN(WARN_AMBIGUOUS)
9949        && pit == '@'
9950        && PL_lex_state != LEX_NORMAL
9951        && !PL_lex_brackets)
9952    {
9953        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9954                                         ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9955                                         SVt_PVAV);
9956        if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9957           )
9958        {
9959            /* Downgraded from fatal to warning 20000522 mjd */
9960            Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9961                        "Possible unintended interpolation of %" UTF8f
9962                        " in string",
9963                        UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9964        }
9965    }
9966
9967    /* build ops for a bareword */
9968    pl_yylval.opval = newSVOP(OP_CONST, 0,
9969                                   newSVpvn_flags(PL_tokenbuf + 1,
9970                                                      tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9971                                                      UTF ? SVf_UTF8 : 0 ));
9972    pl_yylval.opval->op_private = OPpCONST_ENTERED;
9973    if (pit != '&')
9974        gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9975                     (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9976                     | ( UTF ? SVf_UTF8 : 0 ),
9977                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9978                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9979                      : SVt_PVHV));
9980    return BAREWORD;
9981}
9982
9983STATIC void
9984S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9985{
9986    PERL_ARGS_ASSERT_CHECKCOMMA;
9987
9988    if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
9989        if (ckWARN(WARN_SYNTAX)) {
9990            int level = 1;
9991            const char *w;
9992            for (w = s+2; *w && level; w++) {
9993                if (*w == '(')
9994                    ++level;
9995                else if (*w == ')')
9996                    --level;
9997            }
9998            while (isSPACE(*w))
9999                ++w;
10000            /* the list of chars below is for end of statements or
10001             * block / parens, boolean operators (&&, ||, //) and branch
10002             * constructs (or, and, if, until, unless, while, err, for).
10003             * Not a very solid hack... */
10004            if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
10005                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10006                            "%s (...) interpreted as function",name);
10007        }
10008    }
10009    while (s < PL_bufend && isSPACE(*s))
10010        s++;
10011    if (*s == '(')
10012        s++;
10013    while (s < PL_bufend && isSPACE(*s))
10014        s++;
10015    if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10016        const char * const w = s;
10017        s += UTF ? UTF8SKIP(s) : 1;
10018        while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10019            s += UTF ? UTF8SKIP(s) : 1;
10020        while (s < PL_bufend && isSPACE(*s))
10021            s++;
10022        if (*s == ',') {
10023            GV* gv;
10024            if (keyword(w, s - w, 0))
10025                return;
10026
10027            gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10028            if (gv && GvCVu(gv))
10029                return;
10030            if (s - w <= 254) {
10031                PADOFFSET off;
10032                char tmpbuf[256];
10033                Copy(w, tmpbuf+1, s - w, char);
10034                *tmpbuf = '&';
10035                off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
10036                if (off != NOT_IN_PAD) return;
10037            }
10038            Perl_croak(aTHX_ "No comma allowed after %s", what);
10039        }
10040    }
10041}
10042
10043/* S_new_constant(): do any overload::constant lookup.
10044
10045   Either returns sv, or mortalizes/frees sv and returns a new SV*.
10046   Best used as sv=new_constant(..., sv, ...).
10047   If s, pv are NULL, calls subroutine with one argument,
10048   and <type> is used with error messages only.
10049   <type> is assumed to be well formed UTF-8.
10050
10051   If error_msg is not NULL, *error_msg will be set to any error encountered.
10052   Otherwise yyerror() will be used to output it */
10053
10054STATIC SV *
10055S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10056               SV *sv, SV *pv, const char *type, STRLEN typelen,
10057               const char ** error_msg)
10058{
10059    dSP;
10060    HV * table = GvHV(PL_hintgv);		 /* ^H */
10061    SV *res;
10062    SV *errsv = NULL;
10063    SV **cvp;
10064    SV *cv, *typesv;
10065    const char *why1 = "", *why2 = "", *why3 = "";
10066    const char * optional_colon = ":";  /* Only some messages have a colon */
10067    char *msg;
10068
10069    PERL_ARGS_ASSERT_NEW_CONSTANT;
10070    /* We assume that this is true: */
10071    assert(type || s);
10072
10073    sv_2mortal(sv);			/* Parent created it permanently */
10074
10075    if (   ! table
10076        || ! (PL_hints & HINT_LOCALIZE_HH))
10077    {
10078        why1 = "unknown";
10079        optional_colon = "";
10080        goto report;
10081    }
10082
10083    cvp = hv_fetch(table, key, keylen, FALSE);
10084    if (!cvp || !SvOK(*cvp)) {
10085        why1 = "$^H{";
10086        why2 = key;
10087        why3 = "} is not defined";
10088        goto report;
10089    }
10090
10091    cv = *cvp;
10092    if (!pv && s)
10093        pv = newSVpvn_flags(s, len, SVs_TEMP);
10094    if (type && pv)
10095        typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10096    else
10097        typesv = &PL_sv_undef;
10098
10099    PUSHSTACKi(PERLSI_OVERLOAD);
10100    ENTER ;
10101    SAVETMPS;
10102
10103    PUSHMARK(SP) ;
10104    EXTEND(sp, 3);
10105    if (pv)
10106        PUSHs(pv);
10107    PUSHs(sv);
10108    if (pv)
10109        PUSHs(typesv);
10110    PUTBACK;
10111    call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10112
10113    SPAGAIN ;
10114
10115    /* Check the eval first */
10116    if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
10117        STRLEN errlen;
10118        const char * errstr;
10119        sv_catpvs(errsv, "Propagated");
10120        errstr = SvPV_const(errsv, errlen);
10121        yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
10122        (void)POPs;
10123        res = SvREFCNT_inc_simple_NN(sv);
10124    }
10125    else {
10126        res = POPs;
10127        SvREFCNT_inc_simple_void_NN(res);
10128    }
10129
10130    PUTBACK ;
10131    FREETMPS ;
10132    LEAVE ;
10133    POPSTACK;
10134
10135    if (SvOK(res)) {
10136        return res;
10137    }
10138
10139    sv = res;
10140    (void)sv_2mortal(sv);
10141
10142    why1 = "Call to &{$^H{";
10143    why2 = key;
10144    why3 = "}} did not return a defined value";
10145
10146  report:
10147
10148    msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
10149                        (int)(type ? typelen : len),
10150                        (type ? type: s),
10151                        optional_colon,
10152                        why1, why2, why3);
10153    if (error_msg) {
10154        *error_msg = msg;
10155    }
10156    else {
10157        yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10158    }
10159    return SvREFCNT_inc_simple_NN(sv);
10160}
10161
10162PERL_STATIC_INLINE void
10163S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
10164                    bool is_utf8, bool check_dollar, bool tick_warn)
10165{
10166    int saw_tick = 0;
10167    const char *olds = *s;
10168    PERL_ARGS_ASSERT_PARSE_IDENT;
10169
10170    while (*s < PL_bufend) {
10171        if (*d >= e)
10172            Perl_croak(aTHX_ "%s", ident_too_long);
10173        if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10174             /* The UTF-8 case must come first, otherwise things
10175             * like c\N{COMBINING TILDE} would start failing, as the
10176             * isWORDCHAR_A case below would gobble the 'c' up.
10177             */
10178
10179            char *t = *s + UTF8SKIP(*s);
10180            while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10181                t += UTF8SKIP(t);
10182            }
10183            if (*d + (t - *s) > e)
10184                Perl_croak(aTHX_ "%s", ident_too_long);
10185            Copy(*s, *d, t - *s, char);
10186            *d += t - *s;
10187            *s = t;
10188        }
10189        else if ( isWORDCHAR_A(**s) ) {
10190            do {
10191                *(*d)++ = *(*s)++;
10192            } while (isWORDCHAR_A(**s) && *d < e);
10193        }
10194        else if (   allow_package
10195                 && **s == '\''
10196                 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10197        {
10198            *(*d)++ = ':';
10199            *(*d)++ = ':';
10200            (*s)++;
10201            saw_tick++;
10202        }
10203        else if (allow_package && **s == ':' && (*s)[1] == ':'
10204           /* Disallow things like Foo::$bar. For the curious, this is
10205            * the code path that triggers the "Bad name after" warning
10206            * when looking for barewords.
10207            */
10208           && !(check_dollar && (*s)[2] == '$')) {
10209            *(*d)++ = *(*s)++;
10210            *(*d)++ = *(*s)++;
10211        }
10212        else
10213            break;
10214    }
10215    if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
10216        if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10217            char *this_d;
10218            char *d2;
10219            Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10220            d2 = this_d;
10221            SAVEFREEPV(this_d);
10222
10223            Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10224                        "Old package separator used in string");
10225            if (olds[-1] == '#')
10226                *d2++ = olds[-2];
10227            *d2++ = olds[-1];
10228            while (olds < *s) {
10229                if (*olds == '\'') {
10230                    *d2++ = '\\';
10231                    *d2++ = *olds++;
10232                }
10233                else
10234                    *d2++ = *olds++;
10235            }
10236            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10237                        "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10238                        UTF8fARG(is_utf8, d2-this_d, this_d));
10239        }
10240        else {
10241            Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10242                        "Old package separator \"'\" deprecated");
10243        }
10244    }
10245    return;
10246}
10247
10248/* Returns a NUL terminated string, with the length of the string written to
10249   *slp
10250
10251   scan_word6() may be removed once ' in names is removed.
10252   */
10253char *
10254Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
10255{
10256    char *d = dest;
10257    char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10258    bool is_utf8 = cBOOL(UTF);
10259
10260    PERL_ARGS_ASSERT_SCAN_WORD6;
10261
10262    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
10263    *d = '\0';
10264    *slp = d - dest;
10265    return s;
10266}
10267
10268char *
10269Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10270{
10271    PERL_ARGS_ASSERT_SCAN_WORD;
10272    return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
10273}
10274
10275/* scan s and extract an identifier ($var) from it if possible
10276 * into dest.
10277 * XXX: This function has subtle implications on parsing, and
10278 * changing how it behaves can cause a variable to change from
10279 * being a run time rv2sv call or a compile time binding to a
10280 * specific variable name.
10281 */
10282STATIC char *
10283S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10284{
10285    I32 herelines = PL_parser->herelines;
10286    SSize_t bracket = -1;
10287    char funny = *s++;
10288    char *d = dest;
10289    char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10290    bool is_utf8 = cBOOL(UTF);
10291    line_t orig_copline = 0, tmp_copline = 0;
10292
10293    PERL_ARGS_ASSERT_SCAN_IDENT;
10294
10295    if (isSPACE(*s) || !*s)
10296        s = skipspace(s);
10297    if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10298        bool is_zero= *s == '0' ? TRUE : FALSE;
10299        char *digit_start= d;
10300        *d++ = *s++;
10301        while (s < PL_bufend && isDIGIT(*s)) {
10302            if (d >= e)
10303                Perl_croak(aTHX_ "%s", ident_too_long);
10304            *d++ = *s++;
10305        }
10306        if (is_zero && d - digit_start > 1)
10307            Perl_croak(aTHX_ ident_var_zero_multi_digit);
10308    }
10309    else {  /* See if it is a "normal" identifier */
10310        parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10311    }
10312    *d = '\0';
10313    d = dest;
10314    if (*d) {
10315        /* Either a digit variable, or parse_ident() found an identifier
10316           (anything valid as a bareword), so job done and return.  */
10317        if (PL_lex_state != LEX_NORMAL)
10318            PL_lex_state = LEX_INTERPENDMAYBE;
10319        return s;
10320    }
10321
10322    /* Here, it is not a run-of-the-mill identifier name */
10323
10324    if (*s == '$' && s[1]
10325        && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10326            || isDIGIT_A((U8)s[1])
10327            || s[1] == '$'
10328            || s[1] == '{'
10329            || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10330    {
10331        /* Dereferencing a value in a scalar variable.
10332           The alternatives are different syntaxes for a scalar variable.
10333           Using ' as a leading package separator isn't allowed. :: is.   */
10334        return s;
10335    }
10336    /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10337    if (*s == '{') {
10338        bracket = s - SvPVX(PL_linestr);
10339        s++;
10340        orig_copline = CopLINE(PL_curcop);
10341        if (s < PL_bufend && isSPACE(*s)) {
10342            s = skipspace(s);
10343        }
10344    }
10345
10346
10347    /* Extract the first character of the variable name from 's' and
10348     * copy it, null terminated into 'd'. Note that this does not
10349     * involve checking for just IDFIRST characters, as it allows the
10350     * '^' for ${^FOO} type variable names, and it allows all the
10351     * characters that are legal in a single character variable name.
10352     *
10353     * The legal ones are any of:
10354     *  a) all ASCII characters except:
10355     *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10356     *          2) '{'
10357     *     The final case currently doesn't get this far in the program, so we
10358     *     don't test for it.  If that were to change, it would be ok to allow it.
10359     *  b) When not under Unicode rules, any upper Latin1 character
10360     *  c) Otherwise, when unicode rules are used, all XIDS characters.
10361     *
10362     *      Because all ASCII characters have the same representation whether
10363     *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10364     *      '{' without knowing if is UTF-8 or not. */
10365
10366    if ((s <= PL_bufend - ((is_utf8)
10367                          ? UTF8SKIP(s)
10368                          : 1))
10369        && (
10370            isGRAPH_A(*s)
10371            ||
10372            ( is_utf8
10373              ? isIDFIRST_utf8_safe(s, PL_bufend)
10374              : (isGRAPH_L1(*s)
10375                 && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10376                )
10377            )
10378        )
10379    ){
10380        if (is_utf8) {
10381            const STRLEN skip = UTF8SKIP(s);
10382            STRLEN i;
10383            d[skip] = '\0';
10384            for ( i = 0; i < skip; i++ )
10385                d[i] = *s++;
10386        }
10387        else {
10388            *d = *s++;
10389            d[1] = '\0';
10390        }
10391    }
10392
10393    /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10394    if (isDIGIT(*d)) {
10395        bool is_zero= *d == '0' ? TRUE : FALSE;
10396        char *digit_start= d;
10397        while (s < PL_bufend && isDIGIT(*s)) {
10398            d++;
10399            if (d >= e)
10400                Perl_croak(aTHX_ "%s", ident_too_long);
10401            *d= *s++;
10402        }
10403        if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10404            Perl_croak(aTHX_ ident_var_zero_multi_digit);
10405        d[1] = '\0';
10406    }
10407
10408    /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10409    else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10410        *d = toCTRL(*s);
10411        s++;
10412    }
10413    /* Warn about ambiguous code after unary operators if {...} notation isn't
10414       used.  There's no difference in ambiguity; it's merely a heuristic
10415       about when not to warn.  */
10416    else if (ck_uni && bracket == -1)
10417        check_uni();
10418
10419    if (bracket != -1) {
10420        bool skip;
10421        char *s2;
10422        /* If we were processing {...} notation then...  */
10423        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10424            || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10425                 && isWORDCHAR(*s))
10426        ) {
10427            /* note we have to check for a normal identifier first,
10428             * as it handles utf8 symbols, and only after that has
10429             * been ruled out can we look at the caret words */
10430            if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10431                /* if it starts as a valid identifier, assume that it is one.
10432                   (the later check for } being at the expected point will trap
10433                   cases where this doesn't pan out.)  */
10434                d += is_utf8 ? UTF8SKIP(d) : 1;
10435                parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10436                *d = '\0';
10437            }
10438            else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10439                d++;
10440                while (isWORDCHAR(*s) && d < e) {
10441                    *d++ = *s++;
10442                }
10443                if (d >= e)
10444                    Perl_croak(aTHX_ "%s", ident_too_long);
10445                *d = '\0';
10446            }
10447            tmp_copline = CopLINE(PL_curcop);
10448            if (s < PL_bufend && isSPACE(*s)) {
10449                s = skipspace(s);
10450            }
10451            if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10452                /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10453                if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10454                    const char * const brack =
10455                        (const char *)
10456                        ((*s == '[') ? "[...]" : "{...}");
10457                    orig_copline = CopLINE(PL_curcop);
10458                    CopLINE_set(PL_curcop, tmp_copline);
10459   /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10460                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10461                        "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10462                        funny, dest, brack, funny, dest, brack);
10463                    CopLINE_set(PL_curcop, orig_copline);
10464                }
10465                bracket++;
10466                PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10467                PL_lex_allbrackets++;
10468                return s;
10469            }
10470        }
10471
10472        if ( !tmp_copline )
10473            tmp_copline = CopLINE(PL_curcop);
10474        if ((skip = s < PL_bufend && isSPACE(*s))) {
10475            /* Avoid incrementing line numbers or resetting PL_linestart,
10476               in case we have to back up.  */
10477            STRLEN s_off = s - SvPVX(PL_linestr);
10478            s2 = peekspace(s);
10479            s = SvPVX(PL_linestr) + s_off;
10480        }
10481        else
10482            s2 = s;
10483
10484        /* Expect to find a closing } after consuming any trailing whitespace.
10485         */
10486        if (*s2 == '}') {
10487            /* Now increment line numbers if applicable.  */
10488            if (skip)
10489                s = skipspace(s);
10490            s++;
10491            if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10492                PL_lex_state = LEX_INTERPEND;
10493                PL_expect = XREF;
10494            }
10495            if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10496                if (ckWARN(WARN_AMBIGUOUS)
10497                    && (keyword(dest, d - dest, 0)
10498                        || get_cvn_flags(dest, d - dest, is_utf8
10499                           ? SVf_UTF8
10500                           : 0)))
10501                {
10502                    SV *tmp = newSVpvn_flags( dest, d - dest,
10503                                        SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10504                    if (funny == '#')
10505                        funny = '@';
10506                    orig_copline = CopLINE(PL_curcop);
10507                    CopLINE_set(PL_curcop, tmp_copline);
10508                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10509                        "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10510                        funny, SVfARG(tmp), funny, SVfARG(tmp));
10511                    CopLINE_set(PL_curcop, orig_copline);
10512                }
10513            }
10514        }
10515        else {
10516            /* Didn't find the closing } at the point we expected, so restore
10517               state such that the next thing to process is the opening { and */
10518            s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10519            CopLINE_set(PL_curcop, orig_copline);
10520            PL_parser->herelines = herelines;
10521            *dest = '\0';
10522            PL_parser->sub_no_recover = TRUE;
10523        }
10524    }
10525    else if (   PL_lex_state == LEX_INTERPNORMAL
10526             && !PL_lex_brackets
10527             && !intuit_more(s, PL_bufend))
10528        PL_lex_state = LEX_INTERPEND;
10529    return s;
10530}
10531
10532static bool
10533S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10534
10535    /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10536     * found in the parse starting at 's', based on the subset that are valid
10537     * in this context input to this routine in 'valid_flags'. Advances s.
10538     * Returns TRUE if the input should be treated as a valid flag, so the next
10539     * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10540     * upon first call on the current regex.  This routine will set it to any
10541     * charset modifier found.  The caller shouldn't change it.  This way,
10542     * another charset modifier encountered in the parse can be detected as an
10543     * error, as we have decided to allow only one */
10544
10545    const char c = **s;
10546    STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10547
10548    if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10549        if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10550            yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10551                       UTF ? SVf_UTF8 : 0);
10552            (*s) += charlen;
10553            /* Pretend that it worked, so will continue processing before
10554             * dieing */
10555            return TRUE;
10556        }
10557        return FALSE;
10558    }
10559
10560    switch (c) {
10561
10562        CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10563        case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10564        case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10565        case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10566        case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10567        case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10568        case LOCALE_PAT_MOD:
10569            if (*charset) {
10570                goto multiple_charsets;
10571            }
10572            set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10573            *charset = c;
10574            break;
10575        case UNICODE_PAT_MOD:
10576            if (*charset) {
10577                goto multiple_charsets;
10578            }
10579            set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10580            *charset = c;
10581            break;
10582        case ASCII_RESTRICT_PAT_MOD:
10583            if (! *charset) {
10584                set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10585            }
10586            else {
10587
10588                /* Error if previous modifier wasn't an 'a', but if it was, see
10589                 * if, and accept, a second occurrence (only) */
10590                if (*charset != 'a'
10591                    || get_regex_charset(*pmfl)
10592                        != REGEX_ASCII_RESTRICTED_CHARSET)
10593                {
10594                        goto multiple_charsets;
10595                }
10596                set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10597            }
10598            *charset = c;
10599            break;
10600        case DEPENDS_PAT_MOD:
10601            if (*charset) {
10602                goto multiple_charsets;
10603            }
10604            set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10605            *charset = c;
10606            break;
10607    }
10608
10609    (*s)++;
10610    return TRUE;
10611
10612    multiple_charsets:
10613        if (*charset != c) {
10614            yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10615        }
10616        else if (c == 'a') {
10617  /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10618            yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10619        }
10620        else {
10621            yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10622        }
10623
10624        /* Pretend that it worked, so will continue processing before dieing */
10625        (*s)++;
10626        return TRUE;
10627}
10628
10629STATIC char *
10630S_scan_pat(pTHX_ char *start, I32 type)
10631{
10632    PMOP *pm;
10633    char *s;
10634    const char * const valid_flags =
10635        (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10636    char charset = '\0';    /* character set modifier */
10637    unsigned int x_mod_count = 0;
10638
10639    PERL_ARGS_ASSERT_SCAN_PAT;
10640
10641    s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10642    if (!s)
10643        Perl_croak(aTHX_ "Search pattern not terminated");
10644
10645    pm = (PMOP*)newPMOP(type, 0);
10646    if (PL_multi_open == '?') {
10647        /* This is the only point in the code that sets PMf_ONCE:  */
10648        pm->op_pmflags |= PMf_ONCE;
10649
10650        /* Hence it's safe to do this bit of PMOP book-keeping here, which
10651           allows us to restrict the list needed by reset to just the ??
10652           matches.  */
10653        assert(type != OP_TRANS);
10654        if (PL_curstash) {
10655            MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10656            U32 elements;
10657            if (!mg) {
10658                mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10659                                 0);
10660            }
10661            elements = mg->mg_len / sizeof(PMOP**);
10662            Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10663            ((PMOP**)mg->mg_ptr) [elements++] = pm;
10664            mg->mg_len = elements * sizeof(PMOP**);
10665            PmopSTASH_set(pm,PL_curstash);
10666        }
10667    }
10668
10669    /* if qr/...(?{..}).../, then need to parse the pattern within a new
10670     * anon CV. False positives like qr/[(?{]/ are harmless */
10671
10672    if (type == OP_QR) {
10673        STRLEN len;
10674        char *e, *p = SvPV(PL_lex_stuff, len);
10675        e = p + len;
10676        for (; p < e; p++) {
10677            if (p[0] == '(' && (
10678                (p[1] == '?' && (p[2] == '{' ||
10679                                (p[2] == '?' && p[3] == '{'))) ||
10680                (p[1] == '*' && (p[2] == '{' ||
10681                                (p[2] == '*' && p[3] == '{')))
10682            )){
10683                pm->op_pmflags |= PMf_HAS_CV;
10684                break;
10685            }
10686        }
10687        pm->op_pmflags |= PMf_IS_QR;
10688    }
10689
10690    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10691                                &s, &charset, &x_mod_count))
10692    {};
10693    /* issue a warning if /c is specified,but /g is not */
10694    if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10695    {
10696        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10697                       "Use of /c modifier is meaningless without /g" );
10698    }
10699
10700    PL_lex_op = (OP*)pm;
10701    pl_yylval.ival = OP_MATCH;
10702    return s;
10703}
10704
10705STATIC char *
10706S_scan_subst(pTHX_ char *start)
10707{
10708    char *s;
10709    PMOP *pm;
10710    I32 first_start;
10711    line_t first_line;
10712    line_t linediff = 0;
10713    I32 es = 0;
10714    char charset = '\0';    /* character set modifier */
10715    unsigned int x_mod_count = 0;
10716    char *t;
10717
10718    PERL_ARGS_ASSERT_SCAN_SUBST;
10719
10720    pl_yylval.ival = OP_NULL;
10721
10722    s = scan_str(start, TRUE, FALSE, FALSE, &t);
10723
10724    if (!s)
10725        Perl_croak(aTHX_ "Substitution pattern not terminated");
10726
10727    s = t;
10728
10729    first_start = PL_multi_start;
10730    first_line = CopLINE(PL_curcop);
10731    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10732    if (!s) {
10733        SvREFCNT_dec_NN(PL_lex_stuff);
10734        PL_lex_stuff = NULL;
10735        Perl_croak(aTHX_ "Substitution replacement not terminated");
10736    }
10737    PL_multi_start = first_start;	/* so whole substitution is taken together */
10738
10739    pm = (PMOP*)newPMOP(OP_SUBST, 0);
10740
10741
10742    while (*s) {
10743        if (*s == EXEC_PAT_MOD) {
10744            s++;
10745            es++;
10746        }
10747        else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10748                                  &s, &charset, &x_mod_count))
10749        {
10750            break;
10751        }
10752    }
10753
10754    if ((pm->op_pmflags & PMf_CONTINUE)) {
10755        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10756    }
10757
10758    if (es) {
10759        SV * const repl = newSVpvs("");
10760
10761        PL_multi_end = 0;
10762        pm->op_pmflags |= PMf_EVAL;
10763        for (; es > 1; es--) {
10764            sv_catpvs(repl, "eval ");
10765        }
10766        sv_catpvs(repl, "do {");
10767        sv_catsv(repl, PL_parser->lex_sub_repl);
10768        sv_catpvs(repl, "}");
10769        SvREFCNT_dec(PL_parser->lex_sub_repl);
10770        PL_parser->lex_sub_repl = repl;
10771    }
10772
10773
10774    linediff = CopLINE(PL_curcop) - first_line;
10775    if (linediff)
10776        CopLINE_set(PL_curcop, first_line);
10777
10778    if (linediff || es) {
10779        /* the IVX field indicates that the replacement string is a s///e;
10780         * the NVX field indicates how many src code lines the replacement
10781         * spreads over */
10782        sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10783        ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10784        ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10785                                                                    cBOOL(es);
10786    }
10787
10788    PL_lex_op = (OP*)pm;
10789    pl_yylval.ival = OP_SUBST;
10790    return s;
10791}
10792
10793STATIC char *
10794S_scan_trans(pTHX_ char *start)
10795{
10796    char* s;
10797    OP *o;
10798    U8 squash;
10799    U8 del;
10800    U8 complement;
10801    bool nondestruct = 0;
10802    char *t;
10803
10804    PERL_ARGS_ASSERT_SCAN_TRANS;
10805
10806    pl_yylval.ival = OP_NULL;
10807
10808    s = scan_str(start,FALSE,FALSE,FALSE,&t);
10809    if (!s)
10810        Perl_croak(aTHX_ "Transliteration pattern not terminated");
10811
10812    s = t;
10813
10814    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10815    if (!s) {
10816        SvREFCNT_dec_NN(PL_lex_stuff);
10817        PL_lex_stuff = NULL;
10818        Perl_croak(aTHX_ "Transliteration replacement not terminated");
10819    }
10820
10821    complement = del = squash = 0;
10822    while (1) {
10823        switch (*s) {
10824        case 'c':
10825            complement = OPpTRANS_COMPLEMENT;
10826            break;
10827        case 'd':
10828            del = OPpTRANS_DELETE;
10829            break;
10830        case 's':
10831            squash = OPpTRANS_SQUASH;
10832            break;
10833        case 'r':
10834            nondestruct = 1;
10835            break;
10836        default:
10837            goto no_more;
10838        }
10839        s++;
10840    }
10841  no_more:
10842
10843    o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10844    o->op_private &= ~OPpTRANS_ALL;
10845    o->op_private |= del|squash|complement;
10846
10847    PL_lex_op = o;
10848    pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10849
10850
10851    return s;
10852}
10853
10854/* scan_heredoc
10855   Takes a pointer to the first < in <<FOO.
10856   Returns a pointer to the byte following <<FOO.
10857
10858   This function scans a heredoc, which involves different methods
10859   depending on whether we are in a string eval, quoted construct, etc.
10860   This is because PL_linestr could containing a single line of input, or
10861   a whole string being evalled, or the contents of the current quote-
10862   like operator.
10863
10864   The two basic methods are:
10865    - Steal lines from the input stream
10866    - Scan the heredoc in PL_linestr and remove it therefrom
10867
10868   In a file scope or filtered eval, the first method is used; in a
10869   string eval, the second.
10870
10871   In a quote-like operator, we have to choose between the two,
10872   depending on where we can find a newline.  We peek into outer lex-
10873   ing scopes until we find one with a newline in it.  If we reach the
10874   outermost lexing scope and it is a file, we use the stream method.
10875   Otherwise it is treated as an eval.
10876*/
10877
10878STATIC char *
10879S_scan_heredoc(pTHX_ char *s)
10880{
10881    I32 op_type = OP_SCALAR;
10882    I32 len;
10883    SV *tmpstr;
10884    char term;
10885    char *d;
10886    char *e;
10887    char *peek;
10888    char *indent = 0;
10889    I32 indent_len = 0;
10890    bool indented = FALSE;
10891    const bool infile = PL_rsfp || PL_parser->filtered;
10892    const line_t origline = CopLINE(PL_curcop);
10893    LEXSHARED *shared = PL_parser->lex_shared;
10894
10895    PERL_ARGS_ASSERT_SCAN_HEREDOC;
10896
10897    s += 2;
10898    d = PL_tokenbuf + 1;
10899    e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10900    *PL_tokenbuf = '\n';
10901    peek = s;
10902
10903    if (*peek == '~') {
10904        indented = TRUE;
10905        peek++; s++;
10906    }
10907
10908    while (SPACE_OR_TAB(*peek))
10909        peek++;
10910
10911    if (*peek == '`' || *peek == '\'' || *peek =='"') {
10912        s = peek;
10913        term = *s++;
10914        s = delimcpy(d, e, s, PL_bufend, term, &len);
10915        if (s == PL_bufend)
10916            Perl_croak(aTHX_ "Unterminated delimiter for here document");
10917        d += len;
10918        s++;
10919    }
10920    else {
10921        if (*s == '\\')
10922            /* <<\FOO is equivalent to <<'FOO' */
10923            s++, term = '\'';
10924        else
10925            term = '"';
10926
10927        if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10928            Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10929
10930        peek = s;
10931
10932        while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10933            peek += UTF ? UTF8SKIP(peek) : 1;
10934        }
10935
10936        len = (peek - s >= e - d) ? (e - d) : (peek - s);
10937        Copy(s, d, len, char);
10938        s += len;
10939        d += len;
10940    }
10941
10942    if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10943        Perl_croak(aTHX_ "Delimiter for here document is too long");
10944
10945    *d++ = '\n';
10946    *d = '\0';
10947    len = d - PL_tokenbuf;
10948
10949#ifndef PERL_STRICT_CR
10950    d = (char *) memchr(s, '\r', PL_bufend - s);
10951    if (d) {
10952        char * const olds = s;
10953        s = d;
10954        while (s < PL_bufend) {
10955            if (*s == '\r') {
10956                *d++ = '\n';
10957                if (*++s == '\n')
10958                    s++;
10959            }
10960            else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
10961                *d++ = *s++;
10962                s++;
10963            }
10964            else
10965                *d++ = *s++;
10966        }
10967        *d = '\0';
10968        PL_bufend = d;
10969        SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10970        s = olds;
10971    }
10972#endif
10973
10974    tmpstr = newSV_type(SVt_PVIV);
10975    if (term == '\'') {
10976        op_type = OP_CONST;
10977        SvIV_set(tmpstr, -1);
10978    }
10979    else if (term == '`') {
10980        op_type = OP_BACKTICK;
10981        SvIV_set(tmpstr, '\\');
10982    }
10983
10984    PL_multi_start = origline + 1 + PL_parser->herelines;
10985    PL_multi_open = PL_multi_close = '<';
10986
10987    /* inside a string eval or quote-like operator */
10988    if (!infile || PL_lex_inwhat) {
10989        SV *linestr;
10990        char *bufend;
10991        char * const olds = s;
10992        PERL_CONTEXT * const cx = CX_CUR();
10993        /* These two fields are not set until an inner lexing scope is
10994           entered.  But we need them set here. */
10995        shared->ls_bufptr  = s;
10996        shared->ls_linestr = PL_linestr;
10997
10998        if (PL_lex_inwhat) {
10999            /* Look for a newline.  If the current buffer does not have one,
11000             peek into the line buffer of the parent lexing scope, going
11001             up as many levels as necessary to find one with a newline
11002             after bufptr.
11003            */
11004            while (!(s = (char *)memchr(
11005                                (void *)shared->ls_bufptr, '\n',
11006                                SvEND(shared->ls_linestr)-shared->ls_bufptr
11007                )))
11008            {
11009                shared = shared->ls_prev;
11010                /* shared is only null if we have gone beyond the outermost
11011                   lexing scope.  In a file, we will have broken out of the
11012                   loop in the previous iteration.  In an eval, the string buf-
11013                   fer ends with "\n;", so the while condition above will have
11014                   evaluated to false.  So shared can never be null.  Or so you
11015                   might think.  Odd syntax errors like s;@{<<; can gobble up
11016                   the implicit semicolon at the end of a flie, causing the
11017                   file handle to be closed even when we are not in a string
11018                   eval.  So shared may be null in that case.
11019                   (Closing '>>}' here to balance the earlier open brace for
11020                   editors that look for matched pairs.) */
11021                if (UNLIKELY(!shared))
11022                    goto interminable;
11023                /* A LEXSHARED struct with a null ls_prev pointer is the outer-
11024                   most lexing scope.  In a file, shared->ls_linestr at that
11025                   level is just one line, so there is no body to steal. */
11026                if (infile && !shared->ls_prev) {
11027                    s = olds;
11028                    goto streaming;
11029                }
11030            }
11031        }
11032        else {	/* eval or we've already hit EOF */
11033            s = (char*)memchr((void*)s, '\n', PL_bufend - s);
11034            if (!s)
11035                goto interminable;
11036        }
11037
11038        linestr = shared->ls_linestr;
11039        bufend = SvEND(linestr);
11040        d = s;
11041        if (indented) {
11042            char *myolds = s;
11043
11044            while (s < bufend - len + 1) {
11045                if (*s++ == '\n')
11046                    ++PL_parser->herelines;
11047
11048                if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
11049                    char *backup = s;
11050                    indent_len = 0;
11051
11052                    /* Only valid if it's preceded by whitespace only */
11053                    while (backup != myolds && --backup >= myolds) {
11054                        if (! SPACE_OR_TAB(*backup)) {
11055                            break;
11056                        }
11057                        indent_len++;
11058                    }
11059
11060                    /* No whitespace or all! */
11061                    if (backup == s || *backup == '\n') {
11062                        Newx(indent, indent_len + 1, char);
11063                        memcpy(indent, backup + 1, indent_len);
11064                        indent[indent_len] = 0;
11065                        s--; /* before our delimiter */
11066                        PL_parser->herelines--; /* this line doesn't count */
11067                        break;
11068                    }
11069                }
11070            }
11071        }
11072        else {
11073            while (s < bufend - len + 1
11074                   && memNE(s,PL_tokenbuf,len) )
11075            {
11076                if (*s++ == '\n')
11077                    ++PL_parser->herelines;
11078            }
11079        }
11080
11081        if (s >= bufend - len + 1) {
11082            goto interminable;
11083        }
11084
11085        sv_setpvn_fresh(tmpstr,d+1,s-d);
11086        s += len - 1;
11087        /* the preceding stmt passes a newline */
11088        PL_parser->herelines++;
11089
11090        /* s now points to the newline after the heredoc terminator.
11091           d points to the newline before the body of the heredoc.
11092         */
11093
11094        /* We are going to modify linestr in place here, so set
11095           aside copies of the string if necessary for re-evals or
11096           (caller $n)[6]. */
11097        /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
11098           check shared->re_eval_str. */
11099        if (shared->re_eval_start || shared->re_eval_str) {
11100            /* Set aside the rest of the regexp */
11101            if (!shared->re_eval_str)
11102                shared->re_eval_str =
11103                       newSVpvn(shared->re_eval_start,
11104                                bufend - shared->re_eval_start);
11105            shared->re_eval_start -= s-d;
11106        }
11107
11108        if (cxstack_ix >= 0
11109            && CxTYPE(cx) == CXt_EVAL
11110            && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
11111            && cx->blk_eval.cur_text == linestr)
11112        {
11113            cx->blk_eval.cur_text = newSVsv(linestr);
11114            cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
11115        }
11116
11117        /* Copy everything from s onwards back to d. */
11118        Move(s,d,bufend-s + 1,char);
11119        SvCUR_set(linestr, SvCUR(linestr) - (s-d));
11120        /* Setting PL_bufend only applies when we have not dug deeper
11121           into other scopes, because sublex_done sets PL_bufend to
11122           SvEND(PL_linestr). */
11123        if (shared == PL_parser->lex_shared)
11124            PL_bufend = SvEND(linestr);
11125        s = olds;
11126    }
11127    else {
11128        SV *linestr_save;
11129        char *oldbufptr_save;
11130        char *oldoldbufptr_save;
11131      streaming:
11132        sv_grow_fresh(tmpstr, 80);
11133        SvPVCLEAR_FRESH(tmpstr);   /* avoid "uninitialized" warning */
11134        term = PL_tokenbuf[1];
11135        len--;
11136        linestr_save = PL_linestr; /* must restore this afterwards */
11137        d = s;			 /* and this */
11138        oldbufptr_save = PL_oldbufptr;
11139        oldoldbufptr_save = PL_oldoldbufptr;
11140        PL_linestr = newSVpvs("");
11141        PL_bufend = SvPVX(PL_linestr);
11142
11143        while (1) {
11144            PL_bufptr = PL_bufend;
11145            CopLINE_set(PL_curcop,
11146                        origline + 1 + PL_parser->herelines);
11147
11148            if (   !lex_next_chunk(LEX_NO_TERM)
11149                && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
11150            {
11151                /* Simply freeing linestr_save might seem simpler here, as it
11152                   does not matter what PL_linestr points to, since we are
11153                   about to croak; but in a quote-like op, linestr_save
11154                   will have been prospectively freed already, via
11155                   SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
11156                   restore PL_linestr. */
11157                SvREFCNT_dec_NN(PL_linestr);
11158                PL_linestr = linestr_save;
11159                PL_oldbufptr = oldbufptr_save;
11160                PL_oldoldbufptr = oldoldbufptr_save;
11161                goto interminable;
11162            }
11163
11164            CopLINE_set(PL_curcop, origline);
11165
11166            if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
11167                s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
11168                /* ^That should be enough to avoid this needing to grow:  */
11169                sv_catpvs(PL_linestr, "\n\0");
11170                assert(s == SvPVX(PL_linestr));
11171                PL_bufend = SvEND(PL_linestr);
11172            }
11173
11174            s = PL_bufptr;
11175            PL_parser->herelines++;
11176            PL_last_lop = PL_last_uni = NULL;
11177
11178#ifndef PERL_STRICT_CR
11179            if (PL_bufend - PL_linestart >= 2) {
11180                if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
11181                    || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11182                {
11183                    PL_bufend[-2] = '\n';
11184                    PL_bufend--;
11185                    SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11186                }
11187                else if (PL_bufend[-1] == '\r')
11188                    PL_bufend[-1] = '\n';
11189            }
11190            else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11191                PL_bufend[-1] = '\n';
11192#endif
11193
11194            if (indented && (PL_bufend-s) >= len) {
11195                char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11196
11197                if (found) {
11198                    char *backup = found;
11199                    indent_len = 0;
11200
11201                    /* Only valid if it's preceded by whitespace only */
11202                    while (backup != s && --backup >= s) {
11203                        if (! SPACE_OR_TAB(*backup)) {
11204                            break;
11205                        }
11206                        indent_len++;
11207                    }
11208
11209                    /* All whitespace or none! */
11210                    if (backup == found || SPACE_OR_TAB(*backup)) {
11211                        Newx(indent, indent_len + 1, char);
11212                        memcpy(indent, backup, indent_len);
11213                        indent[indent_len] = 0;
11214                        SvREFCNT_dec(PL_linestr);
11215                        PL_linestr = linestr_save;
11216                        PL_linestart = SvPVX(linestr_save);
11217                        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11218                        PL_oldbufptr = oldbufptr_save;
11219                        PL_oldoldbufptr = oldoldbufptr_save;
11220                        s = d;
11221                        break;
11222                    }
11223                }
11224
11225                /* Didn't find it */
11226                sv_catsv(tmpstr,PL_linestr);
11227            }
11228            else {
11229                if (*s == term && PL_bufend-s >= len
11230                    && memEQ(s,PL_tokenbuf + 1,len))
11231                {
11232                    SvREFCNT_dec(PL_linestr);
11233                    PL_linestr = linestr_save;
11234                    PL_linestart = SvPVX(linestr_save);
11235                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11236                    PL_oldbufptr = oldbufptr_save;
11237                    PL_oldoldbufptr = oldoldbufptr_save;
11238                    s = d;
11239                    break;
11240                }
11241                else {
11242                    sv_catsv(tmpstr,PL_linestr);
11243                }
11244            }
11245        } /* while (1) */
11246    }
11247
11248    PL_multi_end = origline + PL_parser->herelines;
11249
11250    if (indented && indent) {
11251        STRLEN linecount = 1;
11252        STRLEN herelen = SvCUR(tmpstr);
11253        char *ss = SvPVX(tmpstr);
11254        char *se = ss + herelen;
11255        SV *newstr = newSV(herelen+1);
11256        SvPOK_on(newstr);
11257
11258        /* Trim leading whitespace */
11259        while (ss < se) {
11260            /* newline only? Copy and move on */
11261            if (*ss == '\n') {
11262                sv_catpvs(newstr,"\n");
11263                ss++;
11264                linecount++;
11265
11266            /* Found our indentation? Strip it */
11267            }
11268            else if (se - ss >= indent_len
11269                       && memEQ(ss, indent, indent_len))
11270            {
11271                STRLEN le = 0;
11272                ss += indent_len;
11273
11274                while ((ss + le) < se && *(ss + le) != '\n')
11275                    le++;
11276
11277                sv_catpvn(newstr, ss, le);
11278                ss += le;
11279
11280            /* Line doesn't begin with our indentation? Croak */
11281            }
11282            else {
11283                Safefree(indent);
11284                Perl_croak(aTHX_
11285                    "Indentation on line %d of here-doc doesn't match delimiter",
11286                    (int)linecount
11287                );
11288            }
11289        } /* while */
11290
11291        /* avoid sv_setsv() as we don't want to COW here */
11292        sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11293        Safefree(indent);
11294        SvREFCNT_dec_NN(newstr);
11295    }
11296
11297    if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11298        SvPV_shrink_to_cur(tmpstr);
11299    }
11300
11301    if (!IN_BYTES) {
11302        if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11303            SvUTF8_on(tmpstr);
11304    }
11305
11306    PL_lex_stuff = tmpstr;
11307    pl_yylval.ival = op_type;
11308    return s;
11309
11310  interminable:
11311    if (indent)
11312        Safefree(indent);
11313    SvREFCNT_dec(tmpstr);
11314    CopLINE_set(PL_curcop, origline);
11315    missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11316}
11317
11318
11319/* scan_inputsymbol
11320   takes: position of first '<' in input buffer
11321   returns: position of first char following the matching '>' in
11322            input buffer
11323   side-effects: pl_yylval and lex_op are set.
11324
11325   This code handles:
11326
11327   <>		read from ARGV
11328   <<>>		read from ARGV without magic open
11329   <FH> 	read from filehandle
11330   <pkg::FH>	read from package qualified filehandle
11331   <pkg'FH>	read from package qualified filehandle
11332   <$fh>	read from filehandle in $fh
11333   <*.h>	filename glob
11334
11335*/
11336
11337STATIC char *
11338S_scan_inputsymbol(pTHX_ char *start)
11339{
11340    char *s = start;		/* current position in buffer */
11341    char *end;
11342    I32 len;
11343    bool nomagicopen = FALSE;
11344    char *d = PL_tokenbuf;					/* start of temp holding space */
11345    const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
11346
11347    PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11348
11349    end = (char *) memchr(s, '\n', PL_bufend - s);
11350    if (!end)
11351        end = PL_bufend;
11352    if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11353        nomagicopen = TRUE;
11354        *d = '\0';
11355        len = 0;
11356        s += 3;
11357    }
11358    else
11359        s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
11360
11361    /* die if we didn't have space for the contents of the <>,
11362       or if it didn't end, or if we see a newline
11363    */
11364
11365    if (len >= (I32)sizeof PL_tokenbuf)
11366        Perl_croak(aTHX_ "Excessively long <> operator");
11367    if (s >= end)
11368        Perl_croak(aTHX_ "Unterminated <> operator");
11369
11370    s++;
11371
11372    /* check for <$fh>
11373       Remember, only scalar variables are interpreted as filehandles by
11374       this code.  Anything more complex (e.g., <$fh{$num}>) will be
11375       treated as a glob() call.
11376       This code makes use of the fact that except for the $ at the front,
11377       a scalar variable and a filehandle look the same.
11378    */
11379    if (*d == '$' && d[1]) d++;
11380
11381    /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11382    while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11383        d += UTF ? UTF8SKIP(d) : 1;
11384    }
11385
11386    /* If we've tried to read what we allow filehandles to look like, and
11387       there's still text left, then it must be a glob() and not a getline.
11388       Use scan_str to pull out the stuff between the <> and treat it
11389       as nothing more than a string.
11390    */
11391
11392    if (d - PL_tokenbuf != len) {
11393        pl_yylval.ival = OP_GLOB;
11394        s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11395        if (!s)
11396           Perl_croak(aTHX_ "Glob not terminated");
11397        return s;
11398    }
11399    else {
11400        bool readline_overridden = FALSE;
11401        GV *gv_readline;
11402        /* we're in a filehandle read situation */
11403        d = PL_tokenbuf;
11404
11405        /* turn <> into <ARGV> */
11406        if (!len)
11407            Copy("ARGV",d,5,char);
11408
11409        /* Check whether readline() is overridden */
11410        if ((gv_readline = gv_override("readline",8)))
11411            readline_overridden = TRUE;
11412
11413        /* if <$fh>, create the ops to turn the variable into a
11414           filehandle
11415        */
11416        if (*d == '$') {
11417            /* try to find it in the pad for this block, otherwise find
11418               add symbol table ops
11419            */
11420            const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11421            if (tmp != NOT_IN_PAD) {
11422                if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11423                    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11424                    HEK * const stashname = HvNAME_HEK(stash);
11425                    SV * const sym = newSVhek_mortal(stashname);
11426                    sv_catpvs(sym, "::");
11427                    sv_catpv(sym, d+1);
11428                    d = SvPVX(sym);
11429                    goto intro_sym;
11430                }
11431                else {
11432                    OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
11433                    PL_lex_op = readline_overridden
11434                        ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11435                                op_append_elem(OP_LIST, o,
11436                                    newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11437                        : newUNOP(OP_READLINE, 0, o);
11438                }
11439            }
11440            else {
11441                GV *gv;
11442                ++d;
11443              intro_sym:
11444                gv = gv_fetchpv(d,
11445                                GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11446                                SVt_PV);
11447                PL_lex_op = readline_overridden
11448                    ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11449                            op_append_elem(OP_LIST,
11450                                newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11451                                newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11452                    : newUNOP(OP_READLINE, 0,
11453                            newUNOP(OP_RV2SV, 0,
11454                                newGVOP(OP_GV, 0, gv)));
11455            }
11456            /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11457            pl_yylval.ival = OP_NULL;
11458        }
11459
11460        /* If it's none of the above, it must be a literal filehandle
11461           (<Foo::BAR> or <FOO>) so build a simple readline OP */
11462        else {
11463            GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11464            PL_lex_op = readline_overridden
11465                ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11466                        op_append_elem(OP_LIST,
11467                            newGVOP(OP_GV, 0, gv),
11468                            newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11469                : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11470            pl_yylval.ival = OP_NULL;
11471
11472            /* leave the token generation above to avoid confusing the parser */
11473            if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11474                no_bareword_filehandle(d);
11475            }
11476        }
11477    }
11478
11479    return s;
11480}
11481
11482
11483/* scan_str
11484   takes:
11485        start			position in buffer
11486        keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11487                                only if they are of the open/close form
11488        keep_delims		preserve the delimiters around the string
11489        re_reparse		compiling a run-time /(?{})/:
11490                                   collapse // to /,  and skip encoding src
11491        delimp			if non-null, this is set to the position of
11492                                the closing delimiter, or just after it if
11493                                the closing and opening delimiters differ
11494                                (i.e., the opening delimiter of a substitu-
11495                                tion replacement)
11496   returns: position to continue reading from buffer
11497   side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11498        updates the read buffer.
11499
11500   This subroutine pulls a string out of the input.  It is called for:
11501        q		single quotes		q(literal text)
11502        '		single quotes		'literal text'
11503        qq		double quotes		qq(interpolate $here please)
11504        "		double quotes		"interpolate $here please"
11505        qx		backticks		qx(/bin/ls -l)
11506        `		backticks		`/bin/ls -l`
11507        qw		quote words		@EXPORT_OK = qw( func() $spam )
11508        m//		regexp match		m/this/
11509        s///		regexp substitute	s/this/that/
11510        tr///		string transliterate	tr/this/that/
11511        y///		string transliterate	y/this/that/
11512        ($*@)		sub prototypes		sub foo ($)
11513        (stuff)		sub attr parameters	sub foo : attr(stuff)
11514        <>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
11515
11516   In most of these cases (all but <>, patterns and transliterate)
11517   yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11518   calls scan_str().  s/// makes yylex() call scan_subst() which calls
11519   scan_str().  tr/// and y/// make yylex() call scan_trans() which
11520   calls scan_str().
11521
11522   It skips whitespace before the string starts, and treats the first
11523   character as the delimiter.  If the delimiter is one of ([{< then
11524   the corresponding "close" character )]}> is used as the closing
11525   delimiter.  It allows quoting of delimiters, and if the string has
11526   balanced delimiters ([{<>}]) it allows nesting.
11527
11528   On success, the SV with the resulting string is put into lex_stuff or,
11529   if that is already non-NULL, into lex_repl. The second case occurs only
11530   when parsing the RHS of the special constructs s/// and tr/// (y///).
11531   For convenience, the terminating delimiter character is stuffed into
11532   SvIVX of the SV.
11533*/
11534
11535char *
11536Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11537                 char **delimp
11538    )
11539{
11540    SV *sv;			/* scalar value: string */
11541    char *s = start;		/* current position in the buffer */
11542    char *to;			/* current position in the sv's data */
11543    int brackets = 1;		/* bracket nesting level */
11544    bool d_is_utf8 = FALSE;	/* is there any utf8 content? */
11545    UV open_delim_code;         /* code point */
11546    char open_delim_str[UTF8_MAXBYTES+1];
11547    STRLEN delim_byte_len;      /* each delimiter currently is the same number
11548                                   of bytes */
11549    line_t herelines;
11550
11551    /* The only non-UTF character that isn't a stand alone grapheme is
11552     * white-space, hence can't be a delimiter. */
11553    const char * non_grapheme_msg = "Use of unassigned code point or"
11554                                    " non-standalone grapheme for a delimiter"
11555                                    " is not allowed";
11556    PERL_ARGS_ASSERT_SCAN_STR;
11557
11558    /* skip space before the delimiter */
11559    if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11560                           'start' also has to change */
11561        s = start = skipspace(s);
11562    }
11563
11564    /* mark where we are, in case we need to report errors */
11565    CLINE;
11566
11567    /* after skipping whitespace, the next character is the delimiter */
11568    if (! UTF || UTF8_IS_INVARIANT(*s)) {
11569        open_delim_code   = (U8) *s;
11570        open_delim_str[0] =      *s;
11571        delim_byte_len = 1;
11572    }
11573    else {
11574        open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11575                                            &delim_byte_len);
11576        if (UNLIKELY(! is_grapheme((U8 *) start,
11577                                   (U8 *) s,
11578                                   (U8 *) PL_bufend,
11579                                   open_delim_code)))
11580        {
11581            yyerror(non_grapheme_msg);
11582        }
11583
11584        Copy(s, open_delim_str, delim_byte_len, char);
11585    }
11586    open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11587
11588
11589    /* mark where we are */
11590    PL_multi_start = CopLINE(PL_curcop);
11591    PL_multi_open = open_delim_code;
11592    herelines = PL_parser->herelines;
11593
11594    const char * legal_paired_opening_delims;
11595    const char * legal_paired_closing_delims;
11596    const char * deprecated_opening_delims;
11597    if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11598        if (UTF) {
11599            legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11600            legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11601
11602            /* We are deprecating using a closing delimiter as the opening, in
11603             * case we want in the future to accept them reversed.  The string
11604             * may include ones that are legal, but the code below won't look
11605             * at this string unless it didn't find a legal opening one */
11606            deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11607        }
11608        else {
11609            legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11610            legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11611            deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11612        }
11613    }
11614    else {
11615        legal_paired_opening_delims = "([{<";
11616        legal_paired_closing_delims = ")]}>";
11617        deprecated_opening_delims = (UTF)
11618                                    ? DEPRECATED_OPENING_UTF8_BRACKETS
11619                                    : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11620    }
11621
11622    const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11623                                          + strlen(legal_paired_opening_delims);
11624    const char * deprecated_delims_end = deprecated_opening_delims
11625                                + strlen(deprecated_opening_delims);
11626
11627    const char * close_delim_str = open_delim_str;
11628    UV close_delim_code = open_delim_code;
11629
11630    /* If the delimiter has a mirror-image closing one, get it */
11631    const char *tmps = ninstr(legal_paired_opening_delims,
11632                              legal_paired_opening_delims_end,
11633                              open_delim_str, open_delim_str + delim_byte_len);
11634    if (tmps) {
11635        /* Here, there is a paired delimiter, and tmps points to its position
11636           in the string of the accepted opening paired delimiters.  The
11637           corresponding position in the string of closing ones is the
11638           beginning of the paired mate.  Both contain the same number of
11639           bytes. */
11640        close_delim_str = legal_paired_closing_delims
11641                        + (tmps - legal_paired_opening_delims);
11642
11643        /* The list of paired delimiters contains all the ASCII ones that have
11644         * always been legal, and no other ASCIIs.  Don't raise a message if
11645         * using one of these */
11646        if (! isASCII(open_delim_code)) {
11647            Perl_ck_warner_d(aTHX_
11648                             packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11649                             "Use of '%" UTF8f "' is experimental as a string delimiter",
11650                             UTF8fARG(UTF, delim_byte_len, open_delim_str));
11651        }
11652
11653        close_delim_code = (UTF)
11654                           ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11655                           : * (U8 *) close_delim_str;
11656    }
11657    else {  /* Here, the delimiter isn't paired, hence the close is the same as
11658               the open; and has already been set up.  But make sure it isn't
11659               deprecated to use this particular delimiter, as we plan
11660               eventually to make it paired. */
11661        if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11662                   open_delim_str, open_delim_str + delim_byte_len))
11663        {
11664            Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
11665                             "Use of '%" UTF8f "' is deprecated as a string delimiter",
11666                             UTF8fARG(UTF, delim_byte_len, open_delim_str));
11667        }
11668
11669        /* Note that a NUL may be used as a delimiter, and this happens when
11670         * delimiting an empty string, and no special handling for it is
11671         * needed, as ninstr() calls are used */
11672    }
11673
11674    PL_multi_close = close_delim_code;
11675
11676    if (PL_multi_open == PL_multi_close) {
11677        keep_bracketed_quoted = FALSE;
11678    }
11679
11680    /* create a new SV to hold the contents.  79 is the SV's initial length.
11681       What a random number. */
11682    sv = newSV_type(SVt_PVIV);
11683    sv_grow_fresh(sv, 79);
11684    SvIV_set(sv, close_delim_code);
11685    (void)SvPOK_only(sv);		/* validate pointer */
11686
11687    /* move past delimiter and try to read a complete string */
11688    if (keep_delims)
11689        sv_catpvn(sv, s, delim_byte_len);
11690    s += delim_byte_len;
11691    for (;;) {
11692        /* extend sv if need be */
11693        SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11694        /* set 'to' to the next character in the sv's string */
11695        to = SvPVX(sv)+SvCUR(sv);
11696
11697        /* read until we run out of string, or we find the closing delimiter */
11698        while (s < PL_bufend) {
11699            /* embedded newlines increment the line count */
11700            if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11701                COPLINE_INC_WITH_HERELINES;
11702
11703            /* backslashes can escape the closing delimiter */
11704            if (   *s == '\\' && s < PL_bufend - delim_byte_len
11705
11706                   /* ... but not if the delimiter itself is a backslash */
11707                && close_delim_code != '\\')
11708            {
11709                /* Here, we have an escaping backslash.  If we're supposed to
11710                 * discard those that escape the closing delimiter, just
11711                 * discard this one */
11712                if (   !  keep_bracketed_quoted
11713                    &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11714                          ||  (   PL_multi_open == PL_multi_close
11715                               && re_reparse && s[1] == '\\')
11716                          ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11717                {
11718                    s++;
11719                }
11720                else /* any other escapes are simply copied straight through */
11721                    *to++ = *s++;
11722            }
11723            else if (   s < PL_bufend - (delim_byte_len - 1)
11724                     && memEQ(s, close_delim_str, delim_byte_len)
11725                     && --brackets <= 0)
11726            {
11727                /* Found unescaped closing delimiter, unnested if we care about
11728                 * that; so are done.
11729                 *
11730                 * In the case of the opening and closing delimiters being
11731                 * different, we have to deal with nesting; the conditional
11732                 * above makes sure we don't get here until the nesting level,
11733                 * 'brackets', is back down to zero.  In the other case,
11734                 * nesting isn't an issue, and 'brackets' never can get
11735                 * incremented above 0, so will come here at the first closing
11736                 * delimiter.
11737                 *
11738                 * Only grapheme delimiters are legal. */
11739                if (   UTF  /* All Non-UTF-8's are graphemes */
11740                    && UNLIKELY(! is_grapheme((U8 *) start,
11741                                              (U8 *) s,
11742                                              (U8 *) PL_bufend,
11743                                              close_delim_code)))
11744                {
11745                    yyerror(non_grapheme_msg);
11746                }
11747
11748                break;
11749            }
11750                        /* No nesting if open eq close */
11751            else if (   PL_multi_open != PL_multi_close
11752                     && s < PL_bufend - (delim_byte_len - 1)
11753                     && memEQ(s, open_delim_str, delim_byte_len))
11754            {
11755                brackets++;
11756            }
11757
11758            /* Here, still in the middle of the string; copy this character */
11759            if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11760                *to++ = *s++;
11761            }
11762            else {
11763                size_t this_char_len = UTF8SKIP(s);
11764                Copy(s, to, this_char_len, char);
11765                s  += this_char_len;
11766                to += this_char_len;
11767
11768                d_is_utf8 = TRUE;
11769            }
11770        } /* End of loop through buffer */
11771
11772        /* Here, found end of the string, OR ran out of buffer: terminate the
11773         * copied string and update the sv's end-of-string */
11774        *to = '\0';
11775        SvCUR_set(sv, to - SvPVX_const(sv));
11776
11777        /*
11778         * this next chunk reads more into the buffer if we're not done yet
11779         */
11780
11781        if (s < PL_bufend)
11782            break;		/* handle case where we are done yet :-) */
11783
11784#ifndef PERL_STRICT_CR
11785        if (to - SvPVX_const(sv) >= 2) {
11786            if (   (to[-2] == '\r' && to[-1] == '\n')
11787                || (to[-2] == '\n' && to[-1] == '\r'))
11788            {
11789                to[-2] = '\n';
11790                to--;
11791                SvCUR_set(sv, to - SvPVX_const(sv));
11792            }
11793            else if (to[-1] == '\r')
11794                to[-1] = '\n';
11795        }
11796        else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11797            to[-1] = '\n';
11798#endif
11799
11800        /* if we're out of file, or a read fails, bail and reset the current
11801           line marker so we can report where the unterminated string began
11802        */
11803        COPLINE_INC_WITH_HERELINES;
11804        PL_bufptr = PL_bufend;
11805        if (!lex_next_chunk(0)) {
11806            ASSUME(sv);
11807            SvREFCNT_dec(sv);
11808            CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11809            return NULL;
11810        }
11811        s = start = PL_bufptr;
11812    } /* End of infinite loop */
11813
11814    /* at this point, we have successfully read the delimited string */
11815
11816    if (keep_delims)
11817            sv_catpvn(sv, s, delim_byte_len);
11818    s += delim_byte_len;
11819
11820    if (d_is_utf8)
11821        SvUTF8_on(sv);
11822
11823    PL_multi_end = CopLINE(PL_curcop);
11824    CopLINE_set(PL_curcop, PL_multi_start);
11825    PL_parser->herelines = herelines;
11826
11827    /* if we allocated too much space, give some back */
11828    if (SvCUR(sv) + 5 < SvLEN(sv)) {
11829        SvLEN_set(sv, SvCUR(sv) + 1);
11830        SvPV_shrink_to_cur(sv);
11831    }
11832
11833    /* decide whether this is the first or second quoted string we've read
11834       for this op
11835    */
11836
11837    if (PL_lex_stuff)
11838        PL_parser->lex_sub_repl = sv;
11839    else
11840        PL_lex_stuff = sv;
11841    if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11842    return s;
11843}
11844
11845/*
11846  scan_num
11847  takes: pointer to position in buffer
11848  returns: pointer to new position in buffer
11849  side-effects: builds ops for the constant in pl_yylval.op
11850
11851  Read a number in any of the formats that Perl accepts:
11852
11853  \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
11854  \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
11855  0b[01](_?[01])*                                       binary integers
11856  0o?[0-7](_?[0-7])*                                    octal integers
11857  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11858  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11859
11860  Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11861  thing it reads.
11862
11863  If it reads a number without a decimal point or an exponent, it will
11864  try converting the number to an integer and see if it can do so
11865  without loss of precision.
11866*/
11867
11868char *
11869Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11870{
11871    const char *s = start;	/* current position in buffer */
11872    char *d;			/* destination in temp buffer */
11873    char *e;			/* end of temp buffer */
11874    NV nv;				/* number read, as a double */
11875    SV *sv = NULL;			/* place to put the converted number */
11876    bool floatit;			/* boolean: int or float? */
11877    const char *lastub = NULL;		/* position of last underbar */
11878    static const char* const number_too_long = "Number too long";
11879    bool warned_about_underscore = 0;
11880    I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11881#define WARN_ABOUT_UNDERSCORE() \
11882        do { \
11883            if (!warned_about_underscore) { \
11884                warned_about_underscore = 1; \
11885                Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11886                               "Misplaced _ in number"); \
11887            } \
11888        } while(0)
11889    /* Hexadecimal floating point.
11890     *
11891     * In many places (where we have quads and NV is IEEE 754 double)
11892     * we can fit the mantissa bits of a NV into an unsigned quad.
11893     * (Note that UVs might not be quads even when we have quads.)
11894     * This will not work everywhere, though (either no quads, or
11895     * using long doubles), in which case we have to resort to NV,
11896     * which will probably mean horrible loss of precision due to
11897     * multiple fp operations. */
11898    bool hexfp = FALSE;
11899    int total_bits = 0;
11900    int significant_bits = 0;
11901#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11902#  define HEXFP_UQUAD
11903    Uquad_t hexfp_uquad = 0;
11904    int hexfp_frac_bits = 0;
11905#else
11906#  define HEXFP_NV
11907    NV hexfp_nv = 0.0;
11908#endif
11909    NV hexfp_mult = 1.0;
11910    UV high_non_zero = 0; /* highest digit */
11911    int non_zero_integer_digits = 0;
11912    bool new_octal = FALSE;     /* octal with "0o" prefix */
11913
11914    PERL_ARGS_ASSERT_SCAN_NUM;
11915
11916    /* We use the first character to decide what type of number this is */
11917
11918    switch (*s) {
11919    default:
11920        Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11921
11922    /* if it starts with a 0, it could be an octal number, a decimal in
11923       0.13 disguise, or a hexadecimal number, or a binary number. */
11924    case '0':
11925        {
11926          /* variables:
11927             u		holds the "number so far"
11928             overflowed	was the number more than we can hold?
11929
11930             Shift is used when we add a digit.  It also serves as an "are
11931             we in octal/hex/binary?" indicator to disallow hex characters
11932             when in octal mode.
11933           */
11934            NV n = 0.0;
11935            UV u = 0;
11936            bool overflowed = FALSE;
11937            bool just_zero  = TRUE;	/* just plain 0 or binary number? */
11938            bool has_digs = FALSE;
11939            static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11940            static const char* const bases[5] =
11941              { "", "binary", "", "octal", "hexadecimal" };
11942            static const char* const Bases[5] =
11943              { "", "Binary", "", "Octal", "Hexadecimal" };
11944            static const char* const maxima[5] =
11945              { "",
11946                "0b11111111111111111111111111111111",
11947                "",
11948                "037777777777",
11949                "0xffffffff" };
11950
11951            /* check for hex */
11952            if (isALPHA_FOLD_EQ(s[1], 'x')) {
11953                shift = 4;
11954                s += 2;
11955                just_zero = FALSE;
11956            } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11957                shift = 1;
11958                s += 2;
11959                just_zero = FALSE;
11960            }
11961            /* check for a decimal in disguise */
11962            else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11963                goto decimal;
11964            /* so it must be octal */
11965            else {
11966                shift = 3;
11967                s++;
11968                if (isALPHA_FOLD_EQ(*s, 'o')) {
11969                    s++;
11970                    just_zero = FALSE;
11971                    new_octal = TRUE;
11972                }
11973            }
11974
11975            if (*s == '_') {
11976                WARN_ABOUT_UNDERSCORE();
11977               lastub = s++;
11978            }
11979
11980            /* read the rest of the number */
11981            for (;;) {
11982                /* x is used in the overflow test,
11983                   b is the digit we're adding on. */
11984                UV x, b;
11985
11986                switch (*s) {
11987
11988                /* if we don't mention it, we're done */
11989                default:
11990                    goto out;
11991
11992                /* _ are ignored -- but warned about if consecutive */
11993                case '_':
11994                    if (lastub && s == lastub + 1)
11995                        WARN_ABOUT_UNDERSCORE();
11996                    lastub = s++;
11997                    break;
11998
11999                /* 8 and 9 are not octal */
12000                case '8': case '9':
12001                    if (shift == 3)
12002                        yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12003                    /* FALLTHROUGH */
12004
12005                /* octal digits */
12006                case '2': case '3': case '4':
12007                case '5': case '6': case '7':
12008                    if (shift == 1)
12009                        yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12010                    /* FALLTHROUGH */
12011
12012                case '0': case '1':
12013                    b = *s++ & 15;		/* ASCII digit -> value of digit */
12014                    goto digit;
12015
12016                /* hex digits */
12017                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12018                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12019                    /* make sure they said 0x */
12020                    if (shift != 4)
12021                        goto out;
12022                    b = (*s++ & 7) + 9;
12023
12024                    /* Prepare to put the digit we have onto the end
12025                       of the number so far.  We check for overflows.
12026                    */
12027
12028                  digit:
12029                    just_zero = FALSE;
12030                    has_digs = TRUE;
12031                    if (!overflowed) {
12032                        assert(shift >= 0);
12033                        x = u << shift;	/* make room for the digit */
12034
12035                        total_bits += shift;
12036
12037                        if ((x >> shift) != u
12038                            && !(PL_hints & HINT_NEW_BINARY)) {
12039                            overflowed = TRUE;
12040                            n = (NV) u;
12041                            Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12042                                             "Integer overflow in %s number",
12043                                             bases[shift]);
12044                        } else
12045                            u = x | b;		/* add the digit to the end */
12046                    }
12047                    if (overflowed) {
12048                        n *= nvshift[shift];
12049                        /* If an NV has not enough bits in its
12050                         * mantissa to represent an UV this summing of
12051                         * small low-order numbers is a waste of time
12052                         * (because the NV cannot preserve the
12053                         * low-order bits anyway): we could just
12054                         * remember when did we overflow and in the
12055                         * end just multiply n by the right
12056                         * amount. */
12057                        n += (NV) b;
12058                    }
12059
12060                    if (high_non_zero == 0 && b > 0)
12061                        high_non_zero = b;
12062
12063                    if (high_non_zero)
12064                        non_zero_integer_digits++;
12065
12066                    /* this could be hexfp, but peek ahead
12067                     * to avoid matching ".." */
12068                    if (UNLIKELY(HEXFP_PEEK(s))) {
12069                        goto out;
12070                    }
12071
12072                    break;
12073                }
12074            }
12075
12076          /* if we get here, we had success: make a scalar value from
12077             the number.
12078          */
12079          out:
12080
12081            /* final misplaced underbar check */
12082            if (s[-1] == '_')
12083                WARN_ABOUT_UNDERSCORE();
12084
12085            if (UNLIKELY(HEXFP_PEEK(s))) {
12086                /* Do sloppy (on the underbars) but quick detection
12087                 * (and value construction) for hexfp, the decimal
12088                 * detection will shortly be more thorough with the
12089                 * underbar checks. */
12090                const char* h = s;
12091                significant_bits = non_zero_integer_digits * shift;
12092#ifdef HEXFP_UQUAD
12093                hexfp_uquad = u;
12094#else /* HEXFP_NV */
12095                hexfp_nv = u;
12096#endif
12097                /* Ignore the leading zero bits of
12098                 * the high (first) non-zero digit. */
12099                if (high_non_zero) {
12100                    if (high_non_zero < 0x8)
12101                        significant_bits--;
12102                    if (high_non_zero < 0x4)
12103                        significant_bits--;
12104                    if (high_non_zero < 0x2)
12105                        significant_bits--;
12106                }
12107
12108                if (*h == '.') {
12109#ifdef HEXFP_NV
12110                    NV nv_mult = 1.0;
12111#endif
12112                    bool accumulate = TRUE;
12113                    U8 b = 0; /* silence compiler warning */
12114                    int lim = 1 << shift;
12115                    for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
12116                               *h == '_'); h++) {
12117                        if (isXDIGIT(*h)) {
12118                            significant_bits += shift;
12119#ifdef HEXFP_UQUAD
12120                            if (accumulate) {
12121                                if (significant_bits < NV_MANT_DIG) {
12122                                    /* We are in the long "run" of xdigits,
12123                                     * accumulate the full four bits. */
12124                                    assert(shift >= 0);
12125                                    hexfp_uquad <<= shift;
12126                                    hexfp_uquad |= b;
12127                                    hexfp_frac_bits += shift;
12128                                } else if (significant_bits - shift < NV_MANT_DIG) {
12129                                    /* We are at a hexdigit either at,
12130                                     * or straddling, the edge of mantissa.
12131                                     * We will try grabbing as many as
12132                                     * possible bits. */
12133                                    int tail =
12134                                      significant_bits - NV_MANT_DIG;
12135                                    if (tail <= 0)
12136                                       tail += shift;
12137                                    assert(tail >= 0);
12138                                    hexfp_uquad <<= tail;
12139                                    assert((shift - tail) >= 0);
12140                                    hexfp_uquad |= b >> (shift - tail);
12141                                    hexfp_frac_bits += tail;
12142
12143                                    /* Ignore the trailing zero bits
12144                                     * of the last non-zero xdigit.
12145                                     *
12146                                     * The assumption here is that if
12147                                     * one has input of e.g. the xdigit
12148                                     * eight (0x8), there is only one
12149                                     * bit being input, not the full
12150                                     * four bits.  Conversely, if one
12151                                     * specifies a zero xdigit, the
12152                                     * assumption is that one really
12153                                     * wants all those bits to be zero. */
12154                                    if (b) {
12155                                        if ((b & 0x1) == 0x0) {
12156                                            significant_bits--;
12157                                            if ((b & 0x2) == 0x0) {
12158                                                significant_bits--;
12159                                                if ((b & 0x4) == 0x0) {
12160                                                    significant_bits--;
12161                                                }
12162                                            }
12163                                        }
12164                                    }
12165
12166                                    accumulate = FALSE;
12167                                }
12168                            } else {
12169                                /* Keep skipping the xdigits, and
12170                                 * accumulating the significant bits,
12171                                 * but do not shift the uquad
12172                                 * (which would catastrophically drop
12173                                 * high-order bits) or accumulate the
12174                                 * xdigits anymore. */
12175                            }
12176#else /* HEXFP_NV */
12177                            if (accumulate) {
12178                                nv_mult /= nvshift[shift];
12179                                if (nv_mult > 0.0)
12180                                    hexfp_nv += b * nv_mult;
12181                                else
12182                                    accumulate = FALSE;
12183                            }
12184#endif
12185                        }
12186                        if (significant_bits >= NV_MANT_DIG)
12187                            accumulate = FALSE;
12188                    }
12189                }
12190
12191                if ((total_bits > 0 || significant_bits > 0) &&
12192                    isALPHA_FOLD_EQ(*h, 'p')) {
12193                    bool negexp = FALSE;
12194                    h++;
12195                    if (*h == '+')
12196                        h++;
12197                    else if (*h == '-') {
12198                        negexp = TRUE;
12199                        h++;
12200                    }
12201                    if (isDIGIT(*h)) {
12202                        I32 hexfp_exp = 0;
12203                        while (isDIGIT(*h) || *h == '_') {
12204                            if (isDIGIT(*h)) {
12205                                hexfp_exp *= 10;
12206                                hexfp_exp += *h - '0';
12207#ifdef NV_MIN_EXP
12208                                if (negexp
12209                                    && -hexfp_exp < NV_MIN_EXP - 1) {
12210                                    /* NOTE: this means that the exponent
12211                                     * underflow warning happens for
12212                                     * the IEEE 754 subnormals (denormals),
12213                                     * because DBL_MIN_EXP etc are the lowest
12214                                     * possible binary (or, rather, DBL_RADIX-base)
12215                                     * exponent for normals, not subnormals.
12216                                     *
12217                                     * This may or may not be a good thing. */
12218                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12219                                                   "Hexadecimal float: exponent underflow");
12220                                    break;
12221                                }
12222#endif
12223#ifdef NV_MAX_EXP
12224                                if (!negexp
12225                                    && hexfp_exp > NV_MAX_EXP - 1) {
12226                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12227                                                   "Hexadecimal float: exponent overflow");
12228                                    break;
12229                                }
12230#endif
12231                            }
12232                            h++;
12233                        }
12234                        if (negexp)
12235                            hexfp_exp = -hexfp_exp;
12236#ifdef HEXFP_UQUAD
12237                        hexfp_exp -= hexfp_frac_bits;
12238#endif
12239                        hexfp_mult = Perl_pow(2.0, hexfp_exp);
12240                        hexfp = TRUE;
12241                        goto decimal;
12242                    }
12243                }
12244            }
12245
12246            if (!just_zero && !has_digs) {
12247                /* 0x, 0o or 0b with no digits, treat it as an error.
12248                   Originally this backed up the parse before the b or
12249                   x, but that has the potential for silent changes in
12250                   behaviour, like for: "0x.3" and "0x+$foo".
12251                */
12252                const char *d = s;
12253                char *oldbp = PL_bufptr;
12254                if (*d) ++d; /* so the user sees the bad non-digit */
12255                PL_bufptr = (char *)d; /* so yyerror reports the context */
12256                yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12257                                  bases[shift]));
12258                PL_bufptr = oldbp;
12259            }
12260
12261            if (overflowed) {
12262                if (n > 4294967295.0)
12263                    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12264                                   "%s number > %s non-portable",
12265                                   Bases[shift],
12266                                   new_octal ? "0o37777777777" : maxima[shift]);
12267                sv = newSVnv(n);
12268            }
12269            else {
12270#if UVSIZE > 4
12271                if (u > 0xffffffff)
12272                    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12273                                   "%s number > %s non-portable",
12274                                   Bases[shift],
12275                                   new_octal ? "0o37777777777" : maxima[shift]);
12276#endif
12277                sv = newSVuv(u);
12278            }
12279            if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12280                sv = new_constant(start, s - start, "integer",
12281                                  sv, NULL, NULL, 0, NULL);
12282            else if (PL_hints & HINT_NEW_BINARY)
12283                sv = new_constant(start, s - start, "binary",
12284                                  sv, NULL, NULL, 0, NULL);
12285        }
12286        break;
12287
12288    /*
12289      handle decimal numbers.
12290      we're also sent here when we read a 0 as the first digit
12291    */
12292    case '1': case '2': case '3': case '4': case '5':
12293    case '6': case '7': case '8': case '9': case '.':
12294      decimal:
12295        d = PL_tokenbuf;
12296        e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12297        floatit = FALSE;
12298        if (hexfp) {
12299            floatit = TRUE;
12300            *d++ = '0';
12301            switch (shift) {
12302            case 4:
12303                *d++ = 'x';
12304                s = start + 2;
12305                break;
12306            case 3:
12307                if (new_octal) {
12308                    *d++ = 'o';
12309                    s = start + 2;
12310                    break;
12311                }
12312                s = start + 1;
12313                break;
12314            case 1:
12315                *d++ = 'b';
12316                s = start + 2;
12317                break;
12318            default:
12319                NOT_REACHED; /* NOTREACHED */
12320            }
12321        }
12322
12323        /* read next group of digits and _ and copy into d */
12324        while (isDIGIT(*s)
12325               || *s == '_'
12326               || UNLIKELY(hexfp && isXDIGIT(*s)))
12327        {
12328            /* skip underscores, checking for misplaced ones
12329               if -w is on
12330            */
12331            if (*s == '_') {
12332                if (lastub && s == lastub + 1)
12333                    WARN_ABOUT_UNDERSCORE();
12334                lastub = s++;
12335            }
12336            else {
12337                /* check for end of fixed-length buffer */
12338                if (d >= e)
12339                    Perl_croak(aTHX_ "%s", number_too_long);
12340                /* if we're ok, copy the character */
12341                *d++ = *s++;
12342            }
12343        }
12344
12345        /* final misplaced underbar check */
12346        if (lastub && s == lastub + 1)
12347            WARN_ABOUT_UNDERSCORE();
12348
12349        /* read a decimal portion if there is one.  avoid
12350           3..5 being interpreted as the number 3. followed
12351           by .5
12352        */
12353        if (*s == '.' && s[1] != '.') {
12354            floatit = TRUE;
12355            *d++ = *s++;
12356
12357            if (*s == '_') {
12358                WARN_ABOUT_UNDERSCORE();
12359                lastub = s;
12360            }
12361
12362            /* copy, ignoring underbars, until we run out of digits.
12363            */
12364            for (; isDIGIT(*s)
12365                   || *s == '_'
12366                   || UNLIKELY(hexfp && isXDIGIT(*s));
12367                 s++)
12368            {
12369                /* fixed length buffer check */
12370                if (d >= e)
12371                    Perl_croak(aTHX_ "%s", number_too_long);
12372                if (*s == '_') {
12373                   if (lastub && s == lastub + 1)
12374                        WARN_ABOUT_UNDERSCORE();
12375                   lastub = s;
12376                }
12377                else
12378                    *d++ = *s;
12379            }
12380            /* fractional part ending in underbar? */
12381            if (s[-1] == '_')
12382                WARN_ABOUT_UNDERSCORE();
12383            if (*s == '.' && isDIGIT(s[1])) {
12384                /* oops, it's really a v-string, but without the "v" */
12385                s = start;
12386                goto vstring;
12387            }
12388        }
12389
12390        /* read exponent part, if present */
12391        if ((isALPHA_FOLD_EQ(*s, 'e')
12392              || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12393            && memCHRs("+-0123456789_", s[1]))
12394        {
12395            int exp_digits = 0;
12396            const char *save_s = s;
12397            char * save_d = d;
12398
12399            /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12400               ditto for p (hexfloats) */
12401            if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12402                /* At least some Mach atof()s don't grok 'E' */
12403                *d++ = 'e';
12404            }
12405            else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12406                *d++ = 'p';
12407            }
12408
12409            s++;
12410
12411
12412            /* stray preinitial _ */
12413            if (*s == '_') {
12414                WARN_ABOUT_UNDERSCORE();
12415                lastub = s++;
12416            }
12417
12418            /* allow positive or negative exponent */
12419            if (*s == '+' || *s == '-')
12420                *d++ = *s++;
12421
12422            /* stray initial _ */
12423            if (*s == '_') {
12424                WARN_ABOUT_UNDERSCORE();
12425                lastub = s++;
12426            }
12427
12428            /* read digits of exponent */
12429            while (isDIGIT(*s) || *s == '_') {
12430                if (isDIGIT(*s)) {
12431                    ++exp_digits;
12432                    if (d >= e)
12433                        Perl_croak(aTHX_ "%s", number_too_long);
12434                    *d++ = *s++;
12435                }
12436                else {
12437                   if (((lastub && s == lastub + 1)
12438                        || (!isDIGIT(s[1]) && s[1] != '_')))
12439                        WARN_ABOUT_UNDERSCORE();
12440                   lastub = s++;
12441                }
12442            }
12443
12444            if (!exp_digits) {
12445                /* no exponent digits, the [eEpP] could be for something else,
12446                 * though in practice we don't get here for p since that's preparsed
12447                 * earlier, and results in only the 0xX being consumed, so behave similarly
12448                 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12449                 * next token.
12450                 */
12451                s = save_s;
12452                d = save_d;
12453            }
12454            else {
12455                floatit = TRUE;
12456            }
12457        }
12458
12459
12460        /*
12461           We try to do an integer conversion first if no characters
12462           indicating "float" have been found.
12463         */
12464
12465        if (!floatit) {
12466            UV uv;
12467            const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12468
12469            if (flags == IS_NUMBER_IN_UV) {
12470              if (uv <= IV_MAX)
12471                sv = newSViv(uv); /* Prefer IVs over UVs. */
12472              else
12473                sv = newSVuv(uv);
12474            } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12475              if (uv <= (UV) IV_MIN)
12476                sv = newSViv(-(IV)uv);
12477              else
12478                floatit = TRUE;
12479            } else
12480              floatit = TRUE;
12481        }
12482        if (floatit) {
12483            /* terminate the string */
12484            *d = '\0';
12485            if (UNLIKELY(hexfp)) {
12486#  ifdef NV_MANT_DIG
12487                if (significant_bits > NV_MANT_DIG)
12488                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12489                                   "Hexadecimal float: mantissa overflow");
12490#  endif
12491#ifdef HEXFP_UQUAD
12492                nv = hexfp_uquad * hexfp_mult;
12493#else /* HEXFP_NV */
12494                nv = hexfp_nv * hexfp_mult;
12495#endif
12496            } else {
12497                nv = Atof(PL_tokenbuf);
12498            }
12499            sv = newSVnv(nv);
12500        }
12501
12502        if ( floatit
12503             ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12504            const char *const key = floatit ? "float" : "integer";
12505            const STRLEN keylen = floatit ? 5 : 7;
12506            sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12507                                key, keylen, sv, NULL, NULL, 0, NULL);
12508        }
12509        break;
12510
12511    /* if it starts with a v, it could be a v-string */
12512    case 'v':
12513    vstring:
12514                sv = newSV(5); /* preallocate storage space */
12515                ENTER_with_name("scan_vstring");
12516                SAVEFREESV(sv);
12517                s = scan_vstring(s, PL_bufend, sv);
12518                SvREFCNT_inc_simple_void_NN(sv);
12519                LEAVE_with_name("scan_vstring");
12520        break;
12521    }
12522
12523    /* make the op for the constant and return */
12524
12525    if (sv)
12526        lvalp->opval = newSVOP(OP_CONST, 0, sv);
12527    else
12528        lvalp->opval = NULL;
12529
12530    return (char *)s;
12531}
12532
12533STATIC char *
12534S_scan_formline(pTHX_ char *s)
12535{
12536    SV * const stuff = newSVpvs("");
12537    bool needargs = FALSE;
12538    bool eofmt = FALSE;
12539
12540    PERL_ARGS_ASSERT_SCAN_FORMLINE;
12541
12542    while (!needargs) {
12543        char *eol;
12544        if (*s == '.') {
12545            char *t = s+1;
12546#ifdef PERL_STRICT_CR
12547            while (SPACE_OR_TAB(*t))
12548                t++;
12549#else
12550            while (SPACE_OR_TAB(*t) || *t == '\r')
12551                t++;
12552#endif
12553            if (*t == '\n' || t == PL_bufend) {
12554                eofmt = TRUE;
12555                break;
12556            }
12557        }
12558        eol = (char *) memchr(s,'\n',PL_bufend-s);
12559        if (! eol) {
12560            eol = PL_bufend;
12561        }
12562        else {
12563            eol++;
12564        }
12565        if (*s != '#') {
12566            char *t;
12567            for (t = s; t < eol; t++) {
12568                if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12569                    needargs = FALSE;
12570                    goto enough;	/* ~~ must be first line in formline */
12571                }
12572                if (*t == '@' || *t == '^')
12573                    needargs = TRUE;
12574            }
12575            if (eol > s) {
12576                sv_catpvn(stuff, s, eol-s);
12577#ifndef PERL_STRICT_CR
12578                if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12579                    char *end = SvPVX(stuff) + SvCUR(stuff);
12580                    end[-2] = '\n';
12581                    end[-1] = '\0';
12582                    SvCUR_set(stuff, SvCUR(stuff) - 1);
12583                }
12584#endif
12585            }
12586            else
12587              break;
12588        }
12589        s = (char*)eol;
12590        if ((PL_rsfp || PL_parser->filtered)
12591         && PL_parser->form_lex_state == LEX_NORMAL) {
12592            bool got_some;
12593            PL_bufptr = PL_bufend;
12594            COPLINE_INC_WITH_HERELINES;
12595            got_some = lex_next_chunk(0);
12596            CopLINE_dec(PL_curcop);
12597            s = PL_bufptr;
12598            if (!got_some)
12599                break;
12600        }
12601        incline(s, PL_bufend);
12602    }
12603  enough:
12604    if (!SvCUR(stuff) || needargs)
12605        PL_lex_state = PL_parser->form_lex_state;
12606    if (SvCUR(stuff)) {
12607        PL_expect = XSTATE;
12608        if (needargs) {
12609            const char *s2 = s;
12610            while (isSPACE(*s2) && *s2 != '\n')
12611                s2++;
12612            if (*s2 == '{') {
12613                PL_expect = XTERMBLOCK;
12614                NEXTVAL_NEXTTOKE.ival = 0;
12615                force_next(KW_DO);
12616            }
12617            NEXTVAL_NEXTTOKE.ival = 0;
12618            force_next(FORMLBRACK);
12619        }
12620        if (!IN_BYTES) {
12621            if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12622                SvUTF8_on(stuff);
12623        }
12624        NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12625        force_next(THING);
12626    }
12627    else {
12628        SvREFCNT_dec(stuff);
12629        if (eofmt)
12630            PL_lex_formbrack = 0;
12631    }
12632    return s;
12633}
12634
12635/*
12636=for apidoc start_subparse
12637
12638Set things up for parsing a subroutine.
12639
12640If C<is_format> is non-zero, the input is to be considered a format sub
12641(a specialised sub used to implement perl's C<format> feature); else a
12642normal C<sub>.
12643
12644C<flags> are added to the flags for C<PL_compcv>.  C<flags> may include the
12645C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
12646
12647This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12648the function;
12649
12650=cut
12651*/
12652
12653I32
12654Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12655{
12656    const I32 oldsavestack_ix = PL_savestack_ix;
12657    CV* const outsidecv = PL_compcv;
12658    bool is_method = flags & CVf_IsMETHOD;
12659
12660    if (is_method)
12661        croak_kw_unless_class("method");
12662
12663    SAVEI32(PL_subline);
12664    save_item(PL_subname);
12665    SAVESPTR(PL_compcv);
12666
12667    PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12668    CvFLAGS(PL_compcv) |= flags;
12669
12670    PL_subline = CopLINE(PL_curcop);
12671    CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12672    CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12673    CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12674    if (outsidecv && CvPADLIST(outsidecv))
12675        CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12676    if (is_method)
12677        class_prepare_method_parse(PL_compcv);
12678
12679    return oldsavestack_ix;
12680}
12681
12682/* If o represents a builtin attribute, apply it to cv and returns true.
12683 * Otherwise does nothing and returns false
12684 */
12685
12686STATIC bool
12687S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
12688{
12689    assert(o->op_type == OP_CONST);
12690    SV *sv = cSVOPo_sv;
12691    STRLEN len = SvCUR(sv);
12692
12693    /* NOTE: any CV attrs applied here need to be part of
12694       the CVf_BUILTIN_ATTRS define in cv.h! */
12695
12696    if(memEQs(SvPVX(sv), len, "lvalue"))
12697        CvLVALUE_on(cv);
12698    else if(memEQs(SvPVX(sv), len, "method"))
12699        CvNOWARN_AMBIGUOUS_on(cv);
12700    else if(memEQs(SvPVX(sv), len, "const")) {
12701        Perl_ck_warner_d(aTHX_
12702            packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
12703           ":const is experimental"
12704        );
12705        CvANONCONST_on(cv);
12706        if (!CvANON(cv))
12707            yyerror(":const is not permitted on named subroutines");
12708    }
12709    else
12710        return false;
12711
12712    return true;
12713}
12714
12715/*
12716=for apidoc apply_builtin_cv_attributes
12717
12718Given an OP_LIST containing attribute definitions, filter it for known builtin
12719attributes to apply to the cv, returning a possibly-smaller list containing
12720just the remaining ones.
12721
12722=cut
12723*/
12724
12725OP *
12726Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
12727{
12728    PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
12729
12730    if(!attrlist)
12731        return attrlist;
12732
12733    if(attrlist->op_type != OP_LIST) {
12734        /* Not in fact a list but just a single attribute */
12735        if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
12736            op_free(attrlist);
12737            return NULL;
12738        }
12739
12740        return attrlist;
12741    }
12742
12743    OP *prev = cLISTOPx(attrlist)->op_first;
12744    assert(prev->op_type == OP_PUSHMARK);
12745    OP *o = OpSIBLING(prev);
12746
12747    OP *next;
12748    for(; o; o = next) {
12749        next = OpSIBLING(o);
12750
12751        if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
12752            op_sibling_splice(attrlist, prev, 1, NULL);
12753            op_free(o);
12754        }
12755        else {
12756            prev = o;
12757        }
12758    }
12759
12760    if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
12761        return attrlist;
12762
12763    /* The list is now entirely empty, we might as well discard it */
12764    op_free(attrlist);
12765    return NULL;
12766}
12767
12768
12769/* Do extra initialisation of a CV (typically one just created by
12770 * start_subparse()) if that CV is for a named sub
12771 */
12772
12773void
12774Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12775{
12776    PERL_ARGS_ASSERT_INIT_NAMED_CV;
12777
12778    if (nameop->op_type == OP_CONST) {
12779        const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12780        if (   strEQ(name, "BEGIN")
12781            || strEQ(name, "END")
12782            || strEQ(name, "INIT")
12783            || strEQ(name, "CHECK")
12784            || strEQ(name, "UNITCHECK")
12785        )
12786          CvSPECIAL_on(cv);
12787    }
12788    else
12789    /* State subs inside anonymous subs need to be
12790     clonable themselves. */
12791    if (   CvANON(CvOUTSIDE(cv))
12792        || CvCLONE(CvOUTSIDE(cv))
12793        || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12794                        CvOUTSIDE(cv)
12795                     ))[nameop->op_targ])
12796    )
12797      CvCLONE_on(cv);
12798}
12799
12800
12801static int
12802S_yywarn(pTHX_ const char *const s, U32 flags)
12803{
12804    PERL_ARGS_ASSERT_YYWARN;
12805
12806    PL_in_eval |= EVAL_WARNONLY;
12807    yyerror_pv(s, flags);
12808    return 0;
12809}
12810
12811void
12812Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
12813{
12814    PERL_ARGS_ASSERT_ABORT_EXECUTION;
12815
12816    if (msg_sv) {
12817        if (PL_minus_c)
12818            Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
12819        else {
12820            Perl_croak(aTHX_
12821                    "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
12822        }
12823    } else {
12824        if (PL_minus_c)
12825            Perl_croak(aTHX_ "%s had compilation errors.\n", name);
12826        else {
12827            Perl_croak(aTHX_
12828                    "Execution of %s aborted due to compilation errors.\n", name);
12829        }
12830    }
12831
12832    NOT_REACHED; /* NOTREACHED */
12833}
12834
12835void
12836Perl_yyquit(pTHX)
12837{
12838    /* Called, after at least one error has been found, to abort the parse now,
12839     * instead of trying to forge ahead */
12840
12841    yyerror_pvn(NULL, 0, 0);
12842}
12843
12844int
12845Perl_yyerror(pTHX_ const char *const s)
12846{
12847    PERL_ARGS_ASSERT_YYERROR;
12848    int r = yyerror_pvn(s, strlen(s), 0);
12849    return r;
12850}
12851
12852int
12853Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12854{
12855    PERL_ARGS_ASSERT_YYERROR_PV;
12856    int r = yyerror_pvn(s, strlen(s), flags);
12857    return r;
12858}
12859
12860int
12861Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12862{
12863    const char *context = NULL;
12864    int contlen = -1;
12865    SV *msg;
12866    SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12867    int yychar  = PL_parser->yychar;
12868
12869    /* Output error message 's' with length 'len'.  'flags' are SV flags that
12870     * apply.  If the number of errors found is large enough, it abandons
12871     * parsing.  If 's' is NULL, there is no message, and it abandons
12872     * processing unconditionally */
12873
12874    if (s != NULL) {
12875        if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12876            sv_catpvs(where_sv, "at EOF");
12877        else if (   PL_oldoldbufptr
12878                 && PL_bufptr > PL_oldoldbufptr
12879                 && PL_bufptr - PL_oldoldbufptr < 200
12880                 && PL_oldoldbufptr != PL_oldbufptr
12881                 && PL_oldbufptr != PL_bufptr)
12882        {
12883            while (isSPACE(*PL_oldoldbufptr))
12884                PL_oldoldbufptr++;
12885            context = PL_oldoldbufptr;
12886            contlen = PL_bufptr - PL_oldoldbufptr;
12887        }
12888        else if (  PL_oldbufptr
12889                && PL_bufptr > PL_oldbufptr
12890                && PL_bufptr - PL_oldbufptr < 200
12891                && PL_oldbufptr != PL_bufptr)
12892        {
12893            while (isSPACE(*PL_oldbufptr))
12894                PL_oldbufptr++;
12895            context = PL_oldbufptr;
12896            contlen = PL_bufptr - PL_oldbufptr;
12897        }
12898        else if (yychar > 255)
12899            sv_catpvs(where_sv, "next token ???");
12900        else if (yychar == YYEMPTY) {
12901            if (PL_lex_state == LEX_NORMAL)
12902                sv_catpvs(where_sv, "at end of line");
12903            else if (PL_lex_inpat)
12904                sv_catpvs(where_sv, "within pattern");
12905            else
12906                sv_catpvs(where_sv, "within string");
12907        }
12908        else {
12909            sv_catpvs(where_sv, "next char ");
12910            if (yychar < 32)
12911                Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12912            else if (isPRINT_LC(yychar)) {
12913                const char string = yychar;
12914                sv_catpvn(where_sv, &string, 1);
12915            }
12916            else
12917                Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12918        }
12919        msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12920        Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
12921            OutCopFILE(PL_curcop),
12922            (PL_parser->preambling == NOLINE
12923                   ? CopLINE(PL_curcop)
12924                   : PL_parser->preambling));
12925        if (context)
12926            Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12927                                 UTF8fARG(UTF, contlen, context));
12928        else
12929            Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12930        if (   PL_multi_start < PL_multi_end
12931            && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12932        {
12933            Perl_sv_catpvf(aTHX_ msg,
12934            "  (Might be a runaway multi-line %c%c string starting on"
12935            " line %" LINE_Tf ")\n",
12936                    (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
12937            PL_multi_end = 0;
12938        }
12939        if (PL_in_eval & EVAL_WARNONLY) {
12940            PL_in_eval &= ~EVAL_WARNONLY;
12941            Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12942        }
12943        else {
12944            qerror(msg);
12945        }
12946    }
12947    /* if there was no message then this is a yyquit(), which is actualy handled
12948     * by qerror() with a NULL argument */
12949    if (s == NULL)
12950        qerror(NULL);
12951
12952    PL_in_my = 0;
12953    PL_in_my_stash = NULL;
12954    return 0;
12955}
12956
12957STATIC char*
12958S_swallow_bom(pTHX_ U8 *s)
12959{
12960    const STRLEN slen = SvCUR(PL_linestr);
12961
12962    PERL_ARGS_ASSERT_SWALLOW_BOM;
12963
12964    switch (s[0]) {
12965    case 0xFF:
12966        if (s[1] == 0xFE) {
12967            /* UTF-16 little-endian? (or UTF-32LE?) */
12968            if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12969                /* diag_listed_as: Unsupported script encoding %s */
12970                Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12971#ifndef PERL_NO_UTF16_FILTER
12972#ifdef DEBUGGING
12973            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12974#endif
12975            s += 2;
12976            if (PL_bufend > (char*)s) {
12977                s = add_utf16_textfilter(s, TRUE);
12978            }
12979#else
12980            /* diag_listed_as: Unsupported script encoding %s */
12981            Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12982#endif
12983        }
12984        break;
12985    case 0xFE:
12986        if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12987#ifndef PERL_NO_UTF16_FILTER
12988#ifdef DEBUGGING
12989            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12990#endif
12991            s += 2;
12992            if (PL_bufend > (char *)s) {
12993                s = add_utf16_textfilter(s, FALSE);
12994            }
12995#else
12996            /* diag_listed_as: Unsupported script encoding %s */
12997            Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12998#endif
12999        }
13000        break;
13001    case BOM_UTF8_FIRST_BYTE: {
13002        if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
13003#ifdef DEBUGGING
13004            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13005#endif
13006            s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
13007        }
13008        break;
13009    }
13010    case 0:
13011        if (slen > 3) {
13012             if (s[1] == 0) {
13013                  if (s[2] == 0xFE && s[3] == 0xFF) {
13014                       /* UTF-32 big-endian */
13015                       /* diag_listed_as: Unsupported script encoding %s */
13016                       Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13017                  }
13018             }
13019             else if (s[2] == 0 && s[3] != 0) {
13020                  /* Leading bytes
13021                   * 00 xx 00 xx
13022                   * are a good indicator of UTF-16BE. */
13023#ifndef PERL_NO_UTF16_FILTER
13024#ifdef DEBUGGING
13025                  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13026#endif
13027                  s = add_utf16_textfilter(s, FALSE);
13028#else
13029                  /* diag_listed_as: Unsupported script encoding %s */
13030                  Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13031#endif
13032             }
13033        }
13034        break;
13035
13036    default:
13037         if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13038                  /* Leading bytes
13039                   * xx 00 xx 00
13040                   * are a good indicator of UTF-16LE. */
13041#ifndef PERL_NO_UTF16_FILTER
13042#ifdef DEBUGGING
13043              if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13044#endif
13045              s = add_utf16_textfilter(s, TRUE);
13046#else
13047              /* diag_listed_as: Unsupported script encoding %s */
13048              Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13049#endif
13050         }
13051    }
13052    return (char*)s;
13053}
13054
13055
13056#ifndef PERL_NO_UTF16_FILTER
13057static I32
13058S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13059{
13060    SV *const filter = FILTER_DATA(idx);
13061    /* We re-use this each time round, throwing the contents away before we
13062       return.  */
13063    SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13064    SV *const utf8_buffer = filter;
13065    IV status = IoPAGE(filter);
13066    const bool reverse = cBOOL(IoLINES(filter));
13067    I32 retval;
13068
13069    PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13070
13071    /* As we're automatically added, at the lowest level, and hence only called
13072       from this file, we can be sure that we're not called in block mode. Hence
13073       don't bother writing code to deal with block mode.  */
13074    if (maxlen) {
13075        Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13076    }
13077    if (status < 0) {
13078        Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
13079    }
13080    DEBUG_P(PerlIO_printf(Perl_debug_log,
13081                          "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13082                          FPTR2DPTR(void *, S_utf16_textfilter),
13083                          reverse ? 'l' : 'b', idx, maxlen, status,
13084                          (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13085
13086    while (1) {
13087        STRLEN chars;
13088        STRLEN have;
13089        Size_t newlen;
13090        U8 *end;
13091        /* First, look in our buffer of existing UTF-8 data:  */
13092        char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13093
13094        if (nl) {
13095            ++nl;
13096        } else if (status == 0) {
13097            /* EOF */
13098            IoPAGE(filter) = 0;
13099            nl = SvEND(utf8_buffer);
13100        }
13101        if (nl) {
13102            STRLEN got = nl - SvPVX(utf8_buffer);
13103            /* Did we have anything to append?  */
13104            retval = got != 0;
13105            sv_catpvn(sv, SvPVX(utf8_buffer), got);
13106            /* Everything else in this code works just fine if SVp_POK isn't
13107               set.  This, however, needs it, and we need it to work, else
13108               we loop infinitely because the buffer is never consumed.  */
13109            sv_chop(utf8_buffer, nl);
13110            break;
13111        }
13112
13113        /* OK, not a complete line there, so need to read some more UTF-16.
13114           Read an extra octect if the buffer currently has an odd number. */
13115        while (1) {
13116            if (status <= 0)
13117                break;
13118            if (SvCUR(utf16_buffer) >= 2) {
13119                /* Location of the high octet of the last complete code point.
13120                   Gosh, UTF-16 is a pain. All the benefits of variable length,
13121                   *coupled* with all the benefits of partial reads and
13122                   endianness.  */
13123                const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13124                    + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13125
13126                if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13127                    break;
13128                }
13129
13130                /* We have the first half of a surrogate. Read more.  */
13131                DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13132            }
13133
13134            status = FILTER_READ(idx + 1, utf16_buffer,
13135                                 160 + (SvCUR(utf16_buffer) & 1));
13136            DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
13137            DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13138            if (status < 0) {
13139                /* Error */
13140                IoPAGE(filter) = status;
13141                return status;
13142            }
13143        }
13144
13145        /* 'chars' isn't quite the right name, as code points above 0xFFFF
13146         * require 4 bytes per char */
13147        chars = SvCUR(utf16_buffer) >> 1;
13148        have = SvCUR(utf8_buffer);
13149
13150        /* Assume the worst case size as noted by the functions: twice the
13151         * number of input bytes */
13152        SvGROW(utf8_buffer, have + chars * 4 + 1);
13153
13154        if (reverse) {
13155            end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13156                                         (U8*)SvPVX_const(utf8_buffer) + have,
13157                                         chars * 2, &newlen);
13158        } else {
13159            end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13160                                (U8*)SvPVX_const(utf8_buffer) + have,
13161                                chars * 2, &newlen);
13162        }
13163        SvCUR_set(utf8_buffer, have + newlen);
13164        *end = '\0';
13165
13166        /* No need to keep this SV "well-formed" with a '\0' after the end, as
13167           it's private to us, and utf16_to_utf8{,reversed} take a
13168           (pointer,length) pair, rather than a NUL-terminated string.  */
13169        if(SvCUR(utf16_buffer) & 1) {
13170            *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13171            SvCUR_set(utf16_buffer, 1);
13172        } else {
13173            SvCUR_set(utf16_buffer, 0);
13174        }
13175    }
13176    DEBUG_P(PerlIO_printf(Perl_debug_log,
13177                          "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13178                          status,
13179                          (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13180    DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13181    return retval;
13182}
13183
13184static U8 *
13185S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13186{
13187    SV *filter = filter_add(S_utf16_textfilter, NULL);
13188
13189    PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13190
13191    IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13192    SvPVCLEAR(filter);
13193    IoLINES(filter) = reversed;
13194    IoPAGE(filter) = 1; /* Not EOF */
13195
13196    /* Sadly, we have to return a valid pointer, come what may, so we have to
13197       ignore any error return from this.  */
13198    SvCUR_set(PL_linestr, 0);
13199    if (FILTER_READ(0, PL_linestr, 0)) {
13200        SvUTF8_on(PL_linestr);
13201    } else {
13202        SvUTF8_on(PL_linestr);
13203    }
13204    PL_bufend = SvEND(PL_linestr);
13205    return (U8*)SvPVX(PL_linestr);
13206}
13207#endif
13208
13209/*
13210=for apidoc scan_vstring
13211
13212Returns a pointer to the next character after the parsed
13213vstring, as well as updating the passed in sv.
13214
13215Function must be called like
13216
13217        sv = sv_2mortal(newSV(5));
13218        s = scan_vstring(s,e,sv);
13219
13220where s and e are the start and end of the string.
13221The sv should already be large enough to store the vstring
13222passed in, for performance reasons.
13223
13224This function may croak if fatal warnings are enabled in the
13225calling scope, hence the sv_2mortal in the example (to prevent
13226a leak).  Make sure to do SvREFCNT_inc afterwards if you use
13227sv_2mortal.
13228
13229=cut
13230*/
13231
13232char *
13233Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13234{
13235    const char *pos = s;
13236    const char *start = s;
13237
13238    PERL_ARGS_ASSERT_SCAN_VSTRING;
13239
13240    if (*pos == 'v') pos++;  /* get past 'v' */
13241    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13242        pos++;
13243    if ( *pos != '.') {
13244        /* this may not be a v-string if followed by => */
13245        const char *next = pos;
13246        while (next < e && isSPACE(*next))
13247            ++next;
13248        if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13249            /* return string not v-string */
13250            sv_setpvn(sv,(char *)s,pos-s);
13251            return (char *)pos;
13252        }
13253    }
13254
13255    if (!isALPHA(*pos)) {
13256        U8 tmpbuf[UTF8_MAXBYTES+1];
13257
13258        if (*s == 'v')
13259            s++;  /* get past 'v' */
13260
13261        SvPVCLEAR(sv);
13262
13263        for (;;) {
13264            /* this is atoi() that tolerates underscores */
13265            U8 *tmpend;
13266            UV rev = 0;
13267            const char *end = pos;
13268            UV mult = 1;
13269            while (--end >= s) {
13270                if (*end != '_') {
13271                    const UV orev = rev;
13272                    rev += (*end - '0') * mult;
13273                    mult *= 10;
13274                    if (orev > rev)
13275                        /* diag_listed_as: Integer overflow in %s number */
13276                        Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13277                                         "Integer overflow in decimal number");
13278                }
13279            }
13280
13281            /* Append native character for the rev point */
13282            tmpend = uvchr_to_utf8(tmpbuf, rev);
13283            sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13284            if (!UVCHR_IS_INVARIANT(rev))
13285                 SvUTF8_on(sv);
13286            if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13287                 s = ++pos;
13288            else {
13289                 s = pos;
13290                 break;
13291            }
13292            while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13293                 pos++;
13294        }
13295        SvPOK_on(sv);
13296        sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13297        SvRMAGICAL_on(sv);
13298    }
13299    return (char *)s;
13300}
13301
13302int
13303Perl_keyword_plugin_standard(pTHX_
13304        char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13305{
13306    PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13307    PERL_UNUSED_CONTEXT;
13308    PERL_UNUSED_ARG(keyword_ptr);
13309    PERL_UNUSED_ARG(keyword_len);
13310    PERL_UNUSED_ARG(op_ptr);
13311    return KEYWORD_PLUGIN_DECLINE;
13312}
13313
13314STRLEN
13315Perl_infix_plugin_standard(pTHX_
13316        char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
13317{
13318    PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
13319    PERL_UNUSED_CONTEXT;
13320    PERL_UNUSED_ARG(operator_ptr);
13321    PERL_UNUSED_ARG(operator_len);
13322    PERL_UNUSED_ARG(def);
13323    return 0;
13324}
13325
13326/*
13327=for apidoc_section $lexer
13328=for apidoc wrap_keyword_plugin
13329
13330Puts a C function into the chain of keyword plugins.  This is the
13331preferred way to manipulate the L</PL_keyword_plugin> variable.
13332C<new_plugin> is a pointer to the C function that is to be added to the
13333keyword plugin chain, and C<old_plugin_p> points to the storage location
13334where a pointer to the next function in the chain will be stored.  The
13335value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13336while the value previously stored there is written to C<*old_plugin_p>.
13337
13338L</PL_keyword_plugin> is global to an entire process, and a module wishing
13339to hook keyword parsing may find itself invoked more than once per
13340process, typically in different threads.  To handle that situation, this
13341function is idempotent.  The location C<*old_plugin_p> must initially
13342(once per process) contain a null pointer.  A C variable of static
13343duration (declared at file scope, typically also marked C<static> to give
13344it internal linkage) will be implicitly initialised appropriately, if it
13345does not have an explicit initialiser.  This function will only actually
13346modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13347function is also thread safe on the small scale.  It uses appropriate
13348locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13349
13350When this function is called, the function referenced by C<new_plugin>
13351must be ready to be called, except for C<*old_plugin_p> being unfilled.
13352In a threading situation, C<new_plugin> may be called immediately, even
13353before this function has returned.  C<*old_plugin_p> will always be
13354appropriately set before C<new_plugin> is called.  If C<new_plugin>
13355decides not to do anything special with the identifier that it is given
13356(which is the usual case for most calls to a keyword plugin), it must
13357chain the plugin function referenced by C<*old_plugin_p>.
13358
13359Taken all together, XS code to install a keyword plugin should typically
13360look something like this:
13361
13362    static Perl_keyword_plugin_t next_keyword_plugin;
13363    static OP *my_keyword_plugin(pTHX_
13364        char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13365    {
13366        if (memEQs(keyword_ptr, keyword_len,
13367                   "my_new_keyword")) {
13368            ...
13369        } else {
13370            return next_keyword_plugin(aTHX_
13371                keyword_ptr, keyword_len, op_ptr);
13372        }
13373    }
13374    BOOT:
13375        wrap_keyword_plugin(my_keyword_plugin,
13376                            &next_keyword_plugin);
13377
13378Direct access to L</PL_keyword_plugin> should be avoided.
13379
13380=cut
13381*/
13382
13383void
13384Perl_wrap_keyword_plugin(pTHX_
13385    Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13386{
13387
13388    PERL_UNUSED_CONTEXT;
13389    PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13390    if (*old_plugin_p) return;
13391    KEYWORD_PLUGIN_MUTEX_LOCK;
13392    if (!*old_plugin_p) {
13393        *old_plugin_p = PL_keyword_plugin;
13394        PL_keyword_plugin = new_plugin;
13395    }
13396    KEYWORD_PLUGIN_MUTEX_UNLOCK;
13397}
13398
13399/*
13400=for apidoc wrap_infix_plugin
13401
13402B<NOTE:> This API exists entirely for the purpose of making the CPAN module
13403C<XS::Parse::Infix> work. It is not expected that additional modules will make
13404use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
13405of new infix operators.
13406
13407Puts a C function into the chain of infix plugins.  This is the preferred
13408way to manipulate the L</PL_infix_plugin> variable.  C<new_plugin> is a
13409pointer to the C function that is to be added to the infix plugin chain, and
13410C<old_plugin_p> points to a storage location where a pointer to the next
13411function in the chain will be stored.  The value of C<new_plugin> is written
13412into the L</PL_infix_plugin> variable, while the value previously stored there
13413is written to C<*old_plugin_p>.
13414
13415Direct access to L</PL_infix_plugin> should be avoided.
13416
13417=cut
13418*/
13419
13420void
13421Perl_wrap_infix_plugin(pTHX_
13422    Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
13423{
13424
13425    PERL_UNUSED_CONTEXT;
13426    PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
13427    if (*old_plugin_p) return;
13428    /* We use the same mutex as for PL_keyword_plugin as it's so rare either
13429     * of them is actually updated; no need for a dedicated one each */
13430    KEYWORD_PLUGIN_MUTEX_LOCK;
13431    if (!*old_plugin_p) {
13432        *old_plugin_p = PL_infix_plugin;
13433        PL_infix_plugin = new_plugin;
13434    }
13435    KEYWORD_PLUGIN_MUTEX_UNLOCK;
13436}
13437
13438#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13439static void
13440S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13441{
13442    SAVEI32(PL_lex_brackets);
13443    if (PL_lex_brackets > 100)
13444        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13445    PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13446    SAVEI32(PL_lex_allbrackets);
13447    PL_lex_allbrackets = 0;
13448    SAVEI8(PL_lex_fakeeof);
13449    PL_lex_fakeeof = (U8)fakeeof;
13450    if(yyparse(gramtype) && !PL_parser->error_count)
13451        qerror(Perl_mess(aTHX_ "Parse error"));
13452}
13453
13454#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13455static OP *
13456S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13457{
13458    OP *o;
13459    ENTER;
13460    SAVEVPTR(PL_eval_root);
13461    PL_eval_root = NULL;
13462    parse_recdescent(gramtype, fakeeof);
13463    o = PL_eval_root;
13464    LEAVE;
13465    return o;
13466}
13467
13468#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13469static OP *
13470S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13471{
13472    OP *exprop;
13473    if (flags & ~PARSE_OPTIONAL)
13474        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13475    exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13476    if (!exprop && !(flags & PARSE_OPTIONAL)) {
13477        if (!PL_parser->error_count)
13478            qerror(Perl_mess(aTHX_ "Parse error"));
13479        exprop = newOP(OP_NULL, 0);
13480    }
13481    return exprop;
13482}
13483
13484/*
13485=for apidoc parse_arithexpr
13486
13487Parse a Perl arithmetic expression.  This may contain operators of precedence
13488down to the bit shift operators.  The expression must be followed (and thus
13489terminated) either by a comparison or lower-precedence operator or by
13490something that would normally terminate an expression such as semicolon.
13491If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13492otherwise it is mandatory.  It is up to the caller to ensure that the
13493dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13494the source of the code to be parsed and the lexical context for the
13495expression.
13496
13497The op tree representing the expression is returned.  If an optional
13498expression is absent, a null pointer is returned, otherwise the pointer
13499will be non-null.
13500
13501If an error occurs in parsing or compilation, in most cases a valid op
13502tree is returned anyway.  The error is reflected in the parser state,
13503normally resulting in a single exception at the top level of parsing
13504which covers all the compilation errors that occurred.  Some compilation
13505errors, however, will throw an exception immediately.
13506
13507=for apidoc Amnh||PARSE_OPTIONAL
13508
13509=cut
13510
13511*/
13512
13513OP *
13514Perl_parse_arithexpr(pTHX_ U32 flags)
13515{
13516    return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13517}
13518
13519/*
13520=for apidoc parse_termexpr
13521
13522Parse a Perl term expression.  This may contain operators of precedence
13523down to the assignment operators.  The expression must be followed (and thus
13524terminated) either by a comma or lower-precedence operator or by
13525something that would normally terminate an expression such as semicolon.
13526If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13527otherwise it is mandatory.  It is up to the caller to ensure that the
13528dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13529the source of the code to be parsed and the lexical context for the
13530expression.
13531
13532The op tree representing the expression is returned.  If an optional
13533expression is absent, a null pointer is returned, otherwise the pointer
13534will be non-null.
13535
13536If an error occurs in parsing or compilation, in most cases a valid op
13537tree is returned anyway.  The error is reflected in the parser state,
13538normally resulting in a single exception at the top level of parsing
13539which covers all the compilation errors that occurred.  Some compilation
13540errors, however, will throw an exception immediately.
13541
13542=cut
13543*/
13544
13545OP *
13546Perl_parse_termexpr(pTHX_ U32 flags)
13547{
13548    return parse_expr(LEX_FAKEEOF_COMMA, flags);
13549}
13550
13551/*
13552=for apidoc parse_listexpr
13553
13554Parse a Perl list expression.  This may contain operators of precedence
13555down to the comma operator.  The expression must be followed (and thus
13556terminated) either by a low-precedence logic operator such as C<or> or by
13557something that would normally terminate an expression such as semicolon.
13558If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13559otherwise it is mandatory.  It is up to the caller to ensure that the
13560dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13561the source of the code to be parsed and the lexical context for the
13562expression.
13563
13564The op tree representing the expression is returned.  If an optional
13565expression is absent, a null pointer is returned, otherwise the pointer
13566will be non-null.
13567
13568If an error occurs in parsing or compilation, in most cases a valid op
13569tree is returned anyway.  The error is reflected in the parser state,
13570normally resulting in a single exception at the top level of parsing
13571which covers all the compilation errors that occurred.  Some compilation
13572errors, however, will throw an exception immediately.
13573
13574=cut
13575*/
13576
13577OP *
13578Perl_parse_listexpr(pTHX_ U32 flags)
13579{
13580    return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13581}
13582
13583/*
13584=for apidoc parse_fullexpr
13585
13586Parse a single complete Perl expression.  This allows the full
13587expression grammar, including the lowest-precedence operators such
13588as C<or>.  The expression must be followed (and thus terminated) by a
13589token that an expression would normally be terminated by: end-of-file,
13590closing bracketing punctuation, semicolon, or one of the keywords that
13591signals a postfix expression-statement modifier.  If C<flags> has the
13592C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13593mandatory.  It is up to the caller to ensure that the dynamic parser
13594state (L</PL_parser> et al) is correctly set to reflect the source of
13595the code to be parsed and the lexical context for the expression.
13596
13597The op tree representing the expression is returned.  If an optional
13598expression is absent, a null pointer is returned, otherwise the pointer
13599will be non-null.
13600
13601If an error occurs in parsing or compilation, in most cases a valid op
13602tree is returned anyway.  The error is reflected in the parser state,
13603normally resulting in a single exception at the top level of parsing
13604which covers all the compilation errors that occurred.  Some compilation
13605errors, however, will throw an exception immediately.
13606
13607=cut
13608*/
13609
13610OP *
13611Perl_parse_fullexpr(pTHX_ U32 flags)
13612{
13613    return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13614}
13615
13616/*
13617=for apidoc parse_block
13618
13619Parse a single complete Perl code block.  This consists of an opening
13620brace, a sequence of statements, and a closing brace.  The block
13621constitutes a lexical scope, so C<my> variables and various compile-time
13622effects can be contained within it.  It is up to the caller to ensure
13623that the dynamic parser state (L</PL_parser> et al) is correctly set to
13624reflect the source of the code to be parsed and the lexical context for
13625the statement.
13626
13627The op tree representing the code block is returned.  This is always a
13628real op, never a null pointer.  It will normally be a C<lineseq> list,
13629including C<nextstate> or equivalent ops.  No ops to construct any kind
13630of runtime scope are included by virtue of it being a block.
13631
13632If an error occurs in parsing or compilation, in most cases a valid op
13633tree (most likely null) is returned anyway.  The error is reflected in
13634the parser state, normally resulting in a single exception at the top
13635level of parsing which covers all the compilation errors that occurred.
13636Some compilation errors, however, will throw an exception immediately.
13637
13638The C<flags> parameter is reserved for future use, and must always
13639be zero.
13640
13641=cut
13642*/
13643
13644OP *
13645Perl_parse_block(pTHX_ U32 flags)
13646{
13647    if (flags)
13648        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13649    return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13650}
13651
13652/*
13653=for apidoc parse_barestmt
13654
13655Parse a single unadorned Perl statement.  This may be a normal imperative
13656statement or a declaration that has compile-time effect.  It does not
13657include any label or other affixture.  It is up to the caller to ensure
13658that the dynamic parser state (L</PL_parser> et al) is correctly set to
13659reflect the source of the code to be parsed and the lexical context for
13660the statement.
13661
13662The op tree representing the statement is returned.  This may be a
13663null pointer if the statement is null, for example if it was actually
13664a subroutine definition (which has compile-time side effects).  If not
13665null, it will be ops directly implementing the statement, suitable to
13666pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13667equivalent op (except for those embedded in a scope contained entirely
13668within the statement).
13669
13670If an error occurs in parsing or compilation, in most cases a valid op
13671tree (most likely null) is returned anyway.  The error is reflected in
13672the parser state, normally resulting in a single exception at the top
13673level of parsing which covers all the compilation errors that occurred.
13674Some compilation errors, however, will throw an exception immediately.
13675
13676The C<flags> parameter is reserved for future use, and must always
13677be zero.
13678
13679=cut
13680*/
13681
13682OP *
13683Perl_parse_barestmt(pTHX_ U32 flags)
13684{
13685    if (flags)
13686        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13687    return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13688}
13689
13690/*
13691=for apidoc parse_label
13692
13693Parse a single label, possibly optional, of the type that may prefix a
13694Perl statement.  It is up to the caller to ensure that the dynamic parser
13695state (L</PL_parser> et al) is correctly set to reflect the source of
13696the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13697label is optional, otherwise it is mandatory.
13698
13699The name of the label is returned in the form of a fresh scalar.  If an
13700optional label is absent, a null pointer is returned.
13701
13702If an error occurs in parsing, which can only occur if the label is
13703mandatory, a valid label is returned anyway.  The error is reflected in
13704the parser state, normally resulting in a single exception at the top
13705level of parsing which covers all the compilation errors that occurred.
13706
13707=cut
13708*/
13709
13710SV *
13711Perl_parse_label(pTHX_ U32 flags)
13712{
13713    if (flags & ~PARSE_OPTIONAL)
13714        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13715    if (PL_nexttoke) {
13716        PL_parser->yychar = yylex();
13717        if (PL_parser->yychar == LABEL) {
13718            SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13719            PL_parser->yychar = YYEMPTY;
13720            cSVOPx(pl_yylval.opval)->op_sv = NULL;
13721            op_free(pl_yylval.opval);
13722            return labelsv;
13723        } else {
13724            yyunlex();
13725            goto no_label;
13726        }
13727    } else {
13728        char *s, *t;
13729        STRLEN wlen, bufptr_pos;
13730        lex_read_space(0);
13731        t = s = PL_bufptr;
13732        if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13733            goto no_label;
13734        t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
13735        if (word_takes_any_delimiter(s, wlen))
13736            goto no_label;
13737        bufptr_pos = s - SvPVX(PL_linestr);
13738        PL_bufptr = t;
13739        lex_read_space(LEX_KEEP_PREVIOUS);
13740        t = PL_bufptr;
13741        s = SvPVX(PL_linestr) + bufptr_pos;
13742        if (t[0] == ':' && t[1] != ':') {
13743            PL_oldoldbufptr = PL_oldbufptr;
13744            PL_oldbufptr = s;
13745            PL_bufptr = t+1;
13746            return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13747        } else {
13748            PL_bufptr = s;
13749            no_label:
13750            if (flags & PARSE_OPTIONAL) {
13751                return NULL;
13752            } else {
13753                qerror(Perl_mess(aTHX_ "Parse error"));
13754                return newSVpvs("x");
13755            }
13756        }
13757    }
13758}
13759
13760/*
13761=for apidoc parse_fullstmt
13762
13763Parse a single complete Perl statement.  This may be a normal imperative
13764statement or a declaration that has compile-time effect, and may include
13765optional labels.  It is up to the caller to ensure that the dynamic
13766parser state (L</PL_parser> et al) is correctly set to reflect the source
13767of the code to be parsed and the lexical context for the statement.
13768
13769The op tree representing the statement is returned.  This may be a
13770null pointer if the statement is null, for example if it was actually
13771a subroutine definition (which has compile-time side effects).  If not
13772null, it will be the result of a L</newSTATEOP> call, normally including
13773a C<nextstate> or equivalent op.
13774
13775If an error occurs in parsing or compilation, in most cases a valid op
13776tree (most likely null) is returned anyway.  The error is reflected in
13777the parser state, normally resulting in a single exception at the top
13778level of parsing which covers all the compilation errors that occurred.
13779Some compilation errors, however, will throw an exception immediately.
13780
13781The C<flags> parameter is reserved for future use, and must always
13782be zero.
13783
13784=cut
13785*/
13786
13787OP *
13788Perl_parse_fullstmt(pTHX_ U32 flags)
13789{
13790    if (flags)
13791        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13792    return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13793}
13794
13795/*
13796=for apidoc parse_stmtseq
13797
13798Parse a sequence of zero or more Perl statements.  These may be normal
13799imperative statements, including optional labels, or declarations
13800that have compile-time effect, or any mixture thereof.  The statement
13801sequence ends when a closing brace or end-of-file is encountered in a
13802place where a new statement could have validly started.  It is up to
13803the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13804is correctly set to reflect the source of the code to be parsed and the
13805lexical context for the statements.
13806
13807The op tree representing the statement sequence is returned.  This may
13808be a null pointer if the statements were all null, for example if there
13809were no statements or if there were only subroutine definitions (which
13810have compile-time side effects).  If not null, it will be a C<lineseq>
13811list, normally including C<nextstate> or equivalent ops.
13812
13813If an error occurs in parsing or compilation, in most cases a valid op
13814tree is returned anyway.  The error is reflected in the parser state,
13815normally resulting in a single exception at the top level of parsing
13816which covers all the compilation errors that occurred.  Some compilation
13817errors, however, will throw an exception immediately.
13818
13819The C<flags> parameter is reserved for future use, and must always
13820be zero.
13821
13822=cut
13823*/
13824
13825OP *
13826Perl_parse_stmtseq(pTHX_ U32 flags)
13827{
13828    OP *stmtseqop;
13829    I32 c;
13830    if (flags)
13831        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13832    stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13833    c = lex_peek_unichar(0);
13834    if (c != -1 && c != /*{*/'}')
13835        qerror(Perl_mess(aTHX_ "Parse error"));
13836    return stmtseqop;
13837}
13838
13839/*
13840=for apidoc parse_subsignature
13841
13842Parse a subroutine signature declaration. This is the contents of the
13843parentheses following a named or anonymous subroutine declaration when the
13844C<signatures> feature is enabled. Note that this function neither expects
13845nor consumes the opening and closing parentheses around the signature; it
13846is the caller's job to handle these.
13847
13848This function must only be called during parsing of a subroutine; after
13849L</start_subparse> has been called. It might allocate lexical variables on
13850the pad for the current subroutine.
13851
13852The op tree to unpack the arguments from the stack at runtime is returned.
13853This op tree should appear at the beginning of the compiled function. The
13854caller may wish to use L</op_append_list> to build their function body
13855after it, or splice it together with the body before calling L</newATTRSUB>.
13856
13857The C<flags> parameter is reserved for future use, and must always
13858be zero.
13859
13860=cut
13861*/
13862
13863OP *
13864Perl_parse_subsignature(pTHX_ U32 flags)
13865{
13866    if (flags)
13867        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13868    return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13869}
13870
13871/*
13872 * ex: set ts=8 sts=4 sw=4 et:
13873 */
13874