toke.c revision 1.27
1/*    toke.c
2 *
3 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 *    You may distribute under the terms of either the GNU General Public
7 *    License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 *  'It all comes from here, the stench and the peril.'    --Frodo
13 *
14 *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl.  It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26This is the lower layer of the Perl parser, managing characters and tokens.
27
28=for apidoc AmnU|yy_parser *|PL_parser
29
30Pointer to a structure encapsulating the state of the parsing operation
31currently in progress.  The pointer can be locally changed to perform
32a nested parse without interfering with the state of an outer parse.
33Individual members of C<PL_parser> have their own documentation.
34
35=cut
36*/
37
38#include "EXTERN.h"
39#define PERL_IN_TOKE_C
40#include "perl.h"
41#include "invlist_inline.h"
42
43#define new_constant(a,b,c,d,e,f,g, h)	\
44        S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45
46#define pl_yylval	(PL_parser->yylval)
47
48/* XXX temporary backwards compatibility */
49#define PL_lex_brackets		(PL_parser->lex_brackets)
50#define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51#define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52#define PL_lex_brackstack	(PL_parser->lex_brackstack)
53#define PL_lex_casemods		(PL_parser->lex_casemods)
54#define PL_lex_casestack        (PL_parser->lex_casestack)
55#define PL_lex_dojoin		(PL_parser->lex_dojoin)
56#define PL_lex_formbrack        (PL_parser->lex_formbrack)
57#define PL_lex_inpat		(PL_parser->lex_inpat)
58#define PL_lex_inwhat		(PL_parser->lex_inwhat)
59#define PL_lex_op		(PL_parser->lex_op)
60#define PL_lex_repl		(PL_parser->lex_repl)
61#define PL_lex_starts		(PL_parser->lex_starts)
62#define PL_lex_stuff		(PL_parser->lex_stuff)
63#define PL_multi_start		(PL_parser->multi_start)
64#define PL_multi_open		(PL_parser->multi_open)
65#define PL_multi_close		(PL_parser->multi_close)
66#define PL_preambled		(PL_parser->preambled)
67#define PL_linestr		(PL_parser->linestr)
68#define PL_expect		(PL_parser->expect)
69#define PL_copline		(PL_parser->copline)
70#define PL_bufptr		(PL_parser->bufptr)
71#define PL_oldbufptr		(PL_parser->oldbufptr)
72#define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
73#define PL_linestart		(PL_parser->linestart)
74#define PL_bufend		(PL_parser->bufend)
75#define PL_last_uni		(PL_parser->last_uni)
76#define PL_last_lop		(PL_parser->last_lop)
77#define PL_last_lop_op		(PL_parser->last_lop_op)
78#define PL_lex_state		(PL_parser->lex_state)
79#define PL_rsfp			(PL_parser->rsfp)
80#define PL_rsfp_filters		(PL_parser->rsfp_filters)
81#define PL_in_my		(PL_parser->in_my)
82#define PL_in_my_stash		(PL_parser->in_my_stash)
83#define PL_tokenbuf		(PL_parser->tokenbuf)
84#define PL_multi_end		(PL_parser->multi_end)
85#define PL_error_count		(PL_parser->error_count)
86
87#  define PL_nexttoke		(PL_parser->nexttoke)
88#  define PL_nexttype		(PL_parser->nexttype)
89#  define PL_nextval		(PL_parser->nextval)
90
91
92#define SvEVALED(sv) \
93    (SvTYPE(sv) >= SVt_PVNV \
94    && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95
96static const char ident_too_long[] = "Identifier too long";
97static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98
99#  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100
101#define XENUMMASK  0x3f
102#define XFAKEEOF   0x40
103#define XFAKEBRACK 0x80
104
105#ifdef USE_UTF8_SCRIPTS
106#   define UTF cBOOL(!IN_BYTES)
107#else
108#   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109#endif
110
111/* The maximum number of characters preceding the unrecognized one to display */
112#define UNRECOGNIZED_PRECEDE_COUNT 10
113
114/* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116#define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117
118#define SPACE_OR_TAB(c) isBLANK_A(c)
119
120#define HEXFP_PEEK(s)     \
121    (((s[0] == '.') && \
122      (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123     isALPHA_FOLD_EQ(s[0], 'p'))
124
125/* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
128 *
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
131 */
132
133/* #define LEX_NOTPARSING		11 is done in perl.h. */
134
135#define LEX_NORMAL		10 /* normal code (ie not within "...")     */
136#define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
137#define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
138#define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
139#define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
140
141                                   /* at end of code, eg "$x" followed by:  */
142#define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
143#define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
144
145#define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
146                                        string or after \E, $foo, etc       */
147#define LEX_INTERPCONST		 2 /* NOT USED */
148#define LEX_FORMLINE		 1 /* expecting a format line               */
149
150/* returned to yyl_try() to request it to retry the parse loop, expected to only
151   be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
152   can also return it.
153
154   yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155   other token values are 258 or higher (see perly.h), so -1 should be
156   a safe value here.
157*/
158#define YYL_RETRY (-1)
159
160#ifdef DEBUGGING
161static const char* const lex_state_names[] = {
162    "KNOWNEXT",
163    "FORMLINE",
164    "INTERPCONST",
165    "INTERPCONCAT",
166    "INTERPENDMAYBE",
167    "INTERPEND",
168    "INTERPSTART",
169    "INTERPPUSH",
170    "INTERPCASEMOD",
171    "INTERPNORMAL",
172    "NORMAL"
173};
174#endif
175
176#include "keywords.h"
177
178/* CLINE is a macro that ensures PL_copline has a sane value */
179
180#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
181
182/*
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token.  They all take an argument.
185 *
186 * TOKEN        : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR     : generic operator
188 * AOPERATOR    : assignment operator
189 * PREBLOCK     : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF       : *EXPR where EXPR is not a simple identifier
192 * TERM         : expression term
193 * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX        : loop exiting command (goto, last, dump, etc)
195 * FTST         : file test operator
196 * FUN0         : zero-argument function
197 * FUN0OP       : zero-argument function, with its op created in this file
198 * FUN1         : not used, except for not, which isn't a UNIOP
199 * BOop         : bitwise or or xor
200 * BAop         : bitwise and
201 * BCop         : bitwise complement
202 * SHop         : shift operator
203 * PWop         : power operator
204 * PMop         : pattern-matching operator
205 * Aop          : addition-level operator
206 * AopNOASSIGN  : addition-level operator that is never part of .=
207 * Mop          : multiplication-level operator
208 * ChEop        : chaining equality-testing operator
209 * NCEop        : non-chaining comparison operator at equality precedence
210 * ChRop        : chaining relational operator <= != gt
211 * NCRop        : non-chaining relational operator isa
212 *
213 * Also see LOP and lop() below.
214 */
215
216#ifdef DEBUGGING /* Serve -DT. */
217#   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
218#else
219#   define REPORT(retval) (retval)
220#endif
221
222#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
231                         pl_yylval.ival=f, \
232                         PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
233                         REPORT((int)LOOPEX))
234#define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235#define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236#define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237#define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
241                       REPORT(PERLY_TILDE)
242#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244#define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248#define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249#define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250#define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251#define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
252
253/* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
257 */
258#define UNI3(f,x,have_x) { \
259        pl_yylval.ival = f; \
260        if (have_x) PL_expect = x; \
261        PL_bufptr = s; \
262        PL_last_uni = PL_oldbufptr; \
263        PL_last_lop_op = (f) < 0 ? -(f) : (f); \
264        if (*s == '(') \
265            return REPORT( (int)FUNC1 ); \
266        s = skipspace(s); \
267        return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268        }
269#define UNI(f)    UNI3(f,XTERM,1)
270#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271#define UNIPROTO(f,optional) { \
272        if (optional) PL_last_uni = PL_oldbufptr; \
273        OPERATOR(f); \
274        }
275
276#define UNIBRACK(f) UNI3(f,0,0)
277
278/* return has special case parsing.
279 *
280 * List operators have low precedence. Functions have high precedence.
281 * Every built in, *except return*, if written with () around its arguments, is
282 * parsed as a function. Hence every other list built in:
283 *
284 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
285 * 429
286 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
287 * 639
288 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
289 * Useless use of a constant (2) in void context at -e line 1.
290 * Useless use of a constant (4) in void context at -e line 1.
291 *
292 * $
293 *
294 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
295 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
296 *
297 * Whereas return:
298 *
299 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
300 * 2
301 * 4
302 * 9
303 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
304 * Useless use of a constant (2) in void context at -e line 1.
305 * Useless use of a constant (4) in void context at -e line 1.
306 * 9
307 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
308 * Useless use of a constant (2) in void context at -e line 1.
309 * Useless use of a constant (4) in void context at -e line 1.
310 * 9
311 * $
312 *
313 * and:
314 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
315 * 2
316 * 4
317 * 6
318 *
319 * This last example is what we expect, but it's clearly inconsistent with how
320 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
321 * followed.
322 *
323 *
324 * Perl 3 attempted to be consistent:
325 *
326 *   The rules are more consistent about where parens are needed and
327 *   where they are not.  In particular, unary operators and list operators now
328 *   behave like functions if they're called like functions.
329 *
330 * However, the behaviour for return was reverted to the "old" parsing with
331 * patches 9-12:
332 *
333 *   The construct
334 *   return (1,2,3);
335 *   did not do what was expected, since return was swallowing the
336 *   parens in order to consider itself a function.  The solution,
337 *   since return never wants any trailing expression such as
338 *   return (1,2,3) + 2;
339 *   is to simply make return an exception to the paren-makes-a-function
340 *   rule, and treat it the way it always was, so that it doesn't
341 *   strip the parens.
342 *
343 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
344 * LOP(OP_RETURN, XTERM);
345 *
346 * and constructs such as
347 *
348 *     return (Internals::V())[2]
349 *
350 * turn into syntax errors
351 */
352
353#define OLDLOP(f) \
354        do { \
355            if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
356                PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
357            pl_yylval.ival = (f); \
358            PL_expect = XTERM; \
359            PL_bufptr = s; \
360            return (int)LSTOP; \
361        } while(0)
362
363#define COPLINE_INC_WITH_HERELINES		    \
364    STMT_START {				     \
365        CopLINE_inc(PL_curcop);			      \
366        if (PL_parser->herelines)		       \
367            CopLINE(PL_curcop) += PL_parser->herelines, \
368            PL_parser->herelines = 0;			 \
369    } STMT_END
370/* Called after scan_str to update CopLINE(PL_curcop), but only when there
371 * is no sublex_push to follow. */
372#define COPLINE_SET_FROM_MULTI_END	      \
373    STMT_START {			       \
374        CopLINE_set(PL_curcop, PL_multi_end);	\
375        if (PL_multi_end != PL_multi_start)	 \
376            PL_parser->herelines = 0;		  \
377    } STMT_END
378
379
380/* A file-local structure for passing around information about subroutines and
381 * related definable words */
382struct code {
383    SV *sv;
384    CV *cv;
385    GV *gv, **gvp;
386    OP *rv2cv_op;
387    PADOFFSET off;
388    bool lex;
389};
390
391static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
392
393#ifdef DEBUGGING
394
395/* how to interpret the pl_yylval associated with the token */
396enum token_type {
397    TOKENTYPE_NONE,
398    TOKENTYPE_IVAL,
399    TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
400    TOKENTYPE_PVAL,
401    TOKENTYPE_OPVAL
402};
403
404#define DEBUG_TOKEN(Type, Name)                                         \
405    { Name, TOKENTYPE_##Type, #Name }
406
407static struct debug_tokens {
408    const int token;
409    enum token_type type;
410    const char *name;
411} const debug_tokens[] =
412{
413    { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
414    { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
415    { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
416    { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
417    { ANON_SIGSUB,	TOKENTYPE_IVAL,		"ANON_SIGSUB" },
418    { ARROW,		TOKENTYPE_NONE,		"ARROW" },
419    { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
420    { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
421    { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
422    { CATCH,		TOKENTYPE_IVAL,		"CATCH" },
423    { CHEQOP,		TOKENTYPE_OPNUM,	"CHEQOP" },
424    { CHRELOP,		TOKENTYPE_OPNUM,	"CHRELOP" },
425    { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
426    { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
427    { DEFAULT,		TOKENTYPE_NONE,		"DEFAULT" },
428    { DO,		TOKENTYPE_NONE,		"DO" },
429    { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
430    { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
431    { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
432    { ELSE,		TOKENTYPE_NONE,		"ELSE" },
433    { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
434    { FOR,		TOKENTYPE_IVAL,		"FOR" },
435    { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
436    { FORMLBRACK,	TOKENTYPE_NONE,		"FORMLBRACK" },
437    { FORMRBRACK,	TOKENTYPE_NONE,		"FORMRBRACK" },
438    { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
439    { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
440    { FUNC0OP,		TOKENTYPE_OPVAL,	"FUNC0OP" },
441    { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
442    { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
443    { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
444    { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
445    { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
446    { IF,		TOKENTYPE_IVAL,		"IF" },
447    { LABEL,		TOKENTYPE_OPVAL,	"LABEL" },
448    { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
449    { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
450    { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
451    { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
452    { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
453    { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
454    { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
455    { MY,		TOKENTYPE_IVAL,		"MY" },
456    { NCEQOP,		TOKENTYPE_OPNUM,	"NCEQOP" },
457    { NCRELOP,		TOKENTYPE_OPNUM,	"NCRELOP" },
458    { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
459    { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
460    { OROP,		TOKENTYPE_IVAL,		"OROP" },
461    { OROR,		TOKENTYPE_NONE,		"OROR" },
462    { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
463    DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
464    DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
465    DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
466    DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
467    DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
468    DEBUG_TOKEN (IVAL, PERLY_COLON),
469    DEBUG_TOKEN (IVAL, PERLY_COMMA),
470    DEBUG_TOKEN (IVAL, PERLY_DOT),
471    DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
472    DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
473    DEBUG_TOKEN (IVAL, PERLY_MINUS),
474    DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
475    DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
476    DEBUG_TOKEN (IVAL, PERLY_PLUS),
477    DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
478    DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
479    DEBUG_TOKEN (IVAL, PERLY_SLASH),
480    DEBUG_TOKEN (IVAL, PERLY_SNAIL),
481    DEBUG_TOKEN (IVAL, PERLY_STAR),
482    DEBUG_TOKEN (IVAL, PERLY_TILDE),
483    { PLUGEXPR,		TOKENTYPE_OPVAL,	"PLUGEXPR" },
484    { PLUGSTMT,		TOKENTYPE_OPVAL,	"PLUGSTMT" },
485    { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
486    { POSTJOIN,		TOKENTYPE_NONE,		"POSTJOIN" },
487    { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
488    { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
489    { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
490    { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
491    { PREINC,		TOKENTYPE_NONE,		"PREINC" },
492    { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
493    { QWLIST,		TOKENTYPE_OPVAL,	"QWLIST" },
494    { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
495    { REQUIRE,		TOKENTYPE_NONE,		"REQUIRE" },
496    { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
497    { SIGSUB,		TOKENTYPE_NONE,		"SIGSUB" },
498    { SUB,		TOKENTYPE_NONE,		"SUB" },
499    { SUBLEXEND,	TOKENTYPE_NONE,		"SUBLEXEND" },
500    { SUBLEXSTART,	TOKENTYPE_NONE,		"SUBLEXSTART" },
501    { THING,		TOKENTYPE_OPVAL,	"THING" },
502    { TRY,		TOKENTYPE_IVAL,		"TRY" },
503    { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
504    { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
505    { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
506    { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
507    { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
508    { USE,		TOKENTYPE_IVAL,		"USE" },
509    { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
510    { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
511    { BAREWORD,		TOKENTYPE_OPVAL,	"BAREWORD" },
512    { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
513    { 0,		TOKENTYPE_NONE,		NULL }
514};
515
516#undef DEBUG_TOKEN
517
518/* dump the returned token in rv, plus any optional arg in pl_yylval */
519
520STATIC int
521S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
522{
523    PERL_ARGS_ASSERT_TOKEREPORT;
524
525    if (DEBUG_T_TEST) {
526        const char *name = NULL;
527        enum token_type type = TOKENTYPE_NONE;
528        const struct debug_tokens *p;
529        SV* const report = newSVpvs("<== ");
530
531        for (p = debug_tokens; p->token; p++) {
532            if (p->token == (int)rv) {
533                name = p->name;
534                type = p->type;
535                break;
536            }
537        }
538        if (name)
539            Perl_sv_catpv(aTHX_ report, name);
540        else if (isGRAPH(rv))
541        {
542            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
543            if ((char)rv == 'p')
544                sv_catpvs(report, " (pending identifier)");
545        }
546        else if (!rv)
547            sv_catpvs(report, "EOF");
548        else
549            Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
550        switch (type) {
551        case TOKENTYPE_NONE:
552            break;
553        case TOKENTYPE_IVAL:
554            Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
555            break;
556        case TOKENTYPE_OPNUM:
557            Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
558                                    PL_op_name[lvalp->ival]);
559            break;
560        case TOKENTYPE_PVAL:
561            Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
562            break;
563        case TOKENTYPE_OPVAL:
564            if (lvalp->opval) {
565                Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
566                                    PL_op_name[lvalp->opval->op_type]);
567                if (lvalp->opval->op_type == OP_CONST) {
568                    Perl_sv_catpvf(aTHX_ report, " %s",
569                        SvPEEK(cSVOPx_sv(lvalp->opval)));
570                }
571
572            }
573            else
574                sv_catpvs(report, "(opval=null)");
575            break;
576        }
577        PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
578    };
579    return (int)rv;
580}
581
582
583/* print the buffer with suitable escapes */
584
585STATIC void
586S_printbuf(pTHX_ const char *const fmt, const char *const s)
587{
588    SV* const tmp = newSVpvs("");
589
590    PERL_ARGS_ASSERT_PRINTBUF;
591
592    GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
593    PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
594    GCC_DIAG_RESTORE_STMT;
595    SvREFCNT_dec(tmp);
596}
597
598#endif
599
600/*
601 * S_ao
602 *
603 * This subroutine looks for an '=' next to the operator that has just been
604 * parsed and turns it into an ASSIGNOP if it finds one.
605 */
606
607STATIC int
608S_ao(pTHX_ int toketype)
609{
610    if (*PL_bufptr == '=') {
611        PL_bufptr++;
612
613        switch (toketype) {
614            case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
615            case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
616            case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
617        }
618
619        toketype = ASSIGNOP;
620    }
621    return REPORT(toketype);
622}
623
624/*
625 * S_no_op
626 * When Perl expects an operator and finds something else, no_op
627 * prints the warning.  It always prints "<something> found where
628 * operator expected.  It prints "Missing semicolon on previous line?"
629 * if the surprise occurs at the start of the line.  "do you need to
630 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
631 * where the compiler doesn't know if foo is a method call or a function.
632 * It prints "Missing operator before end of line" if there's nothing
633 * after the missing operator, or "... before <...>" if there is something
634 * after the missing operator.
635 *
636 * PL_bufptr is expected to point to the start of the thing that was found,
637 * and s after the next token or partial token.
638 */
639
640STATIC void
641S_no_op(pTHX_ const char *const what, char *s)
642{
643    char * const oldbp = PL_bufptr;
644    const bool is_first = (PL_oldbufptr == PL_linestart);
645
646    PERL_ARGS_ASSERT_NO_OP;
647
648    if (!s)
649        s = oldbp;
650    else
651        PL_bufptr = s;
652    yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
653    if (ckWARN_d(WARN_SYNTAX)) {
654        if (is_first)
655            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
656                    "\t(Missing semicolon on previous line?)\n");
657        else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
658                                                           PL_bufend,
659                                                           UTF))
660        {
661            const char *t;
662            for (t = PL_oldoldbufptr;
663                 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
664                 t += UTF ? UTF8SKIP(t) : 1)
665            {
666                NOOP;
667            }
668            if (t < PL_bufptr && isSPACE(*t))
669                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
670                        "\t(Do you need to predeclare %" UTF8f "?)\n",
671                      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
672        }
673        else {
674            assert(s >= oldbp);
675            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
676                    "\t(Missing operator before %" UTF8f "?)\n",
677                     UTF8fARG(UTF, s - oldbp, oldbp));
678        }
679    }
680    PL_bufptr = oldbp;
681}
682
683/*
684 * S_missingterm
685 * Complain about missing quote/regexp/heredoc terminator.
686 * If it's called with NULL then it cauterizes the line buffer.
687 * If we're in a delimited string and the delimiter is a control
688 * character, it's reformatted into a two-char sequence like ^C.
689 * This is fatal.
690 */
691
692STATIC void
693S_missingterm(pTHX_ char *s, STRLEN len)
694{
695    char tmpbuf[UTF8_MAXBYTES + 1];
696    char q;
697    bool uni = FALSE;
698    if (s) {
699        char * const nl = (char *) my_memrchr(s, '\n', len);
700        if (nl) {
701            *nl = '\0';
702            len = nl - s;
703        }
704        uni = UTF;
705    }
706    else if (PL_multi_close < 32) {
707        *tmpbuf = '^';
708        tmpbuf[1] = (char)toCTRL(PL_multi_close);
709        tmpbuf[2] = '\0';
710        s = tmpbuf;
711        len = 2;
712    }
713    else {
714        if (! UTF && LIKELY(PL_multi_close < 256)) {
715            *tmpbuf = (char)PL_multi_close;
716            tmpbuf[1] = '\0';
717            len = 1;
718        }
719        else {
720            char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
721            *end = '\0';
722            len = end - tmpbuf;
723            uni = TRUE;
724        }
725        s = tmpbuf;
726    }
727    q = memchr(s, '"', len) ? '\'' : '"';
728    Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
729                     " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
730}
731
732#include "feature.h"
733
734/*
735 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
736 * utf16-to-utf8-reversed.
737 */
738
739#ifdef PERL_CR_FILTER
740static void
741strip_return(SV *sv)
742{
743    const char *s = SvPVX_const(sv);
744    const char * const e = s + SvCUR(sv);
745
746    PERL_ARGS_ASSERT_STRIP_RETURN;
747
748    /* outer loop optimized to do nothing if there are no CR-LFs */
749    while (s < e) {
750        if (*s++ == '\r' && *s == '\n') {
751            /* hit a CR-LF, need to copy the rest */
752            char *d = s - 1;
753            *d++ = *s++;
754            while (s < e) {
755                if (*s == '\r' && s[1] == '\n')
756                    s++;
757                *d++ = *s++;
758            }
759            SvCUR(sv) -= s - d;
760            return;
761        }
762    }
763}
764
765STATIC I32
766S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
767{
768    const I32 count = FILTER_READ(idx+1, sv, maxlen);
769    if (count > 0 && !maxlen)
770        strip_return(sv);
771    return count;
772}
773#endif
774
775/*
776=for apidoc lex_start
777
778Creates and initialises a new lexer/parser state object, supplying
779a context in which to lex and parse from a new source of Perl code.
780A pointer to the new state object is placed in L</PL_parser>.  An entry
781is made on the save stack so that upon unwinding, the new state object
782will be destroyed and the former value of L</PL_parser> will be restored.
783Nothing else need be done to clean up the parsing context.
784
785The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
786non-null, provides a string (in SV form) containing code to be parsed.
787A copy of the string is made, so subsequent modification of C<line>
788does not affect parsing.  C<rsfp>, if non-null, provides an input stream
789from which code will be read to be parsed.  If both are non-null, the
790code in C<line> comes first and must consist of complete lines of input,
791and C<rsfp> supplies the remainder of the source.
792
793The C<flags> parameter is reserved for future use.  Currently it is only
794used by perl internally, so extensions should always pass zero.
795
796=cut
797*/
798
799/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
800   can share filters with the current parser.
801   LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
802   caller, hence isn't owned by the parser, so shouldn't be closed on parser
803   destruction. This is used to handle the case of defaulting to reading the
804   script from the standard input because no filename was given on the command
805   line (without getting confused by situation where STDIN has been closed, so
806   the script handle is opened on fd 0)  */
807
808void
809Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
810{
811    const char *s = NULL;
812    yy_parser *parser, *oparser;
813
814    if (flags && flags & ~LEX_START_FLAGS)
815        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
816
817    /* create and initialise a parser */
818
819    Newxz(parser, 1, yy_parser);
820    parser->old_parser = oparser = PL_parser;
821    PL_parser = parser;
822
823    parser->stack = NULL;
824    parser->stack_max1 = NULL;
825    parser->ps = NULL;
826
827    /* on scope exit, free this parser and restore any outer one */
828    SAVEPARSER(parser);
829    parser->saved_curcop = PL_curcop;
830
831    /* initialise lexer state */
832
833    parser->nexttoke = 0;
834    parser->error_count = oparser ? oparser->error_count : 0;
835    parser->copline = parser->preambling = NOLINE;
836    parser->lex_state = LEX_NORMAL;
837    parser->expect = XSTATE;
838    parser->rsfp = rsfp;
839    parser->recheck_utf8_validity = TRUE;
840    parser->rsfp_filters =
841      !(flags & LEX_START_SAME_FILTER) || !oparser
842        ? NULL
843        : MUTABLE_AV(SvREFCNT_inc(
844            oparser->rsfp_filters
845             ? oparser->rsfp_filters
846             : (oparser->rsfp_filters = newAV())
847          ));
848
849    Newx(parser->lex_brackstack, 120, char);
850    Newx(parser->lex_casestack, 12, char);
851    *parser->lex_casestack = '\0';
852    Newxz(parser->lex_shared, 1, LEXSHARED);
853
854    if (line) {
855        STRLEN len;
856        const U8* first_bad_char_loc;
857
858        s = SvPV_const(line, len);
859
860        if (   SvUTF8(line)
861            && UNLIKELY(! is_utf8_string_loc((U8 *) s,
862                                             SvCUR(line),
863                                             &first_bad_char_loc)))
864        {
865            _force_out_malformed_utf8_message(first_bad_char_loc,
866                                              (U8 *) s + SvCUR(line),
867                                              0,
868                                              1 /* 1 means die */ );
869            NOT_REACHED; /* NOTREACHED */
870        }
871
872        parser->linestr = flags & LEX_START_COPIED
873                            ? SvREFCNT_inc_simple_NN(line)
874                            : newSVpvn_flags(s, len, SvUTF8(line));
875        if (!rsfp)
876            sv_catpvs(parser->linestr, "\n;");
877    } else {
878        parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
879    }
880
881    parser->oldoldbufptr =
882        parser->oldbufptr =
883        parser->bufptr =
884        parser->linestart = SvPVX(parser->linestr);
885    parser->bufend = parser->bufptr + SvCUR(parser->linestr);
886    parser->last_lop = parser->last_uni = NULL;
887
888    STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
889                                                        |LEX_DONT_CLOSE_RSFP));
890    parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
891                                                        |LEX_DONT_CLOSE_RSFP));
892
893    parser->in_pod = parser->filtered = 0;
894}
895
896
897/* delete a parser object */
898
899void
900Perl_parser_free(pTHX_  const yy_parser *parser)
901{
902    PERL_ARGS_ASSERT_PARSER_FREE;
903
904    PL_curcop = parser->saved_curcop;
905    SvREFCNT_dec(parser->linestr);
906
907    if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
908        PerlIO_clearerr(parser->rsfp);
909    else if (parser->rsfp && (!parser->old_parser
910          || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
911        PerlIO_close(parser->rsfp);
912    SvREFCNT_dec(parser->rsfp_filters);
913    SvREFCNT_dec(parser->lex_stuff);
914    SvREFCNT_dec(parser->lex_sub_repl);
915
916    Safefree(parser->lex_brackstack);
917    Safefree(parser->lex_casestack);
918    Safefree(parser->lex_shared);
919    PL_parser = parser->old_parser;
920    Safefree(parser);
921}
922
923void
924Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
925{
926    I32 nexttoke = parser->nexttoke;
927    PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
928    while (nexttoke--) {
929        if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
930         && parser->nextval[nexttoke].opval
931         && parser->nextval[nexttoke].opval->op_slabbed
932         && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
933            op_free(parser->nextval[nexttoke].opval);
934            parser->nextval[nexttoke].opval = NULL;
935        }
936    }
937}
938
939
940/*
941=for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
942
943Buffer scalar containing the chunk currently under consideration of the
944text currently being lexed.  This is always a plain string scalar (for
945which C<SvPOK> is true).  It is not intended to be used as a scalar by
946normal scalar means; instead refer to the buffer directly by the pointer
947variables described below.
948
949The lexer maintains various C<char*> pointers to things in the
950C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
951reallocated, all of these pointers must be updated.  Don't attempt to
952do this manually, but rather use L</lex_grow_linestr> if you need to
953reallocate the buffer.
954
955The content of the text chunk in the buffer is commonly exactly one
956complete line of input, up to and including a newline terminator,
957but there are situations where it is otherwise.  The octets of the
958buffer may be intended to be interpreted as either UTF-8 or Latin-1.
959The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
960flag on this scalar, which may disagree with it.
961
962For direct examination of the buffer, the variable
963L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
964lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
965of these pointers is usually preferable to examination of the scalar
966through normal scalar means.
967
968=for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
969
970Direct pointer to the end of the chunk of text currently being lexed, the
971end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
972+ SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
973always located at the end of the buffer, and does not count as part of
974the buffer's contents.
975
976=for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
977
978Points to the current position of lexing inside the lexer buffer.
979Characters around this point may be freely examined, within
980the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
981L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
982interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
983
984Lexing code (whether in the Perl core or not) moves this pointer past
985the characters that it consumes.  It is also expected to perform some
986bookkeeping whenever a newline character is consumed.  This movement
987can be more conveniently performed by the function L</lex_read_to>,
988which handles newlines appropriately.
989
990Interpretation of the buffer's octets can be abstracted out by
991using the slightly higher-level functions L</lex_peek_unichar> and
992L</lex_read_unichar>.
993
994=for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
995
996Points to the start of the current line inside the lexer buffer.
997This is useful for indicating at which column an error occurred, and
998not much else.  This must be updated by any lexing code that consumes
999a newline; the function L</lex_read_to> handles this detail.
1000
1001=cut
1002*/
1003
1004/*
1005=for apidoc lex_bufutf8
1006
1007Indicates whether the octets in the lexer buffer
1008(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1009of Unicode characters.  If not, they should be interpreted as Latin-1
1010characters.  This is analogous to the C<SvUTF8> flag for scalars.
1011
1012In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1013contains valid UTF-8.  Lexing code must be robust in the face of invalid
1014encoding.
1015
1016The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1017is significant, but not the whole story regarding the input character
1018encoding.  Normally, when a file is being read, the scalar contains octets
1019and its C<SvUTF8> flag is off, but the octets should be interpreted as
1020UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1021however, the scalar may have the C<SvUTF8> flag on, and in this case its
1022octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1023is in effect.  This logic may change in the future; use this function
1024instead of implementing the logic yourself.
1025
1026=cut
1027*/
1028
1029bool
1030Perl_lex_bufutf8(pTHX)
1031{
1032    return UTF;
1033}
1034
1035/*
1036=for apidoc lex_grow_linestr
1037
1038Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1039at least C<len> octets (including terminating C<NUL>).  Returns a
1040pointer to the reallocated buffer.  This is necessary before making
1041any direct modification of the buffer that would increase its length.
1042L</lex_stuff_pvn> provides a more convenient way to insert text into
1043the buffer.
1044
1045Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1046this function updates all of the lexer's variables that point directly
1047into the buffer.
1048
1049=cut
1050*/
1051
1052char *
1053Perl_lex_grow_linestr(pTHX_ STRLEN len)
1054{
1055    SV *linestr;
1056    char *buf;
1057    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1058    STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1059    bool current;
1060
1061    linestr = PL_parser->linestr;
1062    buf = SvPVX(linestr);
1063    if (len <= SvLEN(linestr))
1064        return buf;
1065
1066    /* Is the lex_shared linestr SV the same as the current linestr SV?
1067     * Only in this case does re_eval_start need adjusting, since it
1068     * points within lex_shared->ls_linestr's buffer */
1069    current = (   !PL_parser->lex_shared->ls_linestr
1070               || linestr == PL_parser->lex_shared->ls_linestr);
1071
1072    bufend_pos = PL_parser->bufend - buf;
1073    bufptr_pos = PL_parser->bufptr - buf;
1074    oldbufptr_pos = PL_parser->oldbufptr - buf;
1075    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1076    linestart_pos = PL_parser->linestart - buf;
1077    last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1078    last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1079    re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1080                            PL_parser->lex_shared->re_eval_start - buf : 0;
1081
1082    buf = sv_grow(linestr, len);
1083
1084    PL_parser->bufend = buf + bufend_pos;
1085    PL_parser->bufptr = buf + bufptr_pos;
1086    PL_parser->oldbufptr = buf + oldbufptr_pos;
1087    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1088    PL_parser->linestart = buf + linestart_pos;
1089    if (PL_parser->last_uni)
1090        PL_parser->last_uni = buf + last_uni_pos;
1091    if (PL_parser->last_lop)
1092        PL_parser->last_lop = buf + last_lop_pos;
1093    if (current && PL_parser->lex_shared->re_eval_start)
1094        PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1095    return buf;
1096}
1097
1098/*
1099=for apidoc lex_stuff_pvn
1100
1101Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103reallocating the buffer if necessary.  This means that lexing code that
1104runs later will see the characters as if they had appeared in the input.
1105It is not recommended to do this as part of normal parsing, and most
1106uses of this facility run the risk of the inserted characters being
1107interpreted in an unintended manner.
1108
1109The string to be inserted is represented by C<len> octets starting
1110at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1111according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1112The characters are recoded for the lexer buffer, according to how the
1113buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1114to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1115function is more convenient.
1116
1117=for apidoc Amnh||LEX_STUFF_UTF8
1118
1119=cut
1120*/
1121
1122void
1123Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1124{
1125    char *bufptr;
1126    PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1127    if (flags & ~(LEX_STUFF_UTF8))
1128        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1129    if (UTF) {
1130        if (flags & LEX_STUFF_UTF8) {
1131            goto plain_copy;
1132        } else {
1133            STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1134                                                       (U8 *) pv + len);
1135            const char *p, *e = pv+len;;
1136            if (!highhalf)
1137                goto plain_copy;
1138            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1139            bufptr = PL_parser->bufptr;
1140            Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1141            SvCUR_set(PL_parser->linestr,
1142                SvCUR(PL_parser->linestr) + len+highhalf);
1143            PL_parser->bufend += len+highhalf;
1144            for (p = pv; p != e; p++) {
1145                append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1146            }
1147        }
1148    } else {
1149        if (flags & LEX_STUFF_UTF8) {
1150            STRLEN highhalf = 0;
1151            const char *p, *e = pv+len;
1152            for (p = pv; p != e; p++) {
1153                U8 c = (U8)*p;
1154                if (UTF8_IS_ABOVE_LATIN1(c)) {
1155                    Perl_croak(aTHX_ "Lexing code attempted to stuff "
1156                                "non-Latin-1 character into Latin-1 input");
1157                } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1158                    p++;
1159                    highhalf++;
1160                } else assert(UTF8_IS_INVARIANT(c));
1161            }
1162            if (!highhalf)
1163                goto plain_copy;
1164            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1165            bufptr = PL_parser->bufptr;
1166            Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1167            SvCUR_set(PL_parser->linestr,
1168                SvCUR(PL_parser->linestr) + len-highhalf);
1169            PL_parser->bufend += len-highhalf;
1170            p = pv;
1171            while (p < e) {
1172                if (UTF8_IS_INVARIANT(*p)) {
1173                    *bufptr++ = *p;
1174                    p++;
1175                }
1176                else {
1177                    assert(p < e -1 );
1178                    *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1179                    p += 2;
1180                }
1181            }
1182        } else {
1183          plain_copy:
1184            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1185            bufptr = PL_parser->bufptr;
1186            Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1187            SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1188            PL_parser->bufend += len;
1189            Copy(pv, bufptr, len, char);
1190        }
1191    }
1192}
1193
1194/*
1195=for apidoc lex_stuff_pv
1196
1197Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1198immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1199reallocating the buffer if necessary.  This means that lexing code that
1200runs later will see the characters as if they had appeared in the input.
1201It is not recommended to do this as part of normal parsing, and most
1202uses of this facility run the risk of the inserted characters being
1203interpreted in an unintended manner.
1204
1205The string to be inserted is represented by octets starting at C<pv>
1206and continuing to the first nul.  These octets are interpreted as either
1207UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1208in C<flags>.  The characters are recoded for the lexer buffer, according
1209to how the buffer is currently being interpreted (L</lex_bufutf8>).
1210If it is not convenient to nul-terminate a string to be inserted, the
1211L</lex_stuff_pvn> function is more appropriate.
1212
1213=cut
1214*/
1215
1216void
1217Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1218{
1219    PERL_ARGS_ASSERT_LEX_STUFF_PV;
1220    lex_stuff_pvn(pv, strlen(pv), flags);
1221}
1222
1223/*
1224=for apidoc lex_stuff_sv
1225
1226Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1227immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1228reallocating the buffer if necessary.  This means that lexing code that
1229runs later will see the characters as if they had appeared in the input.
1230It is not recommended to do this as part of normal parsing, and most
1231uses of this facility run the risk of the inserted characters being
1232interpreted in an unintended manner.
1233
1234The string to be inserted is the string value of C<sv>.  The characters
1235are recoded for the lexer buffer, according to how the buffer is currently
1236being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1237not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1238need to construct a scalar.
1239
1240=cut
1241*/
1242
1243void
1244Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1245{
1246    char *pv;
1247    STRLEN len;
1248    PERL_ARGS_ASSERT_LEX_STUFF_SV;
1249    if (flags)
1250        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1251    pv = SvPV(sv, len);
1252    lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1253}
1254
1255/*
1256=for apidoc lex_unstuff
1257
1258Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1259C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1260This hides the discarded text from any lexing code that runs later,
1261as if the text had never appeared.
1262
1263This is not the normal way to consume lexed text.  For that, use
1264L</lex_read_to>.
1265
1266=cut
1267*/
1268
1269void
1270Perl_lex_unstuff(pTHX_ char *ptr)
1271{
1272    char *buf, *bufend;
1273    STRLEN unstuff_len;
1274    PERL_ARGS_ASSERT_LEX_UNSTUFF;
1275    buf = PL_parser->bufptr;
1276    if (ptr < buf)
1277        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1278    if (ptr == buf)
1279        return;
1280    bufend = PL_parser->bufend;
1281    if (ptr > bufend)
1282        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1283    unstuff_len = ptr - buf;
1284    Move(ptr, buf, bufend+1-ptr, char);
1285    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1286    PL_parser->bufend = bufend - unstuff_len;
1287}
1288
1289/*
1290=for apidoc lex_read_to
1291
1292Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1293to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1294performing the correct bookkeeping whenever a newline character is passed.
1295This is the normal way to consume lexed text.
1296
1297Interpretation of the buffer's octets can be abstracted out by
1298using the slightly higher-level functions L</lex_peek_unichar> and
1299L</lex_read_unichar>.
1300
1301=cut
1302*/
1303
1304void
1305Perl_lex_read_to(pTHX_ char *ptr)
1306{
1307    char *s;
1308    PERL_ARGS_ASSERT_LEX_READ_TO;
1309    s = PL_parser->bufptr;
1310    if (ptr < s || ptr > PL_parser->bufend)
1311        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1312    for (; s != ptr; s++)
1313        if (*s == '\n') {
1314            COPLINE_INC_WITH_HERELINES;
1315            PL_parser->linestart = s+1;
1316        }
1317    PL_parser->bufptr = ptr;
1318}
1319
1320/*
1321=for apidoc lex_discard_to
1322
1323Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1324up to C<ptr>.  The remaining content of the buffer will be moved, and
1325all pointers into the buffer updated appropriately.  C<ptr> must not
1326be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1327it is not permitted to discard text that has yet to be lexed.
1328
1329Normally it is not necessarily to do this directly, because it suffices to
1330use the implicit discarding behaviour of L</lex_next_chunk> and things
1331based on it.  However, if a token stretches across multiple lines,
1332and the lexing code has kept multiple lines of text in the buffer for
1333that purpose, then after completion of the token it would be wise to
1334explicitly discard the now-unneeded earlier lines, to avoid future
1335multi-line tokens growing the buffer without bound.
1336
1337=cut
1338*/
1339
1340void
1341Perl_lex_discard_to(pTHX_ char *ptr)
1342{
1343    char *buf;
1344    STRLEN discard_len;
1345    PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1346    buf = SvPVX(PL_parser->linestr);
1347    if (ptr < buf)
1348        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1349    if (ptr == buf)
1350        return;
1351    if (ptr > PL_parser->bufptr)
1352        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1353    discard_len = ptr - buf;
1354    if (PL_parser->oldbufptr < ptr)
1355        PL_parser->oldbufptr = ptr;
1356    if (PL_parser->oldoldbufptr < ptr)
1357        PL_parser->oldoldbufptr = ptr;
1358    if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1359        PL_parser->last_uni = NULL;
1360    if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1361        PL_parser->last_lop = NULL;
1362    Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1363    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1364    PL_parser->bufend -= discard_len;
1365    PL_parser->bufptr -= discard_len;
1366    PL_parser->oldbufptr -= discard_len;
1367    PL_parser->oldoldbufptr -= discard_len;
1368    if (PL_parser->last_uni)
1369        PL_parser->last_uni -= discard_len;
1370    if (PL_parser->last_lop)
1371        PL_parser->last_lop -= discard_len;
1372}
1373
1374void
1375Perl_notify_parser_that_changed_to_utf8(pTHX)
1376{
1377    /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1378     * off to on.  At compile time, this has the effect of entering a 'use
1379     * utf8' section.  This means that any input was not previously checked for
1380     * UTF-8 (because it was off), but now we do need to check it, or our
1381     * assumptions about the input being sane could be wrong, and we could
1382     * segfault.  This routine just sets a flag so that the next time we look
1383     * at the input we do the well-formed UTF-8 check.  If we aren't in the
1384     * proper phase, there may not be a parser object, but if there is, setting
1385     * the flag is harmless */
1386
1387    if (PL_parser) {
1388        PL_parser->recheck_utf8_validity = TRUE;
1389    }
1390}
1391
1392/*
1393=for apidoc lex_next_chunk
1394
1395Reads in the next chunk of text to be lexed, appending it to
1396L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1397looked to the end of the current chunk and wants to know more.  It is
1398usual, but not necessary, for lexing to have consumed the entirety of
1399the current chunk at this time.
1400
1401If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1402chunk (i.e., the current chunk has been entirely consumed), normally the
1403current chunk will be discarded at the same time that the new chunk is
1404read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1405will not be discarded.  If the current chunk has not been entirely
1406consumed, then it will not be discarded regardless of the flag.
1407
1408Returns true if some new text was added to the buffer, or false if the
1409buffer has reached the end of the input text.
1410
1411=for apidoc Amnh||LEX_KEEP_PREVIOUS
1412
1413=cut
1414*/
1415
1416#define LEX_FAKE_EOF 0x80000000
1417#define LEX_NO_TERM  0x40000000 /* here-doc */
1418
1419bool
1420Perl_lex_next_chunk(pTHX_ U32 flags)
1421{
1422    SV *linestr;
1423    char *buf;
1424    STRLEN old_bufend_pos, new_bufend_pos;
1425    STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1426    STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1427    bool got_some_for_debugger = 0;
1428    bool got_some;
1429
1430    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1431        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1432    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1433        return FALSE;
1434    linestr = PL_parser->linestr;
1435    buf = SvPVX(linestr);
1436    if (!(flags & LEX_KEEP_PREVIOUS)
1437          && PL_parser->bufptr == PL_parser->bufend)
1438    {
1439        old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1440        linestart_pos = 0;
1441        if (PL_parser->last_uni != PL_parser->bufend)
1442            PL_parser->last_uni = NULL;
1443        if (PL_parser->last_lop != PL_parser->bufend)
1444            PL_parser->last_lop = NULL;
1445        last_uni_pos = last_lop_pos = 0;
1446        *buf = 0;
1447        SvCUR_set(linestr, 0);
1448    } else {
1449        old_bufend_pos = PL_parser->bufend - buf;
1450        bufptr_pos = PL_parser->bufptr - buf;
1451        oldbufptr_pos = PL_parser->oldbufptr - buf;
1452        oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1453        linestart_pos = PL_parser->linestart - buf;
1454        last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1455        last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1456    }
1457    if (flags & LEX_FAKE_EOF) {
1458        goto eof;
1459    } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1460        got_some = 0;
1461    } else if (filter_gets(linestr, old_bufend_pos)) {
1462        got_some = 1;
1463        got_some_for_debugger = 1;
1464    } else if (flags & LEX_NO_TERM) {
1465        got_some = 0;
1466    } else {
1467        if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1468            SvPVCLEAR(linestr);
1469        eof:
1470        /* End of real input.  Close filehandle (unless it was STDIN),
1471         * then add implicit termination.
1472         */
1473        if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1474            PerlIO_clearerr(PL_parser->rsfp);
1475        else if (PL_parser->rsfp)
1476            (void)PerlIO_close(PL_parser->rsfp);
1477        PL_parser->rsfp = NULL;
1478        PL_parser->in_pod = PL_parser->filtered = 0;
1479        if (!PL_in_eval && PL_minus_p) {
1480            sv_catpvs(linestr,
1481                /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1482            PL_minus_n = PL_minus_p = 0;
1483        } else if (!PL_in_eval && PL_minus_n) {
1484            sv_catpvs(linestr, /*{*/";}");
1485            PL_minus_n = 0;
1486        } else
1487            sv_catpvs(linestr, ";");
1488        got_some = 1;
1489    }
1490    buf = SvPVX(linestr);
1491    new_bufend_pos = SvCUR(linestr);
1492    PL_parser->bufend = buf + new_bufend_pos;
1493    PL_parser->bufptr = buf + bufptr_pos;
1494
1495    if (UTF) {
1496        const U8* first_bad_char_loc;
1497        if (UNLIKELY(! is_utf8_string_loc(
1498                            (U8 *) PL_parser->bufptr,
1499                                   PL_parser->bufend - PL_parser->bufptr,
1500                                   &first_bad_char_loc)))
1501        {
1502            _force_out_malformed_utf8_message(first_bad_char_loc,
1503                                              (U8 *) PL_parser->bufend,
1504                                              0,
1505                                              1 /* 1 means die */ );
1506            NOT_REACHED; /* NOTREACHED */
1507        }
1508    }
1509
1510    PL_parser->oldbufptr = buf + oldbufptr_pos;
1511    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1512    PL_parser->linestart = buf + linestart_pos;
1513    if (PL_parser->last_uni)
1514        PL_parser->last_uni = buf + last_uni_pos;
1515    if (PL_parser->last_lop)
1516        PL_parser->last_lop = buf + last_lop_pos;
1517    if (PL_parser->preambling != NOLINE) {
1518        CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1519        PL_parser->preambling = NOLINE;
1520    }
1521    if (   got_some_for_debugger
1522        && PERLDB_LINE_OR_SAVESRC
1523        && PL_curstash != PL_debstash)
1524    {
1525        /* debugger active and we're not compiling the debugger code,
1526         * so store the line into the debugger's array of lines
1527         */
1528        update_debugger_info(NULL, buf+old_bufend_pos,
1529            new_bufend_pos-old_bufend_pos);
1530    }
1531    return got_some;
1532}
1533
1534/*
1535=for apidoc lex_peek_unichar
1536
1537Looks ahead one (Unicode) character in the text currently being lexed.
1538Returns the codepoint (unsigned integer value) of the next character,
1539or -1 if lexing has reached the end of the input text.  To consume the
1540peeked character, use L</lex_read_unichar>.
1541
1542If the next character is in (or extends into) the next chunk of input
1543text, the next chunk will be read in.  Normally the current chunk will be
1544discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1545bit set, then the current chunk will not be discarded.
1546
1547If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1548is encountered, an exception is generated.
1549
1550=cut
1551*/
1552
1553I32
1554Perl_lex_peek_unichar(pTHX_ U32 flags)
1555{
1556    char *s, *bufend;
1557    if (flags & ~(LEX_KEEP_PREVIOUS))
1558        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1559    s = PL_parser->bufptr;
1560    bufend = PL_parser->bufend;
1561    if (UTF) {
1562        U8 head;
1563        I32 unichar;
1564        STRLEN len, retlen;
1565        if (s == bufend) {
1566            if (!lex_next_chunk(flags))
1567                return -1;
1568            s = PL_parser->bufptr;
1569            bufend = PL_parser->bufend;
1570        }
1571        head = (U8)*s;
1572        if (UTF8_IS_INVARIANT(head))
1573            return head;
1574        if (UTF8_IS_START(head)) {
1575            len = UTF8SKIP(&head);
1576            while ((STRLEN)(bufend-s) < len) {
1577                if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1578                    break;
1579                s = PL_parser->bufptr;
1580                bufend = PL_parser->bufend;
1581            }
1582        }
1583        unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1584        if (retlen == (STRLEN)-1) {
1585            _force_out_malformed_utf8_message((U8 *) s,
1586                                              (U8 *) bufend,
1587                                              0,
1588                                              1 /* 1 means die */ );
1589            NOT_REACHED; /* NOTREACHED */
1590        }
1591        return unichar;
1592    } else {
1593        if (s == bufend) {
1594            if (!lex_next_chunk(flags))
1595                return -1;
1596            s = PL_parser->bufptr;
1597        }
1598        return (U8)*s;
1599    }
1600}
1601
1602/*
1603=for apidoc lex_read_unichar
1604
1605Reads the next (Unicode) character in the text currently being lexed.
1606Returns the codepoint (unsigned integer value) of the character read,
1607and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1608if lexing has reached the end of the input text.  To non-destructively
1609examine the next character, use L</lex_peek_unichar> instead.
1610
1611If the next character is in (or extends into) the next chunk of input
1612text, the next chunk will be read in.  Normally the current chunk will be
1613discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1614bit set, then the current chunk will not be discarded.
1615
1616If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1617is encountered, an exception is generated.
1618
1619=cut
1620*/
1621
1622I32
1623Perl_lex_read_unichar(pTHX_ U32 flags)
1624{
1625    I32 c;
1626    if (flags & ~(LEX_KEEP_PREVIOUS))
1627        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1628    c = lex_peek_unichar(flags);
1629    if (c != -1) {
1630        if (c == '\n')
1631            COPLINE_INC_WITH_HERELINES;
1632        if (UTF)
1633            PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1634        else
1635            ++(PL_parser->bufptr);
1636    }
1637    return c;
1638}
1639
1640/*
1641=for apidoc lex_read_space
1642
1643Reads optional spaces, in Perl style, in the text currently being
1644lexed.  The spaces may include ordinary whitespace characters and
1645Perl-style comments.  C<#line> directives are processed if encountered.
1646L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1647at a non-space character (or the end of the input text).
1648
1649If spaces extend into the next chunk of input text, the next chunk will
1650be read in.  Normally the current chunk will be discarded at the same
1651time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1652chunk will not be discarded.
1653
1654=cut
1655*/
1656
1657#define LEX_NO_INCLINE    0x40000000
1658#define LEX_NO_NEXT_CHUNK 0x80000000
1659
1660void
1661Perl_lex_read_space(pTHX_ U32 flags)
1662{
1663    char *s, *bufend;
1664    const bool can_incline = !(flags & LEX_NO_INCLINE);
1665    bool need_incline = 0;
1666    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1667        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1668    s = PL_parser->bufptr;
1669    bufend = PL_parser->bufend;
1670    while (1) {
1671        char c = *s;
1672        if (c == '#') {
1673            do {
1674                c = *++s;
1675            } while (!(c == '\n' || (c == 0 && s == bufend)));
1676        } else if (c == '\n') {
1677            s++;
1678            if (can_incline) {
1679                PL_parser->linestart = s;
1680                if (s == bufend)
1681                    need_incline = 1;
1682                else
1683                    incline(s, bufend);
1684            }
1685        } else if (isSPACE(c)) {
1686            s++;
1687        } else if (c == 0 && s == bufend) {
1688            bool got_more;
1689            line_t l;
1690            if (flags & LEX_NO_NEXT_CHUNK)
1691                break;
1692            PL_parser->bufptr = s;
1693            l = CopLINE(PL_curcop);
1694            CopLINE(PL_curcop) += PL_parser->herelines + 1;
1695            got_more = lex_next_chunk(flags);
1696            CopLINE_set(PL_curcop, l);
1697            s = PL_parser->bufptr;
1698            bufend = PL_parser->bufend;
1699            if (!got_more)
1700                break;
1701            if (can_incline && need_incline && PL_parser->rsfp) {
1702                incline(s, bufend);
1703                need_incline = 0;
1704            }
1705        } else if (!c) {
1706            s++;
1707        } else {
1708            break;
1709        }
1710    }
1711    PL_parser->bufptr = s;
1712}
1713
1714/*
1715
1716=for apidoc validate_proto
1717
1718This function performs syntax checking on a prototype, C<proto>.
1719If C<warn> is true, any illegal characters or mismatched brackets
1720will trigger illegalproto warnings, declaring that they were
1721detected in the prototype for C<name>.
1722
1723The return value is C<true> if this is a valid prototype, and
1724C<false> if it is not, regardless of whether C<warn> was C<true> or
1725C<false>.
1726
1727Note that C<NULL> is a valid C<proto> and will always return C<true>.
1728
1729=cut
1730
1731 */
1732
1733bool
1734Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1735{
1736    STRLEN len, origlen;
1737    char *p;
1738    bool bad_proto = FALSE;
1739    bool in_brackets = FALSE;
1740    bool after_slash = FALSE;
1741    char greedy_proto = ' ';
1742    bool proto_after_greedy_proto = FALSE;
1743    bool must_be_last = FALSE;
1744    bool underscore = FALSE;
1745    bool bad_proto_after_underscore = FALSE;
1746
1747    PERL_ARGS_ASSERT_VALIDATE_PROTO;
1748
1749    if (!proto)
1750        return TRUE;
1751
1752    p = SvPV(proto, len);
1753    origlen = len;
1754    for (; len--; p++) {
1755        if (!isSPACE(*p)) {
1756            if (must_be_last)
1757                proto_after_greedy_proto = TRUE;
1758            if (underscore) {
1759                if (!memCHRs(";@%", *p))
1760                    bad_proto_after_underscore = TRUE;
1761                underscore = FALSE;
1762            }
1763            if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1764                bad_proto = TRUE;
1765            }
1766            else {
1767                if (*p == '[')
1768                    in_brackets = TRUE;
1769                else if (*p == ']')
1770                    in_brackets = FALSE;
1771                else if ((*p == '@' || *p == '%')
1772                         && !after_slash
1773                         && !in_brackets )
1774                {
1775                    must_be_last = TRUE;
1776                    greedy_proto = *p;
1777                }
1778                else if (*p == '_')
1779                    underscore = TRUE;
1780            }
1781            if (*p == '\\')
1782                after_slash = TRUE;
1783            else
1784                after_slash = FALSE;
1785        }
1786    }
1787
1788    if (warn) {
1789        SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1790        p -= origlen;
1791        p = SvUTF8(proto)
1792            ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1793                             origlen, UNI_DISPLAY_ISPRINT)
1794            : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1795
1796        if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1797            SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1798            sv_catpvs(name2, "::");
1799            sv_catsv(name2, (SV *)name);
1800            name = name2;
1801        }
1802
1803        if (proto_after_greedy_proto)
1804            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1805                        "Prototype after '%c' for %" SVf " : %s",
1806                        greedy_proto, SVfARG(name), p);
1807        if (in_brackets)
1808            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1809                        "Missing ']' in prototype for %" SVf " : %s",
1810                        SVfARG(name), p);
1811        if (bad_proto)
1812            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1813                        "Illegal character in prototype for %" SVf " : %s",
1814                        SVfARG(name), p);
1815        if (bad_proto_after_underscore)
1816            Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1817                        "Illegal character after '_' in prototype for %" SVf " : %s",
1818                        SVfARG(name), p);
1819    }
1820
1821    return (! (proto_after_greedy_proto || bad_proto) );
1822}
1823
1824/*
1825 * S_incline
1826 * This subroutine has nothing to do with tilting, whether at windmills
1827 * or pinball tables.  Its name is short for "increment line".  It
1828 * increments the current line number in CopLINE(PL_curcop) and checks
1829 * to see whether the line starts with a comment of the form
1830 *    # line 500 "foo.pm"
1831 * If so, it sets the current line number and file to the values in the comment.
1832 */
1833
1834STATIC void
1835S_incline(pTHX_ const char *s, const char *end)
1836{
1837    const char *t;
1838    const char *n;
1839    const char *e;
1840    line_t line_num;
1841    UV uv;
1842
1843    PERL_ARGS_ASSERT_INCLINE;
1844
1845    assert(end >= s);
1846
1847    COPLINE_INC_WITH_HERELINES;
1848    if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1849     && s+1 == PL_bufend && *s == ';') {
1850        /* fake newline in string eval */
1851        CopLINE_dec(PL_curcop);
1852        return;
1853    }
1854    if (*s++ != '#')
1855        return;
1856    while (SPACE_OR_TAB(*s))
1857        s++;
1858    if (memBEGINs(s, (STRLEN) (end - s), "line"))
1859        s += sizeof("line") - 1;
1860    else
1861        return;
1862    if (SPACE_OR_TAB(*s))
1863        s++;
1864    else
1865        return;
1866    while (SPACE_OR_TAB(*s))
1867        s++;
1868    if (!isDIGIT(*s))
1869        return;
1870
1871    n = s;
1872    while (isDIGIT(*s))
1873        s++;
1874    if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1875        return;
1876    while (SPACE_OR_TAB(*s))
1877        s++;
1878    if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1879        s++;
1880        e = t + 1;
1881    }
1882    else {
1883        t = s;
1884        while (*t && !isSPACE(*t))
1885            t++;
1886        e = t;
1887    }
1888    while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1889        e++;
1890    if (*e != '\n' && *e != '\0')
1891        return;		/* false alarm */
1892
1893    if (!grok_atoUV(n, &uv, &e))
1894        return;
1895    line_num = ((line_t)uv) - 1;
1896
1897    if (t - s > 0) {
1898        const STRLEN len = t - s;
1899
1900        if (!PL_rsfp && !PL_parser->filtered) {
1901            /* must copy *{"::_<(eval N)[oldfilename:L]"}
1902             * to *{"::_<newfilename"} */
1903            /* However, the long form of evals is only turned on by the
1904               debugger - usually they're "(eval %lu)" */
1905            GV * const cfgv = CopFILEGV(PL_curcop);
1906            if (cfgv) {
1907                char smallbuf[128];
1908                STRLEN tmplen2 = len;
1909                char *tmpbuf2;
1910                GV *gv2;
1911
1912                if (tmplen2 + 2 <= sizeof smallbuf)
1913                    tmpbuf2 = smallbuf;
1914                else
1915                    Newx(tmpbuf2, tmplen2 + 2, char);
1916
1917                tmpbuf2[0] = '_';
1918                tmpbuf2[1] = '<';
1919
1920                memcpy(tmpbuf2 + 2, s, tmplen2);
1921                tmplen2 += 2;
1922
1923                gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1924                if (!isGV(gv2)) {
1925                    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1926                    /* adjust ${"::_<newfilename"} to store the new file name */
1927                    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1928                    /* The line number may differ. If that is the case,
1929                       alias the saved lines that are in the array.
1930                       Otherwise alias the whole array. */
1931                    if (CopLINE(PL_curcop) == line_num) {
1932                        GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1933                        GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1934                    }
1935                    else if (GvAV(cfgv)) {
1936                        AV * const av = GvAV(cfgv);
1937                        const line_t start = CopLINE(PL_curcop)+1;
1938                        SSize_t items = AvFILLp(av) - start;
1939                        if (items > 0) {
1940                            AV * const av2 = GvAVn(gv2);
1941                            SV **svp = AvARRAY(av) + start;
1942                            Size_t l = line_num+1;
1943                            while (items-- && l < SSize_t_MAX && l == (line_t)l)
1944                                av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1945                        }
1946                    }
1947                }
1948
1949                if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1950            }
1951        }
1952        CopFILE_free(PL_curcop);
1953        CopFILE_setn(PL_curcop, s, len);
1954    }
1955    CopLINE_set(PL_curcop, line_num);
1956}
1957
1958STATIC void
1959S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1960{
1961    AV *av = CopFILEAVx(PL_curcop);
1962    if (av) {
1963        SV * sv;
1964        if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1965        else {
1966            sv = *av_fetch(av, 0, 1);
1967            SvUPGRADE(sv, SVt_PVMG);
1968        }
1969        if (!SvPOK(sv)) SvPVCLEAR(sv);
1970        if (orig_sv)
1971            sv_catsv(sv, orig_sv);
1972        else
1973            sv_catpvn(sv, buf, len);
1974        if (!SvIOK(sv)) {
1975            (void)SvIOK_on(sv);
1976            SvIV_set(sv, 0);
1977        }
1978        if (PL_parser->preambling == NOLINE)
1979            av_store(av, CopLINE(PL_curcop), sv);
1980    }
1981}
1982
1983/*
1984 * skipspace
1985 * Called to gobble the appropriate amount and type of whitespace.
1986 * Skips comments as well.
1987 * Returns the next character after the whitespace that is skipped.
1988 *
1989 * peekspace
1990 * Same thing, but look ahead without incrementing line numbers or
1991 * adjusting PL_linestart.
1992 */
1993
1994#define skipspace(s) skipspace_flags(s, 0)
1995#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1996
1997char *
1998Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1999{
2000    PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2001    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2002        while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2003            s++;
2004    } else {
2005        STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2006        PL_bufptr = s;
2007        lex_read_space(flags | LEX_KEEP_PREVIOUS |
2008                (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2009                    LEX_NO_NEXT_CHUNK : 0));
2010        s = PL_bufptr;
2011        PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2012        if (PL_linestart > PL_bufptr)
2013            PL_bufptr = PL_linestart;
2014        return s;
2015    }
2016    return s;
2017}
2018
2019/*
2020 * S_check_uni
2021 * Check the unary operators to ensure there's no ambiguity in how they're
2022 * used.  An ambiguous piece of code would be:
2023 *     rand + 5
2024 * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2025 * the +5 is its argument.
2026 */
2027
2028STATIC void
2029S_check_uni(pTHX)
2030{
2031    const char *s;
2032
2033    if (PL_oldoldbufptr != PL_last_uni)
2034        return;
2035    while (isSPACE(*PL_last_uni))
2036        PL_last_uni++;
2037    s = PL_last_uni;
2038    while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2039        s += UTF ? UTF8SKIP(s) : 1;
2040    if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2041        return;
2042
2043    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2044                     "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2045                     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2046}
2047
2048/*
2049 * LOP : macro to build a list operator.  Its behaviour has been replaced
2050 * with a subroutine, S_lop() for which LOP is just another name.
2051 */
2052
2053#define LOP(f,x) return lop(f,x,s)
2054
2055/*
2056 * S_lop
2057 * Build a list operator (or something that might be one).  The rules:
2058 *  - if we have a next token, then it's a list operator (no parens) for
2059 *    which the next token has already been parsed; e.g.,
2060 *       sort foo @args
2061 *       sort foo (@args)
2062 *  - if the next thing is an opening paren, then it's a function
2063 *  - else it's a list operator
2064 */
2065
2066STATIC I32
2067S_lop(pTHX_ I32 f, U8 x, char *s)
2068{
2069    PERL_ARGS_ASSERT_LOP;
2070
2071    pl_yylval.ival = f;
2072    CLINE;
2073    PL_bufptr = s;
2074    PL_last_lop = PL_oldbufptr;
2075    PL_last_lop_op = (OPCODE)f;
2076    if (PL_nexttoke)
2077        goto lstop;
2078    PL_expect = x;
2079    if (*s == '(')
2080        return REPORT(FUNC);
2081    s = skipspace(s);
2082    if (*s == '(')
2083        return REPORT(FUNC);
2084    else {
2085        lstop:
2086        if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2087            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2088        return REPORT(LSTOP);
2089    }
2090}
2091
2092/*
2093 * S_force_next
2094 * When the lexer realizes it knows the next token (for instance,
2095 * it is reordering tokens for the parser) then it can call S_force_next
2096 * to know what token to return the next time the lexer is called.  Caller
2097 * will need to set PL_nextval[] and possibly PL_expect to ensure
2098 * the lexer handles the token correctly.
2099 */
2100
2101STATIC void
2102S_force_next(pTHX_ I32 type)
2103{
2104#ifdef DEBUGGING
2105    if (DEBUG_T_TEST) {
2106        PerlIO_printf(Perl_debug_log, "### forced token:\n");
2107        tokereport(type, &NEXTVAL_NEXTTOKE);
2108    }
2109#endif
2110    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2111    PL_nexttype[PL_nexttoke] = type;
2112    PL_nexttoke++;
2113}
2114
2115/*
2116 * S_postderef
2117 *
2118 * This subroutine handles postfix deref syntax after the arrow has already
2119 * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2120 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2121 * only the first, leaving yylex to find the next.
2122 */
2123
2124static int
2125S_postderef(pTHX_ int const funny, char const next)
2126{
2127    assert(funny == DOLSHARP
2128        || funny == PERLY_DOLLAR
2129        || funny == PERLY_SNAIL
2130        || funny == PERLY_PERCENT_SIGN
2131        || funny == PERLY_AMPERSAND
2132        || funny == PERLY_STAR
2133    );
2134    if (next == '*') {
2135        PL_expect = XOPERATOR;
2136        if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2137            assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2138            PL_lex_state = LEX_INTERPEND;
2139            if (PERLY_SNAIL == funny)
2140                force_next(POSTJOIN);
2141        }
2142        force_next(PERLY_STAR);
2143        PL_bufptr+=2;
2144    }
2145    else {
2146        if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2147         && !PL_lex_brackets)
2148            PL_lex_dojoin = 2;
2149        PL_expect = XOPERATOR;
2150        PL_bufptr++;
2151    }
2152    return funny;
2153}
2154
2155void
2156Perl_yyunlex(pTHX)
2157{
2158    int yyc = PL_parser->yychar;
2159    if (yyc != YYEMPTY) {
2160        if (yyc) {
2161            NEXTVAL_NEXTTOKE = PL_parser->yylval;
2162            if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2163                PL_lex_allbrackets--;
2164                PL_lex_brackets--;
2165                yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2166            } else if (yyc == PERLY_PAREN_OPEN) {
2167                PL_lex_allbrackets--;
2168                yyc |= (2<<24);
2169            }
2170            force_next(yyc);
2171        }
2172        PL_parser->yychar = YYEMPTY;
2173    }
2174}
2175
2176STATIC SV *
2177S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2178{
2179    SV * const sv = newSVpvn_utf8(start, len,
2180                    ! IN_BYTES
2181                  &&  UTF
2182                  &&  len != 0
2183                  &&  is_utf8_non_invariant_string((const U8*)start, len));
2184    return sv;
2185}
2186
2187/*
2188 * S_force_word
2189 * When the lexer knows the next thing is a word (for instance, it has
2190 * just seen -> and it knows that the next char is a word char, then
2191 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2192 * lookahead.
2193 *
2194 * Arguments:
2195 *   char *start : buffer position (must be within PL_linestr)
2196 *   int token   : PL_next* will be this type of bare word
2197 *                 (e.g., METHOD,BAREWORD)
2198 *   int check_keyword : if true, Perl checks to make sure the word isn't
2199 *       a keyword (do this if the word is a label, e.g. goto FOO)
2200 *   int allow_pack : if true, : characters will also be allowed (require,
2201 *       use, etc. do this)
2202 */
2203
2204STATIC char *
2205S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2206{
2207    char *s;
2208    STRLEN len;
2209
2210    PERL_ARGS_ASSERT_FORCE_WORD;
2211
2212    start = skipspace(start);
2213    s = start;
2214    if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2215        || (allow_pack && *s == ':' && s[1] == ':') )
2216    {
2217        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2218        if (check_keyword) {
2219          char *s2 = PL_tokenbuf;
2220          STRLEN len2 = len;
2221          if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2222            s2 += sizeof("CORE::") - 1;
2223            len2 -= sizeof("CORE::") - 1;
2224          }
2225          if (keyword(s2, len2, 0))
2226            return start;
2227        }
2228        if (token == METHOD) {
2229            s = skipspace(s);
2230            if (*s == '(')
2231                PL_expect = XTERM;
2232            else {
2233                PL_expect = XOPERATOR;
2234            }
2235        }
2236        NEXTVAL_NEXTTOKE.opval
2237            = newSVOP(OP_CONST,0,
2238                           S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2239        NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2240        force_next(token);
2241    }
2242    return s;
2243}
2244
2245/*
2246 * S_force_ident
2247 * Called when the lexer wants $foo *foo &foo etc, but the program
2248 * text only contains the "foo" portion.  The first argument is a pointer
2249 * to the "foo", and the second argument is the type symbol to prefix.
2250 * Forces the next token to be a "BAREWORD".
2251 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2252 */
2253
2254STATIC void
2255S_force_ident(pTHX_ const char *s, int kind)
2256{
2257    PERL_ARGS_ASSERT_FORCE_IDENT;
2258
2259    if (s[0]) {
2260        const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2261        OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2262                                                                UTF ? SVf_UTF8 : 0));
2263        NEXTVAL_NEXTTOKE.opval = o;
2264        force_next(BAREWORD);
2265        if (kind) {
2266            o->op_private = OPpCONST_ENTERED;
2267            /* XXX see note in pp_entereval() for why we forgo typo
2268               warnings if the symbol must be introduced in an eval.
2269               GSAR 96-10-12 */
2270            gv_fetchpvn_flags(s, len,
2271                              (PL_in_eval ? GV_ADDMULTI
2272                              : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2273                              kind == PERLY_DOLLAR ? SVt_PV :
2274                              kind == PERLY_SNAIL ? SVt_PVAV :
2275                              kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2276                              SVt_PVGV
2277                              );
2278        }
2279    }
2280}
2281
2282static void
2283S_force_ident_maybe_lex(pTHX_ char pit)
2284{
2285    NEXTVAL_NEXTTOKE.ival = pit;
2286    force_next('p');
2287}
2288
2289NV
2290Perl_str_to_version(pTHX_ SV *sv)
2291{
2292    NV retval = 0.0;
2293    NV nshift = 1.0;
2294    STRLEN len;
2295    const char *start = SvPV_const(sv,len);
2296    const char * const end = start + len;
2297    const bool utf = cBOOL(SvUTF8(sv));
2298
2299    PERL_ARGS_ASSERT_STR_TO_VERSION;
2300
2301    while (start < end) {
2302        STRLEN skip;
2303        UV n;
2304        if (utf)
2305            n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2306        else {
2307            n = *(U8*)start;
2308            skip = 1;
2309        }
2310        retval += ((NV)n)/nshift;
2311        start += skip;
2312        nshift *= 1000;
2313    }
2314    return retval;
2315}
2316
2317/*
2318 * S_force_version
2319 * Forces the next token to be a version number.
2320 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2321 * and if "guessing" is TRUE, then no new token is created (and the caller
2322 * must use an alternative parsing method).
2323 */
2324
2325STATIC char *
2326S_force_version(pTHX_ char *s, int guessing)
2327{
2328    OP *version = NULL;
2329    char *d;
2330
2331    PERL_ARGS_ASSERT_FORCE_VERSION;
2332
2333    s = skipspace(s);
2334
2335    d = s;
2336    if (*d == 'v')
2337        d++;
2338    if (isDIGIT(*d)) {
2339        while (isDIGIT(*d) || *d == '_' || *d == '.')
2340            d++;
2341        if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2342            SV *ver;
2343            s = scan_num(s, &pl_yylval);
2344            version = pl_yylval.opval;
2345            ver = cSVOPx(version)->op_sv;
2346            if (SvPOK(ver) && !SvNIOK(ver)) {
2347                SvUPGRADE(ver, SVt_PVNV);
2348                SvNV_set(ver, str_to_version(ver));
2349                SvNOK_on(ver);		/* hint that it is a version */
2350            }
2351        }
2352        else if (guessing) {
2353            return s;
2354        }
2355    }
2356
2357    /* NOTE: The parser sees the package name and the VERSION swapped */
2358    NEXTVAL_NEXTTOKE.opval = version;
2359    force_next(BAREWORD);
2360
2361    return s;
2362}
2363
2364/*
2365 * S_force_strict_version
2366 * Forces the next token to be a version number using strict syntax rules.
2367 */
2368
2369STATIC char *
2370S_force_strict_version(pTHX_ char *s)
2371{
2372    OP *version = NULL;
2373    const char *errstr = NULL;
2374
2375    PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2376
2377    while (isSPACE(*s)) /* leading whitespace */
2378        s++;
2379
2380    if (is_STRICT_VERSION(s,&errstr)) {
2381        SV *ver = newSV_type(SVt_NULL);
2382        s = (char *)scan_version(s, ver, 0);
2383        version = newSVOP(OP_CONST, 0, ver);
2384    }
2385    else if ((*s != ';' && *s != '{' && *s != '}' )
2386             && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2387    {
2388        PL_bufptr = s;
2389        if (errstr)
2390            yyerror(errstr); /* version required */
2391        return s;
2392    }
2393
2394    /* NOTE: The parser sees the package name and the VERSION swapped */
2395    NEXTVAL_NEXTTOKE.opval = version;
2396    force_next(BAREWORD);
2397
2398    return s;
2399}
2400
2401/*
2402 * S_tokeq
2403 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2404 * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2405 * unchanged, and a new SV containing the modified input is returned.
2406 */
2407
2408STATIC SV *
2409S_tokeq(pTHX_ SV *sv)
2410{
2411    char *s;
2412    char *send;
2413    char *d;
2414    SV *pv = sv;
2415
2416    PERL_ARGS_ASSERT_TOKEQ;
2417
2418    assert (SvPOK(sv));
2419    assert (SvLEN(sv));
2420    assert (!SvIsCOW(sv));
2421    if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2422        goto finish;
2423    s = SvPVX(sv);
2424    send = SvEND(sv);
2425    /* This is relying on the SV being "well formed" with a trailing '\0'  */
2426    while (s < send && !(*s == '\\' && s[1] == '\\'))
2427        s++;
2428    if (s == send)
2429        goto finish;
2430    d = s;
2431    if ( PL_hints & HINT_NEW_STRING ) {
2432        pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2433                            SVs_TEMP | SvUTF8(sv));
2434    }
2435    while (s < send) {
2436        if (*s == '\\') {
2437            if (s + 1 < send && (s[1] == '\\'))
2438                s++;		/* all that, just for this */
2439        }
2440        *d++ = *s++;
2441    }
2442    *d = '\0';
2443    SvCUR_set(sv, d - SvPVX_const(sv));
2444  finish:
2445    if ( PL_hints & HINT_NEW_STRING )
2446       return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2447    return sv;
2448}
2449
2450/*
2451 * Now come three functions related to double-quote context,
2452 * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2453 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2454 * interact with PL_lex_state, and create fake ( ... ) argument lists
2455 * to handle functions and concatenation.
2456 * For example,
2457 *   "foo\lbar"
2458 * is tokenised as
2459 *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2460 */
2461
2462/*
2463 * S_sublex_start
2464 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2465 *
2466 * Pattern matching will set PL_lex_op to the pattern-matching op to
2467 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2468 *
2469 * OP_CONST is easy--just make the new op and return.
2470 *
2471 * Everything else becomes a FUNC.
2472 *
2473 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2474 * had an OP_CONST.  This just sets us up for a
2475 * call to S_sublex_push().
2476 */
2477
2478STATIC I32
2479S_sublex_start(pTHX)
2480{
2481    const I32 op_type = pl_yylval.ival;
2482
2483    if (op_type == OP_NULL) {
2484        pl_yylval.opval = PL_lex_op;
2485        PL_lex_op = NULL;
2486        return THING;
2487    }
2488    if (op_type == OP_CONST) {
2489        SV *sv = PL_lex_stuff;
2490        PL_lex_stuff = NULL;
2491        sv = tokeq(sv);
2492
2493        if (SvTYPE(sv) == SVt_PVIV) {
2494            /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2495            STRLEN len;
2496            const char * const p = SvPV_const(sv, len);
2497            SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2498            SvREFCNT_dec(sv);
2499            sv = nsv;
2500        }
2501        pl_yylval.opval = newSVOP(op_type, 0, sv);
2502        return THING;
2503    }
2504
2505    PL_parser->lex_super_state = PL_lex_state;
2506    PL_parser->lex_sub_inwhat = (U16)op_type;
2507    PL_parser->lex_sub_op = PL_lex_op;
2508    PL_parser->sub_no_recover = FALSE;
2509    PL_parser->sub_error_count = PL_error_count;
2510    PL_lex_state = LEX_INTERPPUSH;
2511
2512    PL_expect = XTERM;
2513    if (PL_lex_op) {
2514        pl_yylval.opval = PL_lex_op;
2515        PL_lex_op = NULL;
2516        return PMFUNC;
2517    }
2518    else
2519        return FUNC;
2520}
2521
2522/*
2523 * S_sublex_push
2524 * Create a new scope to save the lexing state.  The scope will be
2525 * ended in S_sublex_done.  Returns a '(', starting the function arguments
2526 * to the uc, lc, etc. found before.
2527 * Sets PL_lex_state to LEX_INTERPCONCAT.
2528 */
2529
2530STATIC I32
2531S_sublex_push(pTHX)
2532{
2533    LEXSHARED *shared;
2534    const bool is_heredoc = PL_multi_close == '<';
2535    ENTER;
2536
2537    PL_lex_state = PL_parser->lex_super_state;
2538    SAVEI8(PL_lex_dojoin);
2539    SAVEI32(PL_lex_brackets);
2540    SAVEI32(PL_lex_allbrackets);
2541    SAVEI32(PL_lex_formbrack);
2542    SAVEI8(PL_lex_fakeeof);
2543    SAVEI32(PL_lex_casemods);
2544    SAVEI32(PL_lex_starts);
2545    SAVEI8(PL_lex_state);
2546    SAVESPTR(PL_lex_repl);
2547    SAVEVPTR(PL_lex_inpat);
2548    SAVEI16(PL_lex_inwhat);
2549    if (is_heredoc)
2550    {
2551        SAVECOPLINE(PL_curcop);
2552        SAVEI32(PL_multi_end);
2553        SAVEI32(PL_parser->herelines);
2554        PL_parser->herelines = 0;
2555    }
2556    SAVEIV(PL_multi_close);
2557    SAVEPPTR(PL_bufptr);
2558    SAVEPPTR(PL_bufend);
2559    SAVEPPTR(PL_oldbufptr);
2560    SAVEPPTR(PL_oldoldbufptr);
2561    SAVEPPTR(PL_last_lop);
2562    SAVEPPTR(PL_last_uni);
2563    SAVEPPTR(PL_linestart);
2564    SAVESPTR(PL_linestr);
2565    SAVEGENERICPV(PL_lex_brackstack);
2566    SAVEGENERICPV(PL_lex_casestack);
2567    SAVEGENERICPV(PL_parser->lex_shared);
2568    SAVEBOOL(PL_parser->lex_re_reparsing);
2569    SAVEI32(PL_copline);
2570
2571    /* The here-doc parser needs to be able to peek into outer lexing
2572       scopes to find the body of the here-doc.  So we put PL_linestr and
2573       PL_bufptr into lex_shared, to 'share' those values.
2574     */
2575    PL_parser->lex_shared->ls_linestr = PL_linestr;
2576    PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2577
2578    PL_linestr = PL_lex_stuff;
2579    PL_lex_repl = PL_parser->lex_sub_repl;
2580    PL_lex_stuff = NULL;
2581    PL_parser->lex_sub_repl = NULL;
2582
2583    /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2584       set for an inner quote-like operator and then an error causes scope-
2585       popping.  We must not have a PL_lex_stuff value left dangling, as
2586       that breaks assumptions elsewhere.  See bug #123617.  */
2587    SAVEGENERICSV(PL_lex_stuff);
2588    SAVEGENERICSV(PL_parser->lex_sub_repl);
2589
2590    PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2591        = SvPVX(PL_linestr);
2592    PL_bufend += SvCUR(PL_linestr);
2593    PL_last_lop = PL_last_uni = NULL;
2594    SAVEFREESV(PL_linestr);
2595    if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2596
2597    PL_lex_dojoin = FALSE;
2598    PL_lex_brackets = PL_lex_formbrack = 0;
2599    PL_lex_allbrackets = 0;
2600    PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2601    Newx(PL_lex_brackstack, 120, char);
2602    Newx(PL_lex_casestack, 12, char);
2603    PL_lex_casemods = 0;
2604    *PL_lex_casestack = '\0';
2605    PL_lex_starts = 0;
2606    PL_lex_state = LEX_INTERPCONCAT;
2607    if (is_heredoc)
2608        CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2609    PL_copline = NOLINE;
2610
2611    Newxz(shared, 1, LEXSHARED);
2612    shared->ls_prev = PL_parser->lex_shared;
2613    PL_parser->lex_shared = shared;
2614
2615    PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2616    if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2617    if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2618        PL_lex_inpat = PL_parser->lex_sub_op;
2619    else
2620        PL_lex_inpat = NULL;
2621
2622    PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2623    PL_in_eval &= ~EVAL_RE_REPARSING;
2624
2625    return SUBLEXSTART;
2626}
2627
2628/*
2629 * S_sublex_done
2630 * Restores lexer state after a S_sublex_push.
2631 */
2632
2633STATIC I32
2634S_sublex_done(pTHX)
2635{
2636    if (!PL_lex_starts++) {
2637        SV * const sv = newSVpvs("");
2638        if (SvUTF8(PL_linestr))
2639            SvUTF8_on(sv);
2640        PL_expect = XOPERATOR;
2641        pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2642        return THING;
2643    }
2644
2645    if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2646        PL_lex_state = LEX_INTERPCASEMOD;
2647        return yylex();
2648    }
2649
2650    /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2651    assert(PL_lex_inwhat != OP_TRANSR);
2652    if (PL_lex_repl) {
2653        assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2654        PL_linestr = PL_lex_repl;
2655        PL_lex_inpat = 0;
2656        PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2657        PL_bufend += SvCUR(PL_linestr);
2658        PL_last_lop = PL_last_uni = NULL;
2659        PL_lex_dojoin = FALSE;
2660        PL_lex_brackets = 0;
2661        PL_lex_allbrackets = 0;
2662        PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2663        PL_lex_casemods = 0;
2664        *PL_lex_casestack = '\0';
2665        PL_lex_starts = 0;
2666        if (SvEVALED(PL_lex_repl)) {
2667            PL_lex_state = LEX_INTERPNORMAL;
2668            PL_lex_starts++;
2669            /*	we don't clear PL_lex_repl here, so that we can check later
2670                whether this is an evalled subst; that means we rely on the
2671                logic to ensure sublex_done() is called again only via the
2672                branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2673        }
2674        else {
2675            PL_lex_state = LEX_INTERPCONCAT;
2676            PL_lex_repl = NULL;
2677        }
2678        if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2679            CopLINE(PL_curcop) +=
2680                ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2681                 + PL_parser->herelines;
2682            PL_parser->herelines = 0;
2683        }
2684        return PERLY_SLASH;
2685    }
2686    else {
2687        const line_t l = CopLINE(PL_curcop);
2688        LEAVE;
2689        if (PL_parser->sub_error_count != PL_error_count) {
2690            if (PL_parser->sub_no_recover) {
2691                yyquit();
2692                NOT_REACHED;
2693            }
2694        }
2695        if (PL_multi_close == '<')
2696            PL_parser->herelines += l - PL_multi_end;
2697        PL_bufend = SvPVX(PL_linestr);
2698        PL_bufend += SvCUR(PL_linestr);
2699        PL_expect = XOPERATOR;
2700        return SUBLEXEND;
2701    }
2702}
2703
2704HV *
2705Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2706                          const STRLEN context_len, const char ** error_msg)
2707{
2708    /* Load the official _charnames module if not already there.  The
2709     * parameters are just to give info for any error messages generated:
2710     *  char_name   a name to look up which is the reason for loading this
2711     *  context     'char_name' in the context in the input in which it appears
2712     *  context_len how many bytes 'context' occupies
2713     *  error_msg   *error_msg will be set to any error
2714     *
2715     *  Returns the ^H table if success; otherwise NULL */
2716
2717    unsigned int i;
2718    HV * table;
2719    SV **cvp;
2720    SV * res;
2721
2722    PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2723
2724    /* This loop is executed 1 1/2 times.  On the first time through, if it
2725     * isn't already loaded, try loading it, and iterate just once to see if it
2726     * worked.  */
2727    for (i = 0; i < 2; i++) {
2728        table = GvHV(PL_hintgv);		 /* ^H */
2729
2730        if (    table
2731            && (PL_hints & HINT_LOCALIZE_HH)
2732            && (cvp = hv_fetchs(table, "charnames", FALSE))
2733            &&  SvOK(*cvp))
2734        {
2735            return table;   /* Quit if already loaded */
2736        }
2737
2738        if (i == 0) {
2739            Perl_load_module(aTHX_
2740                0,
2741                newSVpvs("_charnames"),
2742
2743                /* version parameter; no need to specify it, as if we get too early
2744                * a version, will fail anyway, not being able to find 'charnames'
2745                * */
2746                NULL,
2747                newSVpvs(":full"),
2748                newSVpvs(":short"),
2749                NULL);
2750        }
2751    }
2752
2753    /* Here, it failed; new_constant will give appropriate error messages */
2754    *error_msg = NULL;
2755    res = new_constant( NULL, 0, "charnames", char_name, NULL,
2756                        context, context_len, error_msg);
2757    SvREFCNT_dec(res);
2758
2759    return NULL;
2760}
2761
2762STATIC SV*
2763S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2764{
2765    /* This justs wraps get_and_check_backslash_N_name() to output any error
2766     * message it returns. */
2767
2768    const char * error_msg = NULL;
2769    SV * result;
2770
2771    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2772
2773    /* charnames doesn't work well if there have been errors found */
2774    if (PL_error_count > 0) {
2775        return NULL;
2776    }
2777
2778    result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2779
2780    if (error_msg) {
2781        yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2782    }
2783
2784    return result;
2785}
2786
2787SV*
2788Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2789                                          const char* e,
2790                                          const bool is_utf8,
2791                                          const char ** error_msg)
2792{
2793    /* <s> points to first character of interior of \N{}, <e> to one beyond the
2794     * interior, hence to the "}".  Finds what the name resolves to, returning
2795     * an SV* containing it; NULL if no valid one found.
2796     *
2797     * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2798     * doesn't have to be. */
2799
2800    SV* char_name;
2801    SV* res;
2802    HV * table;
2803    SV **cvp;
2804    SV *cv;
2805    SV *rv;
2806    HV *stash;
2807
2808    /* Points to the beginning of the \N{... so that any messages include the
2809     * context of what's failing*/
2810    const char* context = s - 3;
2811    STRLEN context_len = e - context + 1; /* include all of \N{...} */
2812
2813
2814    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2815
2816    assert(e >= s);
2817    assert(s > (char *) 3);
2818
2819    while (s < e && isBLANK(*s)) {
2820        s++;
2821    }
2822
2823    while (s < e && isBLANK(*(e - 1))) {
2824        e--;
2825    }
2826
2827    char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2828
2829    if (!SvCUR(char_name)) {
2830        SvREFCNT_dec_NN(char_name);
2831        /* diag_listed_as: Unknown charname '%s' */
2832        *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2833        return NULL;
2834    }
2835
2836    /* Autoload the charnames module */
2837
2838    table = load_charnames(char_name, context, context_len, error_msg);
2839    if (table == NULL) {
2840        return NULL;
2841    }
2842
2843    *error_msg = NULL;
2844    res = new_constant( NULL, 0, "charnames", char_name, NULL,
2845                        context, context_len, error_msg);
2846    if (*error_msg) {
2847        *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2848
2849        SvREFCNT_dec(res);
2850        return NULL;
2851    }
2852
2853    /* See if the charnames handler is the Perl core's, and if so, we can skip
2854     * the validation needed for a user-supplied one, as Perl's does its own
2855     * validation. */
2856    cvp = hv_fetchs(table, "charnames", FALSE);
2857    if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2858        SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2859    {
2860        const char * const name = HvNAME(stash);
2861         if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2862           return res;
2863       }
2864    }
2865
2866    /* Here, it isn't Perl's charname handler.  We can't rely on a
2867     * user-supplied handler to validate the input name.  For non-ut8 input,
2868     * look to see that the first character is legal.  Then loop through the
2869     * rest checking that each is a continuation */
2870
2871    /* This code makes the reasonable assumption that the only Latin1-range
2872     * characters that begin a character name alias are alphabetic, otherwise
2873     * would have to create a isCHARNAME_BEGIN macro */
2874
2875    if (! is_utf8) {
2876        if (! isALPHAU(*s)) {
2877            goto bad_charname;
2878        }
2879        s++;
2880        while (s < e) {
2881            if (! isCHARNAME_CONT(*s)) {
2882                goto bad_charname;
2883            }
2884            if (*s == ' ' && *(s-1) == ' ') {
2885                goto multi_spaces;
2886            }
2887            s++;
2888        }
2889    }
2890    else {
2891        /* Similarly for utf8.  For invariants can check directly; for other
2892         * Latin1, can calculate their code point and check; otherwise  use an
2893         * inversion list */
2894        if (UTF8_IS_INVARIANT(*s)) {
2895            if (! isALPHAU(*s)) {
2896                goto bad_charname;
2897            }
2898            s++;
2899        } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2900            if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2901                goto bad_charname;
2902            }
2903            s += 2;
2904        }
2905        else {
2906            if (! _invlist_contains_cp(PL_utf8_charname_begin,
2907                                       utf8_to_uvchr_buf((U8 *) s,
2908                                                         (U8 *) e,
2909                                                         NULL)))
2910            {
2911                goto bad_charname;
2912            }
2913            s += UTF8SKIP(s);
2914        }
2915
2916        while (s < e) {
2917            if (UTF8_IS_INVARIANT(*s)) {
2918                if (! isCHARNAME_CONT(*s)) {
2919                    goto bad_charname;
2920                }
2921                if (*s == ' ' && *(s-1) == ' ') {
2922                    goto multi_spaces;
2923                }
2924                s++;
2925            }
2926            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2927                if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2928                {
2929                    goto bad_charname;
2930                }
2931                s += 2;
2932            }
2933            else {
2934                if (! _invlist_contains_cp(PL_utf8_charname_continue,
2935                                           utf8_to_uvchr_buf((U8 *) s,
2936                                                             (U8 *) e,
2937                                                             NULL)))
2938                {
2939                    goto bad_charname;
2940                }
2941                s += UTF8SKIP(s);
2942            }
2943        }
2944    }
2945    if (*(s-1) == ' ') {
2946        /* diag_listed_as: charnames alias definitions may not contain
2947                           trailing white-space; marked by <-- HERE in %s
2948         */
2949        *error_msg = Perl_form(aTHX_
2950            "charnames alias definitions may not contain trailing "
2951            "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2952            (int)(s - context + 1), context,
2953            (int)(e - s + 1), s + 1);
2954        return NULL;
2955    }
2956
2957    if (SvUTF8(res)) { /* Don't accept malformed charname value */
2958        const U8* first_bad_char_loc;
2959        STRLEN len;
2960        const char* const str = SvPV_const(res, len);
2961        if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2962                                          &first_bad_char_loc)))
2963        {
2964            _force_out_malformed_utf8_message(first_bad_char_loc,
2965                                              (U8 *) PL_parser->bufend,
2966                                              0,
2967                                              0 /* 0 means don't die */ );
2968            /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2969                               immediately after '%s' */
2970            *error_msg = Perl_form(aTHX_
2971                "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2972                 (int) context_len, context,
2973                 (int) ((char *) first_bad_char_loc - str), str);
2974            return NULL;
2975        }
2976    }
2977
2978    return res;
2979
2980  bad_charname: {
2981
2982        /* The final %.*s makes sure that should the trailing NUL be missing
2983         * that this print won't run off the end of the string */
2984        /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2985                           in \N{%s} */
2986        *error_msg = Perl_form(aTHX_
2987            "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2988            (int)(s - context + 1), context,
2989            (int)(e - s + 1), s + 1);
2990        return NULL;
2991    }
2992
2993  multi_spaces:
2994        /* diag_listed_as: charnames alias definitions may not contain a
2995                           sequence of multiple spaces; marked by <-- HERE
2996                           in %s */
2997        *error_msg = Perl_form(aTHX_
2998            "charnames alias definitions may not contain a sequence of "
2999            "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3000            (int)(s - context + 1), context,
3001            (int)(e - s + 1), s + 1);
3002        return NULL;
3003}
3004
3005/*
3006  scan_const
3007
3008  Extracts the next constant part of a pattern, double-quoted string,
3009  or transliteration.  This is terrifying code.
3010
3011  For example, in parsing the double-quoted string "ab\x63$d", it would
3012  stop at the '$' and return an OP_CONST containing 'abc'.
3013
3014  It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3015  processing a pattern (PL_lex_inpat is true), a transliteration
3016  (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3017
3018  Returns a pointer to the character scanned up to. If this is
3019  advanced from the start pointer supplied (i.e. if anything was
3020  successfully parsed), will leave an OP_CONST for the substring scanned
3021  in pl_yylval. Caller must intuit reason for not parsing further
3022  by looking at the next characters herself.
3023
3024  In patterns:
3025    expand:
3026      \N{FOO}  => \N{U+hex_for_character_FOO}
3027      (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3028
3029    pass through:
3030        all other \-char, including \N and \N{ apart from \N{ABC}
3031
3032    stops on:
3033        @ and $ where it appears to be a var, but not for $ as tail anchor
3034        \l \L \u \U \Q \E
3035        (?{  or  (??{
3036
3037  In transliterations:
3038    characters are VERY literal, except for - not at the start or end
3039    of the string, which indicates a range.  However some backslash sequences
3040    are recognized: \r, \n, and the like
3041                    \007 \o{}, \x{}, \N{}
3042    If all elements in the transliteration are below 256,
3043    scan_const expands the range to the full set of intermediate
3044    characters. If the range is in utf8, the hyphen is replaced with
3045    a certain range mark which will be handled by pmtrans() in op.c.
3046
3047  In double-quoted strings:
3048    backslashes:
3049      all those recognized in transliterations
3050      deprecated backrefs: \1 (in substitution replacements)
3051      case and quoting: \U \Q \E
3052    stops on @ and $
3053
3054  scan_const does *not* construct ops to handle interpolated strings.
3055  It stops processing as soon as it finds an embedded $ or @ variable
3056  and leaves it to the caller to work out what's going on.
3057
3058  embedded arrays (whether in pattern or not) could be:
3059      @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3060
3061  $ in double-quoted strings must be the symbol of an embedded scalar.
3062
3063  $ in pattern could be $foo or could be tail anchor.  Assumption:
3064  it's a tail anchor if $ is the last thing in the string, or if it's
3065  followed by one of "()| \r\n\t"
3066
3067  \1 (backreferences) are turned into $1 in substitutions
3068
3069  The structure of the code is
3070      while (there's a character to process) {
3071          handle transliteration ranges
3072          skip regexp comments /(?#comment)/ and codes /(?{code})/
3073          skip #-initiated comments in //x patterns
3074          check for embedded arrays
3075          check for embedded scalars
3076          if (backslash) {
3077              deprecate \1 in substitution replacements
3078              handle string-changing backslashes \l \U \Q \E, etc.
3079              switch (what was escaped) {
3080                  handle \- in a transliteration (becomes a literal -)
3081                  if a pattern and not \N{, go treat as regular character
3082                  handle \132 (octal characters)
3083                  handle \x15 and \x{1234} (hex characters)
3084                  handle \N{name} (named characters, also \N{3,5} in a pattern)
3085                  handle \cV (control characters)
3086                  handle printf-style backslashes (\f, \r, \n, etc)
3087              } (end switch)
3088              continue
3089          } (end if backslash)
3090          handle regular character
3091    } (end while character to read)
3092
3093*/
3094
3095STATIC char *
3096S_scan_const(pTHX_ char *start)
3097{
3098    const char * const send = PL_bufend;/* end of the constant */
3099    SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3100                                           on sizing. */
3101    char *s = start;			/* start of the constant */
3102    char *d = SvPVX(sv);		/* destination for copies */
3103    bool dorange = FALSE;               /* are we in a translit range? */
3104    bool didrange = FALSE;              /* did we just finish a range? */
3105    bool in_charclass = FALSE;          /* within /[...]/ */
3106    const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3107                                           UTF8?  But, this can show as true
3108                                           when the source isn't utf8, as for
3109                                           example when it is entirely composed
3110                                           of hex constants */
3111    bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3112    STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3113                                           number of characters found so far
3114                                           that will expand (into 2 bytes)
3115                                           should we have to convert to
3116                                           UTF-8) */
3117    SV *res;		                /* result from charnames */
3118    STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3119                                   high-end character is temporarily placed */
3120
3121    /* Does something require special handling in tr/// ?  This avoids extra
3122     * work in a less likely case.  As such, khw didn't feel it was worth
3123     * adding any branches to the more mainline code to handle this, which
3124     * means that this doesn't get set in some circumstances when things like
3125     * \x{100} get expanded out.  As a result there needs to be extra testing
3126     * done in the tr code */
3127    bool has_above_latin1 = FALSE;
3128
3129    /* Note on sizing:  The scanned constant is placed into sv, which is
3130     * initialized by newSV() assuming one byte of output for every byte of
3131     * input.  This routine expects newSV() to allocate an extra byte for a
3132     * trailing NUL, which this routine will append if it gets to the end of
3133     * the input.  There may be more bytes of input than output (eg., \N{LATIN
3134     * CAPITAL LETTER A}), or more output than input if the constant ends up
3135     * recoded to utf8, but each time a construct is found that might increase
3136     * the needed size, SvGROW() is called.  Its size parameter each time is
3137     * based on the best guess estimate at the time, namely the length used so
3138     * far, plus the length the current construct will occupy, plus room for
3139     * the trailing NUL, plus one byte for every input byte still unscanned */
3140
3141    UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3142                       before set */
3143#ifdef EBCDIC
3144    int backslash_N = 0;            /* ? was the character from \N{} */
3145    int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3146                                       platform-specific like \x65 */
3147#endif
3148
3149    PERL_ARGS_ASSERT_SCAN_CONST;
3150
3151    assert(PL_lex_inwhat != OP_TRANSR);
3152
3153    /* Protect sv from errors and fatal warnings. */
3154    ENTER_with_name("scan_const");
3155    SAVEFREESV(sv);
3156
3157    /* A bunch of code in the loop below assumes that if s[n] exists and is not
3158     * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3159     * valid */
3160    assert(*send == '\0');
3161
3162    while (s < send
3163           || dorange   /* Handle tr/// range at right edge of input */
3164    ) {
3165
3166        /* get transliterations out of the way (they're most literal) */
3167        if (PL_lex_inwhat == OP_TRANS) {
3168
3169            /* But there isn't any special handling necessary unless there is a
3170             * range, so for most cases we just drop down and handle the value
3171             * as any other.  There are two exceptions.
3172             *
3173             * 1.  A hyphen indicates that we are actually going to have a
3174             *     range.  In this case, skip the '-', set a flag, then drop
3175             *     down to handle what should be the end range value.
3176             * 2.  After we've handled that value, the next time through, that
3177             *     flag is set and we fix up the range.
3178             *
3179             * Ranges entirely within Latin1 are expanded out entirely, in
3180             * order to make the transliteration a simple table look-up.
3181             * Ranges that extend above Latin1 have to be done differently, so
3182             * there is no advantage to expanding them here, so they are
3183             * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3184             * a byte that can't occur in legal UTF-8, and hence can signify a
3185             * hyphen without any possible ambiguity.  On EBCDIC machines, if
3186             * the range is expressed as Unicode, the Latin1 portion is
3187             * expanded out even if the range extends above Latin1.  This is
3188             * because each code point in it has to be processed here
3189             * individually to get its native translation */
3190
3191            if (! dorange) {
3192
3193                /* Here, we don't think we're in a range.  If the new character
3194                 * is not a hyphen; or if it is a hyphen, but it's too close to
3195                 * either edge to indicate a range, or if we haven't output any
3196                 * characters yet then it's a regular character. */
3197                if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3198                {
3199
3200                    /* A regular character.  Process like any other, but first
3201                     * clear any flags */
3202                    didrange = FALSE;
3203                    dorange = FALSE;
3204#ifdef EBCDIC
3205                    non_portable_endpoint = 0;
3206                    backslash_N = 0;
3207#endif
3208                    /* The tests here for being above Latin1 and similar ones
3209                     * in the following 'else' suffice to find all such
3210                     * occurences in the constant, except those added by a
3211                     * backslash escape sequence, like \x{100}.  Mostly, those
3212                     * set 'has_above_latin1' as appropriate */
3213                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3214                        has_above_latin1 = TRUE;
3215                    }
3216
3217                    /* Drops down to generic code to process current byte */
3218                }
3219                else {  /* Is a '-' in the context where it means a range */
3220                    if (didrange) { /* Something like y/A-C-Z// */
3221                        Perl_croak(aTHX_ "Ambiguous range in transliteration"
3222                                         " operator");
3223                    }
3224
3225                    dorange = TRUE;
3226
3227                    s++;    /* Skip past the hyphen */
3228
3229                    /* d now points to where the end-range character will be
3230                     * placed.  Drop down to get that character.  We'll finish
3231                     * processing the range the next time through the loop */
3232
3233                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3234                        has_above_latin1 = TRUE;
3235                    }
3236
3237                    /* Drops down to generic code to process current byte */
3238                }
3239            }  /* End of not a range */
3240            else {
3241                /* Here we have parsed a range.  Now must handle it.  At this
3242                 * point:
3243                 * 'sv' is a SV* that contains the output string we are
3244                 *      constructing.  The final two characters in that string
3245                 *      are the range start and range end, in order.
3246                 * 'd'  points to just beyond the range end in the 'sv' string,
3247                 *      where we would next place something
3248                 */
3249                char * max_ptr;
3250                char * min_ptr;
3251                IV range_min;
3252                IV range_max;	/* last character in range */
3253                STRLEN grow;
3254                Size_t offset_to_min = 0;
3255                Size_t extras = 0;
3256#ifdef EBCDIC
3257                bool convert_unicode;
3258                IV real_range_max = 0;
3259#endif
3260                /* Get the code point values of the range ends. */
3261                max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3262                offset_to_max = max_ptr - SvPVX_const(sv);
3263                if (d_is_utf8) {
3264                    /* We know the utf8 is valid, because we just constructed
3265                     * it ourselves in previous loop iterations */
3266                    min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3267                    range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3268                    range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3269
3270                    /* This compensates for not all code setting
3271                     * 'has_above_latin1', so that we don't skip stuff that
3272                     * should be executed */
3273                    if (range_max > 255) {
3274                        has_above_latin1 = TRUE;
3275                    }
3276                }
3277                else {
3278                    min_ptr = max_ptr - 1;
3279                    range_min = * (U8*) min_ptr;
3280                    range_max = * (U8*) max_ptr;
3281                }
3282
3283                /* If the range is just a single code point, like tr/a-a/.../,
3284                 * that code point is already in the output, twice.  We can
3285                 * just back up over the second instance and avoid all the rest
3286                 * of the work.  But if it is a variant character, it's been
3287                 * counted twice, so decrement.  (This unlikely scenario is
3288                 * special cased, like the one for a range of 2 code points
3289                 * below, only because the main-line code below needs a range
3290                 * of 3 or more to work without special casing.  Might as well
3291                 * get it out of the way now.) */
3292                if (UNLIKELY(range_max == range_min)) {
3293                    d = max_ptr;
3294                    if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3295                        utf8_variant_count--;
3296                    }
3297                    goto range_done;
3298                }
3299
3300#ifdef EBCDIC
3301                /* On EBCDIC platforms, we may have to deal with portable
3302                 * ranges.  These happen if at least one range endpoint is a
3303                 * Unicode value (\N{...}), or if the range is a subset of
3304                 * [A-Z] or [a-z], and both ends are literal characters,
3305                 * like 'A', and not like \x{C1} */
3306                convert_unicode =
3307                               cBOOL(backslash_N)   /* \N{} forces Unicode,
3308                                                       hence portable range */
3309                    || (     ! non_portable_endpoint
3310                        && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3311                           || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3312                if (convert_unicode) {
3313
3314                    /* Special handling is needed for these portable ranges.
3315                     * They are defined to be in Unicode terms, which includes
3316                     * all the Unicode code points between the end points.
3317                     * Convert to Unicode to get the Unicode range.  Later we
3318                     * will convert each code point in the range back to
3319                     * native.  */
3320                    range_min = NATIVE_TO_UNI(range_min);
3321                    range_max = NATIVE_TO_UNI(range_max);
3322                }
3323#endif
3324
3325                if (range_min > range_max) {
3326#ifdef EBCDIC
3327                    if (convert_unicode) {
3328                        /* Need to convert back to native for meaningful
3329                         * messages for this platform */
3330                        range_min = UNI_TO_NATIVE(range_min);
3331                        range_max = UNI_TO_NATIVE(range_max);
3332                    }
3333#endif
3334                    /* Use the characters themselves for the error message if
3335                     * ASCII printables; otherwise some visible representation
3336                     * of them */
3337                    if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3338                        Perl_croak(aTHX_
3339                         "Invalid range \"%c-%c\" in transliteration operator",
3340                         (char)range_min, (char)range_max);
3341                    }
3342#ifdef EBCDIC
3343                    else if (convert_unicode) {
3344        /* diag_listed_as: Invalid range "%s" in transliteration operator */
3345                        Perl_croak(aTHX_
3346                           "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3347                           UVXf "}\" in transliteration operator",
3348                           range_min, range_max);
3349                    }
3350#endif
3351                    else {
3352        /* diag_listed_as: Invalid range "%s" in transliteration operator */
3353                        Perl_croak(aTHX_
3354                           "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3355                           " in transliteration operator",
3356                           range_min, range_max);
3357                    }
3358                }
3359
3360                /* If the range is exactly two code points long, they are
3361                 * already both in the output */
3362                if (UNLIKELY(range_min + 1 == range_max)) {
3363                    goto range_done;
3364                }
3365
3366                /* Here the range contains at least 3 code points */
3367
3368                if (d_is_utf8) {
3369
3370                    /* If everything in the transliteration is below 256, we
3371                     * can avoid special handling later.  A translation table
3372                     * for each of those bytes is created by op.c.  So we
3373                     * expand out all ranges to their constituent code points.
3374                     * But if we've encountered something above 255, the
3375                     * expanding won't help, so skip doing that.  But if it's
3376                     * EBCDIC, we may have to look at each character below 256
3377                     * if we have to convert to/from Unicode values */
3378                    if (   has_above_latin1
3379#ifdef EBCDIC
3380                        && (range_min > 255 || ! convert_unicode)
3381#endif
3382                    ) {
3383                        const STRLEN off = d - SvPVX(sv);
3384                        const STRLEN extra = 1 + (send - s) + 1;
3385                        char *e;
3386
3387                        /* Move the high character one byte to the right; then
3388                         * insert between it and the range begin, an illegal
3389                         * byte which serves to indicate this is a range (using
3390                         * a '-' would be ambiguous). */
3391
3392                        if (off + extra > SvLEN(sv)) {
3393                            d = off + SvGROW(sv, off + extra);
3394                            max_ptr = d - off + offset_to_max;
3395                        }
3396
3397                        e = d++;
3398                        while (e-- > max_ptr) {
3399                            *(e + 1) = *e;
3400                        }
3401                        *(e + 1) = (char) RANGE_INDICATOR;
3402                        goto range_done;
3403                    }
3404
3405                    /* Here, we're going to expand out the range.  For EBCDIC
3406                     * the range can extend above 255 (not so in ASCII), so
3407                     * for EBCDIC, split it into the parts above and below
3408                     * 255/256 */
3409#ifdef EBCDIC
3410                    if (range_max > 255) {
3411                        real_range_max = range_max;
3412                        range_max = 255;
3413                    }
3414#endif
3415                }
3416
3417                /* Here we need to expand out the string to contain each
3418                 * character in the range.  Grow the output to handle this.
3419                 * For non-UTF8, we need a byte for each code point in the
3420                 * range, minus the three that we've already allocated for: the
3421                 * hyphen, the min, and the max.  For UTF-8, we need this
3422                 * plus an extra byte for each code point that occupies two
3423                 * bytes (is variant) when in UTF-8 (except we've already
3424                 * allocated for the end points, including if they are
3425                 * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3426                 * platforms, it's easy to calculate a precise number.  To
3427                 * start, we count the variants in the range, which we need
3428                 * elsewhere in this function anyway.  (For the case where it
3429                 * isn't easy to calculate, 'extras' has been initialized to 0,
3430                 * and the calculation is done in a loop further down.) */
3431#ifdef EBCDIC
3432                if (convert_unicode)
3433#endif
3434                {
3435                    /* This is executed unconditionally on ASCII, and for
3436                     * Unicode ranges on EBCDIC.  Under these conditions, all
3437                     * code points above a certain value are variant; and none
3438                     * under that value are.  We just need to find out how much
3439                     * of the range is above that value.  We don't count the
3440                     * end points here, as they will already have been counted
3441                     * as they were parsed. */
3442                    if (range_min >= UTF_CONTINUATION_MARK) {
3443
3444                        /* The whole range is made up of variants */
3445                        extras = (range_max - 1) - (range_min + 1) + 1;
3446                    }
3447                    else if (range_max >= UTF_CONTINUATION_MARK) {
3448
3449                        /* Only the higher portion of the range is variants */
3450                        extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3451                    }
3452
3453                    utf8_variant_count += extras;
3454                }
3455
3456                /* The base growth is the number of code points in the range,
3457                 * not including the endpoints, which have already been sized
3458                 * for (and output).  We don't subtract for the hyphen, as it
3459                 * has been parsed but not output, and the SvGROW below is
3460                 * based only on what's been output plus what's left to parse.
3461                 * */
3462                grow = (range_max - 1) - (range_min + 1) + 1;
3463
3464                if (d_is_utf8) {
3465#ifdef EBCDIC
3466                    /* In some cases in EBCDIC, we haven't yet calculated a
3467                     * precise amount needed for the UTF-8 variants.  Just
3468                     * assume the worst case, that everything will expand by a
3469                     * byte */
3470                    if (! convert_unicode) {
3471                        grow *= 2;
3472                    }
3473                    else
3474#endif
3475                    {
3476                        /* Otherwise we know exactly how many variants there
3477                         * are in the range. */
3478                        grow += extras;
3479                    }
3480                }
3481
3482                /* Grow, but position the output to overwrite the range min end
3483                 * point, because in some cases we overwrite that */
3484                SvCUR_set(sv, d - SvPVX_const(sv));
3485                offset_to_min = min_ptr - SvPVX_const(sv);
3486
3487                /* See Note on sizing above. */
3488                d = offset_to_min + SvGROW(sv, SvCUR(sv)
3489                                             + (send - s)
3490                                             + grow
3491                                             + 1 /* Trailing NUL */ );
3492
3493                /* Now, we can expand out the range. */
3494#ifdef EBCDIC
3495                if (convert_unicode) {
3496                    SSize_t i;
3497
3498                    /* Recall that the min and max are now in Unicode terms, so
3499                     * we have to convert each character to its native
3500                     * equivalent */
3501                    if (d_is_utf8) {
3502                        for (i = range_min; i <= range_max; i++) {
3503                            append_utf8_from_native_byte(
3504                                                    LATIN1_TO_NATIVE((U8) i),
3505                                                    (U8 **) &d);
3506                        }
3507                    }
3508                    else {
3509                        for (i = range_min; i <= range_max; i++) {
3510                            *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3511                        }
3512                    }
3513                }
3514                else
3515#endif
3516                /* Always gets run for ASCII, and sometimes for EBCDIC. */
3517                {
3518                    /* Here, no conversions are necessary, which means that the
3519                     * first character in the range is already in 'd' and
3520                     * valid, so we can skip overwriting it */
3521                    if (d_is_utf8) {
3522                        SSize_t i;
3523                        d += UTF8SKIP(d);
3524                        for (i = range_min + 1; i <= range_max; i++) {
3525                            append_utf8_from_native_byte((U8) i, (U8 **) &d);
3526                        }
3527                    }
3528                    else {
3529                        SSize_t i;
3530                        d++;
3531                        assert(range_min + 1 <= range_max);
3532                        for (i = range_min + 1; i < range_max; i++) {
3533#ifdef EBCDIC
3534                            /* In this case on EBCDIC, we haven't calculated
3535                             * the variants.  Do it here, as we go along */
3536                            if (! UVCHR_IS_INVARIANT(i)) {
3537                                utf8_variant_count++;
3538                            }
3539#endif
3540                            *d++ = (char)i;
3541                        }
3542
3543                        /* The range_max is done outside the loop so as to
3544                         * avoid having to special case not incrementing
3545                         * 'utf8_variant_count' on EBCDIC (it's already been
3546                         * counted when originally parsed) */
3547                        *d++ = (char) range_max;
3548                    }
3549                }
3550
3551#ifdef EBCDIC
3552                /* If the original range extended above 255, add in that
3553                 * portion. */
3554                if (real_range_max) {
3555                    *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3556                    *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3557                    if (real_range_max > 0x100) {
3558                        if (real_range_max > 0x101) {
3559                            *d++ = (char) RANGE_INDICATOR;
3560                        }
3561                        d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3562                    }
3563                }
3564#endif
3565
3566              range_done:
3567                /* mark the range as done, and continue */
3568                didrange = TRUE;
3569                dorange = FALSE;
3570#ifdef EBCDIC
3571                non_portable_endpoint = 0;
3572                backslash_N = 0;
3573#endif
3574                continue;
3575            } /* End of is a range */
3576        } /* End of transliteration.  Joins main code after these else's */
3577        else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3578            char *s1 = s-1;
3579            int esc = 0;
3580            while (s1 >= start && *s1-- == '\\')
3581                esc = !esc;
3582            if (!esc)
3583                in_charclass = TRUE;
3584        }
3585        else if (*s == ']' && PL_lex_inpat && in_charclass) {
3586            char *s1 = s-1;
3587            int esc = 0;
3588            while (s1 >= start && *s1-- == '\\')
3589                esc = !esc;
3590            if (!esc)
3591                in_charclass = FALSE;
3592        }
3593            /* skip for regexp comments /(?#comment)/, except for the last
3594             * char, which will be done separately.  Stop on (?{..}) and
3595             * friends */
3596        else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3597            if (s[2] == '#') {
3598                if (s_is_utf8) {
3599                    PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3600
3601                    while (s + len < send && *s != ')') {
3602                        Copy(s, d, len, U8);
3603                        d += len;
3604                        s += len;
3605                        len = UTF8_SAFE_SKIP(s, send);
3606                    }
3607                }
3608                else while (s+1 < send && *s != ')') {
3609                    *d++ = *s++;
3610                }
3611            }
3612            else if (!PL_lex_casemods
3613                     && (    s[2] == '{' /* This should match regcomp.c */
3614                         || (s[2] == '?' && s[3] == '{')))
3615            {
3616                break;
3617            }
3618        }
3619            /* likewise skip #-initiated comments in //x patterns */
3620        else if (*s == '#'
3621                 && PL_lex_inpat
3622                 && !in_charclass
3623                 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3624        {
3625            while (s < send && *s != '\n')
3626                *d++ = *s++;
3627        }
3628            /* no further processing of single-quoted regex */
3629        else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3630            goto default_action;
3631
3632            /* check for embedded arrays
3633             * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3634             */
3635        else if (*s == '@' && s[1]) {
3636            if (UTF
3637               ? isIDFIRST_utf8_safe(s+1, send)
3638               : isWORDCHAR_A(s[1]))
3639            {
3640                break;
3641            }
3642            if (memCHRs(":'{$", s[1]))
3643                break;
3644            if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3645                break; /* in regexp, neither @+ nor @- are interpolated */
3646        }
3647            /* check for embedded scalars.  only stop if we're sure it's a
3648             * variable.  */
3649        else if (*s == '$') {
3650            if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3651                break;
3652            if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3653                if (s[1] == '\\') {
3654                    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3655                                   "Possible unintended interpolation of $\\ in regex");
3656                }
3657                break;		/* in regexp, $ might be tail anchor */
3658            }
3659        }
3660
3661        /* End of else if chain - OP_TRANS rejoin rest */
3662
3663        if (UNLIKELY(s >= send)) {
3664            assert(s == send);
3665            break;
3666        }
3667
3668        /* backslashes */
3669        if (*s == '\\' && s+1 < send) {
3670            char* bslash = s;   /* point to beginning \ */
3671            char* rbrace;	/* point to ending '}' */
3672            char* e;	        /* 1 past the meat (non-blanks) before the
3673                                   brace */
3674            s++;
3675
3676            /* warn on \1 - \9 in substitution replacements, but note that \11
3677             * is an octal; and \19 is \1 followed by '9' */
3678            if (PL_lex_inwhat == OP_SUBST
3679                && !PL_lex_inpat
3680                && isDIGIT(*s)
3681                && *s != '0'
3682                && !isDIGIT(s[1]))
3683            {
3684                /* diag_listed_as: \%d better written as $%d */
3685                Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3686                s = bslash;
3687                *s = '$';
3688                break;
3689            }
3690
3691            /* string-change backslash escapes */
3692            if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3693                s = bslash;
3694                break;
3695            }
3696            /* In a pattern, process \N, but skip any other backslash escapes.
3697             * This is because we don't want to translate an escape sequence
3698             * into a meta symbol and have the regex compiler use the meta
3699             * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3700             * in spite of this, we do have to process \N here while the proper
3701             * charnames handler is in scope.  See bugs #56444 and #62056.
3702             *
3703             * There is a complication because \N in a pattern may also stand
3704             * for 'match a non-nl', and not mean a charname, in which case its
3705             * processing should be deferred to the regex compiler.  To be a
3706             * charname it must be followed immediately by a '{', and not look
3707             * like \N followed by a curly quantifier, i.e., not something like
3708             * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3709             * quantifier */
3710            else if (PL_lex_inpat
3711                    && (*s != 'N'
3712                        || s[1] != '{'
3713                        || regcurly(s + 1, send, NULL)))
3714            {
3715                *d++ = '\\';
3716                goto default_action;
3717            }
3718
3719            switch (*s) {
3720            default:
3721                {
3722                    if ((isALPHANUMERIC(*s)))
3723                        Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3724                                       "Unrecognized escape \\%c passed through",
3725                                       *s);
3726                    /* default action is to copy the quoted character */
3727                    goto default_action;
3728                }
3729
3730            /* eg. \132 indicates the octal constant 0132 */
3731            case '0': case '1': case '2': case '3':
3732            case '4': case '5': case '6': case '7':
3733                {
3734                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3735                              | PERL_SCAN_NOTIFY_ILLDIGIT;
3736                    STRLEN len = 3;
3737                    uv = grok_oct(s, &len, &flags, NULL);
3738                    s += len;
3739                    if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3740                        && s < send
3741                        && isDIGIT(*s)  /* like \08, \178 */
3742                        && ckWARN(WARN_MISC))
3743                    {
3744                        Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3745                            form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3746                    }
3747                }
3748                goto NUM_ESCAPE_INSERT;
3749
3750            /* eg. \o{24} indicates the octal constant \024 */
3751            case 'o':
3752                {
3753                    const char* error;
3754
3755                    if (! grok_bslash_o(&s, send,
3756                                               &uv, &error,
3757                                               NULL,
3758                                               FALSE, /* Not strict */
3759                                               FALSE, /* No illegal cp's */
3760                                               UTF))
3761                    {
3762                        yyerror(error);
3763                        uv = 0; /* drop through to ensure range ends are set */
3764                    }
3765                    goto NUM_ESCAPE_INSERT;
3766                }
3767
3768            /* eg. \x24 indicates the hex constant 0x24 */
3769            case 'x':
3770                {
3771                    const char* error;
3772
3773                    if (! grok_bslash_x(&s, send,
3774                                               &uv, &error,
3775                                               NULL,
3776                                               FALSE, /* Not strict */
3777                                               FALSE, /* No illegal cp's */
3778                                               UTF))
3779                    {
3780                        yyerror(error);
3781                        uv = 0; /* drop through to ensure range ends are set */
3782                    }
3783                }
3784
3785              NUM_ESCAPE_INSERT:
3786                /* Insert oct or hex escaped character. */
3787
3788                /* Here uv is the ordinal of the next character being added */
3789                if (UVCHR_IS_INVARIANT(uv)) {
3790                    *d++ = (char) uv;
3791                }
3792                else {
3793                    if (!d_is_utf8 && uv > 255) {
3794
3795                        /* Here, 'uv' won't fit unless we convert to UTF-8.
3796                         * If we've only seen invariants so far, all we have to
3797                         * do is turn on the flag */
3798                        if (utf8_variant_count == 0) {
3799                            SvUTF8_on(sv);
3800                        }
3801                        else {
3802                            SvCUR_set(sv, d - SvPVX_const(sv));
3803                            SvPOK_on(sv);
3804                            *d = '\0';
3805
3806                            sv_utf8_upgrade_flags_grow(
3807                                           sv,
3808                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3809
3810                                           /* Since we're having to grow here,
3811                                            * make sure we have enough room for
3812                                            * this escape and a NUL, so the
3813                                            * code immediately below won't have
3814                                            * to actually grow again */
3815                                          UVCHR_SKIP(uv)
3816                                        + (STRLEN)(send - s) + 1);
3817                            d = SvPVX(sv) + SvCUR(sv);
3818                        }
3819
3820                        has_above_latin1 = TRUE;
3821                        d_is_utf8 = TRUE;
3822                    }
3823
3824                    if (! d_is_utf8) {
3825                        *d++ = (char)uv;
3826                        utf8_variant_count++;
3827                    }
3828                    else {
3829                       /* Usually, there will already be enough room in 'sv'
3830                        * since such escapes are likely longer than any UTF-8
3831                        * sequence they can end up as.  This isn't the case on
3832                        * EBCDIC where \x{40000000} contains 12 bytes, and the
3833                        * UTF-8 for it contains 14.  And, we have to allow for
3834                        * a trailing NUL.  It probably can't happen on ASCII
3835                        * platforms, but be safe.  See Note on sizing above. */
3836                        const STRLEN needed = d - SvPVX(sv)
3837                                            + UVCHR_SKIP(uv)
3838                                            + (send - s)
3839                                            + 1;
3840                        if (UNLIKELY(needed > SvLEN(sv))) {
3841                            SvCUR_set(sv, d - SvPVX_const(sv));
3842                            d = SvCUR(sv) + SvGROW(sv, needed);
3843                        }
3844
3845                        d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3846                                                   (ckWARN(WARN_PORTABLE))
3847                                                   ? UNICODE_WARN_PERL_EXTENDED
3848                                                   : 0);
3849                    }
3850                }
3851#ifdef EBCDIC
3852                non_portable_endpoint++;
3853#endif
3854                continue;
3855
3856            case 'N':
3857                /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3858                 * named character, like \N{LATIN SMALL LETTER A}, or a named
3859                 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3860                 * GRAVE} (except y/// can't handle the latter, croaking).  For
3861                 * convenience all three forms are referred to as "named
3862                 * characters" below.
3863                 *
3864                 * For patterns, \N also can mean to match a non-newline.  Code
3865                 * before this 'switch' statement should already have handled
3866                 * this situation, and hence this code only has to deal with
3867                 * the named character cases.
3868                 *
3869                 * For non-patterns, the named characters are converted to
3870                 * their string equivalents.  In patterns, named characters are
3871                 * not converted to their ultimate forms for the same reasons
3872                 * that other escapes aren't (mainly that the ultimate
3873                 * character could be considered a meta-symbol by the regex
3874                 * compiler).  Instead, they are converted to the \N{U+...}
3875                 * form to get the value from the charnames that is in effect
3876                 * right now, while preserving the fact that it was a named
3877                 * character, so that the regex compiler knows this.
3878                 *
3879                 * The structure of this section of code (besides checking for
3880                 * errors and upgrading to utf8) is:
3881                 *    If the named character is of the form \N{U+...}, pass it
3882                 *      through if a pattern; otherwise convert the code point
3883                 *      to utf8
3884                 *    Otherwise must be some \N{NAME}: convert to
3885                 *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3886                 *
3887                 * Transliteration is an exception.  The conversion to utf8 is
3888                 * only done if the code point requires it to be representable.
3889                 *
3890                 * Here, 's' points to the 'N'; the test below is guaranteed to
3891                 * succeed if we are being called on a pattern, as we already
3892                 * know from a test above that the next character is a '{'.  A
3893                 * non-pattern \N must mean 'named character', which requires
3894                 * braces */
3895                s++;
3896                if (*s != '{') {
3897                    yyerror("Missing braces on \\N{}");
3898                    *d++ = '\0';
3899                    continue;
3900                }
3901                s++;
3902
3903                /* If there is no matching '}', it is an error. */
3904                if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3905                    if (! PL_lex_inpat) {
3906                        yyerror("Missing right brace on \\N{}");
3907                    } else {
3908                        yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3909                    }
3910                    yyquit(); /* Have exhausted the input. */
3911                }
3912
3913                /* Here it looks like a named character */
3914                while (s < rbrace && isBLANK(*s)) {
3915                    s++;
3916                }
3917
3918                e = rbrace;
3919                while (s < e && isBLANK(*(e - 1))) {
3920                    e--;
3921                }
3922
3923                if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3924                    s += 2;	    /* Skip to next char after the 'U+' */
3925                    if (PL_lex_inpat) {
3926
3927                        /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3928                        /* Check the syntax.  */
3929                        if (!isXDIGIT(*s)) {
3930                          bad_NU:
3931                            yyerror(
3932                                "Invalid hexadecimal number in \\N{U+...}"
3933                            );
3934                            s = rbrace + 1;
3935                            *d++ = '\0';
3936                            continue;
3937                        }
3938                        while (++s < e) {
3939                            if (isXDIGIT(*s))
3940                                continue;
3941                            else if ((*s == '.' || *s == '_')
3942                                  && isXDIGIT(s[1]))
3943                                continue;
3944                            goto bad_NU;
3945                        }
3946
3947                        /* Pass everything through unchanged.
3948                         * +1 is to include the '}' */
3949                        Copy(bslash, d, rbrace - bslash + 1, char);
3950                        d += rbrace - bslash + 1;
3951                    }
3952                    else {  /* Not a pattern: convert the hex to string */
3953                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3954                                  | PERL_SCAN_SILENT_ILLDIGIT
3955                                  | PERL_SCAN_SILENT_OVERFLOW
3956                                  | PERL_SCAN_DISALLOW_PREFIX;
3957                        STRLEN len = e - s;
3958
3959                        uv = grok_hex(s, &len, &flags, NULL);
3960                        if (len == 0 || (len != (STRLEN)(e - s)))
3961                            goto bad_NU;
3962
3963                        if (    uv > MAX_LEGAL_CP
3964                            || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3965                        {
3966                            yyerror(form_cp_too_large_msg(16, s, len, 0));
3967                            uv = 0; /* drop through to ensure range ends are
3968                                       set */
3969                        }
3970
3971                         /* For non-tr///, if the destination is not in utf8,
3972                          * unconditionally recode it to be so.  This is
3973                          * because \N{} implies Unicode semantics, and scalars
3974                          * have to be in utf8 to guarantee those semantics.
3975                          * tr/// doesn't care about Unicode rules, so no need
3976                          * there to upgrade to UTF-8 for small enough code
3977                          * points */
3978                        if (! d_is_utf8 && (   uv > 0xFF
3979                                           || PL_lex_inwhat != OP_TRANS))
3980                        {
3981                            /* See Note on sizing above.  */
3982                            const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3983
3984                            SvCUR_set(sv, d - SvPVX_const(sv));
3985                            SvPOK_on(sv);
3986                            *d = '\0';
3987
3988                            if (utf8_variant_count == 0) {
3989                                SvUTF8_on(sv);
3990                                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3991                            }
3992                            else {
3993                                sv_utf8_upgrade_flags_grow(
3994                                               sv,
3995                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3996                                               extra);
3997                                d = SvPVX(sv) + SvCUR(sv);
3998                            }
3999
4000                            d_is_utf8 = TRUE;
4001                            has_above_latin1 = TRUE;
4002                        }
4003
4004                        /* Add the (Unicode) code point to the output. */
4005                        if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4006                            *d++ = (char) LATIN1_TO_NATIVE(uv);
4007                        }
4008                        else {
4009                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4010                                                   (ckWARN(WARN_PORTABLE))
4011                                                   ? UNICODE_WARN_PERL_EXTENDED
4012                                                   : 0);
4013                        }
4014                    }
4015                }
4016                else     /* Here is \N{NAME} but not \N{U+...}. */
4017                     if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4018                {   /* Failed.  We should die eventually, but for now use a NUL
4019                       to keep parsing */
4020                    *d++ = '\0';
4021                }
4022                else {  /* Successfully evaluated the name */
4023                    STRLEN len;
4024                    const char *str = SvPV_const(res, len);
4025                    if (PL_lex_inpat) {
4026
4027                        if (! len) { /* The name resolved to an empty string */
4028                            const char empty_N[] = "\\N{_}";
4029                            Copy(empty_N, d, sizeof(empty_N) - 1, char);
4030                            d += sizeof(empty_N) - 1;
4031                        }
4032                        else {
4033                            /* In order to not lose information for the regex
4034                            * compiler, pass the result in the specially made
4035                            * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4036                            * the code points in hex of each character
4037                            * returned by charnames */
4038
4039                            const char *str_end = str + len;
4040                            const STRLEN off = d - SvPVX_const(sv);
4041
4042                            if (! SvUTF8(res)) {
4043                                /* For the non-UTF-8 case, we can determine the
4044                                 * exact length needed without having to parse
4045                                 * through the string.  Each character takes up
4046                                 * 2 hex digits plus either a trailing dot or
4047                                 * the "}" */
4048                                const char initial_text[] = "\\N{U+";
4049                                const STRLEN initial_len = sizeof(initial_text)
4050                                                           - 1;
4051                                d = off + SvGROW(sv, off
4052                                                    + 3 * len
4053
4054                                                    /* +1 for trailing NUL */
4055                                                    + initial_len + 1
4056
4057                                                    + (STRLEN)(send - rbrace));
4058                                Copy(initial_text, d, initial_len, char);
4059                                d += initial_len;
4060                                while (str < str_end) {
4061                                    char hex_string[4];
4062                                    int len =
4063                                        my_snprintf(hex_string,
4064                                                  sizeof(hex_string),
4065                                                  "%02X.",
4066
4067                                                  /* The regex compiler is
4068                                                   * expecting Unicode, not
4069                                                   * native */
4070                                                  NATIVE_TO_LATIN1(*str));
4071                                    PERL_MY_SNPRINTF_POST_GUARD(len,
4072                                                           sizeof(hex_string));
4073                                    Copy(hex_string, d, 3, char);
4074                                    d += 3;
4075                                    str++;
4076                                }
4077                                d--;    /* Below, we will overwrite the final
4078                                           dot with a right brace */
4079                            }
4080                            else {
4081                                STRLEN char_length; /* cur char's byte length */
4082
4083                                /* and the number of bytes after this is
4084                                 * translated into hex digits */
4085                                STRLEN output_length;
4086
4087                                /* 2 hex per byte; 2 chars for '\N'; 2 chars
4088                                 * for max('U+', '.'); and 1 for NUL */
4089                                char hex_string[2 * UTF8_MAXBYTES + 5];
4090
4091                                /* Get the first character of the result. */
4092                                U32 uv = utf8n_to_uvchr((U8 *) str,
4093                                                        len,
4094                                                        &char_length,
4095                                                        UTF8_ALLOW_ANYUV);
4096                                /* Convert first code point to Unicode hex,
4097                                 * including the boiler plate before it. */
4098                                output_length =
4099                                    my_snprintf(hex_string, sizeof(hex_string),
4100                                             "\\N{U+%X",
4101                                             (unsigned int) NATIVE_TO_UNI(uv));
4102
4103                                /* Make sure there is enough space to hold it */
4104                                d = off + SvGROW(sv, off
4105                                                    + output_length
4106                                                    + (STRLEN)(send - rbrace)
4107                                                    + 2);	/* '}' + NUL */
4108                                /* And output it */
4109                                Copy(hex_string, d, output_length, char);
4110                                d += output_length;
4111
4112                                /* For each subsequent character, append dot and
4113                                * its Unicode code point in hex */
4114                                while ((str += char_length) < str_end) {
4115                                    const STRLEN off = d - SvPVX_const(sv);
4116                                    U32 uv = utf8n_to_uvchr((U8 *) str,
4117                                                            str_end - str,
4118                                                            &char_length,
4119                                                            UTF8_ALLOW_ANYUV);
4120                                    output_length =
4121                                        my_snprintf(hex_string,
4122                                             sizeof(hex_string),
4123                                             ".%X",
4124                                             (unsigned int) NATIVE_TO_UNI(uv));
4125
4126                                    d = off + SvGROW(sv, off
4127                                                        + output_length
4128                                                        + (STRLEN)(send - rbrace)
4129                                                        + 2);	/* '}' +  NUL */
4130                                    Copy(hex_string, d, output_length, char);
4131                                    d += output_length;
4132                                }
4133                            }
4134
4135                            *d++ = '}';	/* Done.  Add the trailing brace */
4136                        }
4137                    }
4138                    else { /* Here, not in a pattern.  Convert the name to a
4139                            * string. */
4140
4141                        if (PL_lex_inwhat == OP_TRANS) {
4142                            str = SvPV_const(res, len);
4143                            if (len > ((SvUTF8(res))
4144                                       ? UTF8SKIP(str)
4145                                       : 1U))
4146                            {
4147                                yyerror(Perl_form(aTHX_
4148                                    "%.*s must not be a named sequence"
4149                                    " in transliteration operator",
4150                                        /*  +1 to include the "}" */
4151                                    (int) (rbrace + 1 - start), start));
4152                                *d++ = '\0';
4153                                goto end_backslash_N;
4154                            }
4155
4156                            if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4157                                has_above_latin1 = TRUE;
4158                            }
4159
4160                        }
4161                        else if (! SvUTF8(res)) {
4162                            /* Make sure \N{} return is UTF-8.  This is because
4163                             * \N{} implies Unicode semantics, and scalars have
4164                             * to be in utf8 to guarantee those semantics; but
4165                             * not needed in tr/// */
4166                            sv_utf8_upgrade_flags(res, 0);
4167                            str = SvPV_const(res, len);
4168                        }
4169
4170                         /* Upgrade destination to be utf8 if this new
4171                          * component is */
4172                        if (! d_is_utf8 && SvUTF8(res)) {
4173                            /* See Note on sizing above.  */
4174                            const STRLEN extra = len + (send - s) + 1;
4175
4176                            SvCUR_set(sv, d - SvPVX_const(sv));
4177                            SvPOK_on(sv);
4178                            *d = '\0';
4179
4180                            if (utf8_variant_count == 0) {
4181                                SvUTF8_on(sv);
4182                                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4183                            }
4184                            else {
4185                                sv_utf8_upgrade_flags_grow(sv,
4186                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4187                                                extra);
4188                                d = SvPVX(sv) + SvCUR(sv);
4189                            }
4190                            d_is_utf8 = TRUE;
4191                        } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4192
4193                            /* See Note on sizing above.  (NOTE: SvCUR() is not
4194                             * set correctly here). */
4195                            const STRLEN extra = len + (send - rbrace) + 1;
4196                            const STRLEN off = d - SvPVX_const(sv);
4197                            d = off + SvGROW(sv, off + extra);
4198                        }
4199                        Copy(str, d, len, char);
4200                        d += len;
4201                    }
4202
4203                    SvREFCNT_dec(res);
4204
4205                } /* End \N{NAME} */
4206
4207              end_backslash_N:
4208#ifdef EBCDIC
4209                backslash_N++; /* \N{} is defined to be Unicode */
4210#endif
4211                s = rbrace + 1;  /* Point to just after the '}' */
4212                continue;
4213
4214            /* \c is a control character */
4215            case 'c':
4216                s++;
4217                if (s < send) {
4218                    const char * message;
4219
4220                    if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4221                        yyerror(message);
4222                        yyquit();   /* Have always immediately croaked on
4223                                       errors in this */
4224                    }
4225                    d++;
4226                }
4227                else {
4228                    yyerror("Missing control char name in \\c");
4229                    yyquit();   /* Are at end of input, no sense continuing */
4230                }
4231#ifdef EBCDIC
4232                non_portable_endpoint++;
4233#endif
4234                break;
4235
4236            /* printf-style backslashes, formfeeds, newlines, etc */
4237            case 'b':
4238                *d++ = '\b';
4239                break;
4240            case 'n':
4241                *d++ = '\n';
4242                break;
4243            case 'r':
4244                *d++ = '\r';
4245                break;
4246            case 'f':
4247                *d++ = '\f';
4248                break;
4249            case 't':
4250                *d++ = '\t';
4251                break;
4252            case 'e':
4253                *d++ = ESC_NATIVE;
4254                break;
4255            case 'a':
4256                *d++ = '\a';
4257                break;
4258            } /* end switch */
4259
4260            s++;
4261            continue;
4262        } /* end if (backslash) */
4263
4264    default_action:
4265        /* Just copy the input to the output, though we may have to convert
4266         * to/from UTF-8.
4267         *
4268         * If the input has the same representation in UTF-8 as not, it will be
4269         * a single byte, and we don't care about UTF8ness; just copy the byte */
4270        if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4271            *d++ = *s++;
4272        }
4273        else if (! s_is_utf8 && ! d_is_utf8) {
4274            /* If neither source nor output is UTF-8, is also a single byte,
4275             * just copy it; but this byte counts should we later have to
4276             * convert to UTF-8 */
4277            *d++ = *s++;
4278            utf8_variant_count++;
4279        }
4280        else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4281            const STRLEN len = UTF8SKIP(s);
4282
4283            /* We expect the source to have already been checked for
4284             * malformedness */
4285            assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4286
4287            Copy(s, d, len, U8);
4288            d += len;
4289            s += len;
4290        }
4291        else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4292            STRLEN need = send - s + 1; /* See Note on sizing above. */
4293
4294            SvCUR_set(sv, d - SvPVX_const(sv));
4295            SvPOK_on(sv);
4296            *d = '\0';
4297
4298            if (utf8_variant_count == 0) {
4299                SvUTF8_on(sv);
4300                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4301            }
4302            else {
4303                sv_utf8_upgrade_flags_grow(sv,
4304                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4305                                           need);
4306                d = SvPVX(sv) + SvCUR(sv);
4307            }
4308            d_is_utf8 = TRUE;
4309            goto default_action; /* Redo, having upgraded so both are UTF-8 */
4310        }
4311        else {  /* UTF8ness matters: convert this non-UTF8 source char to
4312                   UTF-8 for output.  It will occupy 2 bytes, but don't include
4313                   the input byte since we haven't incremented 's' yet. See
4314                   Note on sizing above. */
4315            const STRLEN off = d - SvPVX(sv);
4316            const STRLEN extra = 2 + (send - s - 1) + 1;
4317            if (off + extra > SvLEN(sv)) {
4318                d = off + SvGROW(sv, off + extra);
4319            }
4320            *d++ = UTF8_EIGHT_BIT_HI(*s);
4321            *d++ = UTF8_EIGHT_BIT_LO(*s);
4322            s++;
4323        }
4324    } /* while loop to process each character */
4325
4326    {
4327        const STRLEN off = d - SvPVX(sv);
4328
4329        /* See if room for the terminating NUL */
4330        if (UNLIKELY(off >= SvLEN(sv))) {
4331
4332#ifndef DEBUGGING
4333
4334            if (off > SvLEN(sv))
4335#endif
4336                Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4337                        " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4338
4339            /* Whew!  Here we don't have room for the terminating NUL, but
4340             * everything else so far has fit.  It's not too late to grow
4341             * to fit the NUL and continue on.  But it is a bug, as the code
4342             * above was supposed to have made room for this, so under
4343             * DEBUGGING builds, we panic anyway.  */
4344            d = off + SvGROW(sv, off + 1);
4345        }
4346    }
4347
4348    /* terminate the string and set up the sv */
4349    *d = '\0';
4350    SvCUR_set(sv, d - SvPVX_const(sv));
4351
4352    SvPOK_on(sv);
4353    if (d_is_utf8) {
4354        SvUTF8_on(sv);
4355    }
4356
4357    /* shrink the sv if we allocated more than we used */
4358    if (SvCUR(sv) + 5 < SvLEN(sv)) {
4359        SvPV_shrink_to_cur(sv);
4360    }
4361
4362    /* return the substring (via pl_yylval) only if we parsed anything */
4363    if (s > start) {
4364        char *s2 = start;
4365        for (; s2 < s; s2++) {
4366            if (*s2 == '\n')
4367                COPLINE_INC_WITH_HERELINES;
4368        }
4369        SvREFCNT_inc_simple_void_NN(sv);
4370        if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4371            && ! PL_parser->lex_re_reparsing)
4372        {
4373            const char *const key = PL_lex_inpat ? "qr" : "q";
4374            const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4375            const char *type;
4376            STRLEN typelen;
4377
4378            if (PL_lex_inwhat == OP_TRANS) {
4379                type = "tr";
4380                typelen = 2;
4381            } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4382                type = "s";
4383                typelen = 1;
4384            } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4385                type = "q";
4386                typelen = 1;
4387            } else {
4388                type = "qq";
4389                typelen = 2;
4390            }
4391
4392            sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4393                                type, typelen, NULL);
4394        }
4395        pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4396    }
4397    LEAVE_with_name("scan_const");
4398    return s;
4399}
4400
4401/* S_intuit_more
4402 * Returns TRUE if there's more to the expression (e.g., a subscript),
4403 * FALSE otherwise.
4404 *
4405 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4406 *
4407 * ->[ and ->{ return TRUE
4408 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4409 * { and [ outside a pattern are always subscripts, so return TRUE
4410 * if we're outside a pattern and it's not { or [, then return FALSE
4411 * if we're in a pattern and the first char is a {
4412 *   {4,5} (any digits around the comma) returns FALSE
4413 * if we're in a pattern and the first char is a [
4414 *   [] returns FALSE
4415 *   [SOMETHING] has a funky algorithm to decide whether it's a
4416 *      character class or not.  It has to deal with things like
4417 *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4418 * anything else returns TRUE
4419 */
4420
4421/* This is the one truly awful dwimmer necessary to conflate C and sed. */
4422
4423STATIC int
4424S_intuit_more(pTHX_ char *s, char *e)
4425{
4426    PERL_ARGS_ASSERT_INTUIT_MORE;
4427
4428    if (PL_lex_brackets)
4429        return TRUE;
4430    if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4431        return TRUE;
4432    if (*s == '-' && s[1] == '>'
4433     && FEATURE_POSTDEREF_QQ_IS_ENABLED
4434     && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4435        ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4436        return TRUE;
4437    if (*s != '{' && *s != '[')
4438        return FALSE;
4439    PL_parser->sub_no_recover = TRUE;
4440    if (!PL_lex_inpat)
4441        return TRUE;
4442
4443    /* In a pattern, so maybe we have {n,m}. */
4444    if (*s == '{') {
4445        if (regcurly(s, e, NULL)) {
4446            return FALSE;
4447        }
4448        return TRUE;
4449    }
4450
4451    /* On the other hand, maybe we have a character class */
4452
4453    s++;
4454    if (*s == ']' || *s == '^')
4455        return FALSE;
4456    else {
4457        /* this is terrifying, and it works */
4458        int weight;
4459        char seen[256];
4460        const char * const send = (char *) memchr(s, ']', e - s);
4461        unsigned char un_char, last_un_char;
4462        char tmpbuf[sizeof PL_tokenbuf * 4];
4463
4464        if (!send)		/* has to be an expression */
4465            return TRUE;
4466        weight = 2;		/* let's weigh the evidence */
4467
4468        if (*s == '$')
4469            weight -= 3;
4470        else if (isDIGIT(*s)) {
4471            if (s[1] != ']') {
4472                if (isDIGIT(s[1]) && s[2] == ']')
4473                    weight -= 10;
4474            }
4475            else
4476                weight -= 100;
4477        }
4478        Zero(seen,256,char);
4479        un_char = 255;
4480        for (; s < send; s++) {
4481            last_un_char = un_char;
4482            un_char = (unsigned char)*s;
4483            switch (*s) {
4484            case '@':
4485            case '&':
4486            case '$':
4487                weight -= seen[un_char] * 10;
4488                if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4489                    int len;
4490                    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4491                    len = (int)strlen(tmpbuf);
4492                    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4493                                                    UTF ? SVf_UTF8 : 0, SVt_PV))
4494                        weight -= 100;
4495                    else
4496                        weight -= 10;
4497                }
4498                else if (*s == '$'
4499                         && s[1]
4500                         && memCHRs("[#!%*<>()-=",s[1]))
4501                {
4502                    if (/*{*/ memCHRs("])} =",s[2]))
4503                        weight -= 10;
4504                    else
4505                        weight -= 1;
4506                }
4507                break;
4508            case '\\':
4509                un_char = 254;
4510                if (s[1]) {
4511                    if (memCHRs("wds]",s[1]))
4512                        weight += 100;
4513                    else if (seen[(U8)'\''] || seen[(U8)'"'])
4514                        weight += 1;
4515                    else if (memCHRs("rnftbxcav",s[1]))
4516                        weight += 40;
4517                    else if (isDIGIT(s[1])) {
4518                        weight += 40;
4519                        while (s[1] && isDIGIT(s[1]))
4520                            s++;
4521                    }
4522                }
4523                else
4524                    weight += 100;
4525                break;
4526            case '-':
4527                if (s[1] == '\\')
4528                    weight += 50;
4529                if (memCHRs("aA01! ",last_un_char))
4530                    weight += 30;
4531                if (memCHRs("zZ79~",s[1]))
4532                    weight += 30;
4533                if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4534                    weight -= 5;	/* cope with negative subscript */
4535                break;
4536            default:
4537                if (!isWORDCHAR(last_un_char)
4538                    && !(last_un_char == '$' || last_un_char == '@'
4539                         || last_un_char == '&')
4540                    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4541                    char *d = s;
4542                    while (isALPHA(*s))
4543                        s++;
4544                    if (keyword(d, s - d, 0))
4545                        weight -= 150;
4546                }
4547                if (un_char == last_un_char + 1)
4548                    weight += 5;
4549                weight -= seen[un_char];
4550                break;
4551            }
4552            seen[un_char]++;
4553        }
4554        if (weight >= 0)	/* probably a character class */
4555            return FALSE;
4556    }
4557
4558    return TRUE;
4559}
4560
4561/*
4562 * S_intuit_method
4563 *
4564 * Does all the checking to disambiguate
4565 *   foo bar
4566 * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4567 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4568 *
4569 * First argument is the stuff after the first token, e.g. "bar".
4570 *
4571 * Not a method if foo is a filehandle.
4572 * Not a method if foo is a subroutine prototyped to take a filehandle.
4573 * Not a method if it's really "Foo $bar"
4574 * Method if it's "foo $bar"
4575 * Not a method if it's really "print foo $bar"
4576 * Method if it's really "foo package::" (interpreted as package->foo)
4577 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4578 * Not a method if bar is a filehandle or package, but is quoted with
4579 *   =>
4580 */
4581
4582STATIC int
4583S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4584{
4585    char *s = start + (*start == '$');
4586    char tmpbuf[sizeof PL_tokenbuf];
4587    STRLEN len;
4588    GV* indirgv;
4589        /* Mustn't actually add anything to a symbol table.
4590           But also don't want to "initialise" any placeholder
4591           constants that might already be there into full
4592           blown PVGVs with attached PVCV.  */
4593    GV * const gv =
4594        ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4595
4596    PERL_ARGS_ASSERT_INTUIT_METHOD;
4597
4598    if (!FEATURE_INDIRECT_IS_ENABLED)
4599        return 0;
4600
4601    if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4602            return 0;
4603    if (cv && SvPOK(cv)) {
4604        const char *proto = CvPROTO(cv);
4605        if (proto) {
4606            while (*proto && (isSPACE(*proto) || *proto == ';'))
4607                proto++;
4608            if (*proto == '*')
4609                return 0;
4610        }
4611    }
4612
4613    if (*start == '$') {
4614        SSize_t start_off = start - SvPVX(PL_linestr);
4615        if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4616            || isUPPER(*PL_tokenbuf))
4617            return 0;
4618        /* this could be $# */
4619        if (isSPACE(*s))
4620            s = skipspace(s);
4621        PL_bufptr = SvPVX(PL_linestr) + start_off;
4622        PL_expect = XREF;
4623        return *s == '(' ? FUNCMETH : METHOD;
4624    }
4625
4626    s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4627    /* start is the beginning of the possible filehandle/object,
4628     * and s is the end of it
4629     * tmpbuf is a copy of it (but with single quotes as double colons)
4630     */
4631
4632    if (!keyword(tmpbuf, len, 0)) {
4633        if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4634            len -= 2;
4635            tmpbuf[len] = '\0';
4636            goto bare_package;
4637        }
4638        indirgv = gv_fetchpvn_flags(tmpbuf, len,
4639                                    GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4640                                    SVt_PVCV);
4641        if (indirgv && SvTYPE(indirgv) != SVt_NULL
4642         && (!isGV(indirgv) || GvCVu(indirgv)))
4643            return 0;
4644        /* filehandle or package name makes it a method */
4645        if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4646            s = skipspace(s);
4647            if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4648                return 0;	/* no assumptions -- "=>" quotes bareword */
4649      bare_package:
4650            NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4651                                                  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4652            NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4653            PL_expect = XTERM;
4654            force_next(BAREWORD);
4655            PL_bufptr = s;
4656            return *s == '(' ? FUNCMETH : METHOD;
4657        }
4658    }
4659    return 0;
4660}
4661
4662/* Encoded script support. filter_add() effectively inserts a
4663 * 'pre-processing' function into the current source input stream.
4664 * Note that the filter function only applies to the current source file
4665 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4666 *
4667 * The datasv parameter (which may be NULL) can be used to pass
4668 * private data to this instance of the filter. The filter function
4669 * can recover the SV using the FILTER_DATA macro and use it to
4670 * store private buffers and state information.
4671 *
4672 * The supplied datasv parameter is upgraded to a PVIO type
4673 * and the IoDIRP/IoANY field is used to store the function pointer,
4674 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4675 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4676 * private use must be set using malloc'd pointers.
4677 */
4678
4679SV *
4680Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4681{
4682    if (!funcp)
4683        return NULL;
4684
4685    if (!PL_parser)
4686        return NULL;
4687
4688    if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4689        Perl_croak(aTHX_ "Source filters apply only to byte streams");
4690
4691    if (!PL_rsfp_filters)
4692        PL_rsfp_filters = newAV();
4693    if (!datasv)
4694        datasv = newSV(0);
4695    SvUPGRADE(datasv, SVt_PVIO);
4696    IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4697    IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4698    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4699                          FPTR2DPTR(void *, IoANY(datasv)),
4700                          SvPV_nolen(datasv)));
4701    av_unshift(PL_rsfp_filters, 1);
4702    av_store(PL_rsfp_filters, 0, datasv) ;
4703    if (
4704        !PL_parser->filtered
4705     && PL_parser->lex_flags & LEX_EVALBYTES
4706     && PL_bufptr < PL_bufend
4707    ) {
4708        const char *s = PL_bufptr;
4709        while (s < PL_bufend) {
4710            if (*s == '\n') {
4711                SV *linestr = PL_parser->linestr;
4712                char *buf = SvPVX(linestr);
4713                STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4714                STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4715                STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4716                STRLEN const linestart_pos = PL_parser->linestart - buf;
4717                STRLEN const last_uni_pos =
4718                    PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4719                STRLEN const last_lop_pos =
4720                    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4721                av_push(PL_rsfp_filters, linestr);
4722                PL_parser->linestr =
4723                    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4724                buf = SvPVX(PL_parser->linestr);
4725                PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4726                PL_parser->bufptr = buf + bufptr_pos;
4727                PL_parser->oldbufptr = buf + oldbufptr_pos;
4728                PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4729                PL_parser->linestart = buf + linestart_pos;
4730                if (PL_parser->last_uni)
4731                    PL_parser->last_uni = buf + last_uni_pos;
4732                if (PL_parser->last_lop)
4733                    PL_parser->last_lop = buf + last_lop_pos;
4734                SvLEN_set(linestr, SvCUR(linestr));
4735                SvCUR_set(linestr, s - SvPVX(linestr));
4736                PL_parser->filtered = 1;
4737                break;
4738            }
4739            s++;
4740        }
4741    }
4742    return(datasv);
4743}
4744
4745/*
4746=for apidoc_section $filters
4747=for apidoc filter_del
4748
4749Delete most recently added instance of the filter function argument
4750
4751=cut
4752*/
4753
4754void
4755Perl_filter_del(pTHX_ filter_t funcp)
4756{
4757    SV *datasv;
4758
4759    PERL_ARGS_ASSERT_FILTER_DEL;
4760
4761#ifdef DEBUGGING
4762    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4763                          FPTR2DPTR(void*, funcp)));
4764#endif
4765    if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4766        return;
4767    /* if filter is on top of stack (usual case) just pop it off */
4768    datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4769    if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4770        sv_free(av_pop(PL_rsfp_filters));
4771
4772        return;
4773    }
4774    /* we need to search for the correct entry and clear it	*/
4775    Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4776}
4777
4778
4779/* Invoke the idxth filter function for the current rsfp.	 */
4780/* maxlen 0 = read one text line */
4781I32
4782Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4783{
4784    filter_t funcp;
4785    I32 ret;
4786    SV *datasv = NULL;
4787    /* This API is bad. It should have been using unsigned int for maxlen.
4788       Not sure if we want to change the API, but if not we should sanity
4789       check the value here.  */
4790    unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4791
4792    PERL_ARGS_ASSERT_FILTER_READ;
4793
4794    if (!PL_parser || !PL_rsfp_filters)
4795        return -1;
4796    if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4797        /* Provide a default input filter to make life easy.	*/
4798        /* Note that we append to the line. This is handy.	*/
4799        DEBUG_P(PerlIO_printf(Perl_debug_log,
4800                              "filter_read %d: from rsfp\n", idx));
4801        if (correct_length) {
4802            /* Want a block */
4803            int len ;
4804            const int old_len = SvCUR(buf_sv);
4805
4806            /* ensure buf_sv is large enough */
4807            SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4808            if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4809                                   correct_length)) <= 0) {
4810                if (PerlIO_error(PL_rsfp))
4811                    return -1;		/* error */
4812                else
4813                    return 0 ;		/* end of file */
4814            }
4815            SvCUR_set(buf_sv, old_len + len) ;
4816            SvPVX(buf_sv)[old_len + len] = '\0';
4817        } else {
4818            /* Want a line */
4819            if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4820                if (PerlIO_error(PL_rsfp))
4821                    return -1;		/* error */
4822                else
4823                    return 0 ;		/* end of file */
4824            }
4825        }
4826        return SvCUR(buf_sv);
4827    }
4828    /* Skip this filter slot if filter has been deleted	*/
4829    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4830        DEBUG_P(PerlIO_printf(Perl_debug_log,
4831                              "filter_read %d: skipped (filter deleted)\n",
4832                              idx));
4833        return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4834    }
4835    if (SvTYPE(datasv) != SVt_PVIO) {
4836        if (correct_length) {
4837            /* Want a block */
4838            const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4839            if (!remainder) return 0; /* eof */
4840            if (correct_length > remainder) correct_length = remainder;
4841            sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4842            SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4843        } else {
4844            /* Want a line */
4845            const char *s = SvEND(datasv);
4846            const char *send = SvPVX(datasv) + SvLEN(datasv);
4847            while (s < send) {
4848                if (*s == '\n') {
4849                    s++;
4850                    break;
4851                }
4852                s++;
4853            }
4854            if (s == send) return 0; /* eof */
4855            sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4856            SvCUR_set(datasv, s-SvPVX(datasv));
4857        }
4858        return SvCUR(buf_sv);
4859    }
4860    /* Get function pointer hidden within datasv	*/
4861    funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4862    DEBUG_P(PerlIO_printf(Perl_debug_log,
4863                          "filter_read %d: via function %p (%s)\n",
4864                          idx, (void*)datasv, SvPV_nolen_const(datasv)));
4865    /* Call function. The function is expected to 	*/
4866    /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4867    /* Return: <0:error, =0:eof, >0:not eof 		*/
4868    ENTER;
4869    save_scalar(PL_errgv);
4870    ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4871    LEAVE;
4872    return ret;
4873}
4874
4875STATIC char *
4876S_filter_gets(pTHX_ SV *sv, STRLEN append)
4877{
4878    PERL_ARGS_ASSERT_FILTER_GETS;
4879
4880#ifdef PERL_CR_FILTER
4881    if (!PL_rsfp_filters) {
4882        filter_add(S_cr_textfilter,NULL);
4883    }
4884#endif
4885    if (PL_rsfp_filters) {
4886        if (!append)
4887            SvCUR_set(sv, 0);	/* start with empty line	*/
4888        if (FILTER_READ(0, sv, 0) > 0)
4889            return ( SvPVX(sv) ) ;
4890        else
4891            return NULL ;
4892    }
4893    else
4894        return (sv_gets(sv, PL_rsfp, append));
4895}
4896
4897STATIC HV *
4898S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4899{
4900    GV *gv;
4901
4902    PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4903
4904    if (memEQs(pkgname, len, "__PACKAGE__"))
4905        return PL_curstash;
4906
4907    if (len > 2
4908        && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4909        && (gv = gv_fetchpvn_flags(pkgname,
4910                                   len,
4911                                   ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4912    {
4913        return GvHV(gv);			/* Foo:: */
4914    }
4915
4916    /* use constant CLASS => 'MyClass' */
4917    gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4918    if (gv && GvCV(gv)) {
4919        SV * const sv = cv_const_sv(GvCV(gv));
4920        if (sv)
4921            return gv_stashsv(sv, 0);
4922    }
4923
4924    return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4925}
4926
4927
4928STATIC char *
4929S_tokenize_use(pTHX_ int is_use, char *s) {
4930    PERL_ARGS_ASSERT_TOKENIZE_USE;
4931
4932    if (PL_expect != XSTATE)
4933        /* diag_listed_as: "use" not allowed in expression */
4934        yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4935                    is_use ? "use" : "no"));
4936    PL_expect = XTERM;
4937    s = skipspace(s);
4938    if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4939        s = force_version(s, TRUE);
4940        if (*s == ';' || *s == '}'
4941                || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4942            NEXTVAL_NEXTTOKE.opval = NULL;
4943            force_next(BAREWORD);
4944        }
4945        else if (*s == 'v') {
4946            s = force_word(s,BAREWORD,FALSE,TRUE);
4947            s = force_version(s, FALSE);
4948        }
4949    }
4950    else {
4951        s = force_word(s,BAREWORD,FALSE,TRUE);
4952        s = force_version(s, FALSE);
4953    }
4954    pl_yylval.ival = is_use;
4955    return s;
4956}
4957#ifdef DEBUGGING
4958    static const char* const exp_name[] =
4959        { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4960          "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4961          "SIGVAR", "TERMORDORDOR"
4962        };
4963#endif
4964
4965#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4966STATIC bool
4967S_word_takes_any_delimiter(char *p, STRLEN len)
4968{
4969    return (len == 1 && memCHRs("msyq", p[0]))
4970            || (len == 2
4971                && ((p[0] == 't' && p[1] == 'r')
4972                    || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4973}
4974
4975static void
4976S_check_scalar_slice(pTHX_ char *s)
4977{
4978    s++;
4979    while (SPACE_OR_TAB(*s)) s++;
4980    if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4981                                                             PL_bufend,
4982                                                             UTF))
4983    {
4984        return;
4985    }
4986    while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4987           || (*s && memCHRs(" \t$#+-'\"", *s)))
4988    {
4989        s += UTF ? UTF8SKIP(s) : 1;
4990    }
4991    if (*s == '}' || *s == ']')
4992        pl_yylval.ival = OPpSLICEWARNING;
4993}
4994
4995#define lex_token_boundary() S_lex_token_boundary(aTHX)
4996static void
4997S_lex_token_boundary(pTHX)
4998{
4999    PL_oldoldbufptr = PL_oldbufptr;
5000    PL_oldbufptr = PL_bufptr;
5001}
5002
5003#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5004static char *
5005S_vcs_conflict_marker(pTHX_ char *s)
5006{
5007    lex_token_boundary();
5008    PL_bufptr = s;
5009    yyerror("Version control conflict marker");
5010    while (s < PL_bufend && *s != '\n')
5011        s++;
5012    return s;
5013}
5014
5015static int
5016yyl_sigvar(pTHX_ char *s)
5017{
5018    /* we expect the sigil and optional var name part of a
5019     * signature element here. Since a '$' is not necessarily
5020     * followed by a var name, handle it specially here; the general
5021     * yylex code would otherwise try to interpret whatever follows
5022     * as a var; e.g. ($, ...) would be seen as the var '$,'
5023     */
5024
5025    U8 sigil;
5026
5027    s = skipspace(s);
5028    sigil = *s++;
5029    PL_bufptr = s; /* for error reporting */
5030    switch (sigil) {
5031    case '$':
5032    case '@':
5033    case '%':
5034        /* spot stuff that looks like an prototype */
5035        if (memCHRs("$:@%&*;\\[]", *s)) {
5036            yyerror("Illegal character following sigil in a subroutine signature");
5037            break;
5038        }
5039        /* '$#' is banned, while '$ # comment' isn't */
5040        if (*s == '#') {
5041            yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5042            break;
5043        }
5044        s = skipspace(s);
5045        if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5046            char *dest = PL_tokenbuf + 1;
5047            /* read var name, including sigil, into PL_tokenbuf */
5048            PL_tokenbuf[0] = sigil;
5049            parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5050                0, cBOOL(UTF), FALSE, FALSE);
5051            *dest = '\0';
5052            assert(PL_tokenbuf[1]); /* we have a variable name */
5053        }
5054        else {
5055            *PL_tokenbuf = 0;
5056            PL_in_my = 0;
5057        }
5058
5059        s = skipspace(s);
5060        /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5061         * as the ASSIGNOP, and exclude other tokens that start with =
5062         */
5063        if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5064            /* save now to report with the same context as we did when
5065             * all ASSIGNOPS were accepted */
5066            PL_oldbufptr = s;
5067
5068            ++s;
5069            NEXTVAL_NEXTTOKE.ival = 0;
5070            force_next(ASSIGNOP);
5071            PL_expect = XTERM;
5072        }
5073        else if (*s == ',' || *s == ')') {
5074            PL_expect = XOPERATOR;
5075        }
5076        else {
5077            /* make sure the context shows the unexpected character and
5078             * hopefully a bit more */
5079            if (*s) ++s;
5080            while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5081                s++;
5082            PL_bufptr = s; /* for error reporting */
5083            yyerror("Illegal operator following parameter in a subroutine signature");
5084            PL_in_my = 0;
5085        }
5086        if (*PL_tokenbuf) {
5087            NEXTVAL_NEXTTOKE.ival = sigil;
5088            force_next('p'); /* force a signature pending identifier */
5089        }
5090        break;
5091
5092    case ')':
5093        PL_expect = XBLOCK;
5094        break;
5095    case ',': /* handle ($a,,$b) */
5096        break;
5097
5098    default:
5099        PL_in_my = 0;
5100        yyerror("A signature parameter must start with '$', '@' or '%'");
5101        /* very crude error recovery: skip to likely next signature
5102         * element */
5103        while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5104            s++;
5105        break;
5106    }
5107
5108    switch (sigil) {
5109        case ',': TOKEN (PERLY_COMMA);
5110        case '$': TOKEN (PERLY_DOLLAR);
5111        case '@': TOKEN (PERLY_SNAIL);
5112        case '%': TOKEN (PERLY_PERCENT_SIGN);
5113        case ')': TOKEN (PERLY_PAREN_CLOSE);
5114        default:  TOKEN (sigil);
5115    }
5116}
5117
5118static int
5119yyl_dollar(pTHX_ char *s)
5120{
5121    CLINE;
5122
5123    if (PL_expect == XPOSTDEREF) {
5124        if (s[1] == '#') {
5125            s++;
5126            POSTDEREF(DOLSHARP);
5127        }
5128        POSTDEREF(PERLY_DOLLAR);
5129    }
5130
5131    if (   s[1] == '#'
5132        && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5133            || memCHRs("{$:+-@", s[2])))
5134    {
5135        PL_tokenbuf[0] = '@';
5136        s = scan_ident(s + 1, PL_tokenbuf + 1,
5137                       sizeof PL_tokenbuf - 1, FALSE);
5138        if (PL_expect == XOPERATOR) {
5139            char *d = s;
5140            if (PL_bufptr > s) {
5141                d = PL_bufptr-1;
5142                PL_bufptr = PL_oldbufptr;
5143            }
5144            no_op("Array length", d);
5145        }
5146        if (!PL_tokenbuf[1])
5147            PREREF(DOLSHARP);
5148        PL_expect = XOPERATOR;
5149        force_ident_maybe_lex('#');
5150        TOKEN(DOLSHARP);
5151    }
5152
5153    PL_tokenbuf[0] = '$';
5154    s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5155    if (PL_expect == XOPERATOR) {
5156        char *d = s;
5157        if (PL_bufptr > s) {
5158            d = PL_bufptr-1;
5159            PL_bufptr = PL_oldbufptr;
5160        }
5161        no_op("Scalar", d);
5162    }
5163    if (!PL_tokenbuf[1]) {
5164        if (s == PL_bufend)
5165            yyerror("Final $ should be \\$ or $name");
5166        PREREF(PERLY_DOLLAR);
5167    }
5168
5169    {
5170        const char tmp = *s;
5171        if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5172            s = skipspace(s);
5173
5174        if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5175            && intuit_more(s, PL_bufend)) {
5176            if (*s == '[') {
5177                PL_tokenbuf[0] = '@';
5178                if (ckWARN(WARN_SYNTAX)) {
5179                    char *t = s+1;
5180
5181                    while ( t < PL_bufend ) {
5182                        if (isSPACE(*t)) {
5183                            do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5184                            /* consumed one or more space chars */
5185                        } else if (*t == '$' || *t == '@') {
5186                            /* could be more than one '$' like $$ref or @$ref */
5187                            do { t++; } while (t < PL_bufend && *t == '$');
5188
5189                            /* could be an abigail style identifier like $ foo */
5190                            while (t < PL_bufend && *t == ' ') t++;
5191
5192                            /* strip off the name of the var */
5193                            while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5194                                t += UTF ? UTF8SKIP(t) : 1;
5195                            /* consumed a varname */
5196                        } else if (isDIGIT(*t)) {
5197                            /* deal with hex constants like 0x11 */
5198                            if (t[0] == '0' && t[1] == 'x') {
5199                                t += 2;
5200                                while (t < PL_bufend && isXDIGIT(*t)) t++;
5201                            } else {
5202                                /* deal with decimal/octal constants like 1 and 0123 */
5203                                do { t++; } while (isDIGIT(*t));
5204                                if (t<PL_bufend && *t == '.') {
5205                                    do { t++; } while (isDIGIT(*t));
5206                                }
5207                            }
5208                            /* consumed a number */
5209                        } else {
5210                            /* not a var nor a space nor a number */
5211                            break;
5212                        }
5213                    }
5214                    if (t < PL_bufend && *t++ == ',') {
5215                        PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5216                        while (t < PL_bufend && *t != ']')
5217                            t++;
5218                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5219                                    "Multidimensional syntax %" UTF8f " not supported",
5220                                    UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5221                    }
5222                }
5223            }
5224            else if (*s == '{') {
5225                char *t;
5226                PL_tokenbuf[0] = '%';
5227                if (    strEQ(PL_tokenbuf+1, "SIG")
5228                    && ckWARN(WARN_SYNTAX)
5229                    && (t = (char *) memchr(s, '}', PL_bufend - s))
5230                    && (t = (char *) memchr(t, '=', PL_bufend - t)))
5231                {
5232                    char tmpbuf[sizeof PL_tokenbuf];
5233                    do {
5234                        t++;
5235                    } while (isSPACE(*t));
5236                    if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5237                        STRLEN len;
5238                        t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5239                                        &len);
5240                        while (isSPACE(*t))
5241                            t++;
5242                        if (  *t == ';'
5243                            && get_cvn_flags(tmpbuf, len, UTF
5244                                                            ? SVf_UTF8
5245                                                            : 0))
5246                        {
5247                            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5248                                "You need to quote \"%" UTF8f "\"",
5249                                    UTF8fARG(UTF, len, tmpbuf));
5250                        }
5251                    }
5252                }
5253            }
5254        }
5255
5256        PL_expect = XOPERATOR;
5257        if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5258            const bool islop = (PL_last_lop == PL_oldoldbufptr);
5259            if (!islop || PL_last_lop_op == OP_GREPSTART)
5260                PL_expect = XOPERATOR;
5261            else if (memCHRs("$@\"'`q", *s))
5262                PL_expect = XTERM;		/* e.g. print $fh "foo" */
5263            else if (   memCHRs("&*<%", *s)
5264                     && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5265            {
5266                PL_expect = XTERM;		/* e.g. print $fh &sub */
5267            }
5268            else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5269                char tmpbuf[sizeof PL_tokenbuf];
5270                int t2;
5271                STRLEN len;
5272                scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5273                if ((t2 = keyword(tmpbuf, len, 0))) {
5274                    /* binary operators exclude handle interpretations */
5275                    switch (t2) {
5276                    case -KEY_x:
5277                    case -KEY_eq:
5278                    case -KEY_ne:
5279                    case -KEY_gt:
5280                    case -KEY_lt:
5281                    case -KEY_ge:
5282                    case -KEY_le:
5283                    case -KEY_cmp:
5284                        break;
5285                    default:
5286                        PL_expect = XTERM;	/* e.g. print $fh length() */
5287                        break;
5288                    }
5289                }
5290                else {
5291                    PL_expect = XTERM;	/* e.g. print $fh subr() */
5292                }
5293            }
5294            else if (isDIGIT(*s))
5295                PL_expect = XTERM;		/* e.g. print $fh 3 */
5296            else if (*s == '.' && isDIGIT(s[1]))
5297                PL_expect = XTERM;		/* e.g. print $fh .3 */
5298            else if ((*s == '?' || *s == '-' || *s == '+')
5299                && !isSPACE(s[1]) && s[1] != '=')
5300                PL_expect = XTERM;		/* e.g. print $fh -1 */
5301            else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5302                     && s[1] != '/')
5303                PL_expect = XTERM;		/* e.g. print $fh /.../
5304                                               XXX except DORDOR operator
5305                                            */
5306            else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5307                     && s[2] != '=')
5308                PL_expect = XTERM;		/* print $fh <<"EOF" */
5309        }
5310    }
5311    force_ident_maybe_lex('$');
5312    TOKEN(PERLY_DOLLAR);
5313}
5314
5315static int
5316yyl_sub(pTHX_ char *s, const int key)
5317{
5318    char * const tmpbuf = PL_tokenbuf + 1;
5319    bool have_name, have_proto;
5320    STRLEN len;
5321    SV *format_name = NULL;
5322    bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5323
5324    SSize_t off = s-SvPVX(PL_linestr);
5325    char *d;
5326
5327    s = skipspace(s); /* can move PL_linestr */
5328
5329    d = SvPVX(PL_linestr)+off;
5330
5331    SAVEBOOL(PL_parser->sig_seen);
5332    PL_parser->sig_seen = FALSE;
5333
5334    if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5335        || *s == '\''
5336        || (*s == ':' && s[1] == ':'))
5337    {
5338
5339        PL_expect = XATTRBLOCK;
5340        d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5341                      &len);
5342        if (key == KEY_format)
5343            format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5344        *PL_tokenbuf = '&';
5345        if (memchr(tmpbuf, ':', len) || key != KEY_sub
5346         || pad_findmy_pvn(
5347                PL_tokenbuf, len + 1, 0
5348            ) != NOT_IN_PAD)
5349            sv_setpvn(PL_subname, tmpbuf, len);
5350        else {
5351            sv_setsv(PL_subname,PL_curstname);
5352            sv_catpvs(PL_subname,"::");
5353            sv_catpvn(PL_subname,tmpbuf,len);
5354        }
5355        if (SvUTF8(PL_linestr))
5356            SvUTF8_on(PL_subname);
5357        have_name = TRUE;
5358
5359        s = skipspace(d);
5360    }
5361    else {
5362        if (key == KEY_my || key == KEY_our || key==KEY_state) {
5363            *d = '\0';
5364            /* diag_listed_as: Missing name in "%s sub" */
5365            Perl_croak(aTHX_
5366                      "Missing name in \"%s\"", PL_bufptr);
5367        }
5368        PL_expect = XATTRTERM;
5369        sv_setpvs(PL_subname,"?");
5370        have_name = FALSE;
5371    }
5372
5373    if (key == KEY_format) {
5374        if (format_name) {
5375            NEXTVAL_NEXTTOKE.opval
5376                = newSVOP(OP_CONST,0, format_name);
5377            NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5378            force_next(BAREWORD);
5379        }
5380        PREBLOCK(FORMAT);
5381    }
5382
5383    /* Look for a prototype */
5384    if (*s == '(' && !is_sigsub) {
5385        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5386        if (!s)
5387            Perl_croak(aTHX_ "Prototype not terminated");
5388        COPLINE_SET_FROM_MULTI_END;
5389        (void)validate_proto(PL_subname, PL_lex_stuff,
5390                             ckWARN(WARN_ILLEGALPROTO), 0);
5391        have_proto = TRUE;
5392
5393        s = skipspace(s);
5394    }
5395    else
5396        have_proto = FALSE;
5397
5398    if (  !(*s == ':' && s[1] != ':')
5399        && (*s != '{' && *s != '(') && key != KEY_format)
5400    {
5401        assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5402               key == KEY_DESTROY || key == KEY_BEGIN ||
5403               key == KEY_UNITCHECK || key == KEY_CHECK ||
5404               key == KEY_INIT || key == KEY_END ||
5405               key == KEY_my || key == KEY_state ||
5406               key == KEY_our);
5407        if (!have_name)
5408            Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5409        else if (*s != ';' && *s != '}')
5410            Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5411    }
5412
5413    if (have_proto) {
5414        NEXTVAL_NEXTTOKE.opval =
5415            newSVOP(OP_CONST, 0, PL_lex_stuff);
5416        PL_lex_stuff = NULL;
5417        force_next(THING);
5418    }
5419    if (!have_name) {
5420        if (PL_curstash)
5421            sv_setpvs(PL_subname, "__ANON__");
5422        else
5423            sv_setpvs(PL_subname, "__ANON__::__ANON__");
5424        if (is_sigsub)
5425            TOKEN(ANON_SIGSUB);
5426        else
5427            TOKEN(ANONSUB);
5428    }
5429    force_ident_maybe_lex('&');
5430    if (is_sigsub)
5431        TOKEN(SIGSUB);
5432    else
5433        TOKEN(SUB);
5434}
5435
5436static int
5437yyl_interpcasemod(pTHX_ char *s)
5438{
5439#ifdef DEBUGGING
5440    if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5441        Perl_croak(aTHX_
5442                   "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5443                   PL_bufptr, PL_bufend, *PL_bufptr);
5444#endif
5445
5446    if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5447        /* if at a \E */
5448        if (PL_lex_casemods) {
5449            const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5450            PL_lex_casestack[PL_lex_casemods] = '\0';
5451
5452            if (PL_bufptr != PL_bufend
5453                && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5454                    || oldmod == 'F')) {
5455                PL_bufptr += 2;
5456                PL_lex_state = LEX_INTERPCONCAT;
5457            }
5458            PL_lex_allbrackets--;
5459            return REPORT(PERLY_PAREN_CLOSE);
5460        }
5461        else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5462           /* Got an unpaired \E */
5463           Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5464                    "Useless use of \\E");
5465        }
5466        if (PL_bufptr != PL_bufend)
5467            PL_bufptr += 2;
5468        PL_lex_state = LEX_INTERPCONCAT;
5469        return yylex();
5470    }
5471    else {
5472        DEBUG_T({
5473            PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5474        });
5475        s = PL_bufptr + 1;
5476        if (s[1] == '\\' && s[2] == 'E') {
5477            PL_bufptr = s + 3;
5478            PL_lex_state = LEX_INTERPCONCAT;
5479            return yylex();
5480        }
5481        else {
5482            I32 tmp;
5483            if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5484                || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5485            {
5486                tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
5487            }
5488            if ((*s == 'L' || *s == 'U' || *s == 'F')
5489                && (strpbrk(PL_lex_casestack, "LUF")))
5490            {
5491                PL_lex_casestack[--PL_lex_casemods] = '\0';
5492                PL_lex_allbrackets--;
5493                return REPORT(PERLY_PAREN_CLOSE);
5494            }
5495            if (PL_lex_casemods > 10)
5496                Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5497            PL_lex_casestack[PL_lex_casemods++] = *s;
5498            PL_lex_casestack[PL_lex_casemods] = '\0';
5499            PL_lex_state = LEX_INTERPCONCAT;
5500            NEXTVAL_NEXTTOKE.ival = 0;
5501            force_next((2<<24)|PERLY_PAREN_OPEN);
5502            if (*s == 'l')
5503                NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5504            else if (*s == 'u')
5505                NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5506            else if (*s == 'L')
5507                NEXTVAL_NEXTTOKE.ival = OP_LC;
5508            else if (*s == 'U')
5509                NEXTVAL_NEXTTOKE.ival = OP_UC;
5510            else if (*s == 'Q')
5511                NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5512            else if (*s == 'F')
5513                NEXTVAL_NEXTTOKE.ival = OP_FC;
5514            else
5515                Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5516            PL_bufptr = s + 1;
5517        }
5518        force_next(FUNC);
5519        if (PL_lex_starts) {
5520            s = PL_bufptr;
5521            PL_lex_starts = 0;
5522            /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5523            if (PL_lex_casemods == 1 && PL_lex_inpat)
5524                TOKEN(PERLY_COMMA);
5525            else
5526                AopNOASSIGN(OP_CONCAT);
5527        }
5528        else
5529            return yylex();
5530    }
5531}
5532
5533static int
5534yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5535                        GV **pgv, GV ***pgvp)
5536{
5537    GV *ogv = NULL;	/* override (winner) */
5538    GV *hgv = NULL;	/* hidden (loser) */
5539    GV *gv = *pgv;
5540
5541    if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5542        CV *cv;
5543        if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5544                                    (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5545                                    SVt_PVCV))
5546            && (cv = GvCVu(gv)))
5547        {
5548            if (GvIMPORTED_CV(gv))
5549                ogv = gv;
5550            else if (! CvMETHOD(cv))
5551                hgv = gv;
5552        }
5553        if (!ogv
5554            && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5555            && (gv = **pgvp)
5556            && (isGV_with_GP(gv)
5557                ? GvCVu(gv) && GvIMPORTED_CV(gv)
5558                :   SvPCS_IMPORTED(gv)
5559                && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5560                                                         len, 0), 1)))
5561        {
5562            ogv = gv;
5563        }
5564    }
5565
5566    *pgv = gv;
5567
5568    if (ogv) {
5569        *orig_keyword = key;
5570        return 0;		/* overridden by import or by GLOBAL */
5571    }
5572    else if (gv && !*pgvp
5573             && -key==KEY_lock	/* XXX generalizable kludge */
5574             && GvCVu(gv))
5575    {
5576        return 0;		/* any sub overrides "weak" keyword */
5577    }
5578    else {			/* no override */
5579        key = -key;
5580        if (key == KEY_dump) {
5581            Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5582        }
5583        *pgv = NULL;
5584        *pgvp = 0;
5585        if (hgv && key != KEY_x)	/* never ambiguous */
5586            Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5587                           "Ambiguous call resolved as CORE::%s(), "
5588                           "qualify as such or use &",
5589                           GvENAME(hgv));
5590        return key;
5591    }
5592}
5593
5594static int
5595yyl_qw(pTHX_ char *s, STRLEN len)
5596{
5597    OP *words = NULL;
5598
5599    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5600    if (!s)
5601        missingterm(NULL, 0);
5602
5603    COPLINE_SET_FROM_MULTI_END;
5604    PL_expect = XOPERATOR;
5605    if (SvCUR(PL_lex_stuff)) {
5606        int warned_comma = !ckWARN(WARN_QW);
5607        int warned_comment = warned_comma;
5608        char *d = SvPV_force(PL_lex_stuff, len);
5609        while (len) {
5610            for (; isSPACE(*d) && len; --len, ++d)
5611                /**/;
5612            if (len) {
5613                SV *sv;
5614                const char *b = d;
5615                if (!warned_comma || !warned_comment) {
5616                    for (; !isSPACE(*d) && len; --len, ++d) {
5617                        if (!warned_comma && *d == ',') {
5618                            Perl_warner(aTHX_ packWARN(WARN_QW),
5619                                "Possible attempt to separate words with commas");
5620                            ++warned_comma;
5621                        }
5622                        else if (!warned_comment && *d == '#') {
5623                            Perl_warner(aTHX_ packWARN(WARN_QW),
5624                                "Possible attempt to put comments in qw() list");
5625                            ++warned_comment;
5626                        }
5627                    }
5628                }
5629                else {
5630                    for (; !isSPACE(*d) && len; --len, ++d)
5631                        /**/;
5632                }
5633                sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5634                words = op_append_elem(OP_LIST, words,
5635                                       newSVOP(OP_CONST, 0, tokeq(sv)));
5636            }
5637        }
5638    }
5639    if (!words)
5640        words = newNULLLIST();
5641    SvREFCNT_dec_NN(PL_lex_stuff);
5642    PL_lex_stuff = NULL;
5643    PL_expect = XOPERATOR;
5644    pl_yylval.opval = sawparens(words);
5645    TOKEN(QWLIST);
5646}
5647
5648static int
5649yyl_hyphen(pTHX_ char *s)
5650{
5651    if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5652        I32 ftst = 0;
5653        char tmp;
5654
5655        s++;
5656        PL_bufptr = s;
5657        tmp = *s++;
5658
5659        while (s < PL_bufend && SPACE_OR_TAB(*s))
5660            s++;
5661
5662        if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5663            s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5664            DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5665            OPERATOR(PERLY_MINUS);              /* unary minus */
5666        }
5667        switch (tmp) {
5668        case 'r': ftst = OP_FTEREAD;    break;
5669        case 'w': ftst = OP_FTEWRITE;   break;
5670        case 'x': ftst = OP_FTEEXEC;    break;
5671        case 'o': ftst = OP_FTEOWNED;   break;
5672        case 'R': ftst = OP_FTRREAD;    break;
5673        case 'W': ftst = OP_FTRWRITE;   break;
5674        case 'X': ftst = OP_FTREXEC;    break;
5675        case 'O': ftst = OP_FTROWNED;   break;
5676        case 'e': ftst = OP_FTIS;       break;
5677        case 'z': ftst = OP_FTZERO;     break;
5678        case 's': ftst = OP_FTSIZE;     break;
5679        case 'f': ftst = OP_FTFILE;     break;
5680        case 'd': ftst = OP_FTDIR;      break;
5681        case 'l': ftst = OP_FTLINK;     break;
5682        case 'p': ftst = OP_FTPIPE;     break;
5683        case 'S': ftst = OP_FTSOCK;     break;
5684        case 'u': ftst = OP_FTSUID;     break;
5685        case 'g': ftst = OP_FTSGID;     break;
5686        case 'k': ftst = OP_FTSVTX;     break;
5687        case 'b': ftst = OP_FTBLK;      break;
5688        case 'c': ftst = OP_FTCHR;      break;
5689        case 't': ftst = OP_FTTTY;      break;
5690        case 'T': ftst = OP_FTTEXT;     break;
5691        case 'B': ftst = OP_FTBINARY;   break;
5692        case 'M': case 'A': case 'C':
5693            gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5694            switch (tmp) {
5695            case 'M': ftst = OP_FTMTIME; break;
5696            case 'A': ftst = OP_FTATIME; break;
5697            case 'C': ftst = OP_FTCTIME; break;
5698            default:                     break;
5699            }
5700            break;
5701        default:
5702            break;
5703        }
5704        if (ftst) {
5705            PL_last_uni = PL_oldbufptr;
5706            PL_last_lop_op = (OPCODE)ftst;
5707            DEBUG_T( {
5708                PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5709            } );
5710            FTST(ftst);
5711        }
5712        else {
5713            /* Assume it was a minus followed by a one-letter named
5714             * subroutine call (or a -bareword), then. */
5715            DEBUG_T( {
5716                PerlIO_printf(Perl_debug_log,
5717                    "### '-%c' looked like a file test but was not\n",
5718                    (int) tmp);
5719            } );
5720            s = --PL_bufptr;
5721        }
5722    }
5723    {
5724        const char tmp = *s++;
5725        if (*s == tmp) {
5726            s++;
5727            if (PL_expect == XOPERATOR)
5728                TERM(POSTDEC);
5729            else
5730                OPERATOR(PREDEC);
5731        }
5732        else if (*s == '>') {
5733            s++;
5734            s = skipspace(s);
5735            if (((*s == '$' || *s == '&') && s[1] == '*')
5736              ||(*s == '$' && s[1] == '#' && s[2] == '*')
5737              ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5738              ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5739             )
5740            {
5741                PL_expect = XPOSTDEREF;
5742                TOKEN(ARROW);
5743            }
5744            if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5745                s = force_word(s,METHOD,FALSE,TRUE);
5746                TOKEN(ARROW);
5747            }
5748            else if (*s == '$')
5749                OPERATOR(ARROW);
5750            else
5751                TERM(ARROW);
5752        }
5753        if (PL_expect == XOPERATOR) {
5754            if (*s == '='
5755                && !PL_lex_allbrackets
5756                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5757            {
5758                s--;
5759                TOKEN(0);
5760            }
5761            Aop(OP_SUBTRACT);
5762        }
5763        else {
5764            if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5765                check_uni();
5766            OPERATOR(PERLY_MINUS);              /* unary minus */
5767        }
5768    }
5769}
5770
5771static int
5772yyl_plus(pTHX_ char *s)
5773{
5774    const char tmp = *s++;
5775    if (*s == tmp) {
5776        s++;
5777        if (PL_expect == XOPERATOR)
5778            TERM(POSTINC);
5779        else
5780            OPERATOR(PREINC);
5781    }
5782    if (PL_expect == XOPERATOR) {
5783        if (*s == '='
5784            && !PL_lex_allbrackets
5785            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5786        {
5787            s--;
5788            TOKEN(0);
5789        }
5790        Aop(OP_ADD);
5791    }
5792    else {
5793        if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5794            check_uni();
5795        OPERATOR(PERLY_PLUS);
5796    }
5797}
5798
5799static int
5800yyl_star(pTHX_ char *s)
5801{
5802    if (PL_expect == XPOSTDEREF)
5803        POSTDEREF(PERLY_STAR);
5804
5805    if (PL_expect != XOPERATOR) {
5806        s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5807        PL_expect = XOPERATOR;
5808        force_ident(PL_tokenbuf, PERLY_STAR);
5809        if (!*PL_tokenbuf)
5810            PREREF(PERLY_STAR);
5811        TERM(PERLY_STAR);
5812    }
5813
5814    s++;
5815    if (*s == '*') {
5816        s++;
5817        if (*s == '=' && !PL_lex_allbrackets
5818            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5819        {
5820            s -= 2;
5821            TOKEN(0);
5822        }
5823        PWop(OP_POW);
5824    }
5825
5826    if (*s == '='
5827        && !PL_lex_allbrackets
5828        && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5829    {
5830        s--;
5831        TOKEN(0);
5832    }
5833
5834    Mop(OP_MULTIPLY);
5835}
5836
5837static int
5838yyl_percent(pTHX_ char *s)
5839{
5840    if (PL_expect == XOPERATOR) {
5841        if (s[1] == '='
5842            && !PL_lex_allbrackets
5843            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5844        {
5845            TOKEN(0);
5846        }
5847        ++s;
5848        Mop(OP_MODULO);
5849    }
5850    else if (PL_expect == XPOSTDEREF)
5851        POSTDEREF(PERLY_PERCENT_SIGN);
5852
5853    PL_tokenbuf[0] = '%';
5854    s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5855    pl_yylval.ival = 0;
5856    if (!PL_tokenbuf[1]) {
5857        PREREF(PERLY_PERCENT_SIGN);
5858    }
5859    if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5860        && intuit_more(s, PL_bufend)) {
5861        if (*s == '[')
5862            PL_tokenbuf[0] = '@';
5863    }
5864    PL_expect = XOPERATOR;
5865    force_ident_maybe_lex('%');
5866    TERM(PERLY_PERCENT_SIGN);
5867}
5868
5869static int
5870yyl_caret(pTHX_ char *s)
5871{
5872    char *d = s;
5873    const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5874    if (bof && s[1] == '.')
5875        s++;
5876    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5877            (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5878    {
5879        s = d;
5880        TOKEN(0);
5881    }
5882    s++;
5883    BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5884}
5885
5886static int
5887yyl_colon(pTHX_ char *s)
5888{
5889    OP *attrs;
5890
5891    switch (PL_expect) {
5892    case XOPERATOR:
5893        if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5894            break;
5895        PL_bufptr = s;	/* update in case we back off */
5896        if (*s == '=') {
5897            Perl_croak(aTHX_
5898                       "Use of := for an empty attribute list is not allowed");
5899        }
5900        goto grabattrs;
5901    case XATTRBLOCK:
5902        PL_expect = XBLOCK;
5903        goto grabattrs;
5904    case XATTRTERM:
5905        PL_expect = XTERMBLOCK;
5906     grabattrs:
5907        /* NB: as well as parsing normal attributes, we also end up
5908         * here if there is something looking like attributes
5909         * following a signature (which is illegal, but used to be
5910         * legal in 5.20..5.26). If the latter, we still parse the
5911         * attributes so that error messages(s) are less confusing,
5912         * but ignore them (parser->sig_seen).
5913         */
5914        s = skipspace(s);
5915        attrs = NULL;
5916        while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5917            bool sig = PL_parser->sig_seen;
5918            I32 tmp;
5919            SV *sv;
5920            STRLEN len;
5921            char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5922            if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5923                if (tmp < 0) tmp = -tmp;
5924                switch (tmp) {
5925                case KEY_or:
5926                case KEY_and:
5927                case KEY_for:
5928                case KEY_foreach:
5929                case KEY_unless:
5930                case KEY_if:
5931                case KEY_while:
5932                case KEY_until:
5933                    goto got_attrs;
5934                default:
5935                    break;
5936                }
5937            }
5938            sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5939            if (*d == '(') {
5940                d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5941                if (!d) {
5942                    if (attrs)
5943                        op_free(attrs);
5944                    sv_free(sv);
5945                    Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5946                }
5947                COPLINE_SET_FROM_MULTI_END;
5948            }
5949            if (PL_lex_stuff) {
5950                sv_catsv(sv, PL_lex_stuff);
5951                attrs = op_append_elem(OP_LIST, attrs,
5952                                    newSVOP(OP_CONST, 0, sv));
5953                SvREFCNT_dec_NN(PL_lex_stuff);
5954                PL_lex_stuff = NULL;
5955            }
5956            else {
5957                /* NOTE: any CV attrs applied here need to be part of
5958                   the CVf_BUILTIN_ATTRS define in cv.h! */
5959                if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5960                    sv_free(sv);
5961                    if (!sig)
5962                        CvLVALUE_on(PL_compcv);
5963                }
5964                else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5965                    sv_free(sv);
5966                    if (!sig)
5967                        CvMETHOD_on(PL_compcv);
5968                }
5969                else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5970                    sv_free(sv);
5971                    if (!sig) {
5972                        Perl_ck_warner_d(aTHX_
5973                            packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5974                           ":const is experimental"
5975                        );
5976                        CvANONCONST_on(PL_compcv);
5977                        if (!CvANON(PL_compcv))
5978                            yyerror(":const is not permitted on named "
5979                                    "subroutines");
5980                    }
5981                }
5982                /* After we've set the flags, it could be argued that
5983                   we don't need to do the attributes.pm-based setting
5984                   process, and shouldn't bother appending recognized
5985                   flags.  To experiment with that, uncomment the
5986                   following "else".  (Note that's already been
5987                   uncommented.  That keeps the above-applied built-in
5988                   attributes from being intercepted (and possibly
5989                   rejected) by a package's attribute routines, but is
5990                   justified by the performance win for the common case
5991                   of applying only built-in attributes.) */
5992                else
5993                    attrs = op_append_elem(OP_LIST, attrs,
5994                                        newSVOP(OP_CONST, 0,
5995                                                sv));
5996            }
5997            s = skipspace(d);
5998            if (*s == ':' && s[1] != ':')
5999                s = skipspace(s+1);
6000            else if (s == d)
6001                break;	/* require real whitespace or :'s */
6002            /* XXX losing whitespace on sequential attributes here */
6003        }
6004
6005        if (*s != ';'
6006            && *s != '}'
6007            && !(PL_expect == XOPERATOR
6008                 ? (*s == '=' ||  *s == ')')
6009                 : (*s == '{' ||  *s == '(')))
6010        {
6011            const char q = ((*s == '\'') ? '"' : '\'');
6012            /* If here for an expression, and parsed no attrs, back off. */
6013            if (PL_expect == XOPERATOR && !attrs) {
6014                s = PL_bufptr;
6015                break;
6016            }
6017            /* MUST advance bufptr here to avoid bogus "at end of line"
6018               context messages from yyerror().
6019            */
6020            PL_bufptr = s;
6021            yyerror( (const char *)
6022                     (*s
6023                      ? Perl_form(aTHX_ "Invalid separator character "
6024                                  "%c%c%c in attribute list", q, *s, q)
6025                      : "Unterminated attribute list" ) );
6026            if (attrs)
6027                op_free(attrs);
6028            OPERATOR(PERLY_COLON);
6029        }
6030
6031    got_attrs:
6032        if (PL_parser->sig_seen) {
6033            /* see comment about about sig_seen and parser error
6034             * handling */
6035            if (attrs)
6036                op_free(attrs);
6037            Perl_croak(aTHX_ "Subroutine attributes must come "
6038                             "before the signature");
6039        }
6040        if (attrs) {
6041            NEXTVAL_NEXTTOKE.opval = attrs;
6042            force_next(THING);
6043        }
6044        TOKEN(COLONATTR);
6045    }
6046
6047    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6048        s--;
6049        TOKEN(0);
6050    }
6051
6052    PL_lex_allbrackets--;
6053    OPERATOR(PERLY_COLON);
6054}
6055
6056static int
6057yyl_subproto(pTHX_ char *s, CV *cv)
6058{
6059    STRLEN protolen = CvPROTOLEN(cv);
6060    const char *proto = CvPROTO(cv);
6061    bool optional;
6062
6063    proto = S_strip_spaces(aTHX_ proto, &protolen);
6064    if (!protolen)
6065        TERM(FUNC0SUB);
6066    if ((optional = *proto == ';')) {
6067        do {
6068            proto++;
6069        } while (*proto == ';');
6070    }
6071
6072    if (
6073        (
6074            (
6075                *proto == '$' || *proto == '_'
6076             || *proto == '*' || *proto == '+'
6077            )
6078         && proto[1] == '\0'
6079        )
6080     || (
6081         *proto == '\\' && proto[1] && proto[2] == '\0'
6082        )
6083    ) {
6084        UNIPROTO(UNIOPSUB,optional);
6085    }
6086
6087    if (*proto == '\\' && proto[1] == '[') {
6088        const char *p = proto + 2;
6089        while(*p && *p != ']')
6090            ++p;
6091        if(*p == ']' && !p[1])
6092            UNIPROTO(UNIOPSUB,optional);
6093    }
6094
6095    if (*proto == '&' && *s == '{') {
6096        if (PL_curstash)
6097            sv_setpvs(PL_subname, "__ANON__");
6098        else
6099            sv_setpvs(PL_subname, "__ANON__::__ANON__");
6100        if (!PL_lex_allbrackets
6101            && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6102        {
6103            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6104        }
6105        PREBLOCK(LSTOPSUB);
6106    }
6107
6108    return KEY_NULL;
6109}
6110
6111static int
6112yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6113{
6114    char *d;
6115    if (PL_lex_brackets > 100) {
6116        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6117    }
6118
6119    switch (PL_expect) {
6120    case XTERM:
6121    case XTERMORDORDOR:
6122        PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6123        PL_lex_allbrackets++;
6124        OPERATOR(HASHBRACK);
6125    case XOPERATOR:
6126        while (s < PL_bufend && SPACE_OR_TAB(*s))
6127            s++;
6128        d = s;
6129        PL_tokenbuf[0] = '\0';
6130        if (d < PL_bufend && *d == '-') {
6131            PL_tokenbuf[0] = '-';
6132            d++;
6133            while (d < PL_bufend && SPACE_OR_TAB(*d))
6134                d++;
6135        }
6136        if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6137            STRLEN len;
6138            d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6139                          FALSE, &len);
6140            while (d < PL_bufend && SPACE_OR_TAB(*d))
6141                d++;
6142            if (*d == '}') {
6143                const char minus = (PL_tokenbuf[0] == '-');
6144                s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6145                if (minus)
6146                    force_next(PERLY_MINUS);
6147            }
6148        }
6149        /* FALLTHROUGH */
6150    case XATTRTERM:
6151    case XTERMBLOCK:
6152        PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153        PL_lex_allbrackets++;
6154        PL_expect = XSTATE;
6155        break;
6156    case XATTRBLOCK:
6157    case XBLOCK:
6158        PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6159        PL_lex_allbrackets++;
6160        PL_expect = XSTATE;
6161        break;
6162    case XBLOCKTERM:
6163        PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6164        PL_lex_allbrackets++;
6165        PL_expect = XSTATE;
6166        break;
6167    default: {
6168            const char *t;
6169            if (PL_oldoldbufptr == PL_last_lop)
6170                PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6171            else
6172                PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6173            PL_lex_allbrackets++;
6174            s = skipspace(s);
6175            if (*s == '}') {
6176                if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6177                    PL_expect = XTERM;
6178                    /* This hack is to get the ${} in the message. */
6179                    PL_bufptr = s+1;
6180                    yyerror("syntax error");
6181                    break;
6182                }
6183                OPERATOR(HASHBRACK);
6184            }
6185            if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6186                /* ${...} or @{...} etc., but not print {...}
6187                 * Skip the disambiguation and treat this as a block.
6188                 */
6189                goto block_expectation;
6190            }
6191            /* This hack serves to disambiguate a pair of curlies
6192             * as being a block or an anon hash.  Normally, expectation
6193             * determines that, but in cases where we're not in a
6194             * position to expect anything in particular (like inside
6195             * eval"") we have to resolve the ambiguity.  This code
6196             * covers the case where the first term in the curlies is a
6197             * quoted string.  Most other cases need to be explicitly
6198             * disambiguated by prepending a "+" before the opening
6199             * curly in order to force resolution as an anon hash.
6200             *
6201             * XXX should probably propagate the outer expectation
6202             * into eval"" to rely less on this hack, but that could
6203             * potentially break current behavior of eval"".
6204             * GSAR 97-07-21
6205             */
6206            t = s;
6207            if (*s == '\'' || *s == '"' || *s == '`') {
6208                /* common case: get past first string, handling escapes */
6209                for (t++; t < PL_bufend && *t != *s;)
6210                    if (*t++ == '\\')
6211                        t++;
6212                t++;
6213            }
6214            else if (*s == 'q') {
6215                if (++t < PL_bufend
6216                    && (!isWORDCHAR(*t)
6217                        || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6218                            && !isWORDCHAR(*t))))
6219                {
6220                    /* skip q//-like construct */
6221                    const char *tmps;
6222                    char open, close, term;
6223                    I32 brackets = 1;
6224
6225                    while (t < PL_bufend && isSPACE(*t))
6226                        t++;
6227                    /* check for q => */
6228                    if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6229                        OPERATOR(HASHBRACK);
6230                    }
6231                    term = *t;
6232                    open = term;
6233                    if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6234                        term = tmps[5];
6235                    close = term;
6236                    if (open == close)
6237                        for (t++; t < PL_bufend; t++) {
6238                            if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6239                                t++;
6240                            else if (*t == open)
6241                                break;
6242                        }
6243                    else {
6244                        for (t++; t < PL_bufend; t++) {
6245                            if (*t == '\\' && t+1 < PL_bufend)
6246                                t++;
6247                            else if (*t == close && --brackets <= 0)
6248                                break;
6249                            else if (*t == open)
6250                                brackets++;
6251                        }
6252                    }
6253                    t++;
6254                }
6255                else
6256                    /* skip plain q word */
6257                    while (   t < PL_bufend
6258                           && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6259                    {
6260                        t += UTF ? UTF8SKIP(t) : 1;
6261                    }
6262            }
6263            else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6264                t += UTF ? UTF8SKIP(t) : 1;
6265                while (   t < PL_bufend
6266                       && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6267                {
6268                    t += UTF ? UTF8SKIP(t) : 1;
6269                }
6270            }
6271            while (t < PL_bufend && isSPACE(*t))
6272                t++;
6273            /* if comma follows first term, call it an anon hash */
6274            /* XXX it could be a comma expression with loop modifiers */
6275            if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6276                               || (*t == '=' && t[1] == '>')))
6277                OPERATOR(HASHBRACK);
6278            if (PL_expect == XREF) {
6279              block_expectation:
6280                /* If there is an opening brace or 'sub:', treat it
6281                   as a term to make ${{...}}{k} and &{sub:attr...}
6282                   dwim.  Otherwise, treat it as a statement, so
6283                   map {no strict; ...} works.
6284                 */
6285                s = skipspace(s);
6286                if (*s == '{') {
6287                    PL_expect = XTERM;
6288                    break;
6289                }
6290                if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6291                    PL_bufptr = s;
6292                    d = s + 3;
6293                    d = skipspace(d);
6294                    s = PL_bufptr;
6295                    if (*d == ':') {
6296                        PL_expect = XTERM;
6297                        break;
6298                    }
6299                }
6300                PL_expect = XSTATE;
6301            }
6302            else {
6303                PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6304                PL_expect = XSTATE;
6305            }
6306        }
6307        break;
6308    }
6309
6310    pl_yylval.ival = CopLINE(PL_curcop);
6311    PL_copline = NOLINE;   /* invalidate current command line number */
6312    TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6313}
6314
6315static int
6316yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6317{
6318    assert(s != PL_bufend);
6319    s++;
6320
6321    if (PL_lex_brackets <= 0)
6322        /* diag_listed_as: Unmatched right %s bracket */
6323        yyerror("Unmatched right curly bracket");
6324    else
6325        PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6326
6327    PL_lex_allbrackets--;
6328
6329    if (PL_lex_state == LEX_INTERPNORMAL) {
6330        if (PL_lex_brackets == 0) {
6331            if (PL_expect & XFAKEBRACK) {
6332                PL_expect &= XENUMMASK;
6333                PL_lex_state = LEX_INTERPEND;
6334                PL_bufptr = s;
6335                return yylex();	/* ignore fake brackets */
6336            }
6337            if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6338             && SvEVALED(PL_lex_repl))
6339                PL_lex_state = LEX_INTERPEND;
6340            else if (*s == '-' && s[1] == '>')
6341                PL_lex_state = LEX_INTERPENDMAYBE;
6342            else if (*s != '[' && *s != '{')
6343                PL_lex_state = LEX_INTERPEND;
6344        }
6345    }
6346
6347    if (PL_expect & XFAKEBRACK) {
6348        PL_expect &= XENUMMASK;
6349        PL_bufptr = s;
6350        return yylex();		/* ignore fake brackets */
6351    }
6352
6353    force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6354    if (formbrack) LEAVE_with_name("lex_format");
6355    if (formbrack == 2) { /* means . where arguments were expected */
6356        force_next(PERLY_SEMICOLON);
6357        TOKEN(FORMRBRACK);
6358    }
6359
6360    TOKEN(PERLY_SEMICOLON);
6361}
6362
6363static int
6364yyl_ampersand(pTHX_ char *s)
6365{
6366    if (PL_expect == XPOSTDEREF)
6367        POSTDEREF(PERLY_AMPERSAND);
6368
6369    s++;
6370    if (*s++ == '&') {
6371        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6372                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6373            s -= 2;
6374            TOKEN(0);
6375        }
6376        AOPERATOR(ANDAND);
6377    }
6378    s--;
6379
6380    if (PL_expect == XOPERATOR) {
6381        char *d;
6382        bool bof;
6383        if (   PL_bufptr == PL_linestart
6384            && ckWARN(WARN_SEMICOLON)
6385            && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6386        {
6387            CopLINE_dec(PL_curcop);
6388            Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6389            CopLINE_inc(PL_curcop);
6390        }
6391        d = s;
6392        if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6393            s++;
6394        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6396            s = d;
6397            s--;
6398            TOKEN(0);
6399        }
6400        if (d == s)
6401            BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6402        else
6403            BAop(OP_SBIT_AND);
6404    }
6405
6406    PL_tokenbuf[0] = '&';
6407    s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6408    pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6409
6410    if (PL_tokenbuf[1])
6411        force_ident_maybe_lex('&');
6412    else
6413        PREREF(PERLY_AMPERSAND);
6414
6415    TERM(PERLY_AMPERSAND);
6416}
6417
6418static int
6419yyl_verticalbar(pTHX_ char *s)
6420{
6421    char *d;
6422    bool bof;
6423
6424    s++;
6425    if (*s++ == '|') {
6426        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6427                (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6428            s -= 2;
6429            TOKEN(0);
6430        }
6431        AOPERATOR(OROR);
6432    }
6433
6434    s--;
6435    d = s;
6436    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6437        s++;
6438
6439    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6440            (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6441        s = d - 1;
6442        TOKEN(0);
6443    }
6444
6445    BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6446}
6447
6448static int
6449yyl_bang(pTHX_ char *s)
6450{
6451    const char tmp = *s++;
6452    if (tmp == '=') {
6453        /* was this !=~ where !~ was meant?
6454         * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6455
6456        if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6457            const char *t = s+1;
6458
6459            while (t < PL_bufend && isSPACE(*t))
6460                ++t;
6461
6462            if (*t == '/' || *t == '?'
6463                || ((*t == 'm' || *t == 's' || *t == 'y')
6464                    && !isWORDCHAR(t[1]))
6465                || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6466                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6467                            "!=~ should be !~");
6468        }
6469
6470        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6471            s -= 2;
6472            TOKEN(0);
6473        }
6474
6475        ChEop(OP_NE);
6476    }
6477
6478    if (tmp == '~')
6479        PMop(OP_NOT);
6480
6481    s--;
6482    OPERATOR(PERLY_EXCLAMATION_MARK);
6483}
6484
6485static int
6486yyl_snail(pTHX_ char *s)
6487{
6488    if (PL_expect == XPOSTDEREF)
6489        POSTDEREF(PERLY_SNAIL);
6490    PL_tokenbuf[0] = '@';
6491    s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6492    if (PL_expect == XOPERATOR) {
6493        char *d = s;
6494        if (PL_bufptr > s) {
6495            d = PL_bufptr-1;
6496            PL_bufptr = PL_oldbufptr;
6497        }
6498        no_op("Array", d);
6499    }
6500    pl_yylval.ival = 0;
6501    if (!PL_tokenbuf[1]) {
6502        PREREF(PERLY_SNAIL);
6503    }
6504    if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6505        s = skipspace(s);
6506    if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6507        && intuit_more(s, PL_bufend))
6508    {
6509        if (*s == '{')
6510            PL_tokenbuf[0] = '%';
6511
6512        /* Warn about @ where they meant $. */
6513        if (*s == '[' || *s == '{') {
6514            if (ckWARN(WARN_SYNTAX)) {
6515                S_check_scalar_slice(aTHX_ s);
6516            }
6517        }
6518    }
6519    PL_expect = XOPERATOR;
6520    force_ident_maybe_lex('@');
6521    TERM(PERLY_SNAIL);
6522}
6523
6524static int
6525yyl_slash(pTHX_ char *s)
6526{
6527    if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6528        if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6529                (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6530            TOKEN(0);
6531        s += 2;
6532        AOPERATOR(DORDOR);
6533    }
6534    else if (PL_expect == XOPERATOR) {
6535        s++;
6536        if (*s == '=' && !PL_lex_allbrackets
6537            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6538        {
6539            s--;
6540            TOKEN(0);
6541        }
6542        Mop(OP_DIVIDE);
6543    }
6544    else {
6545        /* Disable warning on "study /blah/" */
6546        if (    PL_oldoldbufptr == PL_last_uni
6547            && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6548                || memNE(PL_last_uni, "study", 5)
6549                || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6550         ))
6551            check_uni();
6552        s = scan_pat(s,OP_MATCH);
6553        TERM(sublex_start());
6554    }
6555}
6556
6557static int
6558yyl_leftsquare(pTHX_ char *s)
6559{
6560    if (PL_lex_brackets > 100)
6561        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6562    PL_lex_brackstack[PL_lex_brackets++] = 0;
6563    PL_lex_allbrackets++;
6564    s++;
6565    OPERATOR(PERLY_BRACKET_OPEN);
6566}
6567
6568static int
6569yyl_rightsquare(pTHX_ char *s)
6570{
6571    if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6572        TOKEN(0);
6573    s++;
6574    if (PL_lex_brackets <= 0)
6575        /* diag_listed_as: Unmatched right %s bracket */
6576        yyerror("Unmatched right square bracket");
6577    else
6578        --PL_lex_brackets;
6579    PL_lex_allbrackets--;
6580    if (PL_lex_state == LEX_INTERPNORMAL) {
6581        if (PL_lex_brackets == 0) {
6582            if (*s == '-' && s[1] == '>')
6583                PL_lex_state = LEX_INTERPENDMAYBE;
6584            else if (*s != '[' && *s != '{')
6585                PL_lex_state = LEX_INTERPEND;
6586        }
6587    }
6588    TERM(PERLY_BRACKET_CLOSE);
6589}
6590
6591static int
6592yyl_tilde(pTHX_ char *s)
6593{
6594    bool bof;
6595    if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6596        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6597            TOKEN(0);
6598        s += 2;
6599        Perl_ck_warner_d(aTHX_
6600            packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6601            "Smartmatch is experimental");
6602        NCEop(OP_SMARTMATCH);
6603    }
6604    s++;
6605    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6606        s++;
6607        BCop(OP_SCOMPLEMENT);
6608    }
6609    BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6610}
6611
6612static int
6613yyl_leftparen(pTHX_ char *s)
6614{
6615    if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6616        PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
6617    else
6618        PL_expect = XTERM;
6619    s = skipspace(s);
6620    PL_lex_allbrackets++;
6621    TOKEN(PERLY_PAREN_OPEN);
6622}
6623
6624static int
6625yyl_rightparen(pTHX_ char *s)
6626{
6627    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6628        TOKEN(0);
6629    s++;
6630    PL_lex_allbrackets--;
6631    s = skipspace(s);
6632    if (*s == '{')
6633        PREBLOCK(PERLY_PAREN_CLOSE);
6634    TERM(PERLY_PAREN_CLOSE);
6635}
6636
6637static int
6638yyl_leftpointy(pTHX_ char *s)
6639{
6640    char tmp;
6641
6642    if (PL_expect != XOPERATOR) {
6643        if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6644            check_uni();
6645        if (s[1] == '<' && s[2] != '>')
6646            s = scan_heredoc(s);
6647        else
6648            s = scan_inputsymbol(s);
6649        PL_expect = XOPERATOR;
6650        TOKEN(sublex_start());
6651    }
6652
6653    s++;
6654
6655    tmp = *s++;
6656    if (tmp == '<') {
6657        if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6658            s -= 2;
6659            TOKEN(0);
6660        }
6661        SHop(OP_LEFT_SHIFT);
6662    }
6663    if (tmp == '=') {
6664        tmp = *s++;
6665        if (tmp == '>') {
6666            if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6667                s -= 3;
6668                TOKEN(0);
6669            }
6670            NCEop(OP_NCMP);
6671        }
6672        s--;
6673        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6674            s -= 2;
6675            TOKEN(0);
6676        }
6677        ChRop(OP_LE);
6678    }
6679
6680    s--;
6681    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6682        s--;
6683        TOKEN(0);
6684    }
6685
6686    ChRop(OP_LT);
6687}
6688
6689static int
6690yyl_rightpointy(pTHX_ char *s)
6691{
6692    const char tmp = *s++;
6693
6694    if (tmp == '>') {
6695        if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6696            s -= 2;
6697            TOKEN(0);
6698        }
6699        SHop(OP_RIGHT_SHIFT);
6700    }
6701    else if (tmp == '=') {
6702        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6703            s -= 2;
6704            TOKEN(0);
6705        }
6706        ChRop(OP_GE);
6707    }
6708
6709    s--;
6710    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6711        s--;
6712        TOKEN(0);
6713    }
6714
6715    ChRop(OP_GT);
6716}
6717
6718static int
6719yyl_sglquote(pTHX_ char *s)
6720{
6721    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6722    if (!s)
6723        missingterm(NULL, 0);
6724    COPLINE_SET_FROM_MULTI_END;
6725    DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6726    if (PL_expect == XOPERATOR) {
6727        no_op("String",s);
6728    }
6729    pl_yylval.ival = OP_CONST;
6730    TERM(sublex_start());
6731}
6732
6733static int
6734yyl_dblquote(pTHX_ char *s)
6735{
6736    char *d;
6737    STRLEN len;
6738    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6739    DEBUG_T( {
6740        if (s)
6741            printbuf("### Saw string before %s\n", s);
6742        else
6743            PerlIO_printf(Perl_debug_log,
6744                         "### Saw unterminated string\n");
6745    } );
6746    if (PL_expect == XOPERATOR) {
6747            no_op("String",s);
6748    }
6749    if (!s)
6750        missingterm(NULL, 0);
6751    pl_yylval.ival = OP_CONST;
6752    /* FIXME. I think that this can be const if char *d is replaced by
6753       more localised variables.  */
6754    for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6755        if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6756            pl_yylval.ival = OP_STRINGIFY;
6757            break;
6758        }
6759    }
6760    if (pl_yylval.ival == OP_CONST)
6761        COPLINE_SET_FROM_MULTI_END;
6762    TERM(sublex_start());
6763}
6764
6765static int
6766yyl_backtick(pTHX_ char *s)
6767{
6768    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6769    DEBUG_T( {
6770        if (s)
6771            printbuf("### Saw backtick string before %s\n", s);
6772        else
6773            PerlIO_printf(Perl_debug_log,
6774                         "### Saw unterminated backtick string\n");
6775    } );
6776    if (PL_expect == XOPERATOR)
6777        no_op("Backticks",s);
6778    if (!s)
6779        missingterm(NULL, 0);
6780    pl_yylval.ival = OP_BACKTICK;
6781    TERM(sublex_start());
6782}
6783
6784static int
6785yyl_backslash(pTHX_ char *s)
6786{
6787    if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6788        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6789                       *s, *s);
6790    if (PL_expect == XOPERATOR)
6791        no_op("Backslash",s);
6792    OPERATOR(REFGEN);
6793}
6794
6795static void
6796yyl_data_handle(pTHX)
6797{
6798    HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6799                            ? PL_curstash
6800                            : PL_defstash;
6801    GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6802
6803    if (!isGV(gv))
6804        gv_init(gv,stash,"DATA",4,0);
6805
6806    GvMULTI_on(gv);
6807    if (!GvIO(gv))
6808        GvIOp(gv) = newIO();
6809    IoIFP(GvIOp(gv)) = PL_rsfp;
6810
6811    /* Mark this internal pseudo-handle as clean */
6812    IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6813    if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6814        IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6815    else
6816        IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6817
6818#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6819    /* if the script was opened in binmode, we need to revert
6820     * it to text mode for compatibility; but only iff it has CRs
6821     * XXX this is a questionable hack at best. */
6822    if (PL_bufend-PL_bufptr > 2
6823        && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6824    {
6825        Off_t loc = 0;
6826        if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6827            loc = PerlIO_tell(PL_rsfp);
6828            (void)PerlIO_seek(PL_rsfp, 0L, 0);
6829        }
6830        if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6831            if (loc > 0)
6832                PerlIO_seek(PL_rsfp, loc, 0);
6833        }
6834    }
6835#endif
6836
6837#ifdef PERLIO_LAYERS
6838    if (!IN_BYTES) {
6839        if (UTF)
6840            PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6841    }
6842#endif
6843
6844    PL_rsfp = NULL;
6845}
6846
6847PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6848    __attribute__noreturn__;
6849
6850PERL_STATIC_NO_RET void
6851yyl_croak_unrecognised(pTHX_ char *s)
6852{
6853    SV *dsv = newSVpvs_flags("", SVs_TEMP);
6854    const char *c;
6855    char *d;
6856    STRLEN len;
6857
6858    if (UTF) {
6859        STRLEN skiplen = UTF8SKIP(s);
6860        STRLEN stravail = PL_bufend - s;
6861        c = sv_uni_display(dsv, newSVpvn_flags(s,
6862                                               skiplen > stravail ? stravail : skiplen,
6863                                               SVs_TEMP | SVf_UTF8),
6864                           10, UNI_DISPLAY_ISPRINT);
6865    }
6866    else {
6867        c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6868    }
6869
6870    if (s >= PL_linestart) {
6871        d = PL_linestart;
6872    }
6873    else {
6874        /* somehow (probably due to a parse failure), PL_linestart has advanced
6875         * pass PL_bufptr, get a reasonable beginning of line
6876         */
6877        d = s;
6878        while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6879            --d;
6880    }
6881    len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6882    if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6883        d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6884    }
6885
6886    Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6887                      UTF8fARG(UTF, (s - d), d),
6888                     (int) len + 1);
6889}
6890
6891static int
6892yyl_require(pTHX_ char *s, I32 orig_keyword)
6893{
6894    s = skipspace(s);
6895    if (isDIGIT(*s)) {
6896        s = force_version(s, FALSE);
6897    }
6898    else if (*s != 'v' || !isDIGIT(s[1])
6899            || (s = force_version(s, TRUE), *s == 'v'))
6900    {
6901        *PL_tokenbuf = '\0';
6902        s = force_word(s,BAREWORD,TRUE,TRUE);
6903        if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6904                                   PL_tokenbuf + sizeof(PL_tokenbuf),
6905                                   UTF))
6906        {
6907            gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6908                        GV_ADD | (UTF ? SVf_UTF8 : 0));
6909        }
6910        else if (*s == '<')
6911            yyerror("<> at require-statement should be quotes");
6912    }
6913
6914    if (orig_keyword == KEY_require)
6915        pl_yylval.ival = 1;
6916    else
6917        pl_yylval.ival = 0;
6918
6919    PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6920    PL_bufptr = s;
6921    PL_last_uni = PL_oldbufptr;
6922    PL_last_lop_op = OP_REQUIRE;
6923    s = skipspace(s);
6924    return REPORT( (int)REQUIRE );
6925}
6926
6927static int
6928yyl_foreach(pTHX_ char *s)
6929{
6930    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6931        return REPORT(0);
6932    pl_yylval.ival = CopLINE(PL_curcop);
6933    s = skipspace(s);
6934    if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6935        char *p = s;
6936        SSize_t s_off = s - SvPVX(PL_linestr);
6937        bool paren_is_valid = FALSE;
6938        bool maybe_package = FALSE;
6939        bool saw_core = FALSE;
6940        bool core_valid = FALSE;
6941
6942        if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6943            saw_core = TRUE;
6944            p += 6;
6945        }
6946        if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6947            core_valid = TRUE;
6948            paren_is_valid = TRUE;
6949            if (isSPACE(p[2])) {
6950                p = skipspace(p + 3);
6951                maybe_package = TRUE;
6952            }
6953            else {
6954                p += 2;
6955            }
6956        }
6957        else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6958            core_valid = TRUE;
6959            if (isSPACE(p[3])) {
6960                p = skipspace(p + 4);
6961                maybe_package = TRUE;
6962            }
6963            else {
6964                p += 3;
6965            }
6966        }
6967        else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6968            core_valid = TRUE;
6969            if (isSPACE(p[5])) {
6970                p = skipspace(p + 6);
6971            }
6972            else {
6973                p += 5;
6974            }
6975        }
6976        if (saw_core && !core_valid) {
6977            Perl_croak(aTHX_ "Missing $ on loop variable");
6978        }
6979
6980        if (maybe_package && !saw_core) {
6981            /* skip optional package name, as in "for my abc $x (..)" */
6982            if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6983                STRLEN len;
6984                p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6985                p = skipspace(p);
6986                paren_is_valid = FALSE;
6987            }
6988        }
6989
6990        if (UNLIKELY(paren_is_valid && *p == '(')) {
6991            Perl_ck_warner_d(aTHX_
6992                             packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6993                             "for my (...) is experimental");
6994        }
6995        else if (UNLIKELY(*p != '$' && *p != '\\')) {
6996            /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6997            Perl_croak(aTHX_ "Missing $ on loop variable");
6998        }
6999        /* The buffer may have been reallocated, update s */
7000        s = SvPVX(PL_linestr) + s_off;
7001    }
7002    OPERATOR(FOR);
7003}
7004
7005static int
7006yyl_do(pTHX_ char *s, I32 orig_keyword)
7007{
7008    s = skipspace(s);
7009    if (*s == '{')
7010        PRETERMBLOCK(DO);
7011    if (*s != '\'') {
7012        char *d;
7013        STRLEN len;
7014        *PL_tokenbuf = '&';
7015        d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7016                      1, &len);
7017        if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7018         && !keyword(PL_tokenbuf + 1, len, 0)) {
7019            SSize_t off = s-SvPVX(PL_linestr);
7020            d = skipspace(d);
7021            s = SvPVX(PL_linestr)+off;
7022            if (*d == '(') {
7023                force_ident_maybe_lex('&');
7024                s = d;
7025            }
7026        }
7027    }
7028    if (orig_keyword == KEY_do)
7029        pl_yylval.ival = 1;
7030    else
7031        pl_yylval.ival = 0;
7032    OPERATOR(DO);
7033}
7034
7035static int
7036yyl_my(pTHX_ char *s, I32 my)
7037{
7038    if (PL_in_my) {
7039        PL_bufptr = s;
7040        yyerror(Perl_form(aTHX_
7041                          "Can't redeclare \"%s\" in \"%s\"",
7042                           my       == KEY_my    ? "my" :
7043                           my       == KEY_state ? "state" : "our",
7044                           PL_in_my == KEY_my    ? "my" :
7045                           PL_in_my == KEY_state ? "state" : "our"));
7046    }
7047    PL_in_my = (U16)my;
7048    s = skipspace(s);
7049    if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7050        STRLEN len;
7051        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7052        if (memEQs(PL_tokenbuf, len, "sub"))
7053            return yyl_sub(aTHX_ s, my);
7054        PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7055        if (!PL_in_my_stash) {
7056            char tmpbuf[1024];
7057            int i;
7058            PL_bufptr = s;
7059            i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7060            PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7061            yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7062        }
7063    }
7064    else if (*s == '\\') {
7065        if (!FEATURE_MYREF_IS_ENABLED)
7066            Perl_croak(aTHX_ "The experimental declared_refs "
7067                             "feature is not enabled");
7068        Perl_ck_warner_d(aTHX_
7069             packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7070            "Declaring references is experimental");
7071    }
7072    OPERATOR(MY);
7073}
7074
7075static int yyl_try(pTHX_ char*);
7076
7077static bool
7078yyl_eol_needs_semicolon(pTHX_ char **ps)
7079{
7080    char *s = *ps;
7081    if (PL_lex_state != LEX_NORMAL
7082        || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7083    {
7084        const bool in_comment = *s == '#';
7085        char *d;
7086        if (*s == '#' && s == PL_linestart && PL_in_eval
7087         && !PL_rsfp && !PL_parser->filtered) {
7088            /* handle eval qq[#line 1 "foo"\n ...] */
7089            CopLINE_dec(PL_curcop);
7090            incline(s, PL_bufend);
7091        }
7092        d = s;
7093        while (d < PL_bufend && *d != '\n')
7094            d++;
7095        if (d < PL_bufend)
7096            d++;
7097        s = d;
7098        if (in_comment && d == PL_bufend
7099            && PL_lex_state == LEX_INTERPNORMAL
7100            && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7101            && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7102        else
7103            incline(s, PL_bufend);
7104        if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7105            PL_lex_state = LEX_FORMLINE;
7106            force_next(FORMRBRACK);
7107            *ps = s;
7108            return TRUE;
7109        }
7110    }
7111    else {
7112        while (s < PL_bufend && *s != '\n')
7113            s++;
7114        if (s < PL_bufend) {
7115            s++;
7116            if (s < PL_bufend)
7117                incline(s, PL_bufend);
7118        }
7119    }
7120    *ps = s;
7121    return FALSE;
7122}
7123
7124static int
7125yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7126{
7127    char *d;
7128
7129    goto start;
7130
7131    do {
7132        fake_eof = 0;
7133        bof = cBOOL(PL_rsfp);
7134      start:
7135
7136        PL_bufptr = PL_bufend;
7137        COPLINE_INC_WITH_HERELINES;
7138        if (!lex_next_chunk(fake_eof)) {
7139            CopLINE_dec(PL_curcop);
7140            s = PL_bufptr;
7141            TOKEN(PERLY_SEMICOLON);	/* not infinite loop because rsfp is NULL now */
7142        }
7143        CopLINE_dec(PL_curcop);
7144        s = PL_bufptr;
7145        /* If it looks like the start of a BOM or raw UTF-16,
7146         * check if it in fact is. */
7147        if (bof && PL_rsfp
7148            && (   *s == 0
7149                || *(U8*)s == BOM_UTF8_FIRST_BYTE
7150                || *(U8*)s >= 0xFE
7151                || s[1] == 0))
7152        {
7153            Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7154            bof = (offset == (Off_t)SvCUR(PL_linestr));
7155#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7156            /* offset may include swallowed CR */
7157            if (!bof)
7158                bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7159#endif
7160            if (bof) {
7161                PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7162                s = swallow_bom((U8*)s);
7163            }
7164        }
7165        if (PL_parser->in_pod) {
7166            /* Incest with pod. */
7167            if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7168                && !isALPHA(s[4]))
7169            {
7170                SvPVCLEAR(PL_linestr);
7171                PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7172                PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7173                PL_last_lop = PL_last_uni = NULL;
7174                PL_parser->in_pod = 0;
7175            }
7176        }
7177        if (PL_rsfp || PL_parser->filtered)
7178            incline(s, PL_bufend);
7179    } while (PL_parser->in_pod);
7180
7181    PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7182    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7183    PL_last_lop = PL_last_uni = NULL;
7184    if (CopLINE(PL_curcop) == 1) {
7185        while (s < PL_bufend && isSPACE(*s))
7186            s++;
7187        if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7188            s++;
7189        d = NULL;
7190        if (!PL_in_eval) {
7191            if (*s == '#' && *(s+1) == '!')
7192                d = s + 2;
7193#ifdef ALTERNATE_SHEBANG
7194            else {
7195                static char const as[] = ALTERNATE_SHEBANG;
7196                if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7197                    d = s + (sizeof(as) - 1);
7198            }
7199#endif /* ALTERNATE_SHEBANG */
7200        }
7201        if (d) {
7202            char *ipath;
7203            char *ipathend;
7204
7205            while (isSPACE(*d))
7206                d++;
7207            ipath = d;
7208            while (*d && !isSPACE(*d))
7209                d++;
7210            ipathend = d;
7211
7212#ifdef ARG_ZERO_IS_SCRIPT
7213            if (ipathend > ipath) {
7214                /*
7215                 * HP-UX (at least) sets argv[0] to the script name,
7216                 * which makes $^X incorrect.  And Digital UNIX and Linux,
7217                 * at least, set argv[0] to the basename of the Perl
7218                 * interpreter. So, having found "#!", we'll set it right.
7219                 */
7220                SV* copfilesv = CopFILESV(PL_curcop);
7221                if (copfilesv) {
7222                    SV * const x =
7223                        GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7224                                         SVt_PV)); /* $^X */
7225                    assert(SvPOK(x) || SvGMAGICAL(x));
7226                    if (sv_eq(x, copfilesv)) {
7227                        sv_setpvn(x, ipath, ipathend - ipath);
7228                        SvSETMAGIC(x);
7229                    }
7230                    else {
7231                        STRLEN blen;
7232                        STRLEN llen;
7233                        const char *bstart = SvPV_const(copfilesv, blen);
7234                        const char * const lstart = SvPV_const(x, llen);
7235                        if (llen < blen) {
7236                            bstart += blen - llen;
7237                            if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
7238                                sv_setpvn(x, ipath, ipathend - ipath);
7239                                SvSETMAGIC(x);
7240                            }
7241                        }
7242                    }
7243                }
7244                else {
7245                    /* Anything to do if no copfilesv? */
7246                }
7247                TAINT_NOT;	/* $^X is always tainted, but that's OK */
7248            }
7249#endif /* ARG_ZERO_IS_SCRIPT */
7250
7251            /*
7252             * Look for options.
7253             */
7254            d = instr(s,"perl -");
7255            if (!d) {
7256                d = instr(s,"perl");
7257#if defined(DOSISH)
7258                /* avoid getting into infinite loops when shebang
7259                 * line contains "Perl" rather than "perl" */
7260                if (!d) {
7261                    for (d = ipathend-4; d >= ipath; --d) {
7262                        if (isALPHA_FOLD_EQ(*d, 'p')
7263                            && !ibcmp(d, "perl", 4))
7264                        {
7265                            break;
7266                        }
7267                    }
7268                    if (d < ipath)
7269                        d = NULL;
7270                }
7271#endif
7272            }
7273#ifdef ALTERNATE_SHEBANG
7274            /*
7275             * If the ALTERNATE_SHEBANG on this system starts with a
7276             * character that can be part of a Perl expression, then if
7277             * we see it but not "perl", we're probably looking at the
7278             * start of Perl code, not a request to hand off to some
7279             * other interpreter.  Similarly, if "perl" is there, but
7280             * not in the first 'word' of the line, we assume the line
7281             * contains the start of the Perl program.
7282             */
7283            if (d && *s != '#') {
7284                const char *c = ipath;
7285                while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7286                    c++;
7287                if (c < d)
7288                    d = NULL;	/* "perl" not in first word; ignore */
7289                else
7290                    *s = '#';	/* Don't try to parse shebang line */
7291            }
7292#endif /* ALTERNATE_SHEBANG */
7293            if (!d
7294                && *s == '#'
7295                && ipathend > ipath
7296                && !PL_minus_c
7297                && !instr(s,"indir")
7298                && instr(PL_origargv[0],"perl"))
7299            {
7300                char **newargv;
7301
7302                *ipathend = '\0';
7303                s = ipathend + 1;
7304                while (s < PL_bufend && isSPACE(*s))
7305                    s++;
7306                if (s < PL_bufend) {
7307                    Newx(newargv,PL_origargc+3,char*);
7308                    newargv[1] = s;
7309                    while (s < PL_bufend && !isSPACE(*s))
7310                        s++;
7311                    *s = '\0';
7312                    Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7313                }
7314                else
7315                    newargv = PL_origargv;
7316                newargv[0] = ipath;
7317                PERL_FPU_PRE_EXEC
7318                PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7319                PERL_FPU_POST_EXEC
7320                Perl_croak(aTHX_ "Can't exec %s", ipath);
7321            }
7322            if (d) {
7323                while (*d && !isSPACE(*d))
7324                    d++;
7325                while (SPACE_OR_TAB(*d))
7326                    d++;
7327
7328                if (*d++ == '-') {
7329                    const bool switches_done = PL_doswitches;
7330                    const U32 oldpdb = PL_perldb;
7331                    const bool oldn = PL_minus_n;
7332                    const bool oldp = PL_minus_p;
7333                    const char *d1 = d;
7334
7335                    do {
7336                        bool baduni = FALSE;
7337                        if (*d1 == 'C') {
7338                            const char *d2 = d1 + 1;
7339                            if (parse_unicode_opts((const char **)&d2)
7340                                != PL_unicode)
7341                                baduni = TRUE;
7342                        }
7343                        if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7344                            const char * const m = d1;
7345                            while (*d1 && !isSPACE(*d1))
7346                                d1++;
7347                            Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7348                                  (int)(d1 - m), m);
7349                        }
7350                        d1 = moreswitches(d1);
7351                    } while (d1);
7352                    if (PL_doswitches && !switches_done) {
7353                        int argc = PL_origargc;
7354                        char **argv = PL_origargv;
7355                        do {
7356                            argc--,argv++;
7357                        } while (argc && argv[0][0] == '-' && argv[0][1]);
7358                        init_argv_symbols(argc,argv);
7359                    }
7360                    if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7361                        || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7362                          /* if we have already added "LINE: while (<>) {",
7363                             we must not do it again */
7364                    {
7365                        SvPVCLEAR(PL_linestr);
7366                        PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7367                        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7368                        PL_last_lop = PL_last_uni = NULL;
7369                        PL_preambled = FALSE;
7370                        if (PERLDB_LINE_OR_SAVESRC)
7371                            (void)gv_fetchfile(PL_origfilename);
7372                        return YYL_RETRY;
7373                    }
7374                }
7375            }
7376        }
7377    }
7378
7379    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7380        PL_lex_state = LEX_FORMLINE;
7381        force_next(FORMRBRACK);
7382        TOKEN(PERLY_SEMICOLON);
7383    }
7384
7385    PL_bufptr = s;
7386    return YYL_RETRY;
7387}
7388
7389static int
7390yyl_fatcomma(pTHX_ char *s, STRLEN len)
7391{
7392    CLINE;
7393    pl_yylval.opval
7394        = newSVOP(OP_CONST, 0,
7395                       S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7396    pl_yylval.opval->op_private = OPpCONST_BARE;
7397    TERM(BAREWORD);
7398}
7399
7400static int
7401yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7402{
7403    if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7404        && PL_parser->saw_infix_sigil)
7405    {
7406        Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7407                         "Operator or semicolon missing before %c%" UTF8f,
7408                         lastchar,
7409                         UTF8fARG(UTF, strlen(PL_tokenbuf),
7410                                  PL_tokenbuf));
7411        Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7412                         "Ambiguous use of %c resolved as operator %c",
7413                         lastchar, lastchar);
7414    }
7415    TOKEN(BAREWORD);
7416}
7417
7418static int
7419yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7420{
7421    if (sv) {
7422        op_free(rv2cv_op);
7423        SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7424        ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7425        if (SvTYPE(sv) == SVt_PVAV)
7426            pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7427                                      pl_yylval.opval);
7428        else {
7429            pl_yylval.opval->op_private = 0;
7430            pl_yylval.opval->op_folded = 1;
7431            pl_yylval.opval->op_flags |= OPf_SPECIAL;
7432        }
7433        TOKEN(BAREWORD);
7434    }
7435
7436    op_free(pl_yylval.opval);
7437    pl_yylval.opval =
7438        off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7439    pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7440    PL_last_lop = PL_oldbufptr;
7441    PL_last_lop_op = OP_ENTERSUB;
7442
7443    /* Is there a prototype? */
7444    if (SvPOK(cv)) {
7445        int k = yyl_subproto(aTHX_ s, cv);
7446        if (k != KEY_NULL)
7447            return k;
7448    }
7449
7450    NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7451    PL_expect = XTERM;
7452    force_next(off ? PRIVATEREF : BAREWORD);
7453    if (!PL_lex_allbrackets
7454        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7455    {
7456        PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7457    }
7458
7459    TOKEN(NOAMP);
7460}
7461
7462/* Honour "reserved word" warnings, and enforce strict subs */
7463static void
7464yyl_strictwarn_bareword(pTHX_ const char lastchar)
7465{
7466    /* after "print" and similar functions (corresponding to
7467     * "F? L" in opcode.pl), whatever wasn't already parsed as
7468     * a filehandle should be subject to "strict subs".
7469     * Likewise for the optional indirect-object argument to system
7470     * or exec, which can't be a bareword */
7471    if ((PL_last_lop_op == OP_PRINT
7472            || PL_last_lop_op == OP_PRTF
7473            || PL_last_lop_op == OP_SAY
7474            || PL_last_lop_op == OP_SYSTEM
7475            || PL_last_lop_op == OP_EXEC)
7476        && (PL_hints & HINT_STRICT_SUBS))
7477    {
7478        pl_yylval.opval->op_private |= OPpCONST_STRICT;
7479    }
7480
7481    if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7482        char *d = PL_tokenbuf;
7483        while (isLOWER(*d))
7484            d++;
7485        if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7486            /* PL_warn_reserved is constant */
7487            GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7488            Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7489                        PL_tokenbuf);
7490            GCC_DIAG_RESTORE_STMT;
7491        }
7492    }
7493}
7494
7495static int
7496yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7497{
7498    int pkgname = 0;
7499    const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7500    bool safebw;
7501    bool no_op_error = FALSE;
7502    /* Use this var to track whether intuit_method has been
7503       called.  intuit_method returns 0 or > 255.  */
7504    int key = 1;
7505
7506    if (PL_expect == XOPERATOR) {
7507        if (PL_bufptr == PL_linestart) {
7508            CopLINE_dec(PL_curcop);
7509            Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7510            CopLINE_inc(PL_curcop);
7511        }
7512        else
7513            /* We want to call no_op with s pointing after the
7514               bareword, so defer it.  But we want it to come
7515               before the Bad name croak.  */
7516            no_op_error = TRUE;
7517    }
7518
7519    /* Get the rest if it looks like a package qualifier */
7520
7521    if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7522        STRLEN morelen;
7523        s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7524                      TRUE, &morelen);
7525        if (no_op_error) {
7526            no_op("Bareword",s);
7527            no_op_error = FALSE;
7528        }
7529        if (!morelen)
7530            Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7531                    UTF8fARG(UTF, len, PL_tokenbuf),
7532                    *s == '\'' ? "'" : "::");
7533        len += morelen;
7534        pkgname = 1;
7535    }
7536
7537    if (no_op_error)
7538        no_op("Bareword",s);
7539
7540    /* See if the name is "Foo::",
7541       in which case Foo is a bareword
7542       (and a package name). */
7543
7544    if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7545        if (ckWARN(WARN_BAREWORD)
7546            && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7547            Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7548                        "Bareword \"%" UTF8f
7549                        "\" refers to nonexistent package",
7550                        UTF8fARG(UTF, len, PL_tokenbuf));
7551        len -= 2;
7552        PL_tokenbuf[len] = '\0';
7553        c.gv = NULL;
7554        c.gvp = 0;
7555        safebw = TRUE;
7556    }
7557    else {
7558        safebw = FALSE;
7559    }
7560
7561    /* if we saw a global override before, get the right name */
7562
7563    if (!c.sv)
7564        c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7565    if (c.gvp) {
7566        SV *sv = newSVpvs("CORE::GLOBAL::");
7567        sv_catsv(sv, c.sv);
7568        SvREFCNT_dec(c.sv);
7569        c.sv = sv;
7570    }
7571
7572    /* Presume this is going to be a bareword of some sort. */
7573    CLINE;
7574    pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7575    pl_yylval.opval->op_private = OPpCONST_BARE;
7576
7577    /* And if "Foo::", then that's what it certainly is. */
7578    if (safebw)
7579        return yyl_safe_bareword(aTHX_ s, lastchar);
7580
7581    if (!c.off) {
7582        OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7583        const_op->op_private = OPpCONST_BARE;
7584        c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7585        c.cv = c.lex
7586            ? isGV(c.gv)
7587                ? GvCV(c.gv)
7588                : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7589                    ? (CV *)SvRV(c.gv)
7590                    : ((CV *)c.gv)
7591            : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7592    }
7593
7594    /* See if it's the indirect object for a list operator. */
7595
7596    if (PL_oldoldbufptr
7597        && PL_oldoldbufptr < PL_bufptr
7598        && (PL_oldoldbufptr == PL_last_lop
7599            || PL_oldoldbufptr == PL_last_uni)
7600        && /* NO SKIPSPACE BEFORE HERE! */
7601           (PL_expect == XREF
7602            || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7603                                                   == OA_FILEREF))
7604    {
7605        bool immediate_paren = *s == '(';
7606        SSize_t s_off;
7607
7608        /* (Now we can afford to cross potential line boundary.) */
7609        s = skipspace(s);
7610
7611        /* intuit_method() can indirectly call lex_next_chunk(),
7612         * invalidating s
7613         */
7614        s_off = s - SvPVX(PL_linestr);
7615        /* Two barewords in a row may indicate method call. */
7616        if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7617                || *s == '$')
7618            && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7619        {
7620            /* the code at method: doesn't use s */
7621            goto method;
7622        }
7623        s = SvPVX(PL_linestr) + s_off;
7624
7625        if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF
7626            && !immediate_paren && !c.cv
7627            && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
7628            no_bareword_filehandle(PL_tokenbuf);
7629        }
7630
7631        /* If not a declared subroutine, it's an indirect object. */
7632        /* (But it's an indir obj regardless for sort.) */
7633        /* Also, if "_" follows a filetest operator, it's a bareword */
7634
7635        if (
7636            ( !immediate_paren && (PL_last_lop_op == OP_SORT
7637             || (!c.cv
7638                 && (PL_last_lop_op != OP_MAPSTART
7639                     && PL_last_lop_op != OP_GREPSTART))))
7640           || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7641                && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7642                                                == OA_FILESTATOP))
7643           )
7644        {
7645            PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7646            yyl_strictwarn_bareword(aTHX_ lastchar);
7647            op_free(c.rv2cv_op);
7648            return yyl_safe_bareword(aTHX_ s, lastchar);
7649        }
7650    }
7651
7652    PL_expect = XOPERATOR;
7653    s = skipspace(s);
7654
7655    /* Is this a word before a => operator? */
7656    if (*s == '=' && s[1] == '>' && !pkgname) {
7657        op_free(c.rv2cv_op);
7658        CLINE;
7659        if (c.gvp || (c.lex && !c.off)) {
7660            assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7661            /* This is our own scalar, created a few lines
7662               above, so this is safe. */
7663            SvREADONLY_off(c.sv);
7664            sv_setpv(c.sv, PL_tokenbuf);
7665            if (UTF && !IN_BYTES
7666             && is_utf8_string((U8*)PL_tokenbuf, len))
7667                  SvUTF8_on(c.sv);
7668            SvREADONLY_on(c.sv);
7669        }
7670        TERM(BAREWORD);
7671    }
7672
7673    /* If followed by a paren, it's certainly a subroutine. */
7674    if (*s == '(') {
7675        CLINE;
7676        if (c.cv) {
7677            char *d = s + 1;
7678            while (SPACE_OR_TAB(*d))
7679                d++;
7680            if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7681                return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7682        }
7683        NEXTVAL_NEXTTOKE.opval =
7684            c.off ? c.rv2cv_op : pl_yylval.opval;
7685        if (c.off)
7686             op_free(pl_yylval.opval), force_next(PRIVATEREF);
7687        else op_free(c.rv2cv_op),      force_next(BAREWORD);
7688        pl_yylval.ival = 0;
7689        TOKEN(PERLY_AMPERSAND);
7690    }
7691
7692    /* If followed by var or block, call it a method (unless sub) */
7693
7694    if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7695        op_free(c.rv2cv_op);
7696        PL_last_lop = PL_oldbufptr;
7697        PL_last_lop_op = OP_METHOD;
7698        if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7699            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7700        PL_expect = XBLOCKTERM;
7701        PL_bufptr = s;
7702        return REPORT(METHOD);
7703    }
7704
7705    /* If followed by a bareword, see if it looks like indir obj. */
7706
7707    if (   key == 1
7708        && !orig_keyword
7709        && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7710        && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7711    {
7712      method:
7713        if (c.lex && !c.off) {
7714            assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7715            SvREADONLY_off(c.sv);
7716            sv_setpvn(c.sv, PL_tokenbuf, len);
7717            if (UTF && !IN_BYTES
7718             && is_utf8_string((U8*)PL_tokenbuf, len))
7719                SvUTF8_on(c.sv);
7720            else SvUTF8_off(c.sv);
7721        }
7722        op_free(c.rv2cv_op);
7723        if (key == METHOD && !PL_lex_allbrackets
7724            && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7725        {
7726            PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7727        }
7728        return REPORT(key);
7729    }
7730
7731    /* Not a method, so call it a subroutine (if defined) */
7732
7733    if (c.cv) {
7734        /* Check for a constant sub */
7735        c.sv = cv_const_sv_or_av(c.cv);
7736        return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7737    }
7738
7739    /* Call it a bare word */
7740
7741    if (PL_hints & HINT_STRICT_SUBS)
7742        pl_yylval.opval->op_private |= OPpCONST_STRICT;
7743    else
7744        yyl_strictwarn_bareword(aTHX_ lastchar);
7745
7746    op_free(c.rv2cv_op);
7747
7748    return yyl_safe_bareword(aTHX_ s, lastchar);
7749}
7750
7751static int
7752yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7753{
7754    switch (key) {
7755    default:			/* not a keyword */
7756        return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7757
7758    case KEY___FILE__:
7759        FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7760
7761    case KEY___LINE__:
7762        FUN0OP(
7763            newSVOP(OP_CONST, 0,
7764                Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7765        );
7766
7767    case KEY___PACKAGE__:
7768        FUN0OP(
7769            newSVOP(OP_CONST, 0, (PL_curstash
7770                                     ? newSVhek(HvNAME_HEK(PL_curstash))
7771                                     : &PL_sv_undef))
7772        );
7773
7774    case KEY___DATA__:
7775    case KEY___END__:
7776        if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7777            yyl_data_handle(aTHX);
7778        return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7779
7780    case KEY___SUB__:
7781        FUN0OP(CvCLONE(PL_compcv)
7782                    ? newOP(OP_RUNCV, 0)
7783                    : newPVOP(OP_RUNCV,0,NULL));
7784
7785    case KEY_AUTOLOAD:
7786    case KEY_DESTROY:
7787    case KEY_BEGIN:
7788    case KEY_UNITCHECK:
7789    case KEY_CHECK:
7790    case KEY_INIT:
7791    case KEY_END:
7792        if (PL_expect == XSTATE)
7793            return yyl_sub(aTHX_ PL_bufptr, key);
7794        return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7795
7796    case KEY_abs:
7797        UNI(OP_ABS);
7798
7799    case KEY_alarm:
7800        UNI(OP_ALARM);
7801
7802    case KEY_accept:
7803        LOP(OP_ACCEPT,XTERM);
7804
7805    case KEY_and:
7806        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7807            return REPORT(0);
7808        OPERATOR(ANDOP);
7809
7810    case KEY_atan2:
7811        LOP(OP_ATAN2,XTERM);
7812
7813    case KEY_bind:
7814        LOP(OP_BIND,XTERM);
7815
7816    case KEY_binmode:
7817        LOP(OP_BINMODE,XTERM);
7818
7819    case KEY_bless:
7820        LOP(OP_BLESS,XTERM);
7821
7822    case KEY_break:
7823        FUN0(OP_BREAK);
7824
7825    case KEY_catch:
7826        Perl_ck_warner_d(aTHX_
7827            packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7828        PREBLOCK(CATCH);
7829
7830    case KEY_chop:
7831        UNI(OP_CHOP);
7832
7833    case KEY_continue:
7834        /* We have to disambiguate the two senses of
7835          "continue". If the next token is a '{' then
7836          treat it as the start of a continue block;
7837          otherwise treat it as a control operator.
7838         */
7839        s = skipspace(s);
7840        if (*s == '{')
7841            PREBLOCK(CONTINUE);
7842        else
7843            FUN0(OP_CONTINUE);
7844
7845    case KEY_chdir:
7846        /* may use HOME */
7847        (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7848        UNI(OP_CHDIR);
7849
7850    case KEY_close:
7851        UNI(OP_CLOSE);
7852
7853    case KEY_closedir:
7854        UNI(OP_CLOSEDIR);
7855
7856    case KEY_cmp:
7857        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7858            return REPORT(0);
7859        NCEop(OP_SCMP);
7860
7861    case KEY_caller:
7862        UNI(OP_CALLER);
7863
7864    case KEY_crypt:
7865
7866        LOP(OP_CRYPT,XTERM);
7867
7868    case KEY_chmod:
7869        LOP(OP_CHMOD,XTERM);
7870
7871    case KEY_chown:
7872        LOP(OP_CHOWN,XTERM);
7873
7874    case KEY_connect:
7875        LOP(OP_CONNECT,XTERM);
7876
7877    case KEY_chr:
7878        UNI(OP_CHR);
7879
7880    case KEY_cos:
7881        UNI(OP_COS);
7882
7883    case KEY_chroot:
7884        UNI(OP_CHROOT);
7885
7886    case KEY_default:
7887        PREBLOCK(DEFAULT);
7888
7889    case KEY_defer:
7890        Perl_ck_warner_d(aTHX_
7891            packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7892        PREBLOCK(DEFER);
7893
7894    case KEY_do:
7895        return yyl_do(aTHX_ s, orig_keyword);
7896
7897    case KEY_die:
7898        PL_hints |= HINT_BLOCK_SCOPE;
7899        LOP(OP_DIE,XTERM);
7900
7901    case KEY_defined:
7902        UNI(OP_DEFINED);
7903
7904    case KEY_delete:
7905        UNI(OP_DELETE);
7906
7907    case KEY_dbmopen:
7908        Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7909                          STR_WITH_LEN("NDBM_File::"),
7910                          STR_WITH_LEN("DB_File::"),
7911                          STR_WITH_LEN("GDBM_File::"),
7912                          STR_WITH_LEN("SDBM_File::"),
7913                          STR_WITH_LEN("ODBM_File::"),
7914                          NULL);
7915        LOP(OP_DBMOPEN,XTERM);
7916
7917    case KEY_dbmclose:
7918        UNI(OP_DBMCLOSE);
7919
7920    case KEY_dump:
7921        LOOPX(OP_DUMP);
7922
7923    case KEY_else:
7924        PREBLOCK(ELSE);
7925
7926    case KEY_elsif:
7927        pl_yylval.ival = CopLINE(PL_curcop);
7928        OPERATOR(ELSIF);
7929
7930    case KEY_eq:
7931        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7932            return REPORT(0);
7933        ChEop(OP_SEQ);
7934
7935    case KEY_exists:
7936        UNI(OP_EXISTS);
7937
7938    case KEY_exit:
7939        UNI(OP_EXIT);
7940
7941    case KEY_eval:
7942        s = skipspace(s);
7943        if (*s == '{') { /* block eval */
7944            PL_expect = XTERMBLOCK;
7945            UNIBRACK(OP_ENTERTRY);
7946        }
7947        else { /* string eval */
7948            PL_expect = XTERM;
7949            UNIBRACK(OP_ENTEREVAL);
7950        }
7951
7952    case KEY_evalbytes:
7953        PL_expect = XTERM;
7954        UNIBRACK(-OP_ENTEREVAL);
7955
7956    case KEY_eof:
7957        UNI(OP_EOF);
7958
7959    case KEY_exp:
7960        UNI(OP_EXP);
7961
7962    case KEY_each:
7963        UNI(OP_EACH);
7964
7965    case KEY_exec:
7966        LOP(OP_EXEC,XREF);
7967
7968    case KEY_endhostent:
7969        FUN0(OP_EHOSTENT);
7970
7971    case KEY_endnetent:
7972        FUN0(OP_ENETENT);
7973
7974    case KEY_endservent:
7975        FUN0(OP_ESERVENT);
7976
7977    case KEY_endprotoent:
7978        FUN0(OP_EPROTOENT);
7979
7980    case KEY_endpwent:
7981        FUN0(OP_EPWENT);
7982
7983    case KEY_endgrent:
7984        FUN0(OP_EGRENT);
7985
7986    case KEY_finally:
7987        Perl_ck_warner_d(aTHX_
7988            packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7989        PREBLOCK(FINALLY);
7990
7991    case KEY_for:
7992    case KEY_foreach:
7993        return yyl_foreach(aTHX_ s);
7994
7995    case KEY_formline:
7996        LOP(OP_FORMLINE,XTERM);
7997
7998    case KEY_fork:
7999        FUN0(OP_FORK);
8000
8001    case KEY_fc:
8002        UNI(OP_FC);
8003
8004    case KEY_fcntl:
8005        LOP(OP_FCNTL,XTERM);
8006
8007    case KEY_fileno:
8008        UNI(OP_FILENO);
8009
8010    case KEY_flock:
8011        LOP(OP_FLOCK,XTERM);
8012
8013    case KEY_gt:
8014        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8015            return REPORT(0);
8016        ChRop(OP_SGT);
8017
8018    case KEY_ge:
8019        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8020            return REPORT(0);
8021        ChRop(OP_SGE);
8022
8023    case KEY_grep:
8024        LOP(OP_GREPSTART, XREF);
8025
8026    case KEY_goto:
8027        LOOPX(OP_GOTO);
8028
8029    case KEY_gmtime:
8030        UNI(OP_GMTIME);
8031
8032    case KEY_getc:
8033        UNIDOR(OP_GETC);
8034
8035    case KEY_getppid:
8036        FUN0(OP_GETPPID);
8037
8038    case KEY_getpgrp:
8039        UNI(OP_GETPGRP);
8040
8041    case KEY_getpriority:
8042        LOP(OP_GETPRIORITY,XTERM);
8043
8044    case KEY_getprotobyname:
8045        UNI(OP_GPBYNAME);
8046
8047    case KEY_getprotobynumber:
8048        LOP(OP_GPBYNUMBER,XTERM);
8049
8050    case KEY_getprotoent:
8051        FUN0(OP_GPROTOENT);
8052
8053    case KEY_getpwent:
8054        FUN0(OP_GPWENT);
8055
8056    case KEY_getpwnam:
8057        UNI(OP_GPWNAM);
8058
8059    case KEY_getpwuid:
8060        UNI(OP_GPWUID);
8061
8062    case KEY_getpeername:
8063        UNI(OP_GETPEERNAME);
8064
8065    case KEY_gethostbyname:
8066        UNI(OP_GHBYNAME);
8067
8068    case KEY_gethostbyaddr:
8069        LOP(OP_GHBYADDR,XTERM);
8070
8071    case KEY_gethostent:
8072        FUN0(OP_GHOSTENT);
8073
8074    case KEY_getnetbyname:
8075        UNI(OP_GNBYNAME);
8076
8077    case KEY_getnetbyaddr:
8078        LOP(OP_GNBYADDR,XTERM);
8079
8080    case KEY_getnetent:
8081        FUN0(OP_GNETENT);
8082
8083    case KEY_getservbyname:
8084        LOP(OP_GSBYNAME,XTERM);
8085
8086    case KEY_getservbyport:
8087        LOP(OP_GSBYPORT,XTERM);
8088
8089    case KEY_getservent:
8090        FUN0(OP_GSERVENT);
8091
8092    case KEY_getsockname:
8093        UNI(OP_GETSOCKNAME);
8094
8095    case KEY_getsockopt:
8096        LOP(OP_GSOCKOPT,XTERM);
8097
8098    case KEY_getgrent:
8099        FUN0(OP_GGRENT);
8100
8101    case KEY_getgrnam:
8102        UNI(OP_GGRNAM);
8103
8104    case KEY_getgrgid:
8105        UNI(OP_GGRGID);
8106
8107    case KEY_getlogin:
8108        FUN0(OP_GETLOGIN);
8109
8110    case KEY_given:
8111        pl_yylval.ival = CopLINE(PL_curcop);
8112        Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8113                         "given is experimental");
8114        OPERATOR(GIVEN);
8115
8116    case KEY_glob:
8117        LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8118
8119    case KEY_hex:
8120        UNI(OP_HEX);
8121
8122    case KEY_if:
8123        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8124            return REPORT(0);
8125        pl_yylval.ival = CopLINE(PL_curcop);
8126        OPERATOR(IF);
8127
8128    case KEY_index:
8129        LOP(OP_INDEX,XTERM);
8130
8131    case KEY_int:
8132        UNI(OP_INT);
8133
8134    case KEY_ioctl:
8135        LOP(OP_IOCTL,XTERM);
8136
8137    case KEY_isa:
8138        NCRop(OP_ISA);
8139
8140    case KEY_join:
8141        LOP(OP_JOIN,XTERM);
8142
8143    case KEY_keys:
8144        UNI(OP_KEYS);
8145
8146    case KEY_kill:
8147        LOP(OP_KILL,XTERM);
8148
8149    case KEY_last:
8150        LOOPX(OP_LAST);
8151
8152    case KEY_lc:
8153        UNI(OP_LC);
8154
8155    case KEY_lcfirst:
8156        UNI(OP_LCFIRST);
8157
8158    case KEY_local:
8159        OPERATOR(LOCAL);
8160
8161    case KEY_length:
8162        UNI(OP_LENGTH);
8163
8164    case KEY_lt:
8165        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8166            return REPORT(0);
8167        ChRop(OP_SLT);
8168
8169    case KEY_le:
8170        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8171            return REPORT(0);
8172        ChRop(OP_SLE);
8173
8174    case KEY_localtime:
8175        UNI(OP_LOCALTIME);
8176
8177    case KEY_log:
8178        UNI(OP_LOG);
8179
8180    case KEY_link:
8181        LOP(OP_LINK,XTERM);
8182
8183    case KEY_listen:
8184        LOP(OP_LISTEN,XTERM);
8185
8186    case KEY_lock:
8187        UNI(OP_LOCK);
8188
8189    case KEY_lstat:
8190        UNI(OP_LSTAT);
8191
8192    case KEY_m:
8193        s = scan_pat(s,OP_MATCH);
8194        TERM(sublex_start());
8195
8196    case KEY_map:
8197        LOP(OP_MAPSTART, XREF);
8198
8199    case KEY_mkdir:
8200        LOP(OP_MKDIR,XTERM);
8201
8202    case KEY_msgctl:
8203        LOP(OP_MSGCTL,XTERM);
8204
8205    case KEY_msgget:
8206        LOP(OP_MSGGET,XTERM);
8207
8208    case KEY_msgrcv:
8209        LOP(OP_MSGRCV,XTERM);
8210
8211    case KEY_msgsnd:
8212        LOP(OP_MSGSND,XTERM);
8213
8214    case KEY_our:
8215    case KEY_my:
8216    case KEY_state:
8217        return yyl_my(aTHX_ s, key);
8218
8219    case KEY_next:
8220        LOOPX(OP_NEXT);
8221
8222    case KEY_ne:
8223        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8224            return REPORT(0);
8225        ChEop(OP_SNE);
8226
8227    case KEY_no:
8228        s = tokenize_use(0, s);
8229        TOKEN(USE);
8230
8231    case KEY_not:
8232        if (*s == '(' || (s = skipspace(s), *s == '('))
8233            FUN1(OP_NOT);
8234        else {
8235            if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8236                PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8237            OPERATOR(NOTOP);
8238        }
8239
8240    case KEY_open:
8241        s = skipspace(s);
8242        if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8243            const char *t;
8244            char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8245            for (t=d; isSPACE(*t);)
8246                t++;
8247            if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8248                /* [perl #16184] */
8249                && !(t[0] == '=' && t[1] == '>')
8250                && !(t[0] == ':' && t[1] == ':')
8251                && !keyword(s, d-s, 0)
8252            ) {
8253                Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8254                   "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8255                    UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8256            }
8257        }
8258        LOP(OP_OPEN,XTERM);
8259
8260    case KEY_or:
8261        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8262            return REPORT(0);
8263        pl_yylval.ival = OP_OR;
8264        OPERATOR(OROP);
8265
8266    case KEY_ord:
8267        UNI(OP_ORD);
8268
8269    case KEY_oct:
8270        UNI(OP_OCT);
8271
8272    case KEY_opendir:
8273        LOP(OP_OPEN_DIR,XTERM);
8274
8275    case KEY_print:
8276        checkcomma(s,PL_tokenbuf,"filehandle");
8277        LOP(OP_PRINT,XREF);
8278
8279    case KEY_printf:
8280        checkcomma(s,PL_tokenbuf,"filehandle");
8281        LOP(OP_PRTF,XREF);
8282
8283    case KEY_prototype:
8284        UNI(OP_PROTOTYPE);
8285
8286    case KEY_push:
8287        LOP(OP_PUSH,XTERM);
8288
8289    case KEY_pop:
8290        UNIDOR(OP_POP);
8291
8292    case KEY_pos:
8293        UNIDOR(OP_POS);
8294
8295    case KEY_pack:
8296        LOP(OP_PACK,XTERM);
8297
8298    case KEY_package:
8299        s = force_word(s,BAREWORD,FALSE,TRUE);
8300        s = skipspace(s);
8301        s = force_strict_version(s);
8302        PREBLOCK(PACKAGE);
8303
8304    case KEY_pipe:
8305        LOP(OP_PIPE_OP,XTERM);
8306
8307    case KEY_q:
8308        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8309        if (!s)
8310            missingterm(NULL, 0);
8311        COPLINE_SET_FROM_MULTI_END;
8312        pl_yylval.ival = OP_CONST;
8313        TERM(sublex_start());
8314
8315    case KEY_quotemeta:
8316        UNI(OP_QUOTEMETA);
8317
8318    case KEY_qw:
8319        return yyl_qw(aTHX_ s, len);
8320
8321    case KEY_qq:
8322        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8323        if (!s)
8324            missingterm(NULL, 0);
8325        pl_yylval.ival = OP_STRINGIFY;
8326        if (SvIVX(PL_lex_stuff) == '\'')
8327            SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8328        TERM(sublex_start());
8329
8330    case KEY_qr:
8331        s = scan_pat(s,OP_QR);
8332        TERM(sublex_start());
8333
8334    case KEY_qx:
8335        s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8336        if (!s)
8337            missingterm(NULL, 0);
8338        pl_yylval.ival = OP_BACKTICK;
8339        TERM(sublex_start());
8340
8341    case KEY_return:
8342        OLDLOP(OP_RETURN);
8343
8344    case KEY_require:
8345        return yyl_require(aTHX_ s, orig_keyword);
8346
8347    case KEY_reset:
8348        UNI(OP_RESET);
8349
8350    case KEY_redo:
8351        LOOPX(OP_REDO);
8352
8353    case KEY_rename:
8354        LOP(OP_RENAME,XTERM);
8355
8356    case KEY_rand:
8357        UNI(OP_RAND);
8358
8359    case KEY_rmdir:
8360        UNI(OP_RMDIR);
8361
8362    case KEY_rindex:
8363        LOP(OP_RINDEX,XTERM);
8364
8365    case KEY_read:
8366        LOP(OP_READ,XTERM);
8367
8368    case KEY_readdir:
8369        UNI(OP_READDIR);
8370
8371    case KEY_readline:
8372        UNIDOR(OP_READLINE);
8373
8374    case KEY_readpipe:
8375        UNIDOR(OP_BACKTICK);
8376
8377    case KEY_rewinddir:
8378        UNI(OP_REWINDDIR);
8379
8380    case KEY_recv:
8381        LOP(OP_RECV,XTERM);
8382
8383    case KEY_reverse:
8384        LOP(OP_REVERSE,XTERM);
8385
8386    case KEY_readlink:
8387        UNIDOR(OP_READLINK);
8388
8389    case KEY_ref:
8390        UNI(OP_REF);
8391
8392    case KEY_s:
8393        s = scan_subst(s);
8394        if (pl_yylval.opval)
8395            TERM(sublex_start());
8396        else
8397            TOKEN(1);	/* force error */
8398
8399    case KEY_say:
8400        checkcomma(s,PL_tokenbuf,"filehandle");
8401        LOP(OP_SAY,XREF);
8402
8403    case KEY_chomp:
8404        UNI(OP_CHOMP);
8405
8406    case KEY_scalar:
8407        UNI(OP_SCALAR);
8408
8409    case KEY_select:
8410        LOP(OP_SELECT,XTERM);
8411
8412    case KEY_seek:
8413        LOP(OP_SEEK,XTERM);
8414
8415    case KEY_semctl:
8416        LOP(OP_SEMCTL,XTERM);
8417
8418    case KEY_semget:
8419        LOP(OP_SEMGET,XTERM);
8420
8421    case KEY_semop:
8422        LOP(OP_SEMOP,XTERM);
8423
8424    case KEY_send:
8425        LOP(OP_SEND,XTERM);
8426
8427    case KEY_setpgrp:
8428        LOP(OP_SETPGRP,XTERM);
8429
8430    case KEY_setpriority:
8431        LOP(OP_SETPRIORITY,XTERM);
8432
8433    case KEY_sethostent:
8434        UNI(OP_SHOSTENT);
8435
8436    case KEY_setnetent:
8437        UNI(OP_SNETENT);
8438
8439    case KEY_setservent:
8440        UNI(OP_SSERVENT);
8441
8442    case KEY_setprotoent:
8443        UNI(OP_SPROTOENT);
8444
8445    case KEY_setpwent:
8446        FUN0(OP_SPWENT);
8447
8448    case KEY_setgrent:
8449        FUN0(OP_SGRENT);
8450
8451    case KEY_seekdir:
8452        LOP(OP_SEEKDIR,XTERM);
8453
8454    case KEY_setsockopt:
8455        LOP(OP_SSOCKOPT,XTERM);
8456
8457    case KEY_shift:
8458        UNIDOR(OP_SHIFT);
8459
8460    case KEY_shmctl:
8461        LOP(OP_SHMCTL,XTERM);
8462
8463    case KEY_shmget:
8464        LOP(OP_SHMGET,XTERM);
8465
8466    case KEY_shmread:
8467        LOP(OP_SHMREAD,XTERM);
8468
8469    case KEY_shmwrite:
8470        LOP(OP_SHMWRITE,XTERM);
8471
8472    case KEY_shutdown:
8473        LOP(OP_SHUTDOWN,XTERM);
8474
8475    case KEY_sin:
8476        UNI(OP_SIN);
8477
8478    case KEY_sleep:
8479        UNI(OP_SLEEP);
8480
8481    case KEY_socket:
8482        LOP(OP_SOCKET,XTERM);
8483
8484    case KEY_socketpair:
8485        LOP(OP_SOCKPAIR,XTERM);
8486
8487    case KEY_sort:
8488        checkcomma(s,PL_tokenbuf,"subroutine name");
8489        s = skipspace(s);
8490        PL_expect = XTERM;
8491        s = force_word(s,BAREWORD,TRUE,TRUE);
8492        LOP(OP_SORT,XREF);
8493
8494    case KEY_split:
8495        LOP(OP_SPLIT,XTERM);
8496
8497    case KEY_sprintf:
8498        LOP(OP_SPRINTF,XTERM);
8499
8500    case KEY_splice:
8501        LOP(OP_SPLICE,XTERM);
8502
8503    case KEY_sqrt:
8504        UNI(OP_SQRT);
8505
8506    case KEY_srand:
8507        UNI(OP_SRAND);
8508
8509    case KEY_stat:
8510        UNI(OP_STAT);
8511
8512    case KEY_study:
8513        UNI(OP_STUDY);
8514
8515    case KEY_substr:
8516        LOP(OP_SUBSTR,XTERM);
8517
8518    case KEY_format:
8519    case KEY_sub:
8520        return yyl_sub(aTHX_ s, key);
8521
8522    case KEY_system:
8523        LOP(OP_SYSTEM,XREF);
8524
8525    case KEY_symlink:
8526        LOP(OP_SYMLINK,XTERM);
8527
8528    case KEY_syscall:
8529        LOP(OP_SYSCALL,XTERM);
8530
8531    case KEY_sysopen:
8532        LOP(OP_SYSOPEN,XTERM);
8533
8534    case KEY_sysseek:
8535        LOP(OP_SYSSEEK,XTERM);
8536
8537    case KEY_sysread:
8538        LOP(OP_SYSREAD,XTERM);
8539
8540    case KEY_syswrite:
8541        LOP(OP_SYSWRITE,XTERM);
8542
8543    case KEY_tr:
8544    case KEY_y:
8545        s = scan_trans(s);
8546        TERM(sublex_start());
8547
8548    case KEY_tell:
8549        UNI(OP_TELL);
8550
8551    case KEY_telldir:
8552        UNI(OP_TELLDIR);
8553
8554    case KEY_tie:
8555        LOP(OP_TIE,XTERM);
8556
8557    case KEY_tied:
8558        UNI(OP_TIED);
8559
8560    case KEY_time:
8561        FUN0(OP_TIME);
8562
8563    case KEY_times:
8564        FUN0(OP_TMS);
8565
8566    case KEY_truncate:
8567        LOP(OP_TRUNCATE,XTERM);
8568
8569    case KEY_try:
8570        pl_yylval.ival = CopLINE(PL_curcop);
8571        Perl_ck_warner_d(aTHX_
8572            packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8573        PREBLOCK(TRY);
8574
8575    case KEY_uc:
8576        UNI(OP_UC);
8577
8578    case KEY_ucfirst:
8579        UNI(OP_UCFIRST);
8580
8581    case KEY_untie:
8582        UNI(OP_UNTIE);
8583
8584    case KEY_until:
8585        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8586            return REPORT(0);
8587        pl_yylval.ival = CopLINE(PL_curcop);
8588        OPERATOR(UNTIL);
8589
8590    case KEY_unless:
8591        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8592            return REPORT(0);
8593        pl_yylval.ival = CopLINE(PL_curcop);
8594        OPERATOR(UNLESS);
8595
8596    case KEY_unlink:
8597        LOP(OP_UNLINK,XTERM);
8598
8599    case KEY_undef:
8600        UNIDOR(OP_UNDEF);
8601
8602    case KEY_unpack:
8603        LOP(OP_UNPACK,XTERM);
8604
8605    case KEY_utime:
8606        LOP(OP_UTIME,XTERM);
8607
8608    case KEY_umask:
8609        UNIDOR(OP_UMASK);
8610
8611    case KEY_unshift:
8612        LOP(OP_UNSHIFT,XTERM);
8613
8614    case KEY_use:
8615        s = tokenize_use(1, s);
8616        TOKEN(USE);
8617
8618    case KEY_values:
8619        UNI(OP_VALUES);
8620
8621    case KEY_vec:
8622        LOP(OP_VEC,XTERM);
8623
8624    case KEY_when:
8625        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8626            return REPORT(0);
8627        pl_yylval.ival = CopLINE(PL_curcop);
8628        Perl_ck_warner_d(aTHX_
8629            packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8630            "when is experimental");
8631        OPERATOR(WHEN);
8632
8633    case KEY_while:
8634        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8635            return REPORT(0);
8636        pl_yylval.ival = CopLINE(PL_curcop);
8637        OPERATOR(WHILE);
8638
8639    case KEY_warn:
8640        PL_hints |= HINT_BLOCK_SCOPE;
8641        LOP(OP_WARN,XTERM);
8642
8643    case KEY_wait:
8644        FUN0(OP_WAIT);
8645
8646    case KEY_waitpid:
8647        LOP(OP_WAITPID,XTERM);
8648
8649    case KEY_wantarray:
8650        FUN0(OP_WANTARRAY);
8651
8652    case KEY_write:
8653        /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8654         * we use the same number on EBCDIC */
8655        gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8656        UNI(OP_ENTERWRITE);
8657
8658    case KEY_x:
8659        if (PL_expect == XOPERATOR) {
8660            if (*s == '=' && !PL_lex_allbrackets
8661                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8662            {
8663                return REPORT(0);
8664            }
8665            Mop(OP_REPEAT);
8666        }
8667        check_uni();
8668        return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8669
8670    case KEY_xor:
8671        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8672            return REPORT(0);
8673        pl_yylval.ival = OP_XOR;
8674        OPERATOR(OROP);
8675    }
8676}
8677
8678static int
8679yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8680{
8681    I32 key = 0;
8682    I32 orig_keyword = 0;
8683    STRLEN olen = len;
8684    char *d = s;
8685    s += 2;
8686    s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8687    if ((*s == ':' && s[1] == ':')
8688        || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8689    {
8690        Copy(PL_bufptr, PL_tokenbuf, olen, char);
8691        return yyl_just_a_word(aTHX_ d, olen, 0, c);
8692    }
8693    if (!key)
8694        Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8695                          UTF8fARG(UTF, len, PL_tokenbuf));
8696    if (key < 0)
8697        key = -key;
8698    else if (key == KEY_require || key == KEY_do
8699          || key == KEY_glob)
8700        /* that's a way to remember we saw "CORE::" */
8701        orig_keyword = key;
8702
8703    /* Known to be a reserved word at this point */
8704    return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8705}
8706
8707static int
8708yyl_keylookup(pTHX_ char *s, GV *gv)
8709{
8710    STRLEN len;
8711    bool anydelim;
8712    I32 key;
8713    struct code c = no_code;
8714    I32 orig_keyword = 0;
8715    char *d;
8716
8717    c.gv = gv;
8718
8719    PL_bufptr = s;
8720    s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8721
8722    /* Some keywords can be followed by any delimiter, including ':' */
8723    anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8724
8725    /* x::* is just a word, unless x is "CORE" */
8726    if (!anydelim && *s == ':' && s[1] == ':') {
8727        if (memEQs(PL_tokenbuf, len, "CORE"))
8728            return yyl_key_core(aTHX_ s, len, c);
8729        return yyl_just_a_word(aTHX_ s, len, 0, c);
8730    }
8731
8732    d = s;
8733    while (d < PL_bufend && isSPACE(*d))
8734            d++;	/* no comments skipped here, or s### is misparsed */
8735
8736    /* Is this a word before a => operator? */
8737    if (*d == '=' && d[1] == '>') {
8738        return yyl_fatcomma(aTHX_ s, len);
8739    }
8740
8741    /* Check for plugged-in keyword */
8742    {
8743        OP *o;
8744        int result;
8745        char *saved_bufptr = PL_bufptr;
8746        PL_bufptr = s;
8747        result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8748        s = PL_bufptr;
8749        if (result == KEYWORD_PLUGIN_DECLINE) {
8750            /* not a plugged-in keyword */
8751            PL_bufptr = saved_bufptr;
8752        } else if (result == KEYWORD_PLUGIN_STMT) {
8753            pl_yylval.opval = o;
8754            CLINE;
8755            if (!PL_nexttoke) PL_expect = XSTATE;
8756            return REPORT(PLUGSTMT);
8757        } else if (result == KEYWORD_PLUGIN_EXPR) {
8758            pl_yylval.opval = o;
8759            CLINE;
8760            if (!PL_nexttoke) PL_expect = XOPERATOR;
8761            return REPORT(PLUGEXPR);
8762        } else {
8763            Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8764        }
8765    }
8766
8767    /* Is this a label? */
8768    if (!anydelim && PL_expect == XSTATE
8769          && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8770        s = d + 1;
8771        pl_yylval.opval =
8772            newSVOP(OP_CONST, 0,
8773                newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8774        CLINE;
8775        TOKEN(LABEL);
8776    }
8777
8778    /* Check for lexical sub */
8779    if (PL_expect != XOPERATOR) {
8780        char tmpbuf[sizeof PL_tokenbuf + 1];
8781        *tmpbuf = '&';
8782        Copy(PL_tokenbuf, tmpbuf+1, len, char);
8783        c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8784        if (c.off != NOT_IN_PAD) {
8785            assert(c.off); /* we assume this is boolean-true below */
8786            if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8787                HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8788                HEK * const stashname = HvNAME_HEK(stash);
8789                c.sv = newSVhek(stashname);
8790                sv_catpvs(c.sv, "::");
8791                sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8792                                (UTF ? SV_CATUTF8 : SV_CATBYTES));
8793                c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8794                                  SVt_PVCV);
8795                c.off = 0;
8796                if (!c.gv) {
8797                    sv_free(c.sv);
8798                    c.sv = NULL;
8799                    return yyl_just_a_word(aTHX_ s, len, 0, c);
8800                }
8801            }
8802            else {
8803                c.rv2cv_op = newOP(OP_PADANY, 0);
8804                c.rv2cv_op->op_targ = c.off;
8805                c.cv = find_lexical_cv(c.off);
8806            }
8807            c.lex = TRUE;
8808            return yyl_just_a_word(aTHX_ s, len, 0, c);
8809        }
8810        c.off = 0;
8811    }
8812
8813    /* Check for built-in keyword */
8814    key = keyword(PL_tokenbuf, len, 0);
8815
8816    if (key < 0)
8817        key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8818
8819    if (key && key != KEY___DATA__ && key != KEY___END__
8820     && (!anydelim || *s != '#')) {
8821        /* no override, and not s### either; skipspace is safe here
8822         * check for => on following line */
8823        bool arrow;
8824        STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8825        STRLEN   soff = s         - SvPVX(PL_linestr);
8826        s = peekspace(s);
8827        arrow = *s == '=' && s[1] == '>';
8828        PL_bufptr = SvPVX(PL_linestr) + bufoff;
8829        s         = SvPVX(PL_linestr) +   soff;
8830        if (arrow)
8831            return yyl_fatcomma(aTHX_ s, len);
8832    }
8833
8834    return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8835}
8836
8837static int
8838yyl_try(pTHX_ char *s)
8839{
8840    char *d;
8841    GV *gv = NULL;
8842    int tok;
8843
8844  retry:
8845    switch (*s) {
8846    default:
8847        if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8848            if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8849                return tok;
8850            goto retry_bufptr;
8851        }
8852        yyl_croak_unrecognised(aTHX_ s);
8853
8854    case 4:
8855    case 26:
8856        /* emulate EOF on ^D or ^Z */
8857        if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8858            return tok;
8859    retry_bufptr:
8860        s = PL_bufptr;
8861        goto retry;
8862
8863    case 0:
8864        if ((!PL_rsfp || PL_lex_inwhat)
8865         && (!PL_parser->filtered || s+1 < PL_bufend)) {
8866            PL_last_uni = 0;
8867            PL_last_lop = 0;
8868            if (PL_lex_brackets
8869                && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8870            {
8871                yyerror((const char *)
8872                        (PL_lex_formbrack
8873                         ? "Format not terminated"
8874                         : "Missing right curly or square bracket"));
8875            }
8876            DEBUG_T({
8877                PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8878            });
8879            TOKEN(0);
8880        }
8881        if (s++ < PL_bufend)
8882            goto retry;  /* ignore stray nulls */
8883        PL_last_uni = 0;
8884        PL_last_lop = 0;
8885        if (!PL_in_eval && !PL_preambled) {
8886            PL_preambled = TRUE;
8887            if (PL_perldb) {
8888                /* Generate a string of Perl code to load the debugger.
8889                 * If PERL5DB is set, it will return the contents of that,
8890                 * otherwise a compile-time require of perl5db.pl.  */
8891
8892                const char * const pdb = PerlEnv_getenv("PERL5DB");
8893
8894                if (pdb) {
8895                    sv_setpv(PL_linestr, pdb);
8896                    sv_catpvs(PL_linestr,";");
8897                } else {
8898                    SETERRNO(0,SS_NORMAL);
8899                    sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8900                }
8901                PL_parser->preambling = CopLINE(PL_curcop);
8902            } else
8903                SvPVCLEAR(PL_linestr);
8904            if (PL_preambleav) {
8905                SV **svp = AvARRAY(PL_preambleav);
8906                SV **const end = svp + AvFILLp(PL_preambleav);
8907                while(svp <= end) {
8908                    sv_catsv(PL_linestr, *svp);
8909                    ++svp;
8910                    sv_catpvs(PL_linestr, ";");
8911                }
8912                sv_free(MUTABLE_SV(PL_preambleav));
8913                PL_preambleav = NULL;
8914            }
8915            if (PL_minus_E)
8916                sv_catpvs(PL_linestr,
8917                          "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8918            if (PL_minus_n || PL_minus_p) {
8919                sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8920                if (PL_minus_l)
8921                    sv_catpvs(PL_linestr,"chomp;");
8922                if (PL_minus_a) {
8923                    if (PL_minus_F) {
8924                        if (   (   *PL_splitstr == '/'
8925                                || *PL_splitstr == '\''
8926                                || *PL_splitstr == '"')
8927                            && strchr(PL_splitstr + 1, *PL_splitstr))
8928                        {
8929                            /* strchr is ok, because -F pattern can't contain
8930                             * embeddded NULs */
8931                            Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8932                        }
8933                        else {
8934                            /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8935                               bytes can be used as quoting characters.  :-) */
8936                            const char *splits = PL_splitstr;
8937                            sv_catpvs(PL_linestr, "our @F=split(q\0");
8938                            do {
8939                                /* Need to \ \s  */
8940                                if (*splits == '\\')
8941                                    sv_catpvn(PL_linestr, splits, 1);
8942                                sv_catpvn(PL_linestr, splits, 1);
8943                            } while (*splits++);
8944                            /* This loop will embed the trailing NUL of
8945                               PL_linestr as the last thing it does before
8946                               terminating.  */
8947                            sv_catpvs(PL_linestr, ");");
8948                        }
8949                    }
8950                    else
8951                        sv_catpvs(PL_linestr,"our @F=split(' ');");
8952                }
8953            }
8954            sv_catpvs(PL_linestr, "\n");
8955            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8956            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8957            PL_last_lop = PL_last_uni = NULL;
8958            if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8959                update_debugger_info(PL_linestr, NULL, 0);
8960            goto retry;
8961        }
8962        if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8963            return tok;
8964        goto retry_bufptr;
8965
8966    case '\r':
8967#ifdef PERL_STRICT_CR
8968        Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8969        Perl_croak(aTHX_
8970      "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8971#endif
8972    case ' ': case '\t': case '\f': case '\v':
8973        s++;
8974        goto retry;
8975
8976    case '#':
8977    case '\n': {
8978        const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8979        if (needs_semicolon)
8980            TOKEN(PERLY_SEMICOLON);
8981        else
8982            goto retry;
8983    }
8984
8985    case '-':
8986        return yyl_hyphen(aTHX_ s);
8987
8988    case '+':
8989        return yyl_plus(aTHX_ s);
8990
8991    case '*':
8992        return yyl_star(aTHX_ s);
8993
8994    case '%':
8995        return yyl_percent(aTHX_ s);
8996
8997    case '^':
8998        return yyl_caret(aTHX_ s);
8999
9000    case '[':
9001        return yyl_leftsquare(aTHX_ s);
9002
9003    case '~':
9004        return yyl_tilde(aTHX_ s);
9005
9006    case ',':
9007        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9008            TOKEN(0);
9009        s++;
9010        OPERATOR(PERLY_COMMA);
9011    case ':':
9012        if (s[1] == ':')
9013            return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9014        return yyl_colon(aTHX_ s + 1);
9015
9016    case '(':
9017        return yyl_leftparen(aTHX_ s + 1);
9018
9019    case ';':
9020        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9021            TOKEN(0);
9022        CLINE;
9023        s++;
9024        PL_expect = XSTATE;
9025        TOKEN(PERLY_SEMICOLON);
9026
9027    case ')':
9028        return yyl_rightparen(aTHX_ s);
9029
9030    case ']':
9031        return yyl_rightsquare(aTHX_ s);
9032
9033    case '{':
9034        return yyl_leftcurly(aTHX_ s + 1, 0);
9035
9036    case '}':
9037        if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9038            TOKEN(0);
9039        return yyl_rightcurly(aTHX_ s, 0);
9040
9041    case '&':
9042        return yyl_ampersand(aTHX_ s);
9043
9044    case '|':
9045        return yyl_verticalbar(aTHX_ s);
9046
9047    case '=':
9048        if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9049            && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9050        {
9051            s = vcs_conflict_marker(s + 7);
9052            goto retry;
9053        }
9054
9055        s++;
9056        {
9057            const char tmp = *s++;
9058            if (tmp == '=') {
9059                if (!PL_lex_allbrackets
9060                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9061                {
9062                    s -= 2;
9063                    TOKEN(0);
9064                }
9065                ChEop(OP_EQ);
9066            }
9067            if (tmp == '>') {
9068                if (!PL_lex_allbrackets
9069                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9070                {
9071                    s -= 2;
9072                    TOKEN(0);
9073                }
9074                OPERATOR(PERLY_COMMA);
9075            }
9076            if (tmp == '~')
9077                PMop(OP_MATCH);
9078            if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9079                && memCHRs("+-*/%.^&|<",tmp))
9080                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9081                            "Reversed %c= operator",(int)tmp);
9082            s--;
9083            if (PL_expect == XSTATE
9084                && isALPHA(tmp)
9085                && (s == PL_linestart+1 || s[-2] == '\n') )
9086            {
9087                if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9088                    || PL_lex_state != LEX_NORMAL)
9089                {
9090                    d = PL_bufend;
9091                    while (s < d) {
9092                        if (*s++ == '\n') {
9093                            incline(s, PL_bufend);
9094                            if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9095                            {
9096                                s = (char *) memchr(s,'\n', d - s);
9097                                if (s)
9098                                    s++;
9099                                else
9100                                    s = d;
9101                                incline(s, PL_bufend);
9102                                goto retry;
9103                            }
9104                        }
9105                    }
9106                    goto retry;
9107                }
9108                s = PL_bufend;
9109                PL_parser->in_pod = 1;
9110                goto retry;
9111            }
9112        }
9113        if (PL_expect == XBLOCK) {
9114            const char *t = s;
9115#ifdef PERL_STRICT_CR
9116            while (SPACE_OR_TAB(*t))
9117#else
9118            while (SPACE_OR_TAB(*t) || *t == '\r')
9119#endif
9120                t++;
9121            if (*t == '\n' || *t == '#') {
9122                ENTER_with_name("lex_format");
9123                SAVEI8(PL_parser->form_lex_state);
9124                SAVEI32(PL_lex_formbrack);
9125                PL_parser->form_lex_state = PL_lex_state;
9126                PL_lex_formbrack = PL_lex_brackets + 1;
9127                PL_parser->sub_error_count = PL_error_count;
9128                return yyl_leftcurly(aTHX_ s, 1);
9129            }
9130        }
9131        if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9132            s--;
9133            TOKEN(0);
9134        }
9135        pl_yylval.ival = 0;
9136        OPERATOR(ASSIGNOP);
9137
9138        case '!':
9139        return yyl_bang(aTHX_ s + 1);
9140
9141    case '<':
9142        if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9143            && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9144        {
9145            s = vcs_conflict_marker(s + 7);
9146            goto retry;
9147        }
9148        return yyl_leftpointy(aTHX_ s);
9149
9150    case '>':
9151        if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9152            && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9153        {
9154            s = vcs_conflict_marker(s + 7);
9155            goto retry;
9156        }
9157        return yyl_rightpointy(aTHX_ s + 1);
9158
9159    case '$':
9160        return yyl_dollar(aTHX_ s);
9161
9162    case '@':
9163        return yyl_snail(aTHX_ s);
9164
9165    case '/':			/* may be division, defined-or, or pattern */
9166        return yyl_slash(aTHX_ s);
9167
9168     case '?':			/* conditional */
9169        s++;
9170        if (!PL_lex_allbrackets
9171            && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9172        {
9173            s--;
9174            TOKEN(0);
9175        }
9176        PL_lex_allbrackets++;
9177        OPERATOR(PERLY_QUESTION_MARK);
9178
9179    case '.':
9180        if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9181#ifdef PERL_STRICT_CR
9182            && s[1] == '\n'
9183#else
9184            && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9185#endif
9186            && (s == PL_linestart || s[-1] == '\n') )
9187        {
9188            PL_expect = XSTATE;
9189            /* formbrack==2 means dot seen where arguments expected */
9190            return yyl_rightcurly(aTHX_ s, 2);
9191        }
9192        if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9193            s += 3;
9194            OPERATOR(YADAYADA);
9195        }
9196        if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9197            char tmp = *s++;
9198            if (*s == tmp) {
9199                if (!PL_lex_allbrackets
9200                    && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9201                {
9202                    s--;
9203                    TOKEN(0);
9204                }
9205                s++;
9206                if (*s == tmp) {
9207                    s++;
9208                    pl_yylval.ival = OPf_SPECIAL;
9209                }
9210                else
9211                    pl_yylval.ival = 0;
9212                OPERATOR(DOTDOT);
9213            }
9214            if (*s == '=' && !PL_lex_allbrackets
9215                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9216            {
9217                s--;
9218                TOKEN(0);
9219            }
9220            Aop(OP_CONCAT);
9221        }
9222        /* FALLTHROUGH */
9223    case '0': case '1': case '2': case '3': case '4':
9224    case '5': case '6': case '7': case '8': case '9':
9225        s = scan_num(s, &pl_yylval);
9226        DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9227        if (PL_expect == XOPERATOR)
9228            no_op("Number",s);
9229        TERM(THING);
9230
9231    case '\'':
9232        return yyl_sglquote(aTHX_ s);
9233
9234    case '"':
9235        return yyl_dblquote(aTHX_ s);
9236
9237    case '`':
9238        return yyl_backtick(aTHX_ s);
9239
9240    case '\\':
9241        return yyl_backslash(aTHX_ s + 1);
9242
9243    case 'v':
9244        if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9245            char *start = s + 2;
9246            while (isDIGIT(*start) || *start == '_')
9247                start++;
9248            if (*start == '.' && isDIGIT(start[1])) {
9249                s = scan_num(s, &pl_yylval);
9250                TERM(THING);
9251            }
9252            else if ((*start == ':' && start[1] == ':')
9253                     || (PL_expect == XSTATE && *start == ':')) {
9254                if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9255                    return tok;
9256                goto retry_bufptr;
9257            }
9258            else if (PL_expect == XSTATE) {
9259                d = start;
9260                while (d < PL_bufend && isSPACE(*d)) d++;
9261                if (*d == ':') {
9262                    if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9263                        return tok;
9264                    goto retry_bufptr;
9265                }
9266            }
9267            /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9268            if (!isALPHA(*start) && (PL_expect == XTERM
9269                        || PL_expect == XREF || PL_expect == XSTATE
9270                        || PL_expect == XTERMORDORDOR)) {
9271                GV *const gv = gv_fetchpvn_flags(s, start - s,
9272                                                    UTF ? SVf_UTF8 : 0, SVt_PVCV);
9273                if (!gv) {
9274                    s = scan_num(s, &pl_yylval);
9275                    TERM(THING);
9276                }
9277            }
9278        }
9279        if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9280            return tok;
9281        goto retry_bufptr;
9282
9283    case 'x':
9284        if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9285            s++;
9286            Mop(OP_REPEAT);
9287        }
9288        if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9289            return tok;
9290        goto retry_bufptr;
9291
9292    case '_':
9293    case 'a': case 'A':
9294    case 'b': case 'B':
9295    case 'c': case 'C':
9296    case 'd': case 'D':
9297    case 'e': case 'E':
9298    case 'f': case 'F':
9299    case 'g': case 'G':
9300    case 'h': case 'H':
9301    case 'i': case 'I':
9302    case 'j': case 'J':
9303    case 'k': case 'K':
9304    case 'l': case 'L':
9305    case 'm': case 'M':
9306    case 'n': case 'N':
9307    case 'o': case 'O':
9308    case 'p': case 'P':
9309    case 'q': case 'Q':
9310    case 'r': case 'R':
9311    case 's': case 'S':
9312    case 't': case 'T':
9313    case 'u': case 'U':
9314              case 'V':
9315    case 'w': case 'W':
9316              case 'X':
9317    case 'y': case 'Y':
9318    case 'z': case 'Z':
9319        if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9320            return tok;
9321        goto retry_bufptr;
9322    }
9323}
9324
9325
9326/*
9327  yylex
9328
9329  Works out what to call the token just pulled out of the input
9330  stream.  The yacc parser takes care of taking the ops we return and
9331  stitching them into a tree.
9332
9333  Returns:
9334    The type of the next token
9335
9336  Structure:
9337      Check if we have already built the token; if so, use it.
9338      Switch based on the current state:
9339          - if we have a case modifier in a string, deal with that
9340          - handle other cases of interpolation inside a string
9341          - scan the next line if we are inside a format
9342      In the normal state, switch on the next character:
9343          - default:
9344            if alphabetic, go to key lookup
9345            unrecognized character - croak
9346          - 0/4/26: handle end-of-line or EOF
9347          - cases for whitespace
9348          - \n and #: handle comments and line numbers
9349          - various operators, brackets and sigils
9350          - numbers
9351          - quotes
9352          - 'v': vstrings (or go to key lookup)
9353          - 'x' repetition operator (or go to key lookup)
9354          - other ASCII alphanumerics (key lookup begins here):
9355              word before => ?
9356              keyword plugin
9357              scan built-in keyword (but do nothing with it yet)
9358              check for statement label
9359              check for lexical subs
9360                  return yyl_just_a_word if there is one
9361              see whether built-in keyword is overridden
9362              switch on keyword number:
9363                  - default: return yyl_just_a_word:
9364                      not a built-in keyword; handle bareword lookup
9365                      disambiguate between method and sub call
9366                      fall back to bareword
9367                  - cases for built-in keywords
9368*/
9369
9370int
9371Perl_yylex(pTHX)
9372{
9373    char *s = PL_bufptr;
9374
9375    if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9376        const U8* first_bad_char_loc;
9377        if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9378                                                        PL_bufend - PL_bufptr,
9379                                                        &first_bad_char_loc)))
9380        {
9381            _force_out_malformed_utf8_message(first_bad_char_loc,
9382                                              (U8 *) PL_bufend,
9383                                              0,
9384                                              1 /* 1 means die */ );
9385            NOT_REACHED; /* NOTREACHED */
9386        }
9387        PL_parser->recheck_utf8_validity = FALSE;
9388    }
9389    DEBUG_T( {
9390        SV* tmp = newSVpvs("");
9391        PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9392            (IV)CopLINE(PL_curcop),
9393            lex_state_names[PL_lex_state],
9394            exp_name[PL_expect],
9395            pv_display(tmp, s, strlen(s), 0, 60));
9396        SvREFCNT_dec(tmp);
9397    } );
9398
9399    /* when we've already built the next token, just pull it out of the queue */
9400    if (PL_nexttoke) {
9401        PL_nexttoke--;
9402        pl_yylval = PL_nextval[PL_nexttoke];
9403        {
9404            I32 next_type;
9405            next_type = PL_nexttype[PL_nexttoke];
9406            if (next_type & (7<<24)) {
9407                if (next_type & (1<<24)) {
9408                    if (PL_lex_brackets > 100)
9409                        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9410                    PL_lex_brackstack[PL_lex_brackets++] =
9411                        (char) ((U8) (next_type >> 16));
9412                }
9413                if (next_type & (2<<24))
9414                    PL_lex_allbrackets++;
9415                if (next_type & (4<<24))
9416                    PL_lex_allbrackets--;
9417                next_type &= 0xffff;
9418            }
9419            return REPORT(next_type == 'p' ? pending_ident() : next_type);
9420        }
9421    }
9422
9423    switch (PL_lex_state) {
9424    case LEX_NORMAL:
9425    case LEX_INTERPNORMAL:
9426        break;
9427
9428    /* interpolated case modifiers like \L \U, including \Q and \E.
9429       when we get here, PL_bufptr is at the \
9430    */
9431    case LEX_INTERPCASEMOD:
9432        /* handle \E or end of string */
9433        return yyl_interpcasemod(aTHX_ s);
9434
9435    case LEX_INTERPPUSH:
9436        return REPORT(sublex_push());
9437
9438    case LEX_INTERPSTART:
9439        if (PL_bufptr == PL_bufend)
9440            return REPORT(sublex_done());
9441        DEBUG_T({
9442            if(*PL_bufptr != '(')
9443                PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9444        });
9445        PL_expect = XTERM;
9446        /* for /@a/, we leave the joining for the regex engine to do
9447         * (unless we're within \Q etc) */
9448        PL_lex_dojoin = (*PL_bufptr == '@'
9449                            && (!PL_lex_inpat || PL_lex_casemods));
9450        PL_lex_state = LEX_INTERPNORMAL;
9451        if (PL_lex_dojoin) {
9452            NEXTVAL_NEXTTOKE.ival = 0;
9453            force_next(PERLY_COMMA);
9454            force_ident("\"", PERLY_DOLLAR);
9455            NEXTVAL_NEXTTOKE.ival = 0;
9456            force_next(PERLY_DOLLAR);
9457            NEXTVAL_NEXTTOKE.ival = 0;
9458            force_next((2<<24)|PERLY_PAREN_OPEN);
9459            NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
9460            force_next(FUNC);
9461        }
9462        /* Convert (?{...}) and friends to 'do {...}' */
9463        if (PL_lex_inpat && *PL_bufptr == '(') {
9464            PL_parser->lex_shared->re_eval_start = PL_bufptr;
9465            PL_bufptr += 2;
9466            if (*PL_bufptr != '{')
9467                PL_bufptr++;
9468            PL_expect = XTERMBLOCK;
9469            force_next(DO);
9470        }
9471
9472        if (PL_lex_starts++) {
9473            s = PL_bufptr;
9474            /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9475            if (!PL_lex_casemods && PL_lex_inpat)
9476                TOKEN(PERLY_COMMA);
9477            else
9478                AopNOASSIGN(OP_CONCAT);
9479        }
9480        return yylex();
9481
9482    case LEX_INTERPENDMAYBE:
9483        if (intuit_more(PL_bufptr, PL_bufend)) {
9484            PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
9485            break;
9486        }
9487        /* FALLTHROUGH */
9488
9489    case LEX_INTERPEND:
9490        if (PL_lex_dojoin) {
9491            const U8 dojoin_was = PL_lex_dojoin;
9492            PL_lex_dojoin = FALSE;
9493            PL_lex_state = LEX_INTERPCONCAT;
9494            PL_lex_allbrackets--;
9495            return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9496        }
9497        if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9498            && SvEVALED(PL_lex_repl))
9499        {
9500            if (PL_bufptr != PL_bufend)
9501                Perl_croak(aTHX_ "Bad evalled substitution pattern");
9502            PL_lex_repl = NULL;
9503        }
9504        /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9505           re_eval_str.  If the here-doc body's length equals the previous
9506           value of re_eval_start, re_eval_start will now be null.  So
9507           check re_eval_str as well. */
9508        if (PL_parser->lex_shared->re_eval_start
9509         || PL_parser->lex_shared->re_eval_str) {
9510            SV *sv;
9511            if (*PL_bufptr != ')')
9512                Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9513            PL_bufptr++;
9514            /* having compiled a (?{..}) expression, return the original
9515             * text too, as a const */
9516            if (PL_parser->lex_shared->re_eval_str) {
9517                sv = PL_parser->lex_shared->re_eval_str;
9518                PL_parser->lex_shared->re_eval_str = NULL;
9519                SvCUR_set(sv,
9520                         PL_bufptr - PL_parser->lex_shared->re_eval_start);
9521                SvPV_shrink_to_cur(sv);
9522            }
9523            else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9524                         PL_bufptr - PL_parser->lex_shared->re_eval_start);
9525            NEXTVAL_NEXTTOKE.opval =
9526                    newSVOP(OP_CONST, 0,
9527                                 sv);
9528            force_next(THING);
9529            PL_parser->lex_shared->re_eval_start = NULL;
9530            PL_expect = XTERM;
9531            return REPORT(PERLY_COMMA);
9532        }
9533
9534        /* FALLTHROUGH */
9535    case LEX_INTERPCONCAT:
9536#ifdef DEBUGGING
9537        if (PL_lex_brackets)
9538            Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9539                       (long) PL_lex_brackets);
9540#endif
9541        if (PL_bufptr == PL_bufend)
9542            return REPORT(sublex_done());
9543
9544        /* m'foo' still needs to be parsed for possible (?{...}) */
9545        if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9546            SV *sv = newSVsv(PL_linestr);
9547            sv = tokeq(sv);
9548            pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9549            s = PL_bufend;
9550        }
9551        else {
9552            int save_error_count = PL_error_count;
9553
9554            s = scan_const(PL_bufptr);
9555
9556            /* Set flag if this was a pattern and there were errors.  op.c will
9557             * refuse to compile a pattern with this flag set.  Otherwise, we
9558             * could get segfaults, etc. */
9559            if (PL_lex_inpat && PL_error_count > save_error_count) {
9560                ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9561            }
9562            if (*s == '\\')
9563                PL_lex_state = LEX_INTERPCASEMOD;
9564            else
9565                PL_lex_state = LEX_INTERPSTART;
9566        }
9567
9568        if (s != PL_bufptr) {
9569            NEXTVAL_NEXTTOKE = pl_yylval;
9570            PL_expect = XTERM;
9571            force_next(THING);
9572            if (PL_lex_starts++) {
9573                /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9574                if (!PL_lex_casemods && PL_lex_inpat)
9575                    TOKEN(PERLY_COMMA);
9576                else
9577                    AopNOASSIGN(OP_CONCAT);
9578            }
9579            else {
9580                PL_bufptr = s;
9581                return yylex();
9582            }
9583        }
9584
9585        return yylex();
9586    case LEX_FORMLINE:
9587        if (PL_parser->sub_error_count != PL_error_count) {
9588            /* There was an error parsing a formline, which tends to
9589               mess up the parser.
9590               Unlike interpolated sub-parsing, we can't treat any of
9591               these as recoverable, so no need to check sub_no_recover.
9592            */
9593            yyquit();
9594        }
9595        assert(PL_lex_formbrack);
9596        s = scan_formline(PL_bufptr);
9597        if (!PL_lex_formbrack)
9598            return yyl_rightcurly(aTHX_ s, 1);
9599        PL_bufptr = s;
9600        return yylex();
9601    }
9602
9603    /* We really do *not* want PL_linestr ever becoming a COW. */
9604    assert (!SvIsCOW(PL_linestr));
9605    s = PL_bufptr;
9606    PL_oldoldbufptr = PL_oldbufptr;
9607    PL_oldbufptr = s;
9608
9609    if (PL_in_my == KEY_sigvar) {
9610        PL_parser->saw_infix_sigil = 0;
9611        return yyl_sigvar(aTHX_ s);
9612    }
9613
9614    {
9615        /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9616           On its return, we then need to set it to indicate whether the token
9617           we just encountered was an infix operator that (if we hadn't been
9618           expecting an operator) have been a sigil.
9619        */
9620        bool expected_operator = (PL_expect == XOPERATOR);
9621        int ret = yyl_try(aTHX_ s);
9622        switch (pl_yylval.ival) {
9623        case OP_BIT_AND:
9624        case OP_MODULO:
9625        case OP_MULTIPLY:
9626        case OP_NBIT_AND:
9627            if (expected_operator) {
9628                PL_parser->saw_infix_sigil = 1;
9629                break;
9630            }
9631            /* FALLTHROUGH */
9632        default:
9633            PL_parser->saw_infix_sigil = 0;
9634        }
9635        return ret;
9636    }
9637}
9638
9639
9640/*
9641  S_pending_ident
9642
9643  Looks up an identifier in the pad or in a package
9644
9645  PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9646  rather than a plain pad var.
9647
9648  Returns:
9649    PRIVATEREF if this is a lexical name.
9650    BAREWORD   if this belongs to a package.
9651
9652  Structure:
9653      if we're in a my declaration
9654          croak if they tried to say my($foo::bar)
9655          build the ops for a my() declaration
9656      if it's an access to a my() variable
9657          build ops for access to a my() variable
9658      if in a dq string, and they've said @foo and we can't find @foo
9659          warn
9660      build ops for a bareword
9661*/
9662
9663static int
9664S_pending_ident(pTHX)
9665{
9666    PADOFFSET tmp = 0;
9667    const char pit = (char)pl_yylval.ival;
9668    const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9669    /* All routes through this function want to know if there is a colon.  */
9670    const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9671
9672    DEBUG_T({ PerlIO_printf(Perl_debug_log,
9673          "### Pending identifier '%s'\n", PL_tokenbuf); });
9674    assert(tokenbuf_len >= 2);
9675
9676    /* if we're in a my(), we can't allow dynamics here.
9677       $foo'bar has already been turned into $foo::bar, so
9678       just check for colons.
9679
9680       if it's a legal name, the OP is a PADANY.
9681    */
9682    if (PL_in_my) {
9683        if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
9684            if (has_colon)
9685                /* diag_listed_as: No package name allowed for variable %s
9686                                   in "our" */
9687                yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9688                                  "%s %s in \"our\"",
9689                                  *PL_tokenbuf=='&' ? "subroutine" : "variable",
9690                                  PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9691            tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9692        }
9693        else {
9694            OP *o;
9695            if (has_colon) {
9696                /* "my" variable %s can't be in a package */
9697                /* PL_no_myglob is constant */
9698                GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9699                yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9700                            PL_in_my == KEY_my ? "my" : "state",
9701                            *PL_tokenbuf == '&' ? "subroutine" : "variable",
9702                            PL_tokenbuf),
9703                            UTF ? SVf_UTF8 : 0);
9704                GCC_DIAG_RESTORE_STMT;
9705            }
9706
9707            if (PL_in_my == KEY_sigvar) {
9708                /* A signature 'padop' needs in addition, an op_first to
9709                 * point to a child sigdefelem, and an extra field to hold
9710                 * the signature index. We can achieve both by using an
9711                 * UNOP_AUX and (ab)using the op_aux field to hold the
9712                 * index. If we ever need more fields, use a real malloced
9713                 * aux strut instead.
9714                 */
9715                o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9716                                    INT2PTR(UNOP_AUX_item *,
9717                                        (PL_parser->sig_elems)));
9718                o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9719                                  : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9720                                  :                         OPpARGELEM_HV);
9721            }
9722            else
9723                o = newOP(OP_PADANY, 0);
9724            o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9725                                                        UTF ? SVf_UTF8 : 0);
9726            if (PL_in_my == KEY_sigvar)
9727                PL_in_my = 0;
9728
9729            pl_yylval.opval = o;
9730            return PRIVATEREF;
9731        }
9732    }
9733
9734    /*
9735       build the ops for accesses to a my() variable.
9736    */
9737
9738    if (!has_colon) {
9739        if (!PL_in_my)
9740            tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9741                                 0);
9742        if (tmp != NOT_IN_PAD) {
9743            /* might be an "our" variable" */
9744            if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9745                /* build ops for a bareword */
9746                HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9747                HEK * const stashname = HvNAME_HEK(stash);
9748                SV *  const sym = newSVhek(stashname);
9749                sv_catpvs(sym, "::");
9750                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9751                pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9752                pl_yylval.opval->op_private = OPpCONST_ENTERED;
9753                if (pit != '&')
9754                  gv_fetchsv(sym,
9755                    GV_ADDMULTI,
9756                    ((PL_tokenbuf[0] == '$') ? SVt_PV
9757                     : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9758                     : SVt_PVHV));
9759                return BAREWORD;
9760            }
9761
9762            pl_yylval.opval = newOP(OP_PADANY, 0);
9763            pl_yylval.opval->op_targ = tmp;
9764            return PRIVATEREF;
9765        }
9766    }
9767
9768    /*
9769       Whine if they've said @foo or @foo{key} in a doublequoted string,
9770       and @foo (or %foo) isn't a variable we can find in the symbol
9771       table.
9772    */
9773    if (ckWARN(WARN_AMBIGUOUS)
9774        && pit == '@'
9775        && PL_lex_state != LEX_NORMAL
9776        && !PL_lex_brackets)
9777    {
9778        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9779                                         ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9780                                         SVt_PVAV);
9781        if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9782           )
9783        {
9784            /* Downgraded from fatal to warning 20000522 mjd */
9785            Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9786                        "Possible unintended interpolation of %" UTF8f
9787                        " in string",
9788                        UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9789        }
9790    }
9791
9792    /* build ops for a bareword */
9793    pl_yylval.opval = newSVOP(OP_CONST, 0,
9794                                   newSVpvn_flags(PL_tokenbuf + 1,
9795                                                      tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9796                                                      UTF ? SVf_UTF8 : 0 ));
9797    pl_yylval.opval->op_private = OPpCONST_ENTERED;
9798    if (pit != '&')
9799        gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9800                     (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9801                     | ( UTF ? SVf_UTF8 : 0 ),
9802                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9803                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9804                      : SVt_PVHV));
9805    return BAREWORD;
9806}
9807
9808STATIC void
9809S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9810{
9811    PERL_ARGS_ASSERT_CHECKCOMMA;
9812
9813    if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
9814        if (ckWARN(WARN_SYNTAX)) {
9815            int level = 1;
9816            const char *w;
9817            for (w = s+2; *w && level; w++) {
9818                if (*w == '(')
9819                    ++level;
9820                else if (*w == ')')
9821                    --level;
9822            }
9823            while (isSPACE(*w))
9824                ++w;
9825            /* the list of chars below is for end of statements or
9826             * block / parens, boolean operators (&&, ||, //) and branch
9827             * constructs (or, and, if, until, unless, while, err, for).
9828             * Not a very solid hack... */
9829            if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9830                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9831                            "%s (...) interpreted as function",name);
9832        }
9833    }
9834    while (s < PL_bufend && isSPACE(*s))
9835        s++;
9836    if (*s == '(')
9837        s++;
9838    while (s < PL_bufend && isSPACE(*s))
9839        s++;
9840    if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9841        const char * const w = s;
9842        s += UTF ? UTF8SKIP(s) : 1;
9843        while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9844            s += UTF ? UTF8SKIP(s) : 1;
9845        while (s < PL_bufend && isSPACE(*s))
9846            s++;
9847        if (*s == ',') {
9848            GV* gv;
9849            if (keyword(w, s - w, 0))
9850                return;
9851
9852            gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9853            if (gv && GvCVu(gv))
9854                return;
9855            if (s - w <= 254) {
9856                PADOFFSET off;
9857                char tmpbuf[256];
9858                Copy(w, tmpbuf+1, s - w, char);
9859                *tmpbuf = '&';
9860                off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9861                if (off != NOT_IN_PAD) return;
9862            }
9863            Perl_croak(aTHX_ "No comma allowed after %s", what);
9864        }
9865    }
9866}
9867
9868/* S_new_constant(): do any overload::constant lookup.
9869
9870   Either returns sv, or mortalizes/frees sv and returns a new SV*.
9871   Best used as sv=new_constant(..., sv, ...).
9872   If s, pv are NULL, calls subroutine with one argument,
9873   and <type> is used with error messages only.
9874   <type> is assumed to be well formed UTF-8.
9875
9876   If error_msg is not NULL, *error_msg will be set to any error encountered.
9877   Otherwise yyerror() will be used to output it */
9878
9879STATIC SV *
9880S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9881               SV *sv, SV *pv, const char *type, STRLEN typelen,
9882               const char ** error_msg)
9883{
9884    dSP;
9885    HV * table = GvHV(PL_hintgv);		 /* ^H */
9886    SV *res;
9887    SV *errsv = NULL;
9888    SV **cvp;
9889    SV *cv, *typesv;
9890    const char *why1 = "", *why2 = "", *why3 = "";
9891    const char * optional_colon = ":";  /* Only some messages have a colon */
9892    char *msg;
9893
9894    PERL_ARGS_ASSERT_NEW_CONSTANT;
9895    /* We assume that this is true: */
9896    assert(type || s);
9897
9898    sv_2mortal(sv);			/* Parent created it permanently */
9899
9900    if (   ! table
9901        || ! (PL_hints & HINT_LOCALIZE_HH))
9902    {
9903        why1 = "unknown";
9904        optional_colon = "";
9905        goto report;
9906    }
9907
9908    cvp = hv_fetch(table, key, keylen, FALSE);
9909    if (!cvp || !SvOK(*cvp)) {
9910        why1 = "$^H{";
9911        why2 = key;
9912        why3 = "} is not defined";
9913        goto report;
9914    }
9915
9916    cv = *cvp;
9917    if (!pv && s)
9918        pv = newSVpvn_flags(s, len, SVs_TEMP);
9919    if (type && pv)
9920        typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9921    else
9922        typesv = &PL_sv_undef;
9923
9924    PUSHSTACKi(PERLSI_OVERLOAD);
9925    ENTER ;
9926    SAVETMPS;
9927
9928    PUSHMARK(SP) ;
9929    EXTEND(sp, 3);
9930    if (pv)
9931        PUSHs(pv);
9932    PUSHs(sv);
9933    if (pv)
9934        PUSHs(typesv);
9935    PUTBACK;
9936    call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9937
9938    SPAGAIN ;
9939
9940    /* Check the eval first */
9941    if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9942        STRLEN errlen;
9943        const char * errstr;
9944        sv_catpvs(errsv, "Propagated");
9945        errstr = SvPV_const(errsv, errlen);
9946        yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9947        (void)POPs;
9948        res = SvREFCNT_inc_simple_NN(sv);
9949    }
9950    else {
9951        res = POPs;
9952        SvREFCNT_inc_simple_void_NN(res);
9953    }
9954
9955    PUTBACK ;
9956    FREETMPS ;
9957    LEAVE ;
9958    POPSTACK;
9959
9960    if (SvOK(res)) {
9961        return res;
9962    }
9963
9964    sv = res;
9965    (void)sv_2mortal(sv);
9966
9967    why1 = "Call to &{$^H{";
9968    why2 = key;
9969    why3 = "}} did not return a defined value";
9970
9971  report:
9972
9973    msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9974                        (int)(type ? typelen : len),
9975                        (type ? type: s),
9976                        optional_colon,
9977                        why1, why2, why3);
9978    if (error_msg) {
9979        *error_msg = msg;
9980    }
9981    else {
9982        yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9983    }
9984    return SvREFCNT_inc_simple_NN(sv);
9985}
9986
9987PERL_STATIC_INLINE void
9988S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9989                    bool is_utf8, bool check_dollar, bool tick_warn)
9990{
9991    int saw_tick = 0;
9992    const char *olds = *s;
9993    PERL_ARGS_ASSERT_PARSE_IDENT;
9994
9995    while (*s < PL_bufend) {
9996        if (*d >= e)
9997            Perl_croak(aTHX_ "%s", ident_too_long);
9998        if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9999             /* The UTF-8 case must come first, otherwise things
10000             * like c\N{COMBINING TILDE} would start failing, as the
10001             * isWORDCHAR_A case below would gobble the 'c' up.
10002             */
10003
10004            char *t = *s + UTF8SKIP(*s);
10005            while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10006                t += UTF8SKIP(t);
10007            }
10008            if (*d + (t - *s) > e)
10009                Perl_croak(aTHX_ "%s", ident_too_long);
10010            Copy(*s, *d, t - *s, char);
10011            *d += t - *s;
10012            *s = t;
10013        }
10014        else if ( isWORDCHAR_A(**s) ) {
10015            do {
10016                *(*d)++ = *(*s)++;
10017            } while (isWORDCHAR_A(**s) && *d < e);
10018        }
10019        else if (   allow_package
10020                 && **s == '\''
10021                 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10022        {
10023            *(*d)++ = ':';
10024            *(*d)++ = ':';
10025            (*s)++;
10026            saw_tick++;
10027        }
10028        else if (allow_package && **s == ':' && (*s)[1] == ':'
10029           /* Disallow things like Foo::$bar. For the curious, this is
10030            * the code path that triggers the "Bad name after" warning
10031            * when looking for barewords.
10032            */
10033           && !(check_dollar && (*s)[2] == '$')) {
10034            *(*d)++ = *(*s)++;
10035            *(*d)++ = *(*s)++;
10036        }
10037        else
10038            break;
10039    }
10040    if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10041              && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10042        char *this_d;
10043        char *d2;
10044        Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10045        d2 = this_d;
10046        SAVEFREEPV(this_d);
10047        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10048                         "Old package separator used in string");
10049        if (olds[-1] == '#')
10050            *d2++ = olds[-2];
10051        *d2++ = olds[-1];
10052        while (olds < *s) {
10053            if (*olds == '\'') {
10054                *d2++ = '\\';
10055                *d2++ = *olds++;
10056            }
10057            else
10058                *d2++ = *olds++;
10059        }
10060        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10061                         "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10062                          UTF8fARG(is_utf8, d2-this_d, this_d));
10063    }
10064    return;
10065}
10066
10067/* Returns a NUL terminated string, with the length of the string written to
10068   *slp
10069   */
10070char *
10071Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10072{
10073    char *d = dest;
10074    char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10075    bool is_utf8 = cBOOL(UTF);
10076
10077    PERL_ARGS_ASSERT_SCAN_WORD;
10078
10079    parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10080    *d = '\0';
10081    *slp = d - dest;
10082    return s;
10083}
10084
10085/* Is the byte 'd' a legal single character identifier name?  'u' is true
10086 * iff Unicode semantics are to be used.  The legal ones are any of:
10087 *  a) all ASCII characters except:
10088 *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10089 *          2) '{'
10090 *     The final case currently doesn't get this far in the program, so we
10091 *     don't test for it.  If that were to change, it would be ok to allow it.
10092 *  b) When not under Unicode rules, any upper Latin1 character
10093 *  c) Otherwise, when unicode rules are used, all XIDS characters.
10094 *
10095 *      Because all ASCII characters have the same representation whether
10096 *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10097 *      '{' without knowing if is UTF-8 or not. */
10098#define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
10099    (isGRAPH_A(*(s)) || ((is_utf8)                                          \
10100                         ? isIDFIRST_utf8_safe(s, e)                        \
10101                         : (isGRAPH_L1(*s)                                  \
10102                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
10103
10104STATIC char *
10105S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10106{
10107    I32 herelines = PL_parser->herelines;
10108    SSize_t bracket = -1;
10109    char funny = *s++;
10110    char *d = dest;
10111    char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10112    bool is_utf8 = cBOOL(UTF);
10113    I32 orig_copline = 0, tmp_copline = 0;
10114
10115    PERL_ARGS_ASSERT_SCAN_IDENT;
10116
10117    if (isSPACE(*s) || !*s)
10118        s = skipspace(s);
10119    if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10120        bool is_zero= *s == '0' ? TRUE : FALSE;
10121        char *digit_start= d;
10122        *d++ = *s++;
10123        while (s < PL_bufend && isDIGIT(*s)) {
10124            if (d >= e)
10125                Perl_croak(aTHX_ "%s", ident_too_long);
10126            *d++ = *s++;
10127        }
10128        if (is_zero && d - digit_start > 1)
10129            Perl_croak(aTHX_ ident_var_zero_multi_digit);
10130    }
10131    else {  /* See if it is a "normal" identifier */
10132        parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10133    }
10134    *d = '\0';
10135    d = dest;
10136    if (*d) {
10137        /* Either a digit variable, or parse_ident() found an identifier
10138           (anything valid as a bareword), so job done and return.  */
10139        if (PL_lex_state != LEX_NORMAL)
10140            PL_lex_state = LEX_INTERPENDMAYBE;
10141        return s;
10142    }
10143
10144    /* Here, it is not a run-of-the-mill identifier name */
10145
10146    if (*s == '$' && s[1]
10147        && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10148            || isDIGIT_A((U8)s[1])
10149            || s[1] == '$'
10150            || s[1] == '{'
10151            || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10152    {
10153        /* Dereferencing a value in a scalar variable.
10154           The alternatives are different syntaxes for a scalar variable.
10155           Using ' as a leading package separator isn't allowed. :: is.   */
10156        return s;
10157    }
10158    /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10159    if (*s == '{') {
10160        bracket = s - SvPVX(PL_linestr);
10161        s++;
10162        orig_copline = CopLINE(PL_curcop);
10163        if (s < PL_bufend && isSPACE(*s)) {
10164            s = skipspace(s);
10165        }
10166    }
10167    if ((s <= PL_bufend - ((is_utf8)
10168                          ? UTF8SKIP(s)
10169                          : 1))
10170        && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
10171    {
10172        if (is_utf8) {
10173            const STRLEN skip = UTF8SKIP(s);
10174            STRLEN i;
10175            d[skip] = '\0';
10176            for ( i = 0; i < skip; i++ )
10177                d[i] = *s++;
10178        }
10179        else {
10180            *d = *s++;
10181            /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10182            if (isDIGIT(*d)) {
10183                bool is_zero= *d == '0' ? TRUE : FALSE;
10184                char *digit_start= d;
10185                while (s < PL_bufend && isDIGIT(*s)) {
10186                    d++;
10187                    if (d >= e)
10188                        Perl_croak(aTHX_ "%s", ident_too_long);
10189                    *d= *s++;
10190                }
10191                if (is_zero && d - digit_start > 1)
10192                    Perl_croak(aTHX_ ident_var_zero_multi_digit);
10193            }
10194            d[1] = '\0';
10195        }
10196    }
10197    /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10198    if (*d == '^' && *s && isCONTROLVAR(*s)) {
10199        *d = toCTRL(*s);
10200        s++;
10201    }
10202    /* Warn about ambiguous code after unary operators if {...} notation isn't
10203       used.  There's no difference in ambiguity; it's merely a heuristic
10204       about when not to warn.  */
10205    else if (ck_uni && bracket == -1)
10206        check_uni();
10207    if (bracket != -1) {
10208        bool skip;
10209        char *s2;
10210        /* If we were processing {...} notation then...  */
10211        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10212            || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10213                 && isWORDCHAR(*s))
10214        ) {
10215            /* note we have to check for a normal identifier first,
10216             * as it handles utf8 symbols, and only after that has
10217             * been ruled out can we look at the caret words */
10218            if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10219                /* if it starts as a valid identifier, assume that it is one.
10220                   (the later check for } being at the expected point will trap
10221                   cases where this doesn't pan out.)  */
10222                d += is_utf8 ? UTF8SKIP(d) : 1;
10223                parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10224                *d = '\0';
10225            }
10226            else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10227                d++;
10228                while (isWORDCHAR(*s) && d < e) {
10229                    *d++ = *s++;
10230                }
10231                if (d >= e)
10232                    Perl_croak(aTHX_ "%s", ident_too_long);
10233                *d = '\0';
10234            }
10235            tmp_copline = CopLINE(PL_curcop);
10236            if (s < PL_bufend && isSPACE(*s)) {
10237                s = skipspace(s);
10238            }
10239            if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10240                /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10241                if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10242                    const char * const brack =
10243                        (const char *)
10244                        ((*s == '[') ? "[...]" : "{...}");
10245                    orig_copline = CopLINE(PL_curcop);
10246                    CopLINE_set(PL_curcop, tmp_copline);
10247   /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10248                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10249                        "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10250                        funny, dest, brack, funny, dest, brack);
10251                    CopLINE_set(PL_curcop, orig_copline);
10252                }
10253                bracket++;
10254                PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10255                PL_lex_allbrackets++;
10256                return s;
10257            }
10258        }
10259
10260        if ( !tmp_copline )
10261            tmp_copline = CopLINE(PL_curcop);
10262        if ((skip = s < PL_bufend && isSPACE(*s))) {
10263            /* Avoid incrementing line numbers or resetting PL_linestart,
10264               in case we have to back up.  */
10265            STRLEN s_off = s - SvPVX(PL_linestr);
10266            s2 = peekspace(s);
10267            s = SvPVX(PL_linestr) + s_off;
10268        }
10269        else
10270            s2 = s;
10271
10272        /* Expect to find a closing } after consuming any trailing whitespace.
10273         */
10274        if (*s2 == '}') {
10275            /* Now increment line numbers if applicable.  */
10276            if (skip)
10277                s = skipspace(s);
10278            s++;
10279            if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10280                PL_lex_state = LEX_INTERPEND;
10281                PL_expect = XREF;
10282            }
10283            if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10284                if (ckWARN(WARN_AMBIGUOUS)
10285                    && (keyword(dest, d - dest, 0)
10286                        || get_cvn_flags(dest, d - dest, is_utf8
10287                           ? SVf_UTF8
10288                           : 0)))
10289                {
10290                    SV *tmp = newSVpvn_flags( dest, d - dest,
10291                                        SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10292                    if (funny == '#')
10293                        funny = '@';
10294                    orig_copline = CopLINE(PL_curcop);
10295                    CopLINE_set(PL_curcop, tmp_copline);
10296                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10297                        "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10298                        funny, SVfARG(tmp), funny, SVfARG(tmp));
10299                    CopLINE_set(PL_curcop, orig_copline);
10300                }
10301            }
10302        }
10303        else {
10304            /* Didn't find the closing } at the point we expected, so restore
10305               state such that the next thing to process is the opening { and */
10306            s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10307            CopLINE_set(PL_curcop, orig_copline);
10308            PL_parser->herelines = herelines;
10309            *dest = '\0';
10310            PL_parser->sub_no_recover = TRUE;
10311        }
10312    }
10313    else if (   PL_lex_state == LEX_INTERPNORMAL
10314             && !PL_lex_brackets
10315             && !intuit_more(s, PL_bufend))
10316        PL_lex_state = LEX_INTERPEND;
10317    return s;
10318}
10319
10320static bool
10321S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10322
10323    /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10324     * found in the parse starting at 's', based on the subset that are valid
10325     * in this context input to this routine in 'valid_flags'. Advances s.
10326     * Returns TRUE if the input should be treated as a valid flag, so the next
10327     * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10328     * upon first call on the current regex.  This routine will set it to any
10329     * charset modifier found.  The caller shouldn't change it.  This way,
10330     * another charset modifier encountered in the parse can be detected as an
10331     * error, as we have decided to allow only one */
10332
10333    const char c = **s;
10334    STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10335
10336    if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10337        if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10338            yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10339                       UTF ? SVf_UTF8 : 0);
10340            (*s) += charlen;
10341            /* Pretend that it worked, so will continue processing before
10342             * dieing */
10343            return TRUE;
10344        }
10345        return FALSE;
10346    }
10347
10348    switch (c) {
10349
10350        CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10351        case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10352        case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10353        case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10354        case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10355        case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10356        case LOCALE_PAT_MOD:
10357            if (*charset) {
10358                goto multiple_charsets;
10359            }
10360            set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10361            *charset = c;
10362            break;
10363        case UNICODE_PAT_MOD:
10364            if (*charset) {
10365                goto multiple_charsets;
10366            }
10367            set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10368            *charset = c;
10369            break;
10370        case ASCII_RESTRICT_PAT_MOD:
10371            if (! *charset) {
10372                set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10373            }
10374            else {
10375
10376                /* Error if previous modifier wasn't an 'a', but if it was, see
10377                 * if, and accept, a second occurrence (only) */
10378                if (*charset != 'a'
10379                    || get_regex_charset(*pmfl)
10380                        != REGEX_ASCII_RESTRICTED_CHARSET)
10381                {
10382                        goto multiple_charsets;
10383                }
10384                set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10385            }
10386            *charset = c;
10387            break;
10388        case DEPENDS_PAT_MOD:
10389            if (*charset) {
10390                goto multiple_charsets;
10391            }
10392            set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10393            *charset = c;
10394            break;
10395    }
10396
10397    (*s)++;
10398    return TRUE;
10399
10400    multiple_charsets:
10401        if (*charset != c) {
10402            yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10403        }
10404        else if (c == 'a') {
10405  /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10406            yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10407        }
10408        else {
10409            yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10410        }
10411
10412        /* Pretend that it worked, so will continue processing before dieing */
10413        (*s)++;
10414        return TRUE;
10415}
10416
10417STATIC char *
10418S_scan_pat(pTHX_ char *start, I32 type)
10419{
10420    PMOP *pm;
10421    char *s;
10422    const char * const valid_flags =
10423        (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10424    char charset = '\0';    /* character set modifier */
10425    unsigned int x_mod_count = 0;
10426
10427    PERL_ARGS_ASSERT_SCAN_PAT;
10428
10429    s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10430    if (!s)
10431        Perl_croak(aTHX_ "Search pattern not terminated");
10432
10433    pm = (PMOP*)newPMOP(type, 0);
10434    if (PL_multi_open == '?') {
10435        /* This is the only point in the code that sets PMf_ONCE:  */
10436        pm->op_pmflags |= PMf_ONCE;
10437
10438        /* Hence it's safe to do this bit of PMOP book-keeping here, which
10439           allows us to restrict the list needed by reset to just the ??
10440           matches.  */
10441        assert(type != OP_TRANS);
10442        if (PL_curstash) {
10443            MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10444            U32 elements;
10445            if (!mg) {
10446                mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10447                                 0);
10448            }
10449            elements = mg->mg_len / sizeof(PMOP**);
10450            Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10451            ((PMOP**)mg->mg_ptr) [elements++] = pm;
10452            mg->mg_len = elements * sizeof(PMOP**);
10453            PmopSTASH_set(pm,PL_curstash);
10454        }
10455    }
10456
10457    /* if qr/...(?{..}).../, then need to parse the pattern within a new
10458     * anon CV. False positives like qr/[(?{]/ are harmless */
10459
10460    if (type == OP_QR) {
10461        STRLEN len;
10462        char *e, *p = SvPV(PL_lex_stuff, len);
10463        e = p + len;
10464        for (; p < e; p++) {
10465            if (p[0] == '(' && p[1] == '?'
10466                && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10467            {
10468                pm->op_pmflags |= PMf_HAS_CV;
10469                break;
10470            }
10471        }
10472        pm->op_pmflags |= PMf_IS_QR;
10473    }
10474
10475    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10476                                &s, &charset, &x_mod_count))
10477    {};
10478    /* issue a warning if /c is specified,but /g is not */
10479    if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10480    {
10481        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10482                       "Use of /c modifier is meaningless without /g" );
10483    }
10484
10485    PL_lex_op = (OP*)pm;
10486    pl_yylval.ival = OP_MATCH;
10487    return s;
10488}
10489
10490STATIC char *
10491S_scan_subst(pTHX_ char *start)
10492{
10493    char *s;
10494    PMOP *pm;
10495    I32 first_start;
10496    line_t first_line;
10497    line_t linediff = 0;
10498    I32 es = 0;
10499    char charset = '\0';    /* character set modifier */
10500    unsigned int x_mod_count = 0;
10501    char *t;
10502
10503    PERL_ARGS_ASSERT_SCAN_SUBST;
10504
10505    pl_yylval.ival = OP_NULL;
10506
10507    s = scan_str(start, TRUE, FALSE, FALSE, &t);
10508
10509    if (!s)
10510        Perl_croak(aTHX_ "Substitution pattern not terminated");
10511
10512    s = t;
10513
10514    first_start = PL_multi_start;
10515    first_line = CopLINE(PL_curcop);
10516    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10517    if (!s) {
10518        SvREFCNT_dec_NN(PL_lex_stuff);
10519        PL_lex_stuff = NULL;
10520        Perl_croak(aTHX_ "Substitution replacement not terminated");
10521    }
10522    PL_multi_start = first_start;	/* so whole substitution is taken together */
10523
10524    pm = (PMOP*)newPMOP(OP_SUBST, 0);
10525
10526
10527    while (*s) {
10528        if (*s == EXEC_PAT_MOD) {
10529            s++;
10530            es++;
10531        }
10532        else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10533                                  &s, &charset, &x_mod_count))
10534        {
10535            break;
10536        }
10537    }
10538
10539    if ((pm->op_pmflags & PMf_CONTINUE)) {
10540        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10541    }
10542
10543    if (es) {
10544        SV * const repl = newSVpvs("");
10545
10546        PL_multi_end = 0;
10547        pm->op_pmflags |= PMf_EVAL;
10548        for (; es > 1; es--) {
10549            sv_catpvs(repl, "eval ");
10550        }
10551        sv_catpvs(repl, "do {");
10552        sv_catsv(repl, PL_parser->lex_sub_repl);
10553        sv_catpvs(repl, "}");
10554        SvREFCNT_dec(PL_parser->lex_sub_repl);
10555        PL_parser->lex_sub_repl = repl;
10556    }
10557
10558
10559    linediff = CopLINE(PL_curcop) - first_line;
10560    if (linediff)
10561        CopLINE_set(PL_curcop, first_line);
10562
10563    if (linediff || es) {
10564        /* the IVX field indicates that the replacement string is a s///e;
10565         * the NVX field indicates how many src code lines the replacement
10566         * spreads over */
10567        sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10568        ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10569        ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10570                                                                    cBOOL(es);
10571    }
10572
10573    PL_lex_op = (OP*)pm;
10574    pl_yylval.ival = OP_SUBST;
10575    return s;
10576}
10577
10578STATIC char *
10579S_scan_trans(pTHX_ char *start)
10580{
10581    char* s;
10582    OP *o;
10583    U8 squash;
10584    U8 del;
10585    U8 complement;
10586    bool nondestruct = 0;
10587    char *t;
10588
10589    PERL_ARGS_ASSERT_SCAN_TRANS;
10590
10591    pl_yylval.ival = OP_NULL;
10592
10593    s = scan_str(start,FALSE,FALSE,FALSE,&t);
10594    if (!s)
10595        Perl_croak(aTHX_ "Transliteration pattern not terminated");
10596
10597    s = t;
10598
10599    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10600    if (!s) {
10601        SvREFCNT_dec_NN(PL_lex_stuff);
10602        PL_lex_stuff = NULL;
10603        Perl_croak(aTHX_ "Transliteration replacement not terminated");
10604    }
10605
10606    complement = del = squash = 0;
10607    while (1) {
10608        switch (*s) {
10609        case 'c':
10610            complement = OPpTRANS_COMPLEMENT;
10611            break;
10612        case 'd':
10613            del = OPpTRANS_DELETE;
10614            break;
10615        case 's':
10616            squash = OPpTRANS_SQUASH;
10617            break;
10618        case 'r':
10619            nondestruct = 1;
10620            break;
10621        default:
10622            goto no_more;
10623        }
10624        s++;
10625    }
10626  no_more:
10627
10628    o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10629    o->op_private &= ~OPpTRANS_ALL;
10630    o->op_private |= del|squash|complement;
10631
10632    PL_lex_op = o;
10633    pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10634
10635
10636    return s;
10637}
10638
10639/* scan_heredoc
10640   Takes a pointer to the first < in <<FOO.
10641   Returns a pointer to the byte following <<FOO.
10642
10643   This function scans a heredoc, which involves different methods
10644   depending on whether we are in a string eval, quoted construct, etc.
10645   This is because PL_linestr could containing a single line of input, or
10646   a whole string being evalled, or the contents of the current quote-
10647   like operator.
10648
10649   The two basic methods are:
10650    - Steal lines from the input stream
10651    - Scan the heredoc in PL_linestr and remove it therefrom
10652
10653   In a file scope or filtered eval, the first method is used; in a
10654   string eval, the second.
10655
10656   In a quote-like operator, we have to choose between the two,
10657   depending on where we can find a newline.  We peek into outer lex-
10658   ing scopes until we find one with a newline in it.  If we reach the
10659   outermost lexing scope and it is a file, we use the stream method.
10660   Otherwise it is treated as an eval.
10661*/
10662
10663STATIC char *
10664S_scan_heredoc(pTHX_ char *s)
10665{
10666    I32 op_type = OP_SCALAR;
10667    I32 len;
10668    SV *tmpstr;
10669    char term;
10670    char *d;
10671    char *e;
10672    char *peek;
10673    char *indent = 0;
10674    I32 indent_len = 0;
10675    bool indented = FALSE;
10676    const bool infile = PL_rsfp || PL_parser->filtered;
10677    const line_t origline = CopLINE(PL_curcop);
10678    LEXSHARED *shared = PL_parser->lex_shared;
10679
10680    PERL_ARGS_ASSERT_SCAN_HEREDOC;
10681
10682    s += 2;
10683    d = PL_tokenbuf + 1;
10684    e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10685    *PL_tokenbuf = '\n';
10686    peek = s;
10687
10688    if (*peek == '~') {
10689        indented = TRUE;
10690        peek++; s++;
10691    }
10692
10693    while (SPACE_OR_TAB(*peek))
10694        peek++;
10695
10696    if (*peek == '`' || *peek == '\'' || *peek =='"') {
10697        s = peek;
10698        term = *s++;
10699        s = delimcpy(d, e, s, PL_bufend, term, &len);
10700        if (s == PL_bufend)
10701            Perl_croak(aTHX_ "Unterminated delimiter for here document");
10702        d += len;
10703        s++;
10704    }
10705    else {
10706        if (*s == '\\')
10707            /* <<\FOO is equivalent to <<'FOO' */
10708            s++, term = '\'';
10709        else
10710            term = '"';
10711
10712        if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10713            Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10714
10715        peek = s;
10716
10717        while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10718            peek += UTF ? UTF8SKIP(peek) : 1;
10719        }
10720
10721        len = (peek - s >= e - d) ? (e - d) : (peek - s);
10722        Copy(s, d, len, char);
10723        s += len;
10724        d += len;
10725    }
10726
10727    if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10728        Perl_croak(aTHX_ "Delimiter for here document is too long");
10729
10730    *d++ = '\n';
10731    *d = '\0';
10732    len = d - PL_tokenbuf;
10733
10734#ifndef PERL_STRICT_CR
10735    d = (char *) memchr(s, '\r', PL_bufend - s);
10736    if (d) {
10737        char * const olds = s;
10738        s = d;
10739        while (s < PL_bufend) {
10740            if (*s == '\r') {
10741                *d++ = '\n';
10742                if (*++s == '\n')
10743                    s++;
10744            }
10745            else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
10746                *d++ = *s++;
10747                s++;
10748            }
10749            else
10750                *d++ = *s++;
10751        }
10752        *d = '\0';
10753        PL_bufend = d;
10754        SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10755        s = olds;
10756    }
10757#endif
10758
10759    tmpstr = newSV_type(SVt_PVIV);
10760    SvGROW(tmpstr, 80);
10761    if (term == '\'') {
10762        op_type = OP_CONST;
10763        SvIV_set(tmpstr, -1);
10764    }
10765    else if (term == '`') {
10766        op_type = OP_BACKTICK;
10767        SvIV_set(tmpstr, '\\');
10768    }
10769
10770    PL_multi_start = origline + 1 + PL_parser->herelines;
10771    PL_multi_open = PL_multi_close = '<';
10772
10773    /* inside a string eval or quote-like operator */
10774    if (!infile || PL_lex_inwhat) {
10775        SV *linestr;
10776        char *bufend;
10777        char * const olds = s;
10778        PERL_CONTEXT * const cx = CX_CUR();
10779        /* These two fields are not set until an inner lexing scope is
10780           entered.  But we need them set here. */
10781        shared->ls_bufptr  = s;
10782        shared->ls_linestr = PL_linestr;
10783
10784        if (PL_lex_inwhat) {
10785            /* Look for a newline.  If the current buffer does not have one,
10786             peek into the line buffer of the parent lexing scope, going
10787             up as many levels as necessary to find one with a newline
10788             after bufptr.
10789            */
10790            while (!(s = (char *)memchr(
10791                                (void *)shared->ls_bufptr, '\n',
10792                                SvEND(shared->ls_linestr)-shared->ls_bufptr
10793                )))
10794            {
10795                shared = shared->ls_prev;
10796                /* shared is only null if we have gone beyond the outermost
10797                   lexing scope.  In a file, we will have broken out of the
10798                   loop in the previous iteration.  In an eval, the string buf-
10799                   fer ends with "\n;", so the while condition above will have
10800                   evaluated to false.  So shared can never be null.  Or so you
10801                   might think.  Odd syntax errors like s;@{<<; can gobble up
10802                   the implicit semicolon at the end of a flie, causing the
10803                   file handle to be closed even when we are not in a string
10804                   eval.  So shared may be null in that case.
10805                   (Closing '>>}' here to balance the earlier open brace for
10806                   editors that look for matched pairs.) */
10807                if (UNLIKELY(!shared))
10808                    goto interminable;
10809                /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10810                   most lexing scope.  In a file, shared->ls_linestr at that
10811                   level is just one line, so there is no body to steal. */
10812                if (infile && !shared->ls_prev) {
10813                    s = olds;
10814                    goto streaming;
10815                }
10816            }
10817        }
10818        else {	/* eval or we've already hit EOF */
10819            s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10820            if (!s)
10821                goto interminable;
10822        }
10823
10824        linestr = shared->ls_linestr;
10825        bufend = SvEND(linestr);
10826        d = s;
10827        if (indented) {
10828            char *myolds = s;
10829
10830            while (s < bufend - len + 1) {
10831                if (*s++ == '\n')
10832                    ++PL_parser->herelines;
10833
10834                if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10835                    char *backup = s;
10836                    indent_len = 0;
10837
10838                    /* Only valid if it's preceded by whitespace only */
10839                    while (backup != myolds && --backup >= myolds) {
10840                        if (! SPACE_OR_TAB(*backup)) {
10841                            break;
10842                        }
10843                        indent_len++;
10844                    }
10845
10846                    /* No whitespace or all! */
10847                    if (backup == s || *backup == '\n') {
10848                        Newx(indent, indent_len + 1, char);
10849                        memcpy(indent, backup + 1, indent_len);
10850                        indent[indent_len] = 0;
10851                        s--; /* before our delimiter */
10852                        PL_parser->herelines--; /* this line doesn't count */
10853                        break;
10854                    }
10855                }
10856            }
10857        }
10858        else {
10859            while (s < bufend - len + 1
10860                   && memNE(s,PL_tokenbuf,len) )
10861            {
10862                if (*s++ == '\n')
10863                    ++PL_parser->herelines;
10864            }
10865        }
10866
10867        if (s >= bufend - len + 1) {
10868            goto interminable;
10869        }
10870
10871        sv_setpvn(tmpstr,d+1,s-d);
10872        s += len - 1;
10873        /* the preceding stmt passes a newline */
10874        PL_parser->herelines++;
10875
10876        /* s now points to the newline after the heredoc terminator.
10877           d points to the newline before the body of the heredoc.
10878         */
10879
10880        /* We are going to modify linestr in place here, so set
10881           aside copies of the string if necessary for re-evals or
10882           (caller $n)[6]. */
10883        /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10884           check shared->re_eval_str. */
10885        if (shared->re_eval_start || shared->re_eval_str) {
10886            /* Set aside the rest of the regexp */
10887            if (!shared->re_eval_str)
10888                shared->re_eval_str =
10889                       newSVpvn(shared->re_eval_start,
10890                                bufend - shared->re_eval_start);
10891            shared->re_eval_start -= s-d;
10892        }
10893
10894        if (cxstack_ix >= 0
10895            && CxTYPE(cx) == CXt_EVAL
10896            && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10897            && cx->blk_eval.cur_text == linestr)
10898        {
10899            cx->blk_eval.cur_text = newSVsv(linestr);
10900            cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10901        }
10902
10903        /* Copy everything from s onwards back to d. */
10904        Move(s,d,bufend-s + 1,char);
10905        SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10906        /* Setting PL_bufend only applies when we have not dug deeper
10907           into other scopes, because sublex_done sets PL_bufend to
10908           SvEND(PL_linestr). */
10909        if (shared == PL_parser->lex_shared)
10910            PL_bufend = SvEND(linestr);
10911        s = olds;
10912    }
10913    else {
10914        SV *linestr_save;
10915        char *oldbufptr_save;
10916        char *oldoldbufptr_save;
10917      streaming:
10918        SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10919        term = PL_tokenbuf[1];
10920        len--;
10921        linestr_save = PL_linestr; /* must restore this afterwards */
10922        d = s;			 /* and this */
10923        oldbufptr_save = PL_oldbufptr;
10924        oldoldbufptr_save = PL_oldoldbufptr;
10925        PL_linestr = newSVpvs("");
10926        PL_bufend = SvPVX(PL_linestr);
10927
10928        while (1) {
10929            PL_bufptr = PL_bufend;
10930            CopLINE_set(PL_curcop,
10931                        origline + 1 + PL_parser->herelines);
10932
10933            if (   !lex_next_chunk(LEX_NO_TERM)
10934                && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10935            {
10936                /* Simply freeing linestr_save might seem simpler here, as it
10937                   does not matter what PL_linestr points to, since we are
10938                   about to croak; but in a quote-like op, linestr_save
10939                   will have been prospectively freed already, via
10940                   SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
10941                   restore PL_linestr. */
10942                SvREFCNT_dec_NN(PL_linestr);
10943                PL_linestr = linestr_save;
10944                PL_oldbufptr = oldbufptr_save;
10945                PL_oldoldbufptr = oldoldbufptr_save;
10946                goto interminable;
10947            }
10948
10949            CopLINE_set(PL_curcop, origline);
10950
10951            if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10952                s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10953                /* ^That should be enough to avoid this needing to grow:  */
10954                sv_catpvs(PL_linestr, "\n\0");
10955                assert(s == SvPVX(PL_linestr));
10956                PL_bufend = SvEND(PL_linestr);
10957            }
10958
10959            s = PL_bufptr;
10960            PL_parser->herelines++;
10961            PL_last_lop = PL_last_uni = NULL;
10962
10963#ifndef PERL_STRICT_CR
10964            if (PL_bufend - PL_linestart >= 2) {
10965                if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10966                    || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10967                {
10968                    PL_bufend[-2] = '\n';
10969                    PL_bufend--;
10970                    SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10971                }
10972                else if (PL_bufend[-1] == '\r')
10973                    PL_bufend[-1] = '\n';
10974            }
10975            else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10976                PL_bufend[-1] = '\n';
10977#endif
10978
10979            if (indented && (PL_bufend-s) >= len) {
10980                char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10981
10982                if (found) {
10983                    char *backup = found;
10984                    indent_len = 0;
10985
10986                    /* Only valid if it's preceded by whitespace only */
10987                    while (backup != s && --backup >= s) {
10988                        if (! SPACE_OR_TAB(*backup)) {
10989                            break;
10990                        }
10991                        indent_len++;
10992                    }
10993
10994                    /* All whitespace or none! */
10995                    if (backup == found || SPACE_OR_TAB(*backup)) {
10996                        Newx(indent, indent_len + 1, char);
10997                        memcpy(indent, backup, indent_len);
10998                        indent[indent_len] = 0;
10999                        SvREFCNT_dec(PL_linestr);
11000                        PL_linestr = linestr_save;
11001                        PL_linestart = SvPVX(linestr_save);
11002                        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11003                        PL_oldbufptr = oldbufptr_save;
11004                        PL_oldoldbufptr = oldoldbufptr_save;
11005                        s = d;
11006                        break;
11007                    }
11008                }
11009
11010                /* Didn't find it */
11011                sv_catsv(tmpstr,PL_linestr);
11012            }
11013            else {
11014                if (*s == term && PL_bufend-s >= len
11015                    && memEQ(s,PL_tokenbuf + 1,len))
11016                {
11017                    SvREFCNT_dec(PL_linestr);
11018                    PL_linestr = linestr_save;
11019                    PL_linestart = SvPVX(linestr_save);
11020                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11021                    PL_oldbufptr = oldbufptr_save;
11022                    PL_oldoldbufptr = oldoldbufptr_save;
11023                    s = d;
11024                    break;
11025                }
11026                else {
11027                    sv_catsv(tmpstr,PL_linestr);
11028                }
11029            }
11030        } /* while (1) */
11031    }
11032
11033    PL_multi_end = origline + PL_parser->herelines;
11034
11035    if (indented && indent) {
11036        STRLEN linecount = 1;
11037        STRLEN herelen = SvCUR(tmpstr);
11038        char *ss = SvPVX(tmpstr);
11039        char *se = ss + herelen;
11040        SV *newstr = newSV(herelen+1);
11041        SvPOK_on(newstr);
11042
11043        /* Trim leading whitespace */
11044        while (ss < se) {
11045            /* newline only? Copy and move on */
11046            if (*ss == '\n') {
11047                sv_catpvs(newstr,"\n");
11048                ss++;
11049                linecount++;
11050
11051            /* Found our indentation? Strip it */
11052            }
11053            else if (se - ss >= indent_len
11054                       && memEQ(ss, indent, indent_len))
11055            {
11056                STRLEN le = 0;
11057                ss += indent_len;
11058
11059                while ((ss + le) < se && *(ss + le) != '\n')
11060                    le++;
11061
11062                sv_catpvn(newstr, ss, le);
11063                ss += le;
11064
11065            /* Line doesn't begin with our indentation? Croak */
11066            }
11067            else {
11068                Safefree(indent);
11069                Perl_croak(aTHX_
11070                    "Indentation on line %d of here-doc doesn't match delimiter",
11071                    (int)linecount
11072                );
11073            }
11074        } /* while */
11075
11076        /* avoid sv_setsv() as we dont wan't to COW here */
11077        sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11078        Safefree(indent);
11079        SvREFCNT_dec_NN(newstr);
11080    }
11081
11082    if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11083        SvPV_shrink_to_cur(tmpstr);
11084    }
11085
11086    if (!IN_BYTES) {
11087        if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11088            SvUTF8_on(tmpstr);
11089    }
11090
11091    PL_lex_stuff = tmpstr;
11092    pl_yylval.ival = op_type;
11093    return s;
11094
11095  interminable:
11096    if (indent)
11097        Safefree(indent);
11098    SvREFCNT_dec(tmpstr);
11099    CopLINE_set(PL_curcop, origline);
11100    missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11101}
11102
11103
11104/* scan_inputsymbol
11105   takes: position of first '<' in input buffer
11106   returns: position of first char following the matching '>' in
11107            input buffer
11108   side-effects: pl_yylval and lex_op are set.
11109
11110   This code handles:
11111
11112   <>		read from ARGV
11113   <<>>		read from ARGV without magic open
11114   <FH> 	read from filehandle
11115   <pkg::FH>	read from package qualified filehandle
11116   <pkg'FH>	read from package qualified filehandle
11117   <$fh>	read from filehandle in $fh
11118   <*.h>	filename glob
11119
11120*/
11121
11122STATIC char *
11123S_scan_inputsymbol(pTHX_ char *start)
11124{
11125    char *s = start;		/* current position in buffer */
11126    char *end;
11127    I32 len;
11128    bool nomagicopen = FALSE;
11129    char *d = PL_tokenbuf;					/* start of temp holding space */
11130    const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
11131
11132    PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11133
11134    end = (char *) memchr(s, '\n', PL_bufend - s);
11135    if (!end)
11136        end = PL_bufend;
11137    if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11138        nomagicopen = TRUE;
11139        *d = '\0';
11140        len = 0;
11141        s += 3;
11142    }
11143    else
11144        s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
11145
11146    /* die if we didn't have space for the contents of the <>,
11147       or if it didn't end, or if we see a newline
11148    */
11149
11150    if (len >= (I32)sizeof PL_tokenbuf)
11151        Perl_croak(aTHX_ "Excessively long <> operator");
11152    if (s >= end)
11153        Perl_croak(aTHX_ "Unterminated <> operator");
11154
11155    s++;
11156
11157    /* check for <$fh>
11158       Remember, only scalar variables are interpreted as filehandles by
11159       this code.  Anything more complex (e.g., <$fh{$num}>) will be
11160       treated as a glob() call.
11161       This code makes use of the fact that except for the $ at the front,
11162       a scalar variable and a filehandle look the same.
11163    */
11164    if (*d == '$' && d[1]) d++;
11165
11166    /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11167    while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11168        d += UTF ? UTF8SKIP(d) : 1;
11169    }
11170
11171    /* If we've tried to read what we allow filehandles to look like, and
11172       there's still text left, then it must be a glob() and not a getline.
11173       Use scan_str to pull out the stuff between the <> and treat it
11174       as nothing more than a string.
11175    */
11176
11177    if (d - PL_tokenbuf != len) {
11178        pl_yylval.ival = OP_GLOB;
11179        s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11180        if (!s)
11181           Perl_croak(aTHX_ "Glob not terminated");
11182        return s;
11183    }
11184    else {
11185        bool readline_overriden = FALSE;
11186        GV *gv_readline;
11187        /* we're in a filehandle read situation */
11188        d = PL_tokenbuf;
11189
11190        /* turn <> into <ARGV> */
11191        if (!len)
11192            Copy("ARGV",d,5,char);
11193
11194        /* Check whether readline() is overriden */
11195        if ((gv_readline = gv_override("readline",8)))
11196            readline_overriden = TRUE;
11197
11198        /* if <$fh>, create the ops to turn the variable into a
11199           filehandle
11200        */
11201        if (*d == '$') {
11202            /* try to find it in the pad for this block, otherwise find
11203               add symbol table ops
11204            */
11205            const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11206            if (tmp != NOT_IN_PAD) {
11207                if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11208                    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11209                    HEK * const stashname = HvNAME_HEK(stash);
11210                    SV * const sym = sv_2mortal(newSVhek(stashname));
11211                    sv_catpvs(sym, "::");
11212                    sv_catpv(sym, d+1);
11213                    d = SvPVX(sym);
11214                    goto intro_sym;
11215                }
11216                else {
11217                    OP * const o = newOP(OP_PADSV, 0);
11218                    o->op_targ = tmp;
11219                    PL_lex_op = readline_overriden
11220                        ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11221                                op_append_elem(OP_LIST, o,
11222                                    newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11223                        : newUNOP(OP_READLINE, 0, o);
11224                }
11225            }
11226            else {
11227                GV *gv;
11228                ++d;
11229              intro_sym:
11230                gv = gv_fetchpv(d,
11231                                GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11232                                SVt_PV);
11233                PL_lex_op = readline_overriden
11234                    ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11235                            op_append_elem(OP_LIST,
11236                                newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11237                                newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11238                    : newUNOP(OP_READLINE, 0,
11239                            newUNOP(OP_RV2SV, 0,
11240                                newGVOP(OP_GV, 0, gv)));
11241            }
11242            /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11243            pl_yylval.ival = OP_NULL;
11244        }
11245
11246        /* If it's none of the above, it must be a literal filehandle
11247           (<Foo::BAR> or <FOO>) so build a simple readline OP */
11248        else {
11249            GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11250            PL_lex_op = readline_overriden
11251                ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11252                        op_append_elem(OP_LIST,
11253                            newGVOP(OP_GV, 0, gv),
11254                            newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11255                : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11256            pl_yylval.ival = OP_NULL;
11257
11258            /* leave the token generation above to avoid confusing the parser */
11259            if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11260                no_bareword_filehandle(d);
11261            }
11262        }
11263    }
11264
11265    return s;
11266}
11267
11268
11269/* scan_str
11270   takes:
11271        start			position in buffer
11272        keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11273                                only if they are of the open/close form
11274        keep_delims		preserve the delimiters around the string
11275        re_reparse		compiling a run-time /(?{})/:
11276                                   collapse // to /,  and skip encoding src
11277        delimp			if non-null, this is set to the position of
11278                                the closing delimiter, or just after it if
11279                                the closing and opening delimiters differ
11280                                (i.e., the opening delimiter of a substitu-
11281                                tion replacement)
11282   returns: position to continue reading from buffer
11283   side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11284        updates the read buffer.
11285
11286   This subroutine pulls a string out of the input.  It is called for:
11287        q		single quotes		q(literal text)
11288        '		single quotes		'literal text'
11289        qq		double quotes		qq(interpolate $here please)
11290        "		double quotes		"interpolate $here please"
11291        qx		backticks		qx(/bin/ls -l)
11292        `		backticks		`/bin/ls -l`
11293        qw		quote words		@EXPORT_OK = qw( func() $spam )
11294        m//		regexp match		m/this/
11295        s///		regexp substitute	s/this/that/
11296        tr///		string transliterate	tr/this/that/
11297        y///		string transliterate	y/this/that/
11298        ($*@)		sub prototypes		sub foo ($)
11299        (stuff)		sub attr parameters	sub foo : attr(stuff)
11300        <>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
11301
11302   In most of these cases (all but <>, patterns and transliterate)
11303   yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11304   calls scan_str().  s/// makes yylex() call scan_subst() which calls
11305   scan_str().  tr/// and y/// make yylex() call scan_trans() which
11306   calls scan_str().
11307
11308   It skips whitespace before the string starts, and treats the first
11309   character as the delimiter.  If the delimiter is one of ([{< then
11310   the corresponding "close" character )]}> is used as the closing
11311   delimiter.  It allows quoting of delimiters, and if the string has
11312   balanced delimiters ([{<>}]) it allows nesting.
11313
11314   On success, the SV with the resulting string is put into lex_stuff or,
11315   if that is already non-NULL, into lex_repl. The second case occurs only
11316   when parsing the RHS of the special constructs s/// and tr/// (y///).
11317   For convenience, the terminating delimiter character is stuffed into
11318   SvIVX of the SV.
11319*/
11320
11321char *
11322Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11323                 char **delimp
11324    )
11325{
11326    SV *sv;			/* scalar value: string */
11327    char *s = start;		/* current position in the buffer */
11328    char *to;			/* current position in the sv's data */
11329    int brackets = 1;		/* bracket nesting level */
11330    bool d_is_utf8 = FALSE;	/* is there any utf8 content? */
11331    UV open_delim_code;         /* code point */
11332    char open_delim_str[UTF8_MAXBYTES+1];
11333    STRLEN delim_byte_len;      /* each delimiter currently is the same number
11334                                   of bytes */
11335    line_t herelines;
11336
11337    /* The only non-UTF character that isn't a stand alone grapheme is
11338     * white-space, hence can't be a delimiter. */
11339    const char * non_grapheme_msg = "Use of unassigned code point or"
11340                                    " non-standalone grapheme for a delimiter"
11341                                    " is not allowed";
11342    PERL_ARGS_ASSERT_SCAN_STR;
11343
11344    /* skip space before the delimiter */
11345    if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11346                           'start' also has to change */
11347        s = start = skipspace(s);
11348    }
11349
11350    /* mark where we are, in case we need to report errors */
11351    CLINE;
11352
11353    /* after skipping whitespace, the next character is the delimiter */
11354    if (! UTF || UTF8_IS_INVARIANT(*s)) {
11355        open_delim_code   = (U8) *s;
11356        open_delim_str[0] =      *s;
11357        delim_byte_len = 1;
11358    }
11359    else {
11360        open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11361                                            &delim_byte_len);
11362        if (UNLIKELY(! is_grapheme((U8 *) start,
11363                                   (U8 *) s,
11364                                   (U8 *) PL_bufend,
11365                                   open_delim_code)))
11366        {
11367            yyerror(non_grapheme_msg);
11368        }
11369
11370        Copy(s, open_delim_str, delim_byte_len, char);
11371    }
11372    open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11373
11374
11375    /* mark where we are */
11376    PL_multi_start = CopLINE(PL_curcop);
11377    PL_multi_open = open_delim_code;
11378    herelines = PL_parser->herelines;
11379
11380    const char * legal_paired_opening_delims;
11381    const char * legal_paired_closing_delims;
11382    const char * deprecated_opening_delims;
11383    if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11384        if (UTF) {
11385            legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11386            legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11387
11388            /* We are deprecating using a closing delimiter as the opening, in
11389             * case we want in the future to accept them reversed.  The string
11390             * may include ones that are legal, but the code below won't look
11391             * at this string unless it didn't find a legal opening one */
11392            deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11393        }
11394        else {
11395            legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11396            legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11397            deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11398        }
11399    }
11400    else {
11401        legal_paired_opening_delims = "([{<";
11402        legal_paired_closing_delims = ")]}>";
11403        deprecated_opening_delims = (UTF)
11404                                    ? DEPRECATED_OPENING_UTF8_BRACKETS
11405                                    : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11406    }
11407
11408    const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11409                                          + strlen(legal_paired_opening_delims);
11410    const char * deprecated_delims_end = deprecated_opening_delims
11411                                + strlen(deprecated_opening_delims);
11412
11413    const char * close_delim_str = open_delim_str;
11414    UV close_delim_code = open_delim_code;
11415
11416    /* If the delimiter has a mirror-image closing one, get it */
11417    const char *tmps = ninstr(legal_paired_opening_delims,
11418                              legal_paired_opening_delims_end,
11419                              open_delim_str, open_delim_str + delim_byte_len);
11420    if (tmps) {
11421        /* Here, there is a paired delimiter, and tmps points to its position
11422           in the string of the accepted opening paired delimiters.  The
11423           corresponding position in the string of closing ones is the
11424           beginning of the paired mate.  Both contain the same number of
11425           bytes. */
11426        close_delim_str = legal_paired_closing_delims
11427                        + (tmps - legal_paired_opening_delims);
11428
11429        /* The list of paired delimiters contains all the ASCII ones that have
11430         * always been legal, and no other ASCIIs.  Don't raise a message if
11431         * using one of these */
11432        if (! isASCII(open_delim_code)) {
11433            Perl_ck_warner_d(aTHX_
11434                             packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11435                             "Use of '%" UTF8f "' is experimental as a string delimiter",
11436                             UTF8fARG(UTF, delim_byte_len, open_delim_str));
11437        }
11438
11439        close_delim_code = (UTF)
11440                           ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11441                           : * (U8 *) close_delim_str;
11442    }
11443    else {  /* Here, the delimiter isn't paired, hence the close is the same as
11444               the open; and has aready been set up.  But make sure it isn't
11445               deprecated to use this particular delimiter, as we plan
11446               eventually to make it paired. */
11447        if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11448                   open_delim_str, open_delim_str + delim_byte_len))
11449        {
11450            Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11451                             "Use of '%" UTF8f "' is deprecated as a string delimiter",
11452                             UTF8fARG(UTF, delim_byte_len, open_delim_str));
11453        }
11454
11455        /* Note that a NUL may be used as a delimiter, and this happens when
11456         * delimitting an empty string, and no special handling for it is
11457         * needed, as ninstr() calls are used */
11458    }
11459
11460    PL_multi_close = close_delim_code;
11461
11462    if (PL_multi_open == PL_multi_close) {
11463        keep_bracketed_quoted = FALSE;
11464    }
11465
11466    /* create a new SV to hold the contents.  79 is the SV's initial length.
11467       What a random number. */
11468    sv = newSV_type(SVt_PVIV);
11469    SvGROW(sv, 79);
11470    SvIV_set(sv, close_delim_code);
11471    (void)SvPOK_only(sv);		/* validate pointer */
11472
11473    /* move past delimiter and try to read a complete string */
11474    if (keep_delims)
11475        sv_catpvn(sv, s, delim_byte_len);
11476    s += delim_byte_len;
11477    for (;;) {
11478        /* extend sv if need be */
11479        SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11480        /* set 'to' to the next character in the sv's string */
11481        to = SvPVX(sv)+SvCUR(sv);
11482
11483        /* read until we run out of string, or we find the closing delimiter */
11484        while (s < PL_bufend) {
11485            /* embedded newlines increment the line count */
11486            if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11487                COPLINE_INC_WITH_HERELINES;
11488
11489            /* backslashes can escape the closing delimiter */
11490            if (   *s == '\\' && s < PL_bufend - delim_byte_len
11491
11492                   /* ... but not if the delimiter itself is a backslash */
11493                && close_delim_code != '\\')
11494            {
11495                /* Here, we have an escaping backslash.  If we're supposed to
11496                 * discard those that escape the closing delimiter, just
11497                 * discard this one */
11498                if (   !  keep_bracketed_quoted
11499                    &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11500                          ||  (   PL_multi_open == PL_multi_close
11501                               && re_reparse && s[1] == '\\')
11502                          ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11503                {
11504                    s++;
11505                }
11506                else /* any other escapes are simply copied straight through */
11507                    *to++ = *s++;
11508            }
11509            else if (   s < PL_bufend - (delim_byte_len - 1)
11510                     && memEQ(s, close_delim_str, delim_byte_len)
11511                     && --brackets <= 0)
11512            {
11513                /* Found unescaped closing delimiter, unnested if we care about
11514                 * that; so are done.
11515                 *
11516                 * In the case of the opening and closing delimiters being
11517                 * different, we have to deal with nesting; the conditional
11518                 * above makes sure we don't get here until the nesting level,
11519                 * 'brackets', is back down to zero.  In the other case,
11520                 * nesting isn't an issue, and 'brackets' never can get
11521                 * incremented above 0, so will come here at the first closing
11522                 * delimiter.
11523                 *
11524                 * Only grapheme delimiters are legal. */
11525                if (   UTF  /* All Non-UTF-8's are graphemes */
11526                    && UNLIKELY(! is_grapheme((U8 *) start,
11527                                              (U8 *) s,
11528                                              (U8 *) PL_bufend,
11529                                              close_delim_code)))
11530                {
11531                    yyerror(non_grapheme_msg);
11532                }
11533
11534                break;
11535            }
11536                        /* No nesting if open eq close */
11537            else if (   PL_multi_open != PL_multi_close
11538                     && s < PL_bufend - (delim_byte_len - 1)
11539                     && memEQ(s, open_delim_str, delim_byte_len))
11540            {
11541                brackets++;
11542            }
11543
11544            /* Here, still in the middle of the string; copy this character */
11545            if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11546                *to++ = *s++;
11547            }
11548            else {
11549                size_t this_char_len = UTF8SKIP(s);
11550                Copy(s, to, this_char_len, char);
11551                s  += this_char_len;
11552                to += this_char_len;
11553
11554                d_is_utf8 = TRUE;
11555            }
11556        } /* End of loop through buffer */
11557
11558        /* Here, found end of the string, OR ran out of buffer: terminate the
11559         * copied string and update the sv's end-of-string */
11560        *to = '\0';
11561        SvCUR_set(sv, to - SvPVX_const(sv));
11562
11563        /*
11564         * this next chunk reads more into the buffer if we're not done yet
11565         */
11566
11567        if (s < PL_bufend)
11568            break;		/* handle case where we are done yet :-) */
11569
11570#ifndef PERL_STRICT_CR
11571        if (to - SvPVX_const(sv) >= 2) {
11572            if (   (to[-2] == '\r' && to[-1] == '\n')
11573                || (to[-2] == '\n' && to[-1] == '\r'))
11574            {
11575                to[-2] = '\n';
11576                to--;
11577                SvCUR_set(sv, to - SvPVX_const(sv));
11578            }
11579            else if (to[-1] == '\r')
11580                to[-1] = '\n';
11581        }
11582        else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11583            to[-1] = '\n';
11584#endif
11585
11586        /* if we're out of file, or a read fails, bail and reset the current
11587           line marker so we can report where the unterminated string began
11588        */
11589        COPLINE_INC_WITH_HERELINES;
11590        PL_bufptr = PL_bufend;
11591        if (!lex_next_chunk(0)) {
11592            sv_free(sv);
11593            CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11594            return NULL;
11595        }
11596        s = start = PL_bufptr;
11597    } /* End of infinite loop */
11598
11599    /* at this point, we have successfully read the delimited string */
11600
11601    if (keep_delims)
11602            sv_catpvn(sv, s, delim_byte_len);
11603    s += delim_byte_len;
11604
11605    if (d_is_utf8)
11606        SvUTF8_on(sv);
11607
11608    PL_multi_end = CopLINE(PL_curcop);
11609    CopLINE_set(PL_curcop, PL_multi_start);
11610    PL_parser->herelines = herelines;
11611
11612    /* if we allocated too much space, give some back */
11613    if (SvCUR(sv) + 5 < SvLEN(sv)) {
11614        SvLEN_set(sv, SvCUR(sv) + 1);
11615        SvPV_shrink_to_cur(sv);
11616    }
11617
11618    /* decide whether this is the first or second quoted string we've read
11619       for this op
11620    */
11621
11622    if (PL_lex_stuff)
11623        PL_parser->lex_sub_repl = sv;
11624    else
11625        PL_lex_stuff = sv;
11626    if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11627    return s;
11628}
11629
11630/*
11631  scan_num
11632  takes: pointer to position in buffer
11633  returns: pointer to new position in buffer
11634  side-effects: builds ops for the constant in pl_yylval.op
11635
11636  Read a number in any of the formats that Perl accepts:
11637
11638  \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
11639  \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
11640  0b[01](_?[01])*                                       binary integers
11641  0o?[0-7](_?[0-7])*                                    octal integers
11642  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11643  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11644
11645  Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11646  thing it reads.
11647
11648  If it reads a number without a decimal point or an exponent, it will
11649  try converting the number to an integer and see if it can do so
11650  without loss of precision.
11651*/
11652
11653char *
11654Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11655{
11656    const char *s = start;	/* current position in buffer */
11657    char *d;			/* destination in temp buffer */
11658    char *e;			/* end of temp buffer */
11659    NV nv;				/* number read, as a double */
11660    SV *sv = NULL;			/* place to put the converted number */
11661    bool floatit;			/* boolean: int or float? */
11662    const char *lastub = NULL;		/* position of last underbar */
11663    static const char* const number_too_long = "Number too long";
11664    bool warned_about_underscore = 0;
11665    I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11666#define WARN_ABOUT_UNDERSCORE() \
11667        do { \
11668            if (!warned_about_underscore) { \
11669                warned_about_underscore = 1; \
11670                Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11671                               "Misplaced _ in number"); \
11672            } \
11673        } while(0)
11674    /* Hexadecimal floating point.
11675     *
11676     * In many places (where we have quads and NV is IEEE 754 double)
11677     * we can fit the mantissa bits of a NV into an unsigned quad.
11678     * (Note that UVs might not be quads even when we have quads.)
11679     * This will not work everywhere, though (either no quads, or
11680     * using long doubles), in which case we have to resort to NV,
11681     * which will probably mean horrible loss of precision due to
11682     * multiple fp operations. */
11683    bool hexfp = FALSE;
11684    int total_bits = 0;
11685    int significant_bits = 0;
11686#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11687#  define HEXFP_UQUAD
11688    Uquad_t hexfp_uquad = 0;
11689    int hexfp_frac_bits = 0;
11690#else
11691#  define HEXFP_NV
11692    NV hexfp_nv = 0.0;
11693#endif
11694    NV hexfp_mult = 1.0;
11695    UV high_non_zero = 0; /* highest digit */
11696    int non_zero_integer_digits = 0;
11697    bool new_octal = FALSE;     /* octal with "0o" prefix */
11698
11699    PERL_ARGS_ASSERT_SCAN_NUM;
11700
11701    /* We use the first character to decide what type of number this is */
11702
11703    switch (*s) {
11704    default:
11705        Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11706
11707    /* if it starts with a 0, it could be an octal number, a decimal in
11708       0.13 disguise, or a hexadecimal number, or a binary number. */
11709    case '0':
11710        {
11711          /* variables:
11712             u		holds the "number so far"
11713             overflowed	was the number more than we can hold?
11714
11715             Shift is used when we add a digit.  It also serves as an "are
11716             we in octal/hex/binary?" indicator to disallow hex characters
11717             when in octal mode.
11718           */
11719            NV n = 0.0;
11720            UV u = 0;
11721            bool overflowed = FALSE;
11722            bool just_zero  = TRUE;	/* just plain 0 or binary number? */
11723            bool has_digs = FALSE;
11724            static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11725            static const char* const bases[5] =
11726              { "", "binary", "", "octal", "hexadecimal" };
11727            static const char* const Bases[5] =
11728              { "", "Binary", "", "Octal", "Hexadecimal" };
11729            static const char* const maxima[5] =
11730              { "",
11731                "0b11111111111111111111111111111111",
11732                "",
11733                "037777777777",
11734                "0xffffffff" };
11735
11736            /* check for hex */
11737            if (isALPHA_FOLD_EQ(s[1], 'x')) {
11738                shift = 4;
11739                s += 2;
11740                just_zero = FALSE;
11741            } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11742                shift = 1;
11743                s += 2;
11744                just_zero = FALSE;
11745            }
11746            /* check for a decimal in disguise */
11747            else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11748                goto decimal;
11749            /* so it must be octal */
11750            else {
11751                shift = 3;
11752                s++;
11753                if (isALPHA_FOLD_EQ(*s, 'o')) {
11754                    s++;
11755                    just_zero = FALSE;
11756                    new_octal = TRUE;
11757                }
11758            }
11759
11760            if (*s == '_') {
11761                WARN_ABOUT_UNDERSCORE();
11762               lastub = s++;
11763            }
11764
11765            /* read the rest of the number */
11766            for (;;) {
11767                /* x is used in the overflow test,
11768                   b is the digit we're adding on. */
11769                UV x, b;
11770
11771                switch (*s) {
11772
11773                /* if we don't mention it, we're done */
11774                default:
11775                    goto out;
11776
11777                /* _ are ignored -- but warned about if consecutive */
11778                case '_':
11779                    if (lastub && s == lastub + 1)
11780                        WARN_ABOUT_UNDERSCORE();
11781                    lastub = s++;
11782                    break;
11783
11784                /* 8 and 9 are not octal */
11785                case '8': case '9':
11786                    if (shift == 3)
11787                        yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11788                    /* FALLTHROUGH */
11789
11790                /* octal digits */
11791                case '2': case '3': case '4':
11792                case '5': case '6': case '7':
11793                    if (shift == 1)
11794                        yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11795                    /* FALLTHROUGH */
11796
11797                case '0': case '1':
11798                    b = *s++ & 15;		/* ASCII digit -> value of digit */
11799                    goto digit;
11800
11801                /* hex digits */
11802                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11803                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11804                    /* make sure they said 0x */
11805                    if (shift != 4)
11806                        goto out;
11807                    b = (*s++ & 7) + 9;
11808
11809                    /* Prepare to put the digit we have onto the end
11810                       of the number so far.  We check for overflows.
11811                    */
11812
11813                  digit:
11814                    just_zero = FALSE;
11815                    has_digs = TRUE;
11816                    if (!overflowed) {
11817                        assert(shift >= 0);
11818                        x = u << shift;	/* make room for the digit */
11819
11820                        total_bits += shift;
11821
11822                        if ((x >> shift) != u
11823                            && !(PL_hints & HINT_NEW_BINARY)) {
11824                            overflowed = TRUE;
11825                            n = (NV) u;
11826                            Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11827                                             "Integer overflow in %s number",
11828                                             bases[shift]);
11829                        } else
11830                            u = x | b;		/* add the digit to the end */
11831                    }
11832                    if (overflowed) {
11833                        n *= nvshift[shift];
11834                        /* If an NV has not enough bits in its
11835                         * mantissa to represent an UV this summing of
11836                         * small low-order numbers is a waste of time
11837                         * (because the NV cannot preserve the
11838                         * low-order bits anyway): we could just
11839                         * remember when did we overflow and in the
11840                         * end just multiply n by the right
11841                         * amount. */
11842                        n += (NV) b;
11843                    }
11844
11845                    if (high_non_zero == 0 && b > 0)
11846                        high_non_zero = b;
11847
11848                    if (high_non_zero)
11849                        non_zero_integer_digits++;
11850
11851                    /* this could be hexfp, but peek ahead
11852                     * to avoid matching ".." */
11853                    if (UNLIKELY(HEXFP_PEEK(s))) {
11854                        goto out;
11855                    }
11856
11857                    break;
11858                }
11859            }
11860
11861          /* if we get here, we had success: make a scalar value from
11862             the number.
11863          */
11864          out:
11865
11866            /* final misplaced underbar check */
11867            if (s[-1] == '_')
11868                WARN_ABOUT_UNDERSCORE();
11869
11870            if (UNLIKELY(HEXFP_PEEK(s))) {
11871                /* Do sloppy (on the underbars) but quick detection
11872                 * (and value construction) for hexfp, the decimal
11873                 * detection will shortly be more thorough with the
11874                 * underbar checks. */
11875                const char* h = s;
11876                significant_bits = non_zero_integer_digits * shift;
11877#ifdef HEXFP_UQUAD
11878                hexfp_uquad = u;
11879#else /* HEXFP_NV */
11880                hexfp_nv = u;
11881#endif
11882                /* Ignore the leading zero bits of
11883                 * the high (first) non-zero digit. */
11884                if (high_non_zero) {
11885                    if (high_non_zero < 0x8)
11886                        significant_bits--;
11887                    if (high_non_zero < 0x4)
11888                        significant_bits--;
11889                    if (high_non_zero < 0x2)
11890                        significant_bits--;
11891                }
11892
11893                if (*h == '.') {
11894#ifdef HEXFP_NV
11895                    NV nv_mult = 1.0;
11896#endif
11897                    bool accumulate = TRUE;
11898                    U8 b;
11899                    int lim = 1 << shift;
11900                    for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11901                               *h == '_'); h++) {
11902                        if (isXDIGIT(*h)) {
11903                            significant_bits += shift;
11904#ifdef HEXFP_UQUAD
11905                            if (accumulate) {
11906                                if (significant_bits < NV_MANT_DIG) {
11907                                    /* We are in the long "run" of xdigits,
11908                                     * accumulate the full four bits. */
11909                                    assert(shift >= 0);
11910                                    hexfp_uquad <<= shift;
11911                                    hexfp_uquad |= b;
11912                                    hexfp_frac_bits += shift;
11913                                } else if (significant_bits - shift < NV_MANT_DIG) {
11914                                    /* We are at a hexdigit either at,
11915                                     * or straddling, the edge of mantissa.
11916                                     * We will try grabbing as many as
11917                                     * possible bits. */
11918                                    int tail =
11919                                      significant_bits - NV_MANT_DIG;
11920                                    if (tail <= 0)
11921                                       tail += shift;
11922                                    assert(tail >= 0);
11923                                    hexfp_uquad <<= tail;
11924                                    assert((shift - tail) >= 0);
11925                                    hexfp_uquad |= b >> (shift - tail);
11926                                    hexfp_frac_bits += tail;
11927
11928                                    /* Ignore the trailing zero bits
11929                                     * of the last non-zero xdigit.
11930                                     *
11931                                     * The assumption here is that if
11932                                     * one has input of e.g. the xdigit
11933                                     * eight (0x8), there is only one
11934                                     * bit being input, not the full
11935                                     * four bits.  Conversely, if one
11936                                     * specifies a zero xdigit, the
11937                                     * assumption is that one really
11938                                     * wants all those bits to be zero. */
11939                                    if (b) {
11940                                        if ((b & 0x1) == 0x0) {
11941                                            significant_bits--;
11942                                            if ((b & 0x2) == 0x0) {
11943                                                significant_bits--;
11944                                                if ((b & 0x4) == 0x0) {
11945                                                    significant_bits--;
11946                                                }
11947                                            }
11948                                        }
11949                                    }
11950
11951                                    accumulate = FALSE;
11952                                }
11953                            } else {
11954                                /* Keep skipping the xdigits, and
11955                                 * accumulating the significant bits,
11956                                 * but do not shift the uquad
11957                                 * (which would catastrophically drop
11958                                 * high-order bits) or accumulate the
11959                                 * xdigits anymore. */
11960                            }
11961#else /* HEXFP_NV */
11962                            if (accumulate) {
11963                                nv_mult /= nvshift[shift];
11964                                if (nv_mult > 0.0)
11965                                    hexfp_nv += b * nv_mult;
11966                                else
11967                                    accumulate = FALSE;
11968                            }
11969#endif
11970                        }
11971                        if (significant_bits >= NV_MANT_DIG)
11972                            accumulate = FALSE;
11973                    }
11974                }
11975
11976                if ((total_bits > 0 || significant_bits > 0) &&
11977                    isALPHA_FOLD_EQ(*h, 'p')) {
11978                    bool negexp = FALSE;
11979                    h++;
11980                    if (*h == '+')
11981                        h++;
11982                    else if (*h == '-') {
11983                        negexp = TRUE;
11984                        h++;
11985                    }
11986                    if (isDIGIT(*h)) {
11987                        I32 hexfp_exp = 0;
11988                        while (isDIGIT(*h) || *h == '_') {
11989                            if (isDIGIT(*h)) {
11990                                hexfp_exp *= 10;
11991                                hexfp_exp += *h - '0';
11992#ifdef NV_MIN_EXP
11993                                if (negexp
11994                                    && -hexfp_exp < NV_MIN_EXP - 1) {
11995                                    /* NOTE: this means that the exponent
11996                                     * underflow warning happens for
11997                                     * the IEEE 754 subnormals (denormals),
11998                                     * because DBL_MIN_EXP etc are the lowest
11999                                     * possible binary (or, rather, DBL_RADIX-base)
12000                                     * exponent for normals, not subnormals.
12001                                     *
12002                                     * This may or may not be a good thing. */
12003                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12004                                                   "Hexadecimal float: exponent underflow");
12005                                    break;
12006                                }
12007#endif
12008#ifdef NV_MAX_EXP
12009                                if (!negexp
12010                                    && hexfp_exp > NV_MAX_EXP - 1) {
12011                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12012                                                   "Hexadecimal float: exponent overflow");
12013                                    break;
12014                                }
12015#endif
12016                            }
12017                            h++;
12018                        }
12019                        if (negexp)
12020                            hexfp_exp = -hexfp_exp;
12021#ifdef HEXFP_UQUAD
12022                        hexfp_exp -= hexfp_frac_bits;
12023#endif
12024                        hexfp_mult = Perl_pow(2.0, hexfp_exp);
12025                        hexfp = TRUE;
12026                        goto decimal;
12027                    }
12028                }
12029            }
12030
12031            if (!just_zero && !has_digs) {
12032                /* 0x, 0o or 0b with no digits, treat it as an error.
12033                   Originally this backed up the parse before the b or
12034                   x, but that has the potential for silent changes in
12035                   behaviour, like for: "0x.3" and "0x+$foo".
12036                */
12037                const char *d = s;
12038                char *oldbp = PL_bufptr;
12039                if (*d) ++d; /* so the user sees the bad non-digit */
12040                PL_bufptr = (char *)d; /* so yyerror reports the context */
12041                yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12042                                  bases[shift]));
12043                PL_bufptr = oldbp;
12044            }
12045
12046            if (overflowed) {
12047                if (n > 4294967295.0)
12048                    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12049                                   "%s number > %s non-portable",
12050                                   Bases[shift],
12051                                   new_octal ? "0o37777777777" : maxima[shift]);
12052                sv = newSVnv(n);
12053            }
12054            else {
12055#if UVSIZE > 4
12056                if (u > 0xffffffff)
12057                    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12058                                   "%s number > %s non-portable",
12059                                   Bases[shift],
12060                                   new_octal ? "0o37777777777" : maxima[shift]);
12061#endif
12062                sv = newSVuv(u);
12063            }
12064            if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12065                sv = new_constant(start, s - start, "integer",
12066                                  sv, NULL, NULL, 0, NULL);
12067            else if (PL_hints & HINT_NEW_BINARY)
12068                sv = new_constant(start, s - start, "binary",
12069                                  sv, NULL, NULL, 0, NULL);
12070        }
12071        break;
12072
12073    /*
12074      handle decimal numbers.
12075      we're also sent here when we read a 0 as the first digit
12076    */
12077    case '1': case '2': case '3': case '4': case '5':
12078    case '6': case '7': case '8': case '9': case '.':
12079      decimal:
12080        d = PL_tokenbuf;
12081        e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12082        floatit = FALSE;
12083        if (hexfp) {
12084            floatit = TRUE;
12085            *d++ = '0';
12086            switch (shift) {
12087            case 4:
12088                *d++ = 'x';
12089                s = start + 2;
12090                break;
12091            case 3:
12092                if (new_octal) {
12093                    *d++ = 'o';
12094                    s = start + 2;
12095                    break;
12096                }
12097                s = start + 1;
12098                break;
12099            case 1:
12100                *d++ = 'b';
12101                s = start + 2;
12102                break;
12103            default:
12104                NOT_REACHED; /* NOTREACHED */
12105            }
12106        }
12107
12108        /* read next group of digits and _ and copy into d */
12109        while (isDIGIT(*s)
12110               || *s == '_'
12111               || UNLIKELY(hexfp && isXDIGIT(*s)))
12112        {
12113            /* skip underscores, checking for misplaced ones
12114               if -w is on
12115            */
12116            if (*s == '_') {
12117                if (lastub && s == lastub + 1)
12118                    WARN_ABOUT_UNDERSCORE();
12119                lastub = s++;
12120            }
12121            else {
12122                /* check for end of fixed-length buffer */
12123                if (d >= e)
12124                    Perl_croak(aTHX_ "%s", number_too_long);
12125                /* if we're ok, copy the character */
12126                *d++ = *s++;
12127            }
12128        }
12129
12130        /* final misplaced underbar check */
12131        if (lastub && s == lastub + 1)
12132            WARN_ABOUT_UNDERSCORE();
12133
12134        /* read a decimal portion if there is one.  avoid
12135           3..5 being interpreted as the number 3. followed
12136           by .5
12137        */
12138        if (*s == '.' && s[1] != '.') {
12139            floatit = TRUE;
12140            *d++ = *s++;
12141
12142            if (*s == '_') {
12143                WARN_ABOUT_UNDERSCORE();
12144                lastub = s;
12145            }
12146
12147            /* copy, ignoring underbars, until we run out of digits.
12148            */
12149            for (; isDIGIT(*s)
12150                   || *s == '_'
12151                   || UNLIKELY(hexfp && isXDIGIT(*s));
12152                 s++)
12153            {
12154                /* fixed length buffer check */
12155                if (d >= e)
12156                    Perl_croak(aTHX_ "%s", number_too_long);
12157                if (*s == '_') {
12158                   if (lastub && s == lastub + 1)
12159                        WARN_ABOUT_UNDERSCORE();
12160                   lastub = s;
12161                }
12162                else
12163                    *d++ = *s;
12164            }
12165            /* fractional part ending in underbar? */
12166            if (s[-1] == '_')
12167                WARN_ABOUT_UNDERSCORE();
12168            if (*s == '.' && isDIGIT(s[1])) {
12169                /* oops, it's really a v-string, but without the "v" */
12170                s = start;
12171                goto vstring;
12172            }
12173        }
12174
12175        /* read exponent part, if present */
12176        if ((isALPHA_FOLD_EQ(*s, 'e')
12177              || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12178            && memCHRs("+-0123456789_", s[1]))
12179        {
12180            int exp_digits = 0;
12181            const char *save_s = s;
12182            char * save_d = d;
12183
12184            /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12185               ditto for p (hexfloats) */
12186            if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12187                /* At least some Mach atof()s don't grok 'E' */
12188                *d++ = 'e';
12189            }
12190            else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12191                *d++ = 'p';
12192            }
12193
12194            s++;
12195
12196
12197            /* stray preinitial _ */
12198            if (*s == '_') {
12199                WARN_ABOUT_UNDERSCORE();
12200                lastub = s++;
12201            }
12202
12203            /* allow positive or negative exponent */
12204            if (*s == '+' || *s == '-')
12205                *d++ = *s++;
12206
12207            /* stray initial _ */
12208            if (*s == '_') {
12209                WARN_ABOUT_UNDERSCORE();
12210                lastub = s++;
12211            }
12212
12213            /* read digits of exponent */
12214            while (isDIGIT(*s) || *s == '_') {
12215                if (isDIGIT(*s)) {
12216                    ++exp_digits;
12217                    if (d >= e)
12218                        Perl_croak(aTHX_ "%s", number_too_long);
12219                    *d++ = *s++;
12220                }
12221                else {
12222                   if (((lastub && s == lastub + 1)
12223                        || (!isDIGIT(s[1]) && s[1] != '_')))
12224                        WARN_ABOUT_UNDERSCORE();
12225                   lastub = s++;
12226                }
12227            }
12228
12229            if (!exp_digits) {
12230                /* no exponent digits, the [eEpP] could be for something else,
12231                 * though in practice we don't get here for p since that's preparsed
12232                 * earlier, and results in only the 0xX being consumed, so behave similarly
12233                 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12234                 * next token.
12235                 */
12236                s = save_s;
12237                d = save_d;
12238            }
12239            else {
12240                floatit = TRUE;
12241            }
12242        }
12243
12244
12245        /*
12246           We try to do an integer conversion first if no characters
12247           indicating "float" have been found.
12248         */
12249
12250        if (!floatit) {
12251            UV uv;
12252            const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12253
12254            if (flags == IS_NUMBER_IN_UV) {
12255              if (uv <= IV_MAX)
12256                sv = newSViv(uv); /* Prefer IVs over UVs. */
12257              else
12258                sv = newSVuv(uv);
12259            } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12260              if (uv <= (UV) IV_MIN)
12261                sv = newSViv(-(IV)uv);
12262              else
12263                floatit = TRUE;
12264            } else
12265              floatit = TRUE;
12266        }
12267        if (floatit) {
12268            /* terminate the string */
12269            *d = '\0';
12270            if (UNLIKELY(hexfp)) {
12271#  ifdef NV_MANT_DIG
12272                if (significant_bits > NV_MANT_DIG)
12273                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12274                                   "Hexadecimal float: mantissa overflow");
12275#  endif
12276#ifdef HEXFP_UQUAD
12277                nv = hexfp_uquad * hexfp_mult;
12278#else /* HEXFP_NV */
12279                nv = hexfp_nv * hexfp_mult;
12280#endif
12281            } else {
12282                nv = Atof(PL_tokenbuf);
12283            }
12284            sv = newSVnv(nv);
12285        }
12286
12287        if ( floatit
12288             ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12289            const char *const key = floatit ? "float" : "integer";
12290            const STRLEN keylen = floatit ? 5 : 7;
12291            sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12292                                key, keylen, sv, NULL, NULL, 0, NULL);
12293        }
12294        break;
12295
12296    /* if it starts with a v, it could be a v-string */
12297    case 'v':
12298    vstring:
12299                sv = newSV(5); /* preallocate storage space */
12300                ENTER_with_name("scan_vstring");
12301                SAVEFREESV(sv);
12302                s = scan_vstring(s, PL_bufend, sv);
12303                SvREFCNT_inc_simple_void_NN(sv);
12304                LEAVE_with_name("scan_vstring");
12305        break;
12306    }
12307
12308    /* make the op for the constant and return */
12309
12310    if (sv)
12311        lvalp->opval = newSVOP(OP_CONST, 0, sv);
12312    else
12313        lvalp->opval = NULL;
12314
12315    return (char *)s;
12316}
12317
12318STATIC char *
12319S_scan_formline(pTHX_ char *s)
12320{
12321    SV * const stuff = newSVpvs("");
12322    bool needargs = FALSE;
12323    bool eofmt = FALSE;
12324
12325    PERL_ARGS_ASSERT_SCAN_FORMLINE;
12326
12327    while (!needargs) {
12328        char *eol;
12329        if (*s == '.') {
12330            char *t = s+1;
12331#ifdef PERL_STRICT_CR
12332            while (SPACE_OR_TAB(*t))
12333                t++;
12334#else
12335            while (SPACE_OR_TAB(*t) || *t == '\r')
12336                t++;
12337#endif
12338            if (*t == '\n' || t == PL_bufend) {
12339                eofmt = TRUE;
12340                break;
12341            }
12342        }
12343        eol = (char *) memchr(s,'\n',PL_bufend-s);
12344        if (!eol++)
12345                eol = PL_bufend;
12346        if (*s != '#') {
12347            char *t;
12348            for (t = s; t < eol; t++) {
12349                if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12350                    needargs = FALSE;
12351                    goto enough;	/* ~~ must be first line in formline */
12352                }
12353                if (*t == '@' || *t == '^')
12354                    needargs = TRUE;
12355            }
12356            if (eol > s) {
12357                sv_catpvn(stuff, s, eol-s);
12358#ifndef PERL_STRICT_CR
12359                if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12360                    char *end = SvPVX(stuff) + SvCUR(stuff);
12361                    end[-2] = '\n';
12362                    end[-1] = '\0';
12363                    SvCUR_set(stuff, SvCUR(stuff) - 1);
12364                }
12365#endif
12366            }
12367            else
12368              break;
12369        }
12370        s = (char*)eol;
12371        if ((PL_rsfp || PL_parser->filtered)
12372         && PL_parser->form_lex_state == LEX_NORMAL) {
12373            bool got_some;
12374            PL_bufptr = PL_bufend;
12375            COPLINE_INC_WITH_HERELINES;
12376            got_some = lex_next_chunk(0);
12377            CopLINE_dec(PL_curcop);
12378            s = PL_bufptr;
12379            if (!got_some)
12380                break;
12381        }
12382        incline(s, PL_bufend);
12383    }
12384  enough:
12385    if (!SvCUR(stuff) || needargs)
12386        PL_lex_state = PL_parser->form_lex_state;
12387    if (SvCUR(stuff)) {
12388        PL_expect = XSTATE;
12389        if (needargs) {
12390            const char *s2 = s;
12391            while (isSPACE(*s2) && *s2 != '\n')
12392                s2++;
12393            if (*s2 == '{') {
12394                PL_expect = XTERMBLOCK;
12395                NEXTVAL_NEXTTOKE.ival = 0;
12396                force_next(DO);
12397            }
12398            NEXTVAL_NEXTTOKE.ival = 0;
12399            force_next(FORMLBRACK);
12400        }
12401        if (!IN_BYTES) {
12402            if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12403                SvUTF8_on(stuff);
12404        }
12405        NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12406        force_next(THING);
12407    }
12408    else {
12409        SvREFCNT_dec(stuff);
12410        if (eofmt)
12411            PL_lex_formbrack = 0;
12412    }
12413    return s;
12414}
12415
12416I32
12417Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12418{
12419    const I32 oldsavestack_ix = PL_savestack_ix;
12420    CV* const outsidecv = PL_compcv;
12421
12422    SAVEI32(PL_subline);
12423    save_item(PL_subname);
12424    SAVESPTR(PL_compcv);
12425
12426    PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12427    CvFLAGS(PL_compcv) |= flags;
12428
12429    PL_subline = CopLINE(PL_curcop);
12430    CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12431    CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12432    CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12433    if (outsidecv && CvPADLIST(outsidecv))
12434        CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12435
12436    return oldsavestack_ix;
12437}
12438
12439
12440/* Do extra initialisation of a CV (typically one just created by
12441 * start_subparse()) if that CV is for a named sub
12442 */
12443
12444void
12445Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12446{
12447    PERL_ARGS_ASSERT_INIT_NAMED_CV;
12448
12449    if (nameop->op_type == OP_CONST) {
12450        const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12451        if (   strEQ(name, "BEGIN")
12452            || strEQ(name, "END")
12453            || strEQ(name, "INIT")
12454            || strEQ(name, "CHECK")
12455            || strEQ(name, "UNITCHECK")
12456        )
12457          CvSPECIAL_on(cv);
12458    }
12459    else
12460    /* State subs inside anonymous subs need to be
12461     clonable themselves. */
12462    if (   CvANON(CvOUTSIDE(cv))
12463        || CvCLONE(CvOUTSIDE(cv))
12464        || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12465                        CvOUTSIDE(cv)
12466                     ))[nameop->op_targ])
12467    )
12468      CvCLONE_on(cv);
12469}
12470
12471
12472static int
12473S_yywarn(pTHX_ const char *const s, U32 flags)
12474{
12475    PERL_ARGS_ASSERT_YYWARN;
12476
12477    PL_in_eval |= EVAL_WARNONLY;
12478    yyerror_pv(s, flags);
12479    return 0;
12480}
12481
12482void
12483Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12484{
12485    PERL_ARGS_ASSERT_ABORT_EXECUTION;
12486
12487    if (PL_minus_c)
12488        Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12489    else {
12490        Perl_croak(aTHX_
12491                "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12492    }
12493    NOT_REACHED; /* NOTREACHED */
12494}
12495
12496void
12497Perl_yyquit(pTHX)
12498{
12499    /* Called, after at least one error has been found, to abort the parse now,
12500     * instead of trying to forge ahead */
12501
12502    yyerror_pvn(NULL, 0, 0);
12503}
12504
12505int
12506Perl_yyerror(pTHX_ const char *const s)
12507{
12508    PERL_ARGS_ASSERT_YYERROR;
12509    return yyerror_pvn(s, strlen(s), 0);
12510}
12511
12512int
12513Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12514{
12515    PERL_ARGS_ASSERT_YYERROR_PV;
12516    return yyerror_pvn(s, strlen(s), flags);
12517}
12518
12519int
12520Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12521{
12522    const char *context = NULL;
12523    int contlen = -1;
12524    SV *msg;
12525    SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12526    int yychar  = PL_parser->yychar;
12527
12528    /* Output error message 's' with length 'len'.  'flags' are SV flags that
12529     * apply.  If the number of errors found is large enough, it abandons
12530     * parsing.  If 's' is NULL, there is no message, and it abandons
12531     * processing unconditionally */
12532
12533    if (s != NULL) {
12534        if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12535            sv_catpvs(where_sv, "at EOF");
12536        else if (   PL_oldoldbufptr
12537                 && PL_bufptr > PL_oldoldbufptr
12538                 && PL_bufptr - PL_oldoldbufptr < 200
12539                 && PL_oldoldbufptr != PL_oldbufptr
12540                 && PL_oldbufptr != PL_bufptr)
12541        {
12542            while (isSPACE(*PL_oldoldbufptr))
12543                PL_oldoldbufptr++;
12544            context = PL_oldoldbufptr;
12545            contlen = PL_bufptr - PL_oldoldbufptr;
12546        }
12547        else if (  PL_oldbufptr
12548                && PL_bufptr > PL_oldbufptr
12549                && PL_bufptr - PL_oldbufptr < 200
12550                && PL_oldbufptr != PL_bufptr)
12551        {
12552            while (isSPACE(*PL_oldbufptr))
12553                PL_oldbufptr++;
12554            context = PL_oldbufptr;
12555            contlen = PL_bufptr - PL_oldbufptr;
12556        }
12557        else if (yychar > 255)
12558            sv_catpvs(where_sv, "next token ???");
12559        else if (yychar == YYEMPTY) {
12560            if (PL_lex_state == LEX_NORMAL)
12561                sv_catpvs(where_sv, "at end of line");
12562            else if (PL_lex_inpat)
12563                sv_catpvs(where_sv, "within pattern");
12564            else
12565                sv_catpvs(where_sv, "within string");
12566        }
12567        else {
12568            sv_catpvs(where_sv, "next char ");
12569            if (yychar < 32)
12570                Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12571            else if (isPRINT_LC(yychar)) {
12572                const char string = yychar;
12573                sv_catpvn(where_sv, &string, 1);
12574            }
12575            else
12576                Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12577        }
12578        msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12579        Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12580            OutCopFILE(PL_curcop),
12581            (IV)(PL_parser->preambling == NOLINE
12582                   ? CopLINE(PL_curcop)
12583                   : PL_parser->preambling));
12584        if (context)
12585            Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12586                                 UTF8fARG(UTF, contlen, context));
12587        else
12588            Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12589        if (   PL_multi_start < PL_multi_end
12590            && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12591        {
12592            Perl_sv_catpvf(aTHX_ msg,
12593            "  (Might be a runaway multi-line %c%c string starting on"
12594            " line %" IVdf ")\n",
12595                    (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12596            PL_multi_end = 0;
12597        }
12598        if (PL_in_eval & EVAL_WARNONLY) {
12599            PL_in_eval &= ~EVAL_WARNONLY;
12600            Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12601        }
12602        else {
12603            qerror(msg);
12604        }
12605    }
12606    if (s == NULL || PL_error_count >= 10) {
12607        const char * msg = "";
12608        const char * const name = OutCopFILE(PL_curcop);
12609
12610        if (PL_in_eval) {
12611            SV * errsv = ERRSV;
12612            if (SvCUR(errsv)) {
12613                msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12614            }
12615        }
12616
12617        if (s == NULL) {
12618            abort_execution(msg, name);
12619        }
12620        else {
12621            Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12622        }
12623    }
12624    PL_in_my = 0;
12625    PL_in_my_stash = NULL;
12626    return 0;
12627}
12628
12629STATIC char*
12630S_swallow_bom(pTHX_ U8 *s)
12631{
12632    const STRLEN slen = SvCUR(PL_linestr);
12633
12634    PERL_ARGS_ASSERT_SWALLOW_BOM;
12635
12636    switch (s[0]) {
12637    case 0xFF:
12638        if (s[1] == 0xFE) {
12639            /* UTF-16 little-endian? (or UTF-32LE?) */
12640            if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12641                /* diag_listed_as: Unsupported script encoding %s */
12642                Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12643#ifndef PERL_NO_UTF16_FILTER
12644#ifdef DEBUGGING
12645            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12646#endif
12647            s += 2;
12648            if (PL_bufend > (char*)s) {
12649                s = add_utf16_textfilter(s, TRUE);
12650            }
12651#else
12652            /* diag_listed_as: Unsupported script encoding %s */
12653            Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12654#endif
12655        }
12656        break;
12657    case 0xFE:
12658        if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12659#ifndef PERL_NO_UTF16_FILTER
12660#ifdef DEBUGGING
12661            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12662#endif
12663            s += 2;
12664            if (PL_bufend > (char *)s) {
12665                s = add_utf16_textfilter(s, FALSE);
12666            }
12667#else
12668            /* diag_listed_as: Unsupported script encoding %s */
12669            Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12670#endif
12671        }
12672        break;
12673    case BOM_UTF8_FIRST_BYTE: {
12674        if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12675#ifdef DEBUGGING
12676            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12677#endif
12678            s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12679        }
12680        break;
12681    }
12682    case 0:
12683        if (slen > 3) {
12684             if (s[1] == 0) {
12685                  if (s[2] == 0xFE && s[3] == 0xFF) {
12686                       /* UTF-32 big-endian */
12687                       /* diag_listed_as: Unsupported script encoding %s */
12688                       Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12689                  }
12690             }
12691             else if (s[2] == 0 && s[3] != 0) {
12692                  /* Leading bytes
12693                   * 00 xx 00 xx
12694                   * are a good indicator of UTF-16BE. */
12695#ifndef PERL_NO_UTF16_FILTER
12696#ifdef DEBUGGING
12697                  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12698#endif
12699                  s = add_utf16_textfilter(s, FALSE);
12700#else
12701                  /* diag_listed_as: Unsupported script encoding %s */
12702                  Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12703#endif
12704             }
12705        }
12706        break;
12707
12708    default:
12709         if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12710                  /* Leading bytes
12711                   * xx 00 xx 00
12712                   * are a good indicator of UTF-16LE. */
12713#ifndef PERL_NO_UTF16_FILTER
12714#ifdef DEBUGGING
12715              if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12716#endif
12717              s = add_utf16_textfilter(s, TRUE);
12718#else
12719              /* diag_listed_as: Unsupported script encoding %s */
12720              Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12721#endif
12722         }
12723    }
12724    return (char*)s;
12725}
12726
12727
12728#ifndef PERL_NO_UTF16_FILTER
12729static I32
12730S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12731{
12732    SV *const filter = FILTER_DATA(idx);
12733    /* We re-use this each time round, throwing the contents away before we
12734       return.  */
12735    SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12736    SV *const utf8_buffer = filter;
12737    IV status = IoPAGE(filter);
12738    const bool reverse = cBOOL(IoLINES(filter));
12739    I32 retval;
12740
12741    PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12742
12743    /* As we're automatically added, at the lowest level, and hence only called
12744       from this file, we can be sure that we're not called in block mode. Hence
12745       don't bother writing code to deal with block mode.  */
12746    if (maxlen) {
12747        Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12748    }
12749    if (status < 0) {
12750        Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12751    }
12752    DEBUG_P(PerlIO_printf(Perl_debug_log,
12753                          "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12754                          FPTR2DPTR(void *, S_utf16_textfilter),
12755                          reverse ? 'l' : 'b', idx, maxlen, status,
12756                          (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12757
12758    while (1) {
12759        STRLEN chars;
12760        STRLEN have;
12761        Size_t newlen;
12762        U8 *end;
12763        /* First, look in our buffer of existing UTF-8 data:  */
12764        char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12765
12766        if (nl) {
12767            ++nl;
12768        } else if (status == 0) {
12769            /* EOF */
12770            IoPAGE(filter) = 0;
12771            nl = SvEND(utf8_buffer);
12772        }
12773        if (nl) {
12774            STRLEN got = nl - SvPVX(utf8_buffer);
12775            /* Did we have anything to append?  */
12776            retval = got != 0;
12777            sv_catpvn(sv, SvPVX(utf8_buffer), got);
12778            /* Everything else in this code works just fine if SVp_POK isn't
12779               set.  This, however, needs it, and we need it to work, else
12780               we loop infinitely because the buffer is never consumed.  */
12781            sv_chop(utf8_buffer, nl);
12782            break;
12783        }
12784
12785        /* OK, not a complete line there, so need to read some more UTF-16.
12786           Read an extra octect if the buffer currently has an odd number. */
12787        while (1) {
12788            if (status <= 0)
12789                break;
12790            if (SvCUR(utf16_buffer) >= 2) {
12791                /* Location of the high octet of the last complete code point.
12792                   Gosh, UTF-16 is a pain. All the benefits of variable length,
12793                   *coupled* with all the benefits of partial reads and
12794                   endianness.  */
12795                const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12796                    + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12797
12798                if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12799                    break;
12800                }
12801
12802                /* We have the first half of a surrogate. Read more.  */
12803                DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12804            }
12805
12806            status = FILTER_READ(idx + 1, utf16_buffer,
12807                                 160 + (SvCUR(utf16_buffer) & 1));
12808            DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12809            DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12810            if (status < 0) {
12811                /* Error */
12812                IoPAGE(filter) = status;
12813                return status;
12814            }
12815        }
12816
12817        /* 'chars' isn't quite the right name, as code points above 0xFFFF
12818         * require 4 bytes per char */
12819        chars = SvCUR(utf16_buffer) >> 1;
12820        have = SvCUR(utf8_buffer);
12821
12822        /* Assume the worst case size as noted by the functions: twice the
12823         * number of input bytes */
12824        SvGROW(utf8_buffer, have + chars * 4 + 1);
12825
12826        if (reverse) {
12827            end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12828                                         (U8*)SvPVX_const(utf8_buffer) + have,
12829                                         chars * 2, &newlen);
12830        } else {
12831            end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12832                                (U8*)SvPVX_const(utf8_buffer) + have,
12833                                chars * 2, &newlen);
12834        }
12835        SvCUR_set(utf8_buffer, have + newlen);
12836        *end = '\0';
12837
12838        /* No need to keep this SV "well-formed" with a '\0' after the end, as
12839           it's private to us, and utf16_to_utf8{,reversed} take a
12840           (pointer,length) pair, rather than a NUL-terminated string.  */
12841        if(SvCUR(utf16_buffer) & 1) {
12842            *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12843            SvCUR_set(utf16_buffer, 1);
12844        } else {
12845            SvCUR_set(utf16_buffer, 0);
12846        }
12847    }
12848    DEBUG_P(PerlIO_printf(Perl_debug_log,
12849                          "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12850                          status,
12851                          (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12852    DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12853    return retval;
12854}
12855
12856static U8 *
12857S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12858{
12859    SV *filter = filter_add(S_utf16_textfilter, NULL);
12860
12861    PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12862
12863    IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12864    SvPVCLEAR(filter);
12865    IoLINES(filter) = reversed;
12866    IoPAGE(filter) = 1; /* Not EOF */
12867
12868    /* Sadly, we have to return a valid pointer, come what may, so we have to
12869       ignore any error return from this.  */
12870    SvCUR_set(PL_linestr, 0);
12871    if (FILTER_READ(0, PL_linestr, 0)) {
12872        SvUTF8_on(PL_linestr);
12873    } else {
12874        SvUTF8_on(PL_linestr);
12875    }
12876    PL_bufend = SvEND(PL_linestr);
12877    return (U8*)SvPVX(PL_linestr);
12878}
12879#endif
12880
12881/*
12882=for apidoc scan_vstring
12883
12884Returns a pointer to the next character after the parsed
12885vstring, as well as updating the passed in sv.
12886
12887Function must be called like
12888
12889        sv = sv_2mortal(newSV(5));
12890        s = scan_vstring(s,e,sv);
12891
12892where s and e are the start and end of the string.
12893The sv should already be large enough to store the vstring
12894passed in, for performance reasons.
12895
12896This function may croak if fatal warnings are enabled in the
12897calling scope, hence the sv_2mortal in the example (to prevent
12898a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12899sv_2mortal.
12900
12901=cut
12902*/
12903
12904char *
12905Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12906{
12907    const char *pos = s;
12908    const char *start = s;
12909
12910    PERL_ARGS_ASSERT_SCAN_VSTRING;
12911
12912    if (*pos == 'v') pos++;  /* get past 'v' */
12913    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12914        pos++;
12915    if ( *pos != '.') {
12916        /* this may not be a v-string if followed by => */
12917        const char *next = pos;
12918        while (next < e && isSPACE(*next))
12919            ++next;
12920        if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12921            /* return string not v-string */
12922            sv_setpvn(sv,(char *)s,pos-s);
12923            return (char *)pos;
12924        }
12925    }
12926
12927    if (!isALPHA(*pos)) {
12928        U8 tmpbuf[UTF8_MAXBYTES+1];
12929
12930        if (*s == 'v')
12931            s++;  /* get past 'v' */
12932
12933        SvPVCLEAR(sv);
12934
12935        for (;;) {
12936            /* this is atoi() that tolerates underscores */
12937            U8 *tmpend;
12938            UV rev = 0;
12939            const char *end = pos;
12940            UV mult = 1;
12941            while (--end >= s) {
12942                if (*end != '_') {
12943                    const UV orev = rev;
12944                    rev += (*end - '0') * mult;
12945                    mult *= 10;
12946                    if (orev > rev)
12947                        /* diag_listed_as: Integer overflow in %s number */
12948                        Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12949                                         "Integer overflow in decimal number");
12950                }
12951            }
12952
12953            /* Append native character for the rev point */
12954            tmpend = uvchr_to_utf8(tmpbuf, rev);
12955            sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12956            if (!UVCHR_IS_INVARIANT(rev))
12957                 SvUTF8_on(sv);
12958            if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12959                 s = ++pos;
12960            else {
12961                 s = pos;
12962                 break;
12963            }
12964            while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12965                 pos++;
12966        }
12967        SvPOK_on(sv);
12968        sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12969        SvRMAGICAL_on(sv);
12970    }
12971    return (char *)s;
12972}
12973
12974int
12975Perl_keyword_plugin_standard(pTHX_
12976        char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12977{
12978    PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12979    PERL_UNUSED_CONTEXT;
12980    PERL_UNUSED_ARG(keyword_ptr);
12981    PERL_UNUSED_ARG(keyword_len);
12982    PERL_UNUSED_ARG(op_ptr);
12983    return KEYWORD_PLUGIN_DECLINE;
12984}
12985
12986/*
12987=for apidoc_section $lexer
12988=for apidoc wrap_keyword_plugin
12989
12990Puts a C function into the chain of keyword plugins.  This is the
12991preferred way to manipulate the L</PL_keyword_plugin> variable.
12992C<new_plugin> is a pointer to the C function that is to be added to the
12993keyword plugin chain, and C<old_plugin_p> points to the storage location
12994where a pointer to the next function in the chain will be stored.  The
12995value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12996while the value previously stored there is written to C<*old_plugin_p>.
12997
12998L</PL_keyword_plugin> is global to an entire process, and a module wishing
12999to hook keyword parsing may find itself invoked more than once per
13000process, typically in different threads.  To handle that situation, this
13001function is idempotent.  The location C<*old_plugin_p> must initially
13002(once per process) contain a null pointer.  A C variable of static
13003duration (declared at file scope, typically also marked C<static> to give
13004it internal linkage) will be implicitly initialised appropriately, if it
13005does not have an explicit initialiser.  This function will only actually
13006modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13007function is also thread safe on the small scale.  It uses appropriate
13008locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13009
13010When this function is called, the function referenced by C<new_plugin>
13011must be ready to be called, except for C<*old_plugin_p> being unfilled.
13012In a threading situation, C<new_plugin> may be called immediately, even
13013before this function has returned.  C<*old_plugin_p> will always be
13014appropriately set before C<new_plugin> is called.  If C<new_plugin>
13015decides not to do anything special with the identifier that it is given
13016(which is the usual case for most calls to a keyword plugin), it must
13017chain the plugin function referenced by C<*old_plugin_p>.
13018
13019Taken all together, XS code to install a keyword plugin should typically
13020look something like this:
13021
13022    static Perl_keyword_plugin_t next_keyword_plugin;
13023    static OP *my_keyword_plugin(pTHX_
13024        char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13025    {
13026        if (memEQs(keyword_ptr, keyword_len,
13027                   "my_new_keyword")) {
13028            ...
13029        } else {
13030            return next_keyword_plugin(aTHX_
13031                keyword_ptr, keyword_len, op_ptr);
13032        }
13033    }
13034    BOOT:
13035        wrap_keyword_plugin(my_keyword_plugin,
13036                            &next_keyword_plugin);
13037
13038Direct access to L</PL_keyword_plugin> should be avoided.
13039
13040=cut
13041*/
13042
13043void
13044Perl_wrap_keyword_plugin(pTHX_
13045    Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13046{
13047
13048    PERL_UNUSED_CONTEXT;
13049    PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13050    if (*old_plugin_p) return;
13051    KEYWORD_PLUGIN_MUTEX_LOCK;
13052    if (!*old_plugin_p) {
13053        *old_plugin_p = PL_keyword_plugin;
13054        PL_keyword_plugin = new_plugin;
13055    }
13056    KEYWORD_PLUGIN_MUTEX_UNLOCK;
13057}
13058
13059#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13060static void
13061S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13062{
13063    SAVEI32(PL_lex_brackets);
13064    if (PL_lex_brackets > 100)
13065        Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13066    PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13067    SAVEI32(PL_lex_allbrackets);
13068    PL_lex_allbrackets = 0;
13069    SAVEI8(PL_lex_fakeeof);
13070    PL_lex_fakeeof = (U8)fakeeof;
13071    if(yyparse(gramtype) && !PL_parser->error_count)
13072        qerror(Perl_mess(aTHX_ "Parse error"));
13073}
13074
13075#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13076static OP *
13077S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13078{
13079    OP *o;
13080    ENTER;
13081    SAVEVPTR(PL_eval_root);
13082    PL_eval_root = NULL;
13083    parse_recdescent(gramtype, fakeeof);
13084    o = PL_eval_root;
13085    LEAVE;
13086    return o;
13087}
13088
13089#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13090static OP *
13091S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13092{
13093    OP *exprop;
13094    if (flags & ~PARSE_OPTIONAL)
13095        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13096    exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13097    if (!exprop && !(flags & PARSE_OPTIONAL)) {
13098        if (!PL_parser->error_count)
13099            qerror(Perl_mess(aTHX_ "Parse error"));
13100        exprop = newOP(OP_NULL, 0);
13101    }
13102    return exprop;
13103}
13104
13105/*
13106=for apidoc parse_arithexpr
13107
13108Parse a Perl arithmetic expression.  This may contain operators of precedence
13109down to the bit shift operators.  The expression must be followed (and thus
13110terminated) either by a comparison or lower-precedence operator or by
13111something that would normally terminate an expression such as semicolon.
13112If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13113otherwise it is mandatory.  It is up to the caller to ensure that the
13114dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13115the source of the code to be parsed and the lexical context for the
13116expression.
13117
13118The op tree representing the expression is returned.  If an optional
13119expression is absent, a null pointer is returned, otherwise the pointer
13120will be non-null.
13121
13122If an error occurs in parsing or compilation, in most cases a valid op
13123tree is returned anyway.  The error is reflected in the parser state,
13124normally resulting in a single exception at the top level of parsing
13125which covers all the compilation errors that occurred.  Some compilation
13126errors, however, will throw an exception immediately.
13127
13128=for apidoc Amnh||PARSE_OPTIONAL
13129
13130=cut
13131
13132*/
13133
13134OP *
13135Perl_parse_arithexpr(pTHX_ U32 flags)
13136{
13137    return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13138}
13139
13140/*
13141=for apidoc parse_termexpr
13142
13143Parse a Perl term expression.  This may contain operators of precedence
13144down to the assignment operators.  The expression must be followed (and thus
13145terminated) either by a comma or lower-precedence operator or by
13146something that would normally terminate an expression such as semicolon.
13147If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13148otherwise it is mandatory.  It is up to the caller to ensure that the
13149dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13150the source of the code to be parsed and the lexical context for the
13151expression.
13152
13153The op tree representing the expression is returned.  If an optional
13154expression is absent, a null pointer is returned, otherwise the pointer
13155will be non-null.
13156
13157If an error occurs in parsing or compilation, in most cases a valid op
13158tree is returned anyway.  The error is reflected in the parser state,
13159normally resulting in a single exception at the top level of parsing
13160which covers all the compilation errors that occurred.  Some compilation
13161errors, however, will throw an exception immediately.
13162
13163=cut
13164*/
13165
13166OP *
13167Perl_parse_termexpr(pTHX_ U32 flags)
13168{
13169    return parse_expr(LEX_FAKEEOF_COMMA, flags);
13170}
13171
13172/*
13173=for apidoc parse_listexpr
13174
13175Parse a Perl list expression.  This may contain operators of precedence
13176down to the comma operator.  The expression must be followed (and thus
13177terminated) either by a low-precedence logic operator such as C<or> or by
13178something that would normally terminate an expression such as semicolon.
13179If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13180otherwise it is mandatory.  It is up to the caller to ensure that the
13181dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13182the source of the code to be parsed and the lexical context for the
13183expression.
13184
13185The op tree representing the expression is returned.  If an optional
13186expression is absent, a null pointer is returned, otherwise the pointer
13187will be non-null.
13188
13189If an error occurs in parsing or compilation, in most cases a valid op
13190tree is returned anyway.  The error is reflected in the parser state,
13191normally resulting in a single exception at the top level of parsing
13192which covers all the compilation errors that occurred.  Some compilation
13193errors, however, will throw an exception immediately.
13194
13195=cut
13196*/
13197
13198OP *
13199Perl_parse_listexpr(pTHX_ U32 flags)
13200{
13201    return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13202}
13203
13204/*
13205=for apidoc parse_fullexpr
13206
13207Parse a single complete Perl expression.  This allows the full
13208expression grammar, including the lowest-precedence operators such
13209as C<or>.  The expression must be followed (and thus terminated) by a
13210token that an expression would normally be terminated by: end-of-file,
13211closing bracketing punctuation, semicolon, or one of the keywords that
13212signals a postfix expression-statement modifier.  If C<flags> has the
13213C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13214mandatory.  It is up to the caller to ensure that the dynamic parser
13215state (L</PL_parser> et al) is correctly set to reflect the source of
13216the code to be parsed and the lexical context for the expression.
13217
13218The op tree representing the expression is returned.  If an optional
13219expression is absent, a null pointer is returned, otherwise the pointer
13220will be non-null.
13221
13222If an error occurs in parsing or compilation, in most cases a valid op
13223tree is returned anyway.  The error is reflected in the parser state,
13224normally resulting in a single exception at the top level of parsing
13225which covers all the compilation errors that occurred.  Some compilation
13226errors, however, will throw an exception immediately.
13227
13228=cut
13229*/
13230
13231OP *
13232Perl_parse_fullexpr(pTHX_ U32 flags)
13233{
13234    return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13235}
13236
13237/*
13238=for apidoc parse_block
13239
13240Parse a single complete Perl code block.  This consists of an opening
13241brace, a sequence of statements, and a closing brace.  The block
13242constitutes a lexical scope, so C<my> variables and various compile-time
13243effects can be contained within it.  It is up to the caller to ensure
13244that the dynamic parser state (L</PL_parser> et al) is correctly set to
13245reflect the source of the code to be parsed and the lexical context for
13246the statement.
13247
13248The op tree representing the code block is returned.  This is always a
13249real op, never a null pointer.  It will normally be a C<lineseq> list,
13250including C<nextstate> or equivalent ops.  No ops to construct any kind
13251of runtime scope are included by virtue of it being a block.
13252
13253If an error occurs in parsing or compilation, in most cases a valid op
13254tree (most likely null) is returned anyway.  The error is reflected in
13255the parser state, normally resulting in a single exception at the top
13256level of parsing which covers all the compilation errors that occurred.
13257Some compilation errors, however, will throw an exception immediately.
13258
13259The C<flags> parameter is reserved for future use, and must always
13260be zero.
13261
13262=cut
13263*/
13264
13265OP *
13266Perl_parse_block(pTHX_ U32 flags)
13267{
13268    if (flags)
13269        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13270    return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13271}
13272
13273/*
13274=for apidoc parse_barestmt
13275
13276Parse a single unadorned Perl statement.  This may be a normal imperative
13277statement or a declaration that has compile-time effect.  It does not
13278include any label or other affixture.  It is up to the caller to ensure
13279that the dynamic parser state (L</PL_parser> et al) is correctly set to
13280reflect the source of the code to be parsed and the lexical context for
13281the statement.
13282
13283The op tree representing the statement is returned.  This may be a
13284null pointer if the statement is null, for example if it was actually
13285a subroutine definition (which has compile-time side effects).  If not
13286null, it will be ops directly implementing the statement, suitable to
13287pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13288equivalent op (except for those embedded in a scope contained entirely
13289within the statement).
13290
13291If an error occurs in parsing or compilation, in most cases a valid op
13292tree (most likely null) is returned anyway.  The error is reflected in
13293the parser state, normally resulting in a single exception at the top
13294level of parsing which covers all the compilation errors that occurred.
13295Some compilation errors, however, will throw an exception immediately.
13296
13297The C<flags> parameter is reserved for future use, and must always
13298be zero.
13299
13300=cut
13301*/
13302
13303OP *
13304Perl_parse_barestmt(pTHX_ U32 flags)
13305{
13306    if (flags)
13307        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13308    return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13309}
13310
13311/*
13312=for apidoc parse_label
13313
13314Parse a single label, possibly optional, of the type that may prefix a
13315Perl statement.  It is up to the caller to ensure that the dynamic parser
13316state (L</PL_parser> et al) is correctly set to reflect the source of
13317the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13318label is optional, otherwise it is mandatory.
13319
13320The name of the label is returned in the form of a fresh scalar.  If an
13321optional label is absent, a null pointer is returned.
13322
13323If an error occurs in parsing, which can only occur if the label is
13324mandatory, a valid label is returned anyway.  The error is reflected in
13325the parser state, normally resulting in a single exception at the top
13326level of parsing which covers all the compilation errors that occurred.
13327
13328=cut
13329*/
13330
13331SV *
13332Perl_parse_label(pTHX_ U32 flags)
13333{
13334    if (flags & ~PARSE_OPTIONAL)
13335        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13336    if (PL_nexttoke) {
13337        PL_parser->yychar = yylex();
13338        if (PL_parser->yychar == LABEL) {
13339            SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13340            PL_parser->yychar = YYEMPTY;
13341            cSVOPx(pl_yylval.opval)->op_sv = NULL;
13342            op_free(pl_yylval.opval);
13343            return labelsv;
13344        } else {
13345            yyunlex();
13346            goto no_label;
13347        }
13348    } else {
13349        char *s, *t;
13350        STRLEN wlen, bufptr_pos;
13351        lex_read_space(0);
13352        t = s = PL_bufptr;
13353        if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13354            goto no_label;
13355        t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13356        if (word_takes_any_delimiter(s, wlen))
13357            goto no_label;
13358        bufptr_pos = s - SvPVX(PL_linestr);
13359        PL_bufptr = t;
13360        lex_read_space(LEX_KEEP_PREVIOUS);
13361        t = PL_bufptr;
13362        s = SvPVX(PL_linestr) + bufptr_pos;
13363        if (t[0] == ':' && t[1] != ':') {
13364            PL_oldoldbufptr = PL_oldbufptr;
13365            PL_oldbufptr = s;
13366            PL_bufptr = t+1;
13367            return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13368        } else {
13369            PL_bufptr = s;
13370            no_label:
13371            if (flags & PARSE_OPTIONAL) {
13372                return NULL;
13373            } else {
13374                qerror(Perl_mess(aTHX_ "Parse error"));
13375                return newSVpvs("x");
13376            }
13377        }
13378    }
13379}
13380
13381/*
13382=for apidoc parse_fullstmt
13383
13384Parse a single complete Perl statement.  This may be a normal imperative
13385statement or a declaration that has compile-time effect, and may include
13386optional labels.  It is up to the caller to ensure that the dynamic
13387parser state (L</PL_parser> et al) is correctly set to reflect the source
13388of the code to be parsed and the lexical context for the statement.
13389
13390The op tree representing the statement is returned.  This may be a
13391null pointer if the statement is null, for example if it was actually
13392a subroutine definition (which has compile-time side effects).  If not
13393null, it will be the result of a L</newSTATEOP> call, normally including
13394a C<nextstate> or equivalent op.
13395
13396If an error occurs in parsing or compilation, in most cases a valid op
13397tree (most likely null) is returned anyway.  The error is reflected in
13398the parser state, normally resulting in a single exception at the top
13399level of parsing which covers all the compilation errors that occurred.
13400Some compilation errors, however, will throw an exception immediately.
13401
13402The C<flags> parameter is reserved for future use, and must always
13403be zero.
13404
13405=cut
13406*/
13407
13408OP *
13409Perl_parse_fullstmt(pTHX_ U32 flags)
13410{
13411    if (flags)
13412        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13413    return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13414}
13415
13416/*
13417=for apidoc parse_stmtseq
13418
13419Parse a sequence of zero or more Perl statements.  These may be normal
13420imperative statements, including optional labels, or declarations
13421that have compile-time effect, or any mixture thereof.  The statement
13422sequence ends when a closing brace or end-of-file is encountered in a
13423place where a new statement could have validly started.  It is up to
13424the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13425is correctly set to reflect the source of the code to be parsed and the
13426lexical context for the statements.
13427
13428The op tree representing the statement sequence is returned.  This may
13429be a null pointer if the statements were all null, for example if there
13430were no statements or if there were only subroutine definitions (which
13431have compile-time side effects).  If not null, it will be a C<lineseq>
13432list, normally including C<nextstate> or equivalent ops.
13433
13434If an error occurs in parsing or compilation, in most cases a valid op
13435tree is returned anyway.  The error is reflected in the parser state,
13436normally resulting in a single exception at the top level of parsing
13437which covers all the compilation errors that occurred.  Some compilation
13438errors, however, will throw an exception immediately.
13439
13440The C<flags> parameter is reserved for future use, and must always
13441be zero.
13442
13443=cut
13444*/
13445
13446OP *
13447Perl_parse_stmtseq(pTHX_ U32 flags)
13448{
13449    OP *stmtseqop;
13450    I32 c;
13451    if (flags)
13452        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13453    stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13454    c = lex_peek_unichar(0);
13455    if (c != -1 && c != /*{*/'}')
13456        qerror(Perl_mess(aTHX_ "Parse error"));
13457    return stmtseqop;
13458}
13459
13460/*
13461=for apidoc parse_subsignature
13462
13463Parse a subroutine signature declaration. This is the contents of the
13464parentheses following a named or anonymous subroutine declaration when the
13465C<signatures> feature is enabled. Note that this function neither expects
13466nor consumes the opening and closing parentheses around the signature; it
13467is the caller's job to handle these.
13468
13469This function must only be called during parsing of a subroutine; after
13470L</start_subparse> has been called. It might allocate lexical variables on
13471the pad for the current subroutine.
13472
13473The op tree to unpack the arguments from the stack at runtime is returned.
13474This op tree should appear at the beginning of the compiled function. The
13475caller may wish to use L</op_append_list> to build their function body
13476after it, or splice it together with the body before calling L</newATTRSUB>.
13477
13478The C<flags> parameter is reserved for future use, and must always
13479be zero.
13480
13481=cut
13482*/
13483
13484OP *
13485Perl_parse_subsignature(pTHX_ U32 flags)
13486{
13487    if (flags)
13488        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13489    return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13490}
13491
13492/*
13493 * ex: set ts=8 sts=4 sw=4 et:
13494 */
13495