toke.c revision 1.22
1/*    toke.c
2 *
3 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 *    You may distribute under the terms of either the GNU General Public
7 *    License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 *  'It all comes from here, the stench and the peril.'    --Frodo
13 *
14 *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl.  It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26This is the lower layer of the Perl parser, managing characters and tokens.
27
28=for apidoc AmU|yy_parser *|PL_parser
29
30Pointer to a structure encapsulating the state of the parsing operation
31currently in progress.  The pointer can be locally changed to perform
32a nested parse without interfering with the state of an outer parse.
33Individual members of C<PL_parser> have their own documentation.
34
35=cut
36*/
37
38#include "EXTERN.h"
39#define PERL_IN_TOKE_C
40#include "perl.h"
41#include "dquote_inline.h"
42
43#define new_constant(a,b,c,d,e,f,g)	\
44	S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
46#define pl_yylval	(PL_parser->yylval)
47
48/* XXX temporary backwards compatibility */
49#define PL_lex_brackets		(PL_parser->lex_brackets)
50#define PL_lex_allbrackets	(PL_parser->lex_allbrackets)
51#define PL_lex_fakeeof		(PL_parser->lex_fakeeof)
52#define PL_lex_brackstack	(PL_parser->lex_brackstack)
53#define PL_lex_casemods		(PL_parser->lex_casemods)
54#define PL_lex_casestack        (PL_parser->lex_casestack)
55#define PL_lex_defer		(PL_parser->lex_defer)
56#define PL_lex_dojoin		(PL_parser->lex_dojoin)
57#define PL_lex_formbrack        (PL_parser->lex_formbrack)
58#define PL_lex_inpat		(PL_parser->lex_inpat)
59#define PL_lex_inwhat		(PL_parser->lex_inwhat)
60#define PL_lex_op		(PL_parser->lex_op)
61#define PL_lex_repl		(PL_parser->lex_repl)
62#define PL_lex_starts		(PL_parser->lex_starts)
63#define PL_lex_stuff		(PL_parser->lex_stuff)
64#define PL_multi_start		(PL_parser->multi_start)
65#define PL_multi_open		(PL_parser->multi_open)
66#define PL_multi_close		(PL_parser->multi_close)
67#define PL_preambled		(PL_parser->preambled)
68#define PL_sublex_info		(PL_parser->sublex_info)
69#define PL_linestr		(PL_parser->linestr)
70#define PL_expect		(PL_parser->expect)
71#define PL_copline		(PL_parser->copline)
72#define PL_bufptr		(PL_parser->bufptr)
73#define PL_oldbufptr		(PL_parser->oldbufptr)
74#define PL_oldoldbufptr		(PL_parser->oldoldbufptr)
75#define PL_linestart		(PL_parser->linestart)
76#define PL_bufend		(PL_parser->bufend)
77#define PL_last_uni		(PL_parser->last_uni)
78#define PL_last_lop		(PL_parser->last_lop)
79#define PL_last_lop_op		(PL_parser->last_lop_op)
80#define PL_lex_state		(PL_parser->lex_state)
81#define PL_rsfp			(PL_parser->rsfp)
82#define PL_rsfp_filters		(PL_parser->rsfp_filters)
83#define PL_in_my		(PL_parser->in_my)
84#define PL_in_my_stash		(PL_parser->in_my_stash)
85#define PL_tokenbuf		(PL_parser->tokenbuf)
86#define PL_multi_end		(PL_parser->multi_end)
87#define PL_error_count		(PL_parser->error_count)
88
89#  define PL_nexttoke		(PL_parser->nexttoke)
90#  define PL_nexttype		(PL_parser->nexttype)
91#  define PL_nextval		(PL_parser->nextval)
92
93static const char* const ident_too_long = "Identifier too long";
94
95#  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
96
97#define XENUMMASK  0x3f
98#define XFAKEEOF   0x40
99#define XFAKEBRACK 0x80
100
101#ifdef USE_UTF8_SCRIPTS
102#   define UTF cBOOL(!IN_BYTES)
103#else
104#   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
105#endif
106
107/* The maximum number of characters preceding the unrecognized one to display */
108#define UNRECOGNIZED_PRECEDE_COUNT 10
109
110/* In variables named $^X, these are the legal values for X.
111 * 1999-02-27 mjd-perl-patch@plover.com */
112#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
113
114#define SPACE_OR_TAB(c) isBLANK_A(c)
115
116#define HEXFP_PEEK(s)     \
117    (((s[0] == '.') && \
118      (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
119     isALPHA_FOLD_EQ(s[0], 'p'))
120
121/* LEX_* are values for PL_lex_state, the state of the lexer.
122 * They are arranged oddly so that the guard on the switch statement
123 * can get by with a single comparison (if the compiler is smart enough).
124 *
125 * These values refer to the various states within a sublex parse,
126 * i.e. within a double quotish string
127 */
128
129/* #define LEX_NOTPARSING		11 is done in perl.h. */
130
131#define LEX_NORMAL		10 /* normal code (ie not within "...")     */
132#define LEX_INTERPNORMAL	 9 /* code within a string, eg "$foo[$x+1]" */
133#define LEX_INTERPCASEMOD	 8 /* expecting a \U, \Q or \E etc          */
134#define LEX_INTERPPUSH		 7 /* starting a new sublex parse level     */
135#define LEX_INTERPSTART		 6 /* expecting the start of a $var         */
136
137				   /* at end of code, eg "$x" followed by:  */
138#define LEX_INTERPEND		 5 /* ... eg not one of [, { or ->          */
139#define LEX_INTERPENDMAYBE	 4 /* ... eg one of [, { or ->              */
140
141#define LEX_INTERPCONCAT	 3 /* expecting anything, eg at start of
142				        string or after \E, $foo, etc       */
143#define LEX_INTERPCONST		 2 /* NOT USED */
144#define LEX_FORMLINE		 1 /* expecting a format line               */
145#define LEX_KNOWNEXT		 0 /* next token known; just return it      */
146
147
148#ifdef DEBUGGING
149static const char* const lex_state_names[] = {
150    "KNOWNEXT",
151    "FORMLINE",
152    "INTERPCONST",
153    "INTERPCONCAT",
154    "INTERPENDMAYBE",
155    "INTERPEND",
156    "INTERPSTART",
157    "INTERPPUSH",
158    "INTERPCASEMOD",
159    "INTERPNORMAL",
160    "NORMAL"
161};
162#endif
163
164#include "keywords.h"
165
166/* CLINE is a macro that ensures PL_copline has a sane value */
167
168#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
169
170/*
171 * Convenience functions to return different tokens and prime the
172 * lexer for the next token.  They all take an argument.
173 *
174 * TOKEN        : generic token (used for '(', DOLSHARP, etc)
175 * OPERATOR     : generic operator
176 * AOPERATOR    : assignment operator
177 * PREBLOCK     : beginning the block after an if, while, foreach, ...
178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
179 * PREREF       : *EXPR where EXPR is not a simple identifier
180 * TERM         : expression term
181 * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
182 * LOOPX        : loop exiting command (goto, last, dump, etc)
183 * FTST         : file test operator
184 * FUN0         : zero-argument function
185 * FUN0OP       : zero-argument function, with its op created in this file
186 * FUN1         : not used, except for not, which isn't a UNIOP
187 * BOop         : bitwise or or xor
188 * BAop         : bitwise and
189 * BCop         : bitwise complement
190 * SHop         : shift operator
191 * PWop         : power operator
192 * PMop         : pattern-matching operator
193 * Aop          : addition-level operator
194 * AopNOASSIGN  : addition-level operator that is never part of .=
195 * Mop          : multiplication-level operator
196 * Eop          : equality-testing operator
197 * Rop          : relational operator <= != gt
198 *
199 * Also see LOP and lop() below.
200 */
201
202#ifdef DEBUGGING /* Serve -DT. */
203#   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
204#else
205#   define REPORT(retval) (retval)
206#endif
207
208#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
209#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
210#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
211#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
212#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
213#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
214#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
215#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
216#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
217			 pl_yylval.ival=f, \
218			 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
219			 REPORT((int)LOOPEX))
220#define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
221#define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
222#define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
223#define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
224#define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
225#define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
226#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
227		       REPORT('~')
228#define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
229#define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
230#define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
231#define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
232#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
233#define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
234#define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
235#define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
236
237/* This bit of chicanery makes a unary function followed by
238 * a parenthesis into a function with one argument, highest precedence.
239 * The UNIDOR macro is for unary functions that can be followed by the //
240 * operator (such as C<shift // 0>).
241 */
242#define UNI3(f,x,have_x) { \
243	pl_yylval.ival = f; \
244	if (have_x) PL_expect = x; \
245	PL_bufptr = s; \
246	PL_last_uni = PL_oldbufptr; \
247	PL_last_lop_op = (f) < 0 ? -(f) : (f); \
248	if (*s == '(') \
249	    return REPORT( (int)FUNC1 ); \
250	s = skipspace(s); \
251	return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
252	}
253#define UNI(f)    UNI3(f,XTERM,1)
254#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
255#define UNIPROTO(f,optional) { \
256	if (optional) PL_last_uni = PL_oldbufptr; \
257	OPERATOR(f); \
258	}
259
260#define UNIBRACK(f) UNI3(f,0,0)
261
262/* grandfather return to old style */
263#define OLDLOP(f) \
264	do { \
265	    if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
266		PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
267	    pl_yylval.ival = (f); \
268	    PL_expect = XTERM; \
269	    PL_bufptr = s; \
270	    return (int)LSTOP; \
271	} while(0)
272
273#define COPLINE_INC_WITH_HERELINES		    \
274    STMT_START {				     \
275	CopLINE_inc(PL_curcop);			      \
276	if (PL_parser->herelines)		       \
277	    CopLINE(PL_curcop) += PL_parser->herelines, \
278	    PL_parser->herelines = 0;			 \
279    } STMT_END
280/* Called after scan_str to update CopLINE(PL_curcop), but only when there
281 * is no sublex_push to follow. */
282#define COPLINE_SET_FROM_MULTI_END	      \
283    STMT_START {			       \
284	CopLINE_set(PL_curcop, PL_multi_end);	\
285	if (PL_multi_end != PL_multi_start)	 \
286	    PL_parser->herelines = 0;		  \
287    } STMT_END
288
289
290#ifdef DEBUGGING
291
292/* how to interpret the pl_yylval associated with the token */
293enum token_type {
294    TOKENTYPE_NONE,
295    TOKENTYPE_IVAL,
296    TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
297    TOKENTYPE_PVAL,
298    TOKENTYPE_OPVAL
299};
300
301static struct debug_tokens {
302    const int token;
303    enum token_type type;
304    const char *name;
305} const debug_tokens[] =
306{
307    { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
308    { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
309    { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
310    { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
311    { ARROW,		TOKENTYPE_NONE,		"ARROW" },
312    { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
313    { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
314    { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
315    { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
316    { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
317    { DEFAULT,		TOKENTYPE_NONE,		"DEFAULT" },
318    { DO,		TOKENTYPE_NONE,		"DO" },
319    { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
320    { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
321    { DOROP,		TOKENTYPE_OPNUM,	"DOROP" },
322    { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
323    { ELSE,		TOKENTYPE_NONE,		"ELSE" },
324    { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
325    { EQOP,		TOKENTYPE_OPNUM,	"EQOP" },
326    { FOR,		TOKENTYPE_IVAL,		"FOR" },
327    { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
328    { FORMLBRACK,	TOKENTYPE_NONE,		"FORMLBRACK" },
329    { FORMRBRACK,	TOKENTYPE_NONE,		"FORMRBRACK" },
330    { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
331    { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
332    { FUNC0OP,		TOKENTYPE_OPVAL,	"FUNC0OP" },
333    { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
334    { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
335    { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
336    { GIVEN,		TOKENTYPE_IVAL,		"GIVEN" },
337    { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
338    { IF,		TOKENTYPE_IVAL,		"IF" },
339    { LABEL,		TOKENTYPE_PVAL,		"LABEL" },
340    { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
341    { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
342    { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
343    { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
344    { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
345    { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
346    { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
347    { MY,		TOKENTYPE_IVAL,		"MY" },
348    { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
349    { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
350    { OROP,		TOKENTYPE_IVAL,		"OROP" },
351    { OROR,		TOKENTYPE_NONE,		"OROR" },
352    { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
353    { PLUGEXPR,		TOKENTYPE_OPVAL,	"PLUGEXPR" },
354    { PLUGSTMT,		TOKENTYPE_OPVAL,	"PLUGSTMT" },
355    { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
356    { POSTJOIN,		TOKENTYPE_NONE,		"POSTJOIN" },
357    { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
358    { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
359    { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
360    { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
361    { PREINC,		TOKENTYPE_NONE,		"PREINC" },
362    { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
363    { QWLIST,		TOKENTYPE_OPVAL,	"QWLIST" },
364    { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
365    { RELOP,		TOKENTYPE_OPNUM,	"RELOP" },
366    { REQUIRE,		TOKENTYPE_NONE,		"REQUIRE" },
367    { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
368    { SUB,		TOKENTYPE_NONE,		"SUB" },
369    { THING,		TOKENTYPE_OPVAL,	"THING" },
370    { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
371    { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
372    { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
373    { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
374    { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
375    { USE,		TOKENTYPE_IVAL,		"USE" },
376    { WHEN,		TOKENTYPE_IVAL,		"WHEN" },
377    { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
378    { WORD,		TOKENTYPE_OPVAL,	"WORD" },
379    { YADAYADA,		TOKENTYPE_IVAL,		"YADAYADA" },
380    { 0,		TOKENTYPE_NONE,		NULL }
381};
382
383/* dump the returned token in rv, plus any optional arg in pl_yylval */
384
385STATIC int
386S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
387{
388    PERL_ARGS_ASSERT_TOKEREPORT;
389
390    if (DEBUG_T_TEST) {
391	const char *name = NULL;
392	enum token_type type = TOKENTYPE_NONE;
393	const struct debug_tokens *p;
394	SV* const report = newSVpvs("<== ");
395
396	for (p = debug_tokens; p->token; p++) {
397	    if (p->token == (int)rv) {
398		name = p->name;
399		type = p->type;
400		break;
401	    }
402	}
403	if (name)
404	    Perl_sv_catpv(aTHX_ report, name);
405	else if (isGRAPH(rv))
406	{
407	    Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
408	    if ((char)rv == 'p')
409		sv_catpvs(report, " (pending identifier)");
410	}
411	else if (!rv)
412	    sv_catpvs(report, "EOF");
413	else
414	    Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
415	switch (type) {
416	case TOKENTYPE_NONE:
417	    break;
418	case TOKENTYPE_IVAL:
419	    Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
420	    break;
421	case TOKENTYPE_OPNUM:
422	    Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
423				    PL_op_name[lvalp->ival]);
424	    break;
425	case TOKENTYPE_PVAL:
426	    Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
427	    break;
428	case TOKENTYPE_OPVAL:
429	    if (lvalp->opval) {
430		Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
431				    PL_op_name[lvalp->opval->op_type]);
432		if (lvalp->opval->op_type == OP_CONST) {
433		    Perl_sv_catpvf(aTHX_ report, " %s",
434			SvPEEK(cSVOPx_sv(lvalp->opval)));
435		}
436
437	    }
438	    else
439		sv_catpvs(report, "(opval=null)");
440	    break;
441	}
442        PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
443    };
444    return (int)rv;
445}
446
447
448/* print the buffer with suitable escapes */
449
450STATIC void
451S_printbuf(pTHX_ const char *const fmt, const char *const s)
452{
453    SV* const tmp = newSVpvs("");
454
455    PERL_ARGS_ASSERT_PRINTBUF;
456
457    GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
458    PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
459    GCC_DIAG_RESTORE;
460    SvREFCNT_dec(tmp);
461}
462
463#endif
464
465static int
466S_deprecate_commaless_var_list(pTHX) {
467    PL_expect = XTERM;
468    deprecate("comma-less variable list");
469    return REPORT(','); /* grandfather non-comma-format format */
470}
471
472/*
473 * S_ao
474 *
475 * This subroutine looks for an '=' next to the operator that has just been
476 * parsed and turns it into an ASSIGNOP if it finds one.
477 */
478
479STATIC int
480S_ao(pTHX_ int toketype)
481{
482    if (*PL_bufptr == '=') {
483	PL_bufptr++;
484	if (toketype == ANDAND)
485	    pl_yylval.ival = OP_ANDASSIGN;
486	else if (toketype == OROR)
487	    pl_yylval.ival = OP_ORASSIGN;
488	else if (toketype == DORDOR)
489	    pl_yylval.ival = OP_DORASSIGN;
490	toketype = ASSIGNOP;
491    }
492    return REPORT(toketype);
493}
494
495/*
496 * S_no_op
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning.  It always prints "<something> found where
499 * operator expected.  It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line.  "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
506 *
507 * PL_bufptr is expected to point to the start of the thing that was found,
508 * and s after the next token or partial token.
509 */
510
511STATIC void
512S_no_op(pTHX_ const char *const what, char *s)
513{
514    char * const oldbp = PL_bufptr;
515    const bool is_first = (PL_oldbufptr == PL_linestart);
516
517    PERL_ARGS_ASSERT_NO_OP;
518
519    if (!s)
520	s = oldbp;
521    else
522	PL_bufptr = s;
523    yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
524    if (ckWARN_d(WARN_SYNTAX)) {
525	if (is_first)
526	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527		    "\t(Missing semicolon on previous line?)\n");
528	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
529	    const char *t;
530	    for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
531                                                            t += UTF ? UTF8SKIP(t) : 1)
532		NOOP;
533	    if (t < PL_bufptr && isSPACE(*t))
534		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535			"\t(Do you need to predeclare %"UTF8f"?)\n",
536		      UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
537	}
538	else {
539	    assert(s >= oldbp);
540	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541		    "\t(Missing operator before %"UTF8f"?)\n",
542		     UTF8fARG(UTF, s - oldbp, oldbp));
543	}
544    }
545    PL_bufptr = oldbp;
546}
547
548/*
549 * S_missingterm
550 * Complain about missing quote/regexp/heredoc terminator.
551 * If it's called with NULL then it cauterizes the line buffer.
552 * If we're in a delimited string and the delimiter is a control
553 * character, it's reformatted into a two-char sequence like ^C.
554 * This is fatal.
555 */
556
557STATIC void
558S_missingterm(pTHX_ char *s)
559{
560    char tmpbuf[3];
561    char q;
562    if (s) {
563	char * const nl = strrchr(s,'\n');
564	if (nl)
565	    *nl = '\0';
566    }
567    else if ((U8) PL_multi_close < 32) {
568	*tmpbuf = '^';
569	tmpbuf[1] = (char)toCTRL(PL_multi_close);
570	tmpbuf[2] = '\0';
571	s = tmpbuf;
572    }
573    else {
574	*tmpbuf = (char)PL_multi_close;
575	tmpbuf[1] = '\0';
576	s = tmpbuf;
577    }
578    q = strchr(s,'"') ? '\'' : '"';
579    Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
580}
581
582#include "feature.h"
583
584/*
585 * Check whether the named feature is enabled.
586 */
587bool
588Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
589{
590    char he_name[8 + MAX_FEATURE_LEN] = "feature_";
591
592    PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
593
594    assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
595
596    if (namelen > MAX_FEATURE_LEN)
597	return FALSE;
598    memcpy(&he_name[8], name, namelen);
599
600    return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
601				     REFCOUNTED_HE_EXISTS));
602}
603
604/*
605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606 * utf16-to-utf8-reversed.
607 */
608
609#ifdef PERL_CR_FILTER
610static void
611strip_return(SV *sv)
612{
613    const char *s = SvPVX_const(sv);
614    const char * const e = s + SvCUR(sv);
615
616    PERL_ARGS_ASSERT_STRIP_RETURN;
617
618    /* outer loop optimized to do nothing if there are no CR-LFs */
619    while (s < e) {
620	if (*s++ == '\r' && *s == '\n') {
621	    /* hit a CR-LF, need to copy the rest */
622	    char *d = s - 1;
623	    *d++ = *s++;
624	    while (s < e) {
625		if (*s == '\r' && s[1] == '\n')
626		    s++;
627		*d++ = *s++;
628	    }
629	    SvCUR(sv) -= s - d;
630	    return;
631	}
632    }
633}
634
635STATIC I32
636S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
637{
638    const I32 count = FILTER_READ(idx+1, sv, maxlen);
639    if (count > 0 && !maxlen)
640	strip_return(sv);
641    return count;
642}
643#endif
644
645/*
646=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
647
648Creates and initialises a new lexer/parser state object, supplying
649a context in which to lex and parse from a new source of Perl code.
650A pointer to the new state object is placed in L</PL_parser>.  An entry
651is made on the save stack so that upon unwinding the new state object
652will be destroyed and the former value of L</PL_parser> will be restored.
653Nothing else need be done to clean up the parsing context.
654
655The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
656non-null, provides a string (in SV form) containing code to be parsed.
657A copy of the string is made, so subsequent modification of C<line>
658does not affect parsing.  C<rsfp>, if non-null, provides an input stream
659from which code will be read to be parsed.  If both are non-null, the
660code in C<line> comes first and must consist of complete lines of input,
661and C<rsfp> supplies the remainder of the source.
662
663The C<flags> parameter is reserved for future use.  Currently it is only
664used by perl internally, so extensions should always pass zero.
665
666=cut
667*/
668
669/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
670   can share filters with the current parser.
671   LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
672   caller, hence isn't owned by the parser, so shouldn't be closed on parser
673   destruction. This is used to handle the case of defaulting to reading the
674   script from the standard input because no filename was given on the command
675   line (without getting confused by situation where STDIN has been closed, so
676   the script handle is opened on fd 0)  */
677
678void
679Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
680{
681    const char *s = NULL;
682    yy_parser *parser, *oparser;
683    if (flags && flags & ~LEX_START_FLAGS)
684	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
685
686    /* create and initialise a parser */
687
688    Newxz(parser, 1, yy_parser);
689    parser->old_parser = oparser = PL_parser;
690    PL_parser = parser;
691
692    parser->stack = NULL;
693    parser->ps = NULL;
694    parser->stack_size = 0;
695
696    /* on scope exit, free this parser and restore any outer one */
697    SAVEPARSER(parser);
698    parser->saved_curcop = PL_curcop;
699
700    /* initialise lexer state */
701
702    parser->nexttoke = 0;
703    parser->error_count = oparser ? oparser->error_count : 0;
704    parser->copline = parser->preambling = NOLINE;
705    parser->lex_state = LEX_NORMAL;
706    parser->expect = XSTATE;
707    parser->rsfp = rsfp;
708    parser->rsfp_filters =
709      !(flags & LEX_START_SAME_FILTER) || !oparser
710        ? NULL
711        : MUTABLE_AV(SvREFCNT_inc(
712            oparser->rsfp_filters
713             ? oparser->rsfp_filters
714             : (oparser->rsfp_filters = newAV())
715          ));
716
717    Newx(parser->lex_brackstack, 120, char);
718    Newx(parser->lex_casestack, 12, char);
719    *parser->lex_casestack = '\0';
720    Newxz(parser->lex_shared, 1, LEXSHARED);
721
722    if (line) {
723	STRLEN len;
724	s = SvPV_const(line, len);
725	parser->linestr = flags & LEX_START_COPIED
726			    ? SvREFCNT_inc_simple_NN(line)
727			    : newSVpvn_flags(s, len, SvUTF8(line));
728	if (!rsfp)
729	    sv_catpvs(parser->linestr, "\n;");
730    } else {
731	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
732    }
733    parser->oldoldbufptr =
734	parser->oldbufptr =
735	parser->bufptr =
736	parser->linestart = SvPVX(parser->linestr);
737    parser->bufend = parser->bufptr + SvCUR(parser->linestr);
738    parser->last_lop = parser->last_uni = NULL;
739
740    STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
741                                                        |LEX_DONT_CLOSE_RSFP));
742    parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
743                                                        |LEX_DONT_CLOSE_RSFP));
744
745    parser->in_pod = parser->filtered = 0;
746}
747
748
749/* delete a parser object */
750
751void
752Perl_parser_free(pTHX_  const yy_parser *parser)
753{
754    PERL_ARGS_ASSERT_PARSER_FREE;
755
756    PL_curcop = parser->saved_curcop;
757    SvREFCNT_dec(parser->linestr);
758
759    if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
760	PerlIO_clearerr(parser->rsfp);
761    else if (parser->rsfp && (!parser->old_parser
762          || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
763	PerlIO_close(parser->rsfp);
764    SvREFCNT_dec(parser->rsfp_filters);
765    SvREFCNT_dec(parser->lex_stuff);
766    SvREFCNT_dec(parser->sublex_info.repl);
767
768    Safefree(parser->lex_brackstack);
769    Safefree(parser->lex_casestack);
770    Safefree(parser->lex_shared);
771    PL_parser = parser->old_parser;
772    Safefree(parser);
773}
774
775void
776Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
777{
778    I32 nexttoke = parser->nexttoke;
779    PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
780    while (nexttoke--) {
781	if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
782	 && parser->nextval[nexttoke].opval
783	 && parser->nextval[nexttoke].opval->op_slabbed
784	 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
785	    op_free(parser->nextval[nexttoke].opval);
786	    parser->nextval[nexttoke].opval = NULL;
787	}
788    }
789}
790
791
792/*
793=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
794
795Buffer scalar containing the chunk currently under consideration of the
796text currently being lexed.  This is always a plain string scalar (for
797which C<SvPOK> is true).  It is not intended to be used as a scalar by
798normal scalar means; instead refer to the buffer directly by the pointer
799variables described below.
800
801The lexer maintains various C<char*> pointers to things in the
802C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
803reallocated, all of these pointers must be updated.  Don't attempt to
804do this manually, but rather use L</lex_grow_linestr> if you need to
805reallocate the buffer.
806
807The content of the text chunk in the buffer is commonly exactly one
808complete line of input, up to and including a newline terminator,
809but there are situations where it is otherwise.  The octets of the
810buffer may be intended to be interpreted as either UTF-8 or Latin-1.
811The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
812flag on this scalar, which may disagree with it.
813
814For direct examination of the buffer, the variable
815L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
816lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
817of these pointers is usually preferable to examination of the scalar
818through normal scalar means.
819
820=for apidoc AmxU|char *|PL_parser-E<gt>bufend
821
822Direct pointer to the end of the chunk of text currently being lexed, the
823end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
824+ SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
825always located at the end of the buffer, and does not count as part of
826the buffer's contents.
827
828=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
829
830Points to the current position of lexing inside the lexer buffer.
831Characters around this point may be freely examined, within
832the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
833L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
834interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
835
836Lexing code (whether in the Perl core or not) moves this pointer past
837the characters that it consumes.  It is also expected to perform some
838bookkeeping whenever a newline character is consumed.  This movement
839can be more conveniently performed by the function L</lex_read_to>,
840which handles newlines appropriately.
841
842Interpretation of the buffer's octets can be abstracted out by
843using the slightly higher-level functions L</lex_peek_unichar> and
844L</lex_read_unichar>.
845
846=for apidoc AmxU|char *|PL_parser-E<gt>linestart
847
848Points to the start of the current line inside the lexer buffer.
849This is useful for indicating at which column an error occurred, and
850not much else.  This must be updated by any lexing code that consumes
851a newline; the function L</lex_read_to> handles this detail.
852
853=cut
854*/
855
856/*
857=for apidoc Amx|bool|lex_bufutf8
858
859Indicates whether the octets in the lexer buffer
860(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
861of Unicode characters.  If not, they should be interpreted as Latin-1
862characters.  This is analogous to the C<SvUTF8> flag for scalars.
863
864In UTF-8 mode, it is not guaranteed that the lexer buffer actually
865contains valid UTF-8.  Lexing code must be robust in the face of invalid
866encoding.
867
868The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
869is significant, but not the whole story regarding the input character
870encoding.  Normally, when a file is being read, the scalar contains octets
871and its C<SvUTF8> flag is off, but the octets should be interpreted as
872UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
873however, the scalar may have the C<SvUTF8> flag on, and in this case its
874octets should be interpreted as UTF-8 unless the C<use bytes> pragma
875is in effect.  This logic may change in the future; use this function
876instead of implementing the logic yourself.
877
878=cut
879*/
880
881bool
882Perl_lex_bufutf8(pTHX)
883{
884    return UTF;
885}
886
887/*
888=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
889
890Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
891at least C<len> octets (including terminating C<NUL>).  Returns a
892pointer to the reallocated buffer.  This is necessary before making
893any direct modification of the buffer that would increase its length.
894L</lex_stuff_pvn> provides a more convenient way to insert text into
895the buffer.
896
897Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
898this function updates all of the lexer's variables that point directly
899into the buffer.
900
901=cut
902*/
903
904char *
905Perl_lex_grow_linestr(pTHX_ STRLEN len)
906{
907    SV *linestr;
908    char *buf;
909    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
910    STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
911    linestr = PL_parser->linestr;
912    buf = SvPVX(linestr);
913    if (len <= SvLEN(linestr))
914	return buf;
915    bufend_pos = PL_parser->bufend - buf;
916    bufptr_pos = PL_parser->bufptr - buf;
917    oldbufptr_pos = PL_parser->oldbufptr - buf;
918    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
919    linestart_pos = PL_parser->linestart - buf;
920    last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
921    last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
922    re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
923                            PL_parser->lex_shared->re_eval_start - buf : 0;
924
925    buf = sv_grow(linestr, len);
926
927    PL_parser->bufend = buf + bufend_pos;
928    PL_parser->bufptr = buf + bufptr_pos;
929    PL_parser->oldbufptr = buf + oldbufptr_pos;
930    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
931    PL_parser->linestart = buf + linestart_pos;
932    if (PL_parser->last_uni)
933	PL_parser->last_uni = buf + last_uni_pos;
934    if (PL_parser->last_lop)
935	PL_parser->last_lop = buf + last_lop_pos;
936    if (PL_parser->lex_shared->re_eval_start)
937        PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
938    return buf;
939}
940
941/*
942=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
943
944Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
945immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
946reallocating the buffer if necessary.  This means that lexing code that
947runs later will see the characters as if they had appeared in the input.
948It is not recommended to do this as part of normal parsing, and most
949uses of this facility run the risk of the inserted characters being
950interpreted in an unintended manner.
951
952The string to be inserted is represented by C<len> octets starting
953at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
954according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
955The characters are recoded for the lexer buffer, according to how the
956buffer is currently being interpreted (L</lex_bufutf8>).  If a string
957to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
958function is more convenient.
959
960=cut
961*/
962
963void
964Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
965{
966    dVAR;
967    char *bufptr;
968    PERL_ARGS_ASSERT_LEX_STUFF_PVN;
969    if (flags & ~(LEX_STUFF_UTF8))
970	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
971    if (UTF) {
972	if (flags & LEX_STUFF_UTF8) {
973	    goto plain_copy;
974	} else {
975	    STRLEN highhalf = 0;    /* Count of variants */
976	    const char *p, *e = pv+len;
977	    for (p = pv; p != e; p++) {
978		if (! UTF8_IS_INVARIANT(*p)) {
979                    highhalf++;
980                }
981            }
982	    if (!highhalf)
983		goto plain_copy;
984	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
985	    bufptr = PL_parser->bufptr;
986	    Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
987	    SvCUR_set(PL_parser->linestr,
988	    	SvCUR(PL_parser->linestr) + len+highhalf);
989	    PL_parser->bufend += len+highhalf;
990	    for (p = pv; p != e; p++) {
991		U8 c = (U8)*p;
992		if (! UTF8_IS_INVARIANT(c)) {
993		    *bufptr++ = UTF8_TWO_BYTE_HI(c);
994		    *bufptr++ = UTF8_TWO_BYTE_LO(c);
995		} else {
996		    *bufptr++ = (char)c;
997		}
998	    }
999	}
1000    } else {
1001	if (flags & LEX_STUFF_UTF8) {
1002	    STRLEN highhalf = 0;
1003	    const char *p, *e = pv+len;
1004	    for (p = pv; p != e; p++) {
1005		U8 c = (U8)*p;
1006		if (UTF8_IS_ABOVE_LATIN1(c)) {
1007		    Perl_croak(aTHX_ "Lexing code attempted to stuff "
1008				"non-Latin-1 character into Latin-1 input");
1009		} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1010		    p++;
1011		    highhalf++;
1012		} else if (! UTF8_IS_INVARIANT(c)) {
1013		    /* malformed UTF-8 */
1014		    ENTER;
1015		    SAVESPTR(PL_warnhook);
1016		    PL_warnhook = PERL_WARNHOOK_FATAL;
1017		    utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1018		    LEAVE;
1019		}
1020	    }
1021	    if (!highhalf)
1022		goto plain_copy;
1023	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1024	    bufptr = PL_parser->bufptr;
1025	    Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1026	    SvCUR_set(PL_parser->linestr,
1027	    	SvCUR(PL_parser->linestr) + len-highhalf);
1028	    PL_parser->bufend += len-highhalf;
1029	    p = pv;
1030	    while (p < e) {
1031		if (UTF8_IS_INVARIANT(*p)) {
1032		    *bufptr++ = *p;
1033                    p++;
1034		}
1035		else {
1036                    assert(p < e -1 );
1037		    *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1038		    p += 2;
1039                }
1040	    }
1041	} else {
1042	  plain_copy:
1043	    lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1044	    bufptr = PL_parser->bufptr;
1045	    Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1046	    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1047	    PL_parser->bufend += len;
1048	    Copy(pv, bufptr, len, char);
1049	}
1050    }
1051}
1052
1053/*
1054=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1055
1056Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1057immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1058reallocating the buffer if necessary.  This means that lexing code that
1059runs later will see the characters as if they had appeared in the input.
1060It is not recommended to do this as part of normal parsing, and most
1061uses of this facility run the risk of the inserted characters being
1062interpreted in an unintended manner.
1063
1064The string to be inserted is represented by octets starting at C<pv>
1065and continuing to the first nul.  These octets are interpreted as either
1066UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1067in C<flags>.  The characters are recoded for the lexer buffer, according
1068to how the buffer is currently being interpreted (L</lex_bufutf8>).
1069If it is not convenient to nul-terminate a string to be inserted, the
1070L</lex_stuff_pvn> function is more appropriate.
1071
1072=cut
1073*/
1074
1075void
1076Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1077{
1078    PERL_ARGS_ASSERT_LEX_STUFF_PV;
1079    lex_stuff_pvn(pv, strlen(pv), flags);
1080}
1081
1082/*
1083=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1084
1085Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1086immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1087reallocating the buffer if necessary.  This means that lexing code that
1088runs later will see the characters as if they had appeared in the input.
1089It is not recommended to do this as part of normal parsing, and most
1090uses of this facility run the risk of the inserted characters being
1091interpreted in an unintended manner.
1092
1093The string to be inserted is the string value of C<sv>.  The characters
1094are recoded for the lexer buffer, according to how the buffer is currently
1095being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1096not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1097need to construct a scalar.
1098
1099=cut
1100*/
1101
1102void
1103Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1104{
1105    char *pv;
1106    STRLEN len;
1107    PERL_ARGS_ASSERT_LEX_STUFF_SV;
1108    if (flags)
1109	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1110    pv = SvPV(sv, len);
1111    lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1112}
1113
1114/*
1115=for apidoc Amx|void|lex_unstuff|char *ptr
1116
1117Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1118C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1119This hides the discarded text from any lexing code that runs later,
1120as if the text had never appeared.
1121
1122This is not the normal way to consume lexed text.  For that, use
1123L</lex_read_to>.
1124
1125=cut
1126*/
1127
1128void
1129Perl_lex_unstuff(pTHX_ char *ptr)
1130{
1131    char *buf, *bufend;
1132    STRLEN unstuff_len;
1133    PERL_ARGS_ASSERT_LEX_UNSTUFF;
1134    buf = PL_parser->bufptr;
1135    if (ptr < buf)
1136	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1137    if (ptr == buf)
1138	return;
1139    bufend = PL_parser->bufend;
1140    if (ptr > bufend)
1141	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1142    unstuff_len = ptr - buf;
1143    Move(ptr, buf, bufend+1-ptr, char);
1144    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1145    PL_parser->bufend = bufend - unstuff_len;
1146}
1147
1148/*
1149=for apidoc Amx|void|lex_read_to|char *ptr
1150
1151Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1152to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1153performing the correct bookkeeping whenever a newline character is passed.
1154This is the normal way to consume lexed text.
1155
1156Interpretation of the buffer's octets can be abstracted out by
1157using the slightly higher-level functions L</lex_peek_unichar> and
1158L</lex_read_unichar>.
1159
1160=cut
1161*/
1162
1163void
1164Perl_lex_read_to(pTHX_ char *ptr)
1165{
1166    char *s;
1167    PERL_ARGS_ASSERT_LEX_READ_TO;
1168    s = PL_parser->bufptr;
1169    if (ptr < s || ptr > PL_parser->bufend)
1170	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1171    for (; s != ptr; s++)
1172	if (*s == '\n') {
1173	    COPLINE_INC_WITH_HERELINES;
1174	    PL_parser->linestart = s+1;
1175	}
1176    PL_parser->bufptr = ptr;
1177}
1178
1179/*
1180=for apidoc Amx|void|lex_discard_to|char *ptr
1181
1182Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1183up to C<ptr>.  The remaining content of the buffer will be moved, and
1184all pointers into the buffer updated appropriately.  C<ptr> must not
1185be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1186it is not permitted to discard text that has yet to be lexed.
1187
1188Normally it is not necessarily to do this directly, because it suffices to
1189use the implicit discarding behaviour of L</lex_next_chunk> and things
1190based on it.  However, if a token stretches across multiple lines,
1191and the lexing code has kept multiple lines of text in the buffer for
1192that purpose, then after completion of the token it would be wise to
1193explicitly discard the now-unneeded earlier lines, to avoid future
1194multi-line tokens growing the buffer without bound.
1195
1196=cut
1197*/
1198
1199void
1200Perl_lex_discard_to(pTHX_ char *ptr)
1201{
1202    char *buf;
1203    STRLEN discard_len;
1204    PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1205    buf = SvPVX(PL_parser->linestr);
1206    if (ptr < buf)
1207	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208    if (ptr == buf)
1209	return;
1210    if (ptr > PL_parser->bufptr)
1211	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1212    discard_len = ptr - buf;
1213    if (PL_parser->oldbufptr < ptr)
1214	PL_parser->oldbufptr = ptr;
1215    if (PL_parser->oldoldbufptr < ptr)
1216	PL_parser->oldoldbufptr = ptr;
1217    if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1218	PL_parser->last_uni = NULL;
1219    if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1220	PL_parser->last_lop = NULL;
1221    Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1222    SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1223    PL_parser->bufend -= discard_len;
1224    PL_parser->bufptr -= discard_len;
1225    PL_parser->oldbufptr -= discard_len;
1226    PL_parser->oldoldbufptr -= discard_len;
1227    if (PL_parser->last_uni)
1228	PL_parser->last_uni -= discard_len;
1229    if (PL_parser->last_lop)
1230	PL_parser->last_lop -= discard_len;
1231}
1232
1233/*
1234=for apidoc Amx|bool|lex_next_chunk|U32 flags
1235
1236Reads in the next chunk of text to be lexed, appending it to
1237L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1238looked to the end of the current chunk and wants to know more.  It is
1239usual, but not necessary, for lexing to have consumed the entirety of
1240the current chunk at this time.
1241
1242If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1243chunk (i.e., the current chunk has been entirely consumed), normally the
1244current chunk will be discarded at the same time that the new chunk is
1245read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1246will not be discarded.  If the current chunk has not been entirely
1247consumed, then it will not be discarded regardless of the flag.
1248
1249Returns true if some new text was added to the buffer, or false if the
1250buffer has reached the end of the input text.
1251
1252=cut
1253*/
1254
1255#define LEX_FAKE_EOF 0x80000000
1256#define LEX_NO_TERM  0x40000000 /* here-doc */
1257
1258bool
1259Perl_lex_next_chunk(pTHX_ U32 flags)
1260{
1261    SV *linestr;
1262    char *buf;
1263    STRLEN old_bufend_pos, new_bufend_pos;
1264    STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1265    STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1266    bool got_some_for_debugger = 0;
1267    bool got_some;
1268    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1269	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1270    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1271	return FALSE;
1272    linestr = PL_parser->linestr;
1273    buf = SvPVX(linestr);
1274    if (!(flags & LEX_KEEP_PREVIOUS)
1275          && PL_parser->bufptr == PL_parser->bufend)
1276    {
1277	old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1278	linestart_pos = 0;
1279	if (PL_parser->last_uni != PL_parser->bufend)
1280	    PL_parser->last_uni = NULL;
1281	if (PL_parser->last_lop != PL_parser->bufend)
1282	    PL_parser->last_lop = NULL;
1283	last_uni_pos = last_lop_pos = 0;
1284	*buf = 0;
1285	SvCUR(linestr) = 0;
1286    } else {
1287	old_bufend_pos = PL_parser->bufend - buf;
1288	bufptr_pos = PL_parser->bufptr - buf;
1289	oldbufptr_pos = PL_parser->oldbufptr - buf;
1290	oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1291	linestart_pos = PL_parser->linestart - buf;
1292	last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1293	last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1294    }
1295    if (flags & LEX_FAKE_EOF) {
1296	goto eof;
1297    } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1298	got_some = 0;
1299    } else if (filter_gets(linestr, old_bufend_pos)) {
1300	got_some = 1;
1301	got_some_for_debugger = 1;
1302    } else if (flags & LEX_NO_TERM) {
1303	got_some = 0;
1304    } else {
1305	if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1306	    sv_setpvs(linestr, "");
1307	eof:
1308	/* End of real input.  Close filehandle (unless it was STDIN),
1309	 * then add implicit termination.
1310	 */
1311	if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1312	    PerlIO_clearerr(PL_parser->rsfp);
1313	else if (PL_parser->rsfp)
1314	    (void)PerlIO_close(PL_parser->rsfp);
1315	PL_parser->rsfp = NULL;
1316	PL_parser->in_pod = PL_parser->filtered = 0;
1317	if (!PL_in_eval && PL_minus_p) {
1318	    sv_catpvs(linestr,
1319		/*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1320	    PL_minus_n = PL_minus_p = 0;
1321	} else if (!PL_in_eval && PL_minus_n) {
1322	    sv_catpvs(linestr, /*{*/";}");
1323	    PL_minus_n = 0;
1324	} else
1325	    sv_catpvs(linestr, ";");
1326	got_some = 1;
1327    }
1328    buf = SvPVX(linestr);
1329    new_bufend_pos = SvCUR(linestr);
1330    PL_parser->bufend = buf + new_bufend_pos;
1331    PL_parser->bufptr = buf + bufptr_pos;
1332    PL_parser->oldbufptr = buf + oldbufptr_pos;
1333    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1334    PL_parser->linestart = buf + linestart_pos;
1335    if (PL_parser->last_uni)
1336	PL_parser->last_uni = buf + last_uni_pos;
1337    if (PL_parser->last_lop)
1338	PL_parser->last_lop = buf + last_lop_pos;
1339    if (PL_parser->preambling != NOLINE) {
1340	CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1341	PL_parser->preambling = NOLINE;
1342    }
1343    if (   got_some_for_debugger
1344        && PERLDB_LINE_OR_SAVESRC
1345        && PL_curstash != PL_debstash)
1346    {
1347	/* debugger active and we're not compiling the debugger code,
1348	 * so store the line into the debugger's array of lines
1349	 */
1350	update_debugger_info(NULL, buf+old_bufend_pos,
1351	    new_bufend_pos-old_bufend_pos);
1352    }
1353    return got_some;
1354}
1355
1356/*
1357=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1358
1359Looks ahead one (Unicode) character in the text currently being lexed.
1360Returns the codepoint (unsigned integer value) of the next character,
1361or -1 if lexing has reached the end of the input text.  To consume the
1362peeked character, use L</lex_read_unichar>.
1363
1364If the next character is in (or extends into) the next chunk of input
1365text, the next chunk will be read in.  Normally the current chunk will be
1366discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1367bit set, then the current chunk will not be discarded.
1368
1369If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1370is encountered, an exception is generated.
1371
1372=cut
1373*/
1374
1375I32
1376Perl_lex_peek_unichar(pTHX_ U32 flags)
1377{
1378    dVAR;
1379    char *s, *bufend;
1380    if (flags & ~(LEX_KEEP_PREVIOUS))
1381	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1382    s = PL_parser->bufptr;
1383    bufend = PL_parser->bufend;
1384    if (UTF) {
1385	U8 head;
1386	I32 unichar;
1387	STRLEN len, retlen;
1388	if (s == bufend) {
1389	    if (!lex_next_chunk(flags))
1390		return -1;
1391	    s = PL_parser->bufptr;
1392	    bufend = PL_parser->bufend;
1393	}
1394	head = (U8)*s;
1395	if (UTF8_IS_INVARIANT(head))
1396	    return head;
1397	if (UTF8_IS_START(head)) {
1398	    len = UTF8SKIP(&head);
1399	    while ((STRLEN)(bufend-s) < len) {
1400		if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1401		    break;
1402		s = PL_parser->bufptr;
1403		bufend = PL_parser->bufend;
1404	    }
1405	}
1406	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1407	if (retlen == (STRLEN)-1) {
1408	    /* malformed UTF-8 */
1409	    ENTER;
1410	    SAVESPTR(PL_warnhook);
1411	    PL_warnhook = PERL_WARNHOOK_FATAL;
1412	    utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1413	    LEAVE;
1414	}
1415	return unichar;
1416    } else {
1417	if (s == bufend) {
1418	    if (!lex_next_chunk(flags))
1419		return -1;
1420	    s = PL_parser->bufptr;
1421	}
1422	return (U8)*s;
1423    }
1424}
1425
1426/*
1427=for apidoc Amx|I32|lex_read_unichar|U32 flags
1428
1429Reads the next (Unicode) character in the text currently being lexed.
1430Returns the codepoint (unsigned integer value) of the character read,
1431and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1432if lexing has reached the end of the input text.  To non-destructively
1433examine the next character, use L</lex_peek_unichar> instead.
1434
1435If the next character is in (or extends into) the next chunk of input
1436text, the next chunk will be read in.  Normally the current chunk will be
1437discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1438bit set, then the current chunk will not be discarded.
1439
1440If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1441is encountered, an exception is generated.
1442
1443=cut
1444*/
1445
1446I32
1447Perl_lex_read_unichar(pTHX_ U32 flags)
1448{
1449    I32 c;
1450    if (flags & ~(LEX_KEEP_PREVIOUS))
1451	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1452    c = lex_peek_unichar(flags);
1453    if (c != -1) {
1454	if (c == '\n')
1455	    COPLINE_INC_WITH_HERELINES;
1456	if (UTF)
1457	    PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1458	else
1459	    ++(PL_parser->bufptr);
1460    }
1461    return c;
1462}
1463
1464/*
1465=for apidoc Amx|void|lex_read_space|U32 flags
1466
1467Reads optional spaces, in Perl style, in the text currently being
1468lexed.  The spaces may include ordinary whitespace characters and
1469Perl-style comments.  C<#line> directives are processed if encountered.
1470L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1471at a non-space character (or the end of the input text).
1472
1473If spaces extend into the next chunk of input text, the next chunk will
1474be read in.  Normally the current chunk will be discarded at the same
1475time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1476chunk will not be discarded.
1477
1478=cut
1479*/
1480
1481#define LEX_NO_INCLINE    0x40000000
1482#define LEX_NO_NEXT_CHUNK 0x80000000
1483
1484void
1485Perl_lex_read_space(pTHX_ U32 flags)
1486{
1487    char *s, *bufend;
1488    const bool can_incline = !(flags & LEX_NO_INCLINE);
1489    bool need_incline = 0;
1490    if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1491	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1492    s = PL_parser->bufptr;
1493    bufend = PL_parser->bufend;
1494    while (1) {
1495	char c = *s;
1496	if (c == '#') {
1497	    do {
1498		c = *++s;
1499	    } while (!(c == '\n' || (c == 0 && s == bufend)));
1500	} else if (c == '\n') {
1501	    s++;
1502	    if (can_incline) {
1503		PL_parser->linestart = s;
1504		if (s == bufend)
1505		    need_incline = 1;
1506		else
1507		    incline(s);
1508	    }
1509	} else if (isSPACE(c)) {
1510	    s++;
1511	} else if (c == 0 && s == bufend) {
1512	    bool got_more;
1513	    line_t l;
1514	    if (flags & LEX_NO_NEXT_CHUNK)
1515		break;
1516	    PL_parser->bufptr = s;
1517	    l = CopLINE(PL_curcop);
1518	    CopLINE(PL_curcop) += PL_parser->herelines + 1;
1519	    got_more = lex_next_chunk(flags);
1520	    CopLINE_set(PL_curcop, l);
1521	    s = PL_parser->bufptr;
1522	    bufend = PL_parser->bufend;
1523	    if (!got_more)
1524		break;
1525	    if (can_incline && need_incline && PL_parser->rsfp) {
1526		incline(s);
1527		need_incline = 0;
1528	    }
1529	} else if (!c) {
1530	    s++;
1531	} else {
1532	    break;
1533	}
1534    }
1535    PL_parser->bufptr = s;
1536}
1537
1538/*
1539
1540=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1541
1542This function performs syntax checking on a prototype, C<proto>.
1543If C<warn> is true, any illegal characters or mismatched brackets
1544will trigger illegalproto warnings, declaring that they were
1545detected in the prototype for C<name>.
1546
1547The return value is C<true> if this is a valid prototype, and
1548C<false> if it is not, regardless of whether C<warn> was C<true> or
1549C<false>.
1550
1551Note that C<NULL> is a valid C<proto> and will always return C<true>.
1552
1553=cut
1554
1555 */
1556
1557bool
1558Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1559{
1560    STRLEN len, origlen;
1561    char *p = proto ? SvPV(proto, len) : NULL;
1562    bool bad_proto = FALSE;
1563    bool in_brackets = FALSE;
1564    bool after_slash = FALSE;
1565    char greedy_proto = ' ';
1566    bool proto_after_greedy_proto = FALSE;
1567    bool must_be_last = FALSE;
1568    bool underscore = FALSE;
1569    bool bad_proto_after_underscore = FALSE;
1570
1571    PERL_ARGS_ASSERT_VALIDATE_PROTO;
1572
1573    if (!proto)
1574	return TRUE;
1575
1576    origlen = len;
1577    for (; len--; p++) {
1578	if (!isSPACE(*p)) {
1579	    if (must_be_last)
1580		proto_after_greedy_proto = TRUE;
1581	    if (underscore) {
1582		if (!strchr(";@%", *p))
1583		    bad_proto_after_underscore = TRUE;
1584		underscore = FALSE;
1585	    }
1586	    if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1587		bad_proto = TRUE;
1588	    }
1589	    else {
1590		if (*p == '[')
1591		    in_brackets = TRUE;
1592		else if (*p == ']')
1593		    in_brackets = FALSE;
1594		else if ((*p == '@' || *p == '%')
1595                         && !after_slash
1596                         && !in_brackets )
1597                {
1598		    must_be_last = TRUE;
1599		    greedy_proto = *p;
1600		}
1601		else if (*p == '_')
1602		    underscore = TRUE;
1603	    }
1604	    if (*p == '\\')
1605		after_slash = TRUE;
1606	    else
1607		after_slash = FALSE;
1608	}
1609    }
1610
1611    if (warn) {
1612	SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1613	p -= origlen;
1614	p = SvUTF8(proto)
1615	    ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1616	                     origlen, UNI_DISPLAY_ISPRINT)
1617	    : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1618
1619	if (proto_after_greedy_proto)
1620	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1621			"Prototype after '%c' for %"SVf" : %s",
1622			greedy_proto, SVfARG(name), p);
1623	if (in_brackets)
1624	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1625			"Missing ']' in prototype for %"SVf" : %s",
1626			SVfARG(name), p);
1627	if (bad_proto)
1628	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1629			"Illegal character in prototype for %"SVf" : %s",
1630			SVfARG(name), p);
1631	if (bad_proto_after_underscore)
1632	    Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1633			"Illegal character after '_' in prototype for %"SVf" : %s",
1634			SVfARG(name), p);
1635    }
1636
1637    return (! (proto_after_greedy_proto || bad_proto) );
1638}
1639
1640/*
1641 * S_incline
1642 * This subroutine has nothing to do with tilting, whether at windmills
1643 * or pinball tables.  Its name is short for "increment line".  It
1644 * increments the current line number in CopLINE(PL_curcop) and checks
1645 * to see whether the line starts with a comment of the form
1646 *    # line 500 "foo.pm"
1647 * If so, it sets the current line number and file to the values in the comment.
1648 */
1649
1650STATIC void
1651S_incline(pTHX_ const char *s)
1652{
1653    const char *t;
1654    const char *n;
1655    const char *e;
1656    line_t line_num;
1657    UV uv;
1658
1659    PERL_ARGS_ASSERT_INCLINE;
1660
1661    COPLINE_INC_WITH_HERELINES;
1662    if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1663     && s+1 == PL_bufend && *s == ';') {
1664	/* fake newline in string eval */
1665	CopLINE_dec(PL_curcop);
1666	return;
1667    }
1668    if (*s++ != '#')
1669	return;
1670    while (SPACE_OR_TAB(*s))
1671	s++;
1672    if (strnEQ(s, "line", 4))
1673	s += 4;
1674    else
1675	return;
1676    if (SPACE_OR_TAB(*s))
1677	s++;
1678    else
1679	return;
1680    while (SPACE_OR_TAB(*s))
1681	s++;
1682    if (!isDIGIT(*s))
1683	return;
1684
1685    n = s;
1686    while (isDIGIT(*s))
1687	s++;
1688    if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1689	return;
1690    while (SPACE_OR_TAB(*s))
1691	s++;
1692    if (*s == '"' && (t = strchr(s+1, '"'))) {
1693	s++;
1694	e = t + 1;
1695    }
1696    else {
1697	t = s;
1698	while (*t && !isSPACE(*t))
1699	    t++;
1700	e = t;
1701    }
1702    while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1703	e++;
1704    if (*e != '\n' && *e != '\0')
1705	return;		/* false alarm */
1706
1707    if (!grok_atoUV(n, &uv, &e))
1708        return;
1709    line_num = ((line_t)uv) - 1;
1710
1711    if (t - s > 0) {
1712	const STRLEN len = t - s;
1713
1714	if (!PL_rsfp && !PL_parser->filtered) {
1715	    /* must copy *{"::_<(eval N)[oldfilename:L]"}
1716	     * to *{"::_<newfilename"} */
1717	    /* However, the long form of evals is only turned on by the
1718	       debugger - usually they're "(eval %lu)" */
1719	    GV * const cfgv = CopFILEGV(PL_curcop);
1720	    if (cfgv) {
1721		char smallbuf[128];
1722		STRLEN tmplen2 = len;
1723		char *tmpbuf2;
1724		GV *gv2;
1725
1726		if (tmplen2 + 2 <= sizeof smallbuf)
1727		    tmpbuf2 = smallbuf;
1728		else
1729		    Newx(tmpbuf2, tmplen2 + 2, char);
1730
1731		tmpbuf2[0] = '_';
1732		tmpbuf2[1] = '<';
1733
1734		memcpy(tmpbuf2 + 2, s, tmplen2);
1735		tmplen2 += 2;
1736
1737		gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1738		if (!isGV(gv2)) {
1739		    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1740		    /* adjust ${"::_<newfilename"} to store the new file name */
1741		    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1742		    /* The line number may differ. If that is the case,
1743		       alias the saved lines that are in the array.
1744		       Otherwise alias the whole array. */
1745		    if (CopLINE(PL_curcop) == line_num) {
1746			GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1747			GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1748		    }
1749		    else if (GvAV(cfgv)) {
1750			AV * const av = GvAV(cfgv);
1751			const I32 start = CopLINE(PL_curcop)+1;
1752			I32 items = AvFILLp(av) - start;
1753			if (items > 0) {
1754			    AV * const av2 = GvAVn(gv2);
1755			    SV **svp = AvARRAY(av) + start;
1756			    I32 l = (I32)line_num+1;
1757			    while (items--)
1758				av_store(av2, l++, SvREFCNT_inc(*svp++));
1759			}
1760		    }
1761		}
1762
1763		if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1764	    }
1765	}
1766	CopFILE_free(PL_curcop);
1767	CopFILE_setn(PL_curcop, s, len);
1768    }
1769    CopLINE_set(PL_curcop, line_num);
1770}
1771
1772#define skipspace(s) skipspace_flags(s, 0)
1773
1774
1775STATIC void
1776S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1777{
1778    AV *av = CopFILEAVx(PL_curcop);
1779    if (av) {
1780	SV * sv;
1781	if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1782	else {
1783	    sv = *av_fetch(av, 0, 1);
1784	    SvUPGRADE(sv, SVt_PVMG);
1785	}
1786	if (!SvPOK(sv)) sv_setpvs(sv,"");
1787	if (orig_sv)
1788	    sv_catsv(sv, orig_sv);
1789	else
1790	    sv_catpvn(sv, buf, len);
1791	if (!SvIOK(sv)) {
1792	    (void)SvIOK_on(sv);
1793	    SvIV_set(sv, 0);
1794	}
1795	if (PL_parser->preambling == NOLINE)
1796	    av_store(av, CopLINE(PL_curcop), sv);
1797    }
1798}
1799
1800/*
1801 * S_skipspace
1802 * Called to gobble the appropriate amount and type of whitespace.
1803 * Skips comments as well.
1804 */
1805
1806STATIC char *
1807S_skipspace_flags(pTHX_ char *s, U32 flags)
1808{
1809    PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1810    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1811	while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1812	    s++;
1813    } else {
1814	STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1815	PL_bufptr = s;
1816	lex_read_space(flags | LEX_KEEP_PREVIOUS |
1817		(PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1818		    LEX_NO_NEXT_CHUNK : 0));
1819	s = PL_bufptr;
1820	PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1821	if (PL_linestart > PL_bufptr)
1822	    PL_bufptr = PL_linestart;
1823	return s;
1824    }
1825    return s;
1826}
1827
1828/*
1829 * S_check_uni
1830 * Check the unary operators to ensure there's no ambiguity in how they're
1831 * used.  An ambiguous piece of code would be:
1832 *     rand + 5
1833 * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1834 * the +5 is its argument.
1835 */
1836
1837STATIC void
1838S_check_uni(pTHX)
1839{
1840    const char *s;
1841    const char *t;
1842
1843    if (PL_oldoldbufptr != PL_last_uni)
1844	return;
1845    while (isSPACE(*PL_last_uni))
1846	PL_last_uni++;
1847    s = PL_last_uni;
1848    while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1849	s += UTF ? UTF8SKIP(s) : 1;
1850    if ((t = strchr(s, '(')) && t < PL_bufptr)
1851	return;
1852
1853    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1854		     "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
1855		     UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1856}
1857
1858/*
1859 * LOP : macro to build a list operator.  Its behaviour has been replaced
1860 * with a subroutine, S_lop() for which LOP is just another name.
1861 */
1862
1863#define LOP(f,x) return lop(f,x,s)
1864
1865/*
1866 * S_lop
1867 * Build a list operator (or something that might be one).  The rules:
1868 *  - if we have a next token, then it's a list operator (no parens) for
1869 *    which the next token has already been parsed; e.g.,
1870 *       sort foo @args
1871 *       sort foo (@args)
1872 *  - if the next thing is an opening paren, then it's a function
1873 *  - else it's a list operator
1874 */
1875
1876STATIC I32
1877S_lop(pTHX_ I32 f, int x, char *s)
1878{
1879    PERL_ARGS_ASSERT_LOP;
1880
1881    pl_yylval.ival = f;
1882    CLINE;
1883    PL_bufptr = s;
1884    PL_last_lop = PL_oldbufptr;
1885    PL_last_lop_op = (OPCODE)f;
1886    if (PL_nexttoke)
1887	goto lstop;
1888    PL_expect = x;
1889    if (*s == '(')
1890	return REPORT(FUNC);
1891    s = skipspace(s);
1892    if (*s == '(')
1893	return REPORT(FUNC);
1894    else {
1895	lstop:
1896	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1897	    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1898	return REPORT(LSTOP);
1899    }
1900}
1901
1902/*
1903 * S_force_next
1904 * When the lexer realizes it knows the next token (for instance,
1905 * it is reordering tokens for the parser) then it can call S_force_next
1906 * to know what token to return the next time the lexer is called.  Caller
1907 * will need to set PL_nextval[] and possibly PL_expect to ensure
1908 * the lexer handles the token correctly.
1909 */
1910
1911STATIC void
1912S_force_next(pTHX_ I32 type)
1913{
1914#ifdef DEBUGGING
1915    if (DEBUG_T_TEST) {
1916        PerlIO_printf(Perl_debug_log, "### forced token:\n");
1917	tokereport(type, &NEXTVAL_NEXTTOKE);
1918    }
1919#endif
1920    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1921    PL_nexttype[PL_nexttoke] = type;
1922    PL_nexttoke++;
1923    if (PL_lex_state != LEX_KNOWNEXT) {
1924	PL_lex_defer = PL_lex_state;
1925	PL_lex_state = LEX_KNOWNEXT;
1926    }
1927}
1928
1929/*
1930 * S_postderef
1931 *
1932 * This subroutine handles postfix deref syntax after the arrow has already
1933 * been emitted.  @* $* etc. are emitted as two separate token right here.
1934 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1935 * only the first, leaving yylex to find the next.
1936 */
1937
1938static int
1939S_postderef(pTHX_ int const funny, char const next)
1940{
1941    assert(funny == DOLSHARP || strchr("$@%&*", funny));
1942    if (next == '*') {
1943	PL_expect = XOPERATOR;
1944	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1945	    assert('@' == funny || '$' == funny || DOLSHARP == funny);
1946	    PL_lex_state = LEX_INTERPEND;
1947	    force_next(POSTJOIN);
1948	}
1949	force_next(next);
1950	PL_bufptr+=2;
1951    }
1952    else {
1953	if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1954	 && !PL_lex_brackets)
1955	    PL_lex_dojoin = 2;
1956	PL_expect = XOPERATOR;
1957	PL_bufptr++;
1958    }
1959    return funny;
1960}
1961
1962void
1963Perl_yyunlex(pTHX)
1964{
1965    int yyc = PL_parser->yychar;
1966    if (yyc != YYEMPTY) {
1967	if (yyc) {
1968	    NEXTVAL_NEXTTOKE = PL_parser->yylval;
1969	    if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1970		PL_lex_allbrackets--;
1971		PL_lex_brackets--;
1972		yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1973	    } else if (yyc == '('/*)*/) {
1974		PL_lex_allbrackets--;
1975		yyc |= (2<<24);
1976	    }
1977	    force_next(yyc);
1978	}
1979	PL_parser->yychar = YYEMPTY;
1980    }
1981}
1982
1983STATIC SV *
1984S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1985{
1986    SV * const sv = newSVpvn_utf8(start, len,
1987				  !IN_BYTES
1988				  && UTF
1989				  && !is_invariant_string((const U8*)start, len)
1990				  && is_utf8_string((const U8*)start, len));
1991    return sv;
1992}
1993
1994/*
1995 * S_force_word
1996 * When the lexer knows the next thing is a word (for instance, it has
1997 * just seen -> and it knows that the next char is a word char, then
1998 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1999 * lookahead.
2000 *
2001 * Arguments:
2002 *   char *start : buffer position (must be within PL_linestr)
2003 *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2004 *   int check_keyword : if true, Perl checks to make sure the word isn't
2005 *       a keyword (do this if the word is a label, e.g. goto FOO)
2006 *   int allow_pack : if true, : characters will also be allowed (require,
2007 *       use, etc. do this)
2008 */
2009
2010STATIC char *
2011S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2012{
2013    char *s;
2014    STRLEN len;
2015
2016    PERL_ARGS_ASSERT_FORCE_WORD;
2017
2018    start = skipspace(start);
2019    s = start;
2020    if (isIDFIRST_lazy_if(s,UTF)
2021        || (allow_pack && *s == ':') )
2022    {
2023	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2024	if (check_keyword) {
2025	  char *s2 = PL_tokenbuf;
2026	  STRLEN len2 = len;
2027	  if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2028	    s2 += 6, len2 -= 6;
2029	  if (keyword(s2, len2, 0))
2030	    return start;
2031	}
2032	if (token == METHOD) {
2033	    s = skipspace(s);
2034	    if (*s == '(')
2035		PL_expect = XTERM;
2036	    else {
2037		PL_expect = XOPERATOR;
2038	    }
2039	}
2040	NEXTVAL_NEXTTOKE.opval
2041	    = (OP*)newSVOP(OP_CONST,0,
2042			   S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2043	NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2044	force_next(token);
2045    }
2046    return s;
2047}
2048
2049/*
2050 * S_force_ident
2051 * Called when the lexer wants $foo *foo &foo etc, but the program
2052 * text only contains the "foo" portion.  The first argument is a pointer
2053 * to the "foo", and the second argument is the type symbol to prefix.
2054 * Forces the next token to be a "WORD".
2055 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2056 */
2057
2058STATIC void
2059S_force_ident(pTHX_ const char *s, int kind)
2060{
2061    PERL_ARGS_ASSERT_FORCE_IDENT;
2062
2063    if (s[0]) {
2064	const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2065	OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2066                                                                UTF ? SVf_UTF8 : 0));
2067	NEXTVAL_NEXTTOKE.opval = o;
2068	force_next(WORD);
2069	if (kind) {
2070	    o->op_private = OPpCONST_ENTERED;
2071	    /* XXX see note in pp_entereval() for why we forgo typo
2072	       warnings if the symbol must be introduced in an eval.
2073	       GSAR 96-10-12 */
2074	    gv_fetchpvn_flags(s, len,
2075			      (PL_in_eval ? GV_ADDMULTI
2076			      : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2077			      kind == '$' ? SVt_PV :
2078			      kind == '@' ? SVt_PVAV :
2079			      kind == '%' ? SVt_PVHV :
2080			      SVt_PVGV
2081			      );
2082	}
2083    }
2084}
2085
2086static void
2087S_force_ident_maybe_lex(pTHX_ char pit)
2088{
2089    NEXTVAL_NEXTTOKE.ival = pit;
2090    force_next('p');
2091}
2092
2093NV
2094Perl_str_to_version(pTHX_ SV *sv)
2095{
2096    NV retval = 0.0;
2097    NV nshift = 1.0;
2098    STRLEN len;
2099    const char *start = SvPV_const(sv,len);
2100    const char * const end = start + len;
2101    const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2102
2103    PERL_ARGS_ASSERT_STR_TO_VERSION;
2104
2105    while (start < end) {
2106	STRLEN skip;
2107	UV n;
2108	if (utf)
2109	    n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2110	else {
2111	    n = *(U8*)start;
2112	    skip = 1;
2113	}
2114	retval += ((NV)n)/nshift;
2115	start += skip;
2116	nshift *= 1000;
2117    }
2118    return retval;
2119}
2120
2121/*
2122 * S_force_version
2123 * Forces the next token to be a version number.
2124 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2125 * and if "guessing" is TRUE, then no new token is created (and the caller
2126 * must use an alternative parsing method).
2127 */
2128
2129STATIC char *
2130S_force_version(pTHX_ char *s, int guessing)
2131{
2132    OP *version = NULL;
2133    char *d;
2134
2135    PERL_ARGS_ASSERT_FORCE_VERSION;
2136
2137    s = skipspace(s);
2138
2139    d = s;
2140    if (*d == 'v')
2141	d++;
2142    if (isDIGIT(*d)) {
2143	while (isDIGIT(*d) || *d == '_' || *d == '.')
2144	    d++;
2145        if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2146	    SV *ver;
2147            s = scan_num(s, &pl_yylval);
2148            version = pl_yylval.opval;
2149	    ver = cSVOPx(version)->op_sv;
2150	    if (SvPOK(ver) && !SvNIOK(ver)) {
2151		SvUPGRADE(ver, SVt_PVNV);
2152		SvNV_set(ver, str_to_version(ver));
2153		SvNOK_on(ver);		/* hint that it is a version */
2154	    }
2155        }
2156	else if (guessing) {
2157	    return s;
2158	}
2159    }
2160
2161    /* NOTE: The parser sees the package name and the VERSION swapped */
2162    NEXTVAL_NEXTTOKE.opval = version;
2163    force_next(WORD);
2164
2165    return s;
2166}
2167
2168/*
2169 * S_force_strict_version
2170 * Forces the next token to be a version number using strict syntax rules.
2171 */
2172
2173STATIC char *
2174S_force_strict_version(pTHX_ char *s)
2175{
2176    OP *version = NULL;
2177    const char *errstr = NULL;
2178
2179    PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2180
2181    while (isSPACE(*s)) /* leading whitespace */
2182	s++;
2183
2184    if (is_STRICT_VERSION(s,&errstr)) {
2185	SV *ver = newSV(0);
2186	s = (char *)scan_version(s, ver, 0);
2187	version = newSVOP(OP_CONST, 0, ver);
2188    }
2189    else if ((*s != ';' && *s != '{' && *s != '}' )
2190             && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2191    {
2192	PL_bufptr = s;
2193	if (errstr)
2194	    yyerror(errstr); /* version required */
2195	return s;
2196    }
2197
2198    /* NOTE: The parser sees the package name and the VERSION swapped */
2199    NEXTVAL_NEXTTOKE.opval = version;
2200    force_next(WORD);
2201
2202    return s;
2203}
2204
2205/*
2206 * S_tokeq
2207 * Tokenize a quoted string passed in as an SV.  It finds the next
2208 * chunk, up to end of string or a backslash.  It may make a new
2209 * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2210 * turns \\ into \.
2211 */
2212
2213STATIC SV *
2214S_tokeq(pTHX_ SV *sv)
2215{
2216    char *s;
2217    char *send;
2218    char *d;
2219    SV *pv = sv;
2220
2221    PERL_ARGS_ASSERT_TOKEQ;
2222
2223    assert (SvPOK(sv));
2224    assert (SvLEN(sv));
2225    assert (!SvIsCOW(sv));
2226    if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2227	goto finish;
2228    s = SvPVX(sv);
2229    send = SvEND(sv);
2230    /* This is relying on the SV being "well formed" with a trailing '\0'  */
2231    while (s < send && !(*s == '\\' && s[1] == '\\'))
2232	s++;
2233    if (s == send)
2234	goto finish;
2235    d = s;
2236    if ( PL_hints & HINT_NEW_STRING ) {
2237	pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2238			    SVs_TEMP | SvUTF8(sv));
2239    }
2240    while (s < send) {
2241	if (*s == '\\') {
2242	    if (s + 1 < send && (s[1] == '\\'))
2243		s++;		/* all that, just for this */
2244	}
2245	*d++ = *s++;
2246    }
2247    *d = '\0';
2248    SvCUR_set(sv, d - SvPVX_const(sv));
2249  finish:
2250    if ( PL_hints & HINT_NEW_STRING )
2251       return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2252    return sv;
2253}
2254
2255/*
2256 * Now come three functions related to double-quote context,
2257 * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2258 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2259 * interact with PL_lex_state, and create fake ( ... ) argument lists
2260 * to handle functions and concatenation.
2261 * For example,
2262 *   "foo\lbar"
2263 * is tokenised as
2264 *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2265 */
2266
2267/*
2268 * S_sublex_start
2269 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2270 *
2271 * Pattern matching will set PL_lex_op to the pattern-matching op to
2272 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2273 *
2274 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2275 *
2276 * Everything else becomes a FUNC.
2277 *
2278 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2279 * had an OP_CONST or OP_READLINE).  This just sets us up for a
2280 * call to S_sublex_push().
2281 */
2282
2283STATIC I32
2284S_sublex_start(pTHX)
2285{
2286    const I32 op_type = pl_yylval.ival;
2287
2288    if (op_type == OP_NULL) {
2289	pl_yylval.opval = PL_lex_op;
2290	PL_lex_op = NULL;
2291	return THING;
2292    }
2293    if (op_type == OP_CONST) {
2294	SV *sv = PL_lex_stuff;
2295	PL_lex_stuff = NULL;
2296	sv = tokeq(sv);
2297
2298	if (SvTYPE(sv) == SVt_PVIV) {
2299	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2300	    STRLEN len;
2301	    const char * const p = SvPV_const(sv, len);
2302	    SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2303	    SvREFCNT_dec(sv);
2304	    sv = nsv;
2305	}
2306	pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2307	return THING;
2308    }
2309
2310    PL_sublex_info.super_state = PL_lex_state;
2311    PL_sublex_info.sub_inwhat = (U16)op_type;
2312    PL_sublex_info.sub_op = PL_lex_op;
2313    PL_lex_state = LEX_INTERPPUSH;
2314
2315    PL_expect = XTERM;
2316    if (PL_lex_op) {
2317	pl_yylval.opval = PL_lex_op;
2318	PL_lex_op = NULL;
2319	return PMFUNC;
2320    }
2321    else
2322	return FUNC;
2323}
2324
2325/*
2326 * S_sublex_push
2327 * Create a new scope to save the lexing state.  The scope will be
2328 * ended in S_sublex_done.  Returns a '(', starting the function arguments
2329 * to the uc, lc, etc. found before.
2330 * Sets PL_lex_state to LEX_INTERPCONCAT.
2331 */
2332
2333STATIC I32
2334S_sublex_push(pTHX)
2335{
2336    LEXSHARED *shared;
2337    const bool is_heredoc = PL_multi_close == '<';
2338    ENTER;
2339
2340    PL_lex_state = PL_sublex_info.super_state;
2341    SAVEI8(PL_lex_dojoin);
2342    SAVEI32(PL_lex_brackets);
2343    SAVEI32(PL_lex_allbrackets);
2344    SAVEI32(PL_lex_formbrack);
2345    SAVEI8(PL_lex_fakeeof);
2346    SAVEI32(PL_lex_casemods);
2347    SAVEI32(PL_lex_starts);
2348    SAVEI8(PL_lex_state);
2349    SAVEI8(PL_lex_defer);
2350    SAVESPTR(PL_lex_repl);
2351    SAVEVPTR(PL_lex_inpat);
2352    SAVEI16(PL_lex_inwhat);
2353    if (is_heredoc)
2354    {
2355	SAVECOPLINE(PL_curcop);
2356	SAVEI32(PL_multi_end);
2357	SAVEI32(PL_parser->herelines);
2358	PL_parser->herelines = 0;
2359    }
2360    SAVEI8(PL_multi_close);
2361    SAVEPPTR(PL_bufptr);
2362    SAVEPPTR(PL_bufend);
2363    SAVEPPTR(PL_oldbufptr);
2364    SAVEPPTR(PL_oldoldbufptr);
2365    SAVEPPTR(PL_last_lop);
2366    SAVEPPTR(PL_last_uni);
2367    SAVEPPTR(PL_linestart);
2368    SAVESPTR(PL_linestr);
2369    SAVEGENERICPV(PL_lex_brackstack);
2370    SAVEGENERICPV(PL_lex_casestack);
2371    SAVEGENERICPV(PL_parser->lex_shared);
2372    SAVEBOOL(PL_parser->lex_re_reparsing);
2373    SAVEI32(PL_copline);
2374
2375    /* The here-doc parser needs to be able to peek into outer lexing
2376       scopes to find the body of the here-doc.  So we put PL_linestr and
2377       PL_bufptr into lex_shared, to ���share��� those values.
2378     */
2379    PL_parser->lex_shared->ls_linestr = PL_linestr;
2380    PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2381
2382    PL_linestr = PL_lex_stuff;
2383    PL_lex_repl = PL_sublex_info.repl;
2384    PL_lex_stuff = NULL;
2385    PL_sublex_info.repl = NULL;
2386
2387    /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2388       set for an inner quote-like operator and then an error causes scope-
2389       popping.  We must not have a PL_lex_stuff value left dangling, as
2390       that breaks assumptions elsewhere.  See bug #123617.  */
2391    SAVEGENERICSV(PL_lex_stuff);
2392    SAVEGENERICSV(PL_sublex_info.repl);
2393
2394    PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2395	= SvPVX(PL_linestr);
2396    PL_bufend += SvCUR(PL_linestr);
2397    PL_last_lop = PL_last_uni = NULL;
2398    SAVEFREESV(PL_linestr);
2399    if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2400
2401    PL_lex_dojoin = FALSE;
2402    PL_lex_brackets = PL_lex_formbrack = 0;
2403    PL_lex_allbrackets = 0;
2404    PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2405    Newx(PL_lex_brackstack, 120, char);
2406    Newx(PL_lex_casestack, 12, char);
2407    PL_lex_casemods = 0;
2408    *PL_lex_casestack = '\0';
2409    PL_lex_starts = 0;
2410    PL_lex_state = LEX_INTERPCONCAT;
2411    if (is_heredoc)
2412	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2413    PL_copline = NOLINE;
2414
2415    Newxz(shared, 1, LEXSHARED);
2416    shared->ls_prev = PL_parser->lex_shared;
2417    PL_parser->lex_shared = shared;
2418
2419    PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2420    if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2421    if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2422	PL_lex_inpat = PL_sublex_info.sub_op;
2423    else
2424	PL_lex_inpat = NULL;
2425
2426    PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2427    PL_in_eval &= ~EVAL_RE_REPARSING;
2428
2429    return '(';
2430}
2431
2432/*
2433 * S_sublex_done
2434 * Restores lexer state after a S_sublex_push.
2435 */
2436
2437STATIC I32
2438S_sublex_done(pTHX)
2439{
2440    if (!PL_lex_starts++) {
2441	SV * const sv = newSVpvs("");
2442	if (SvUTF8(PL_linestr))
2443	    SvUTF8_on(sv);
2444	PL_expect = XOPERATOR;
2445	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2446	return THING;
2447    }
2448
2449    if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
2450	PL_lex_state = LEX_INTERPCASEMOD;
2451	return yylex();
2452    }
2453
2454    /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2455    assert(PL_lex_inwhat != OP_TRANSR);
2456    if (PL_lex_repl) {
2457	assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2458	PL_linestr = PL_lex_repl;
2459	PL_lex_inpat = 0;
2460	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2461	PL_bufend += SvCUR(PL_linestr);
2462	PL_last_lop = PL_last_uni = NULL;
2463	PL_lex_dojoin = FALSE;
2464	PL_lex_brackets = 0;
2465	PL_lex_allbrackets = 0;
2466	PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2467	PL_lex_casemods = 0;
2468	*PL_lex_casestack = '\0';
2469	PL_lex_starts = 0;
2470	if (SvEVALED(PL_lex_repl)) {
2471	    PL_lex_state = LEX_INTERPNORMAL;
2472	    PL_lex_starts++;
2473	    /*	we don't clear PL_lex_repl here, so that we can check later
2474		whether this is an evalled subst; that means we rely on the
2475		logic to ensure sublex_done() is called again only via the
2476		branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2477	}
2478	else {
2479	    PL_lex_state = LEX_INTERPCONCAT;
2480	    PL_lex_repl = NULL;
2481	}
2482	if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2483	    CopLINE(PL_curcop) +=
2484		((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2485		 + PL_parser->herelines;
2486	    PL_parser->herelines = 0;
2487	}
2488	return '/';
2489    }
2490    else {
2491	const line_t l = CopLINE(PL_curcop);
2492	LEAVE;
2493	if (PL_multi_close == '<')
2494	    PL_parser->herelines += l - PL_multi_end;
2495	PL_bufend = SvPVX(PL_linestr);
2496	PL_bufend += SvCUR(PL_linestr);
2497	PL_expect = XOPERATOR;
2498	return ')';
2499    }
2500}
2501
2502PERL_STATIC_INLINE SV*
2503S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2504{
2505    /* <s> points to first character of interior of \N{}, <e> to one beyond the
2506     * interior, hence to the "}".  Finds what the name resolves to, returning
2507     * an SV* containing it; NULL if no valid one found */
2508
2509    SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2510
2511    HV * table;
2512    SV **cvp;
2513    SV *cv;
2514    SV *rv;
2515    HV *stash;
2516    const U8* first_bad_char_loc;
2517    const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2518
2519    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2520
2521    if (!SvCUR(res)) {
2522        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2523                       "Unknown charname '' is deprecated");
2524        return res;
2525    }
2526
2527    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2528                                     e - backslash_ptr,
2529                                     &first_bad_char_loc))
2530    {
2531        /* If warnings are on, this will print a more detailed analysis of what
2532         * is wrong than the error message below */
2533        utf8n_to_uvchr(first_bad_char_loc,
2534                       e - ((char *) first_bad_char_loc),
2535                       NULL, 0);
2536
2537        /* We deliberately don't try to print the malformed character, which
2538         * might not print very well; it also may be just the first of many
2539         * malformations, so don't print what comes after it */
2540        yyerror_pv(Perl_form(aTHX_
2541            "Malformed UTF-8 character immediately after '%.*s'",
2542            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2543                   SVf_UTF8);
2544	return NULL;
2545    }
2546
2547    res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2548                        /* include the <}> */
2549                        e - backslash_ptr + 1);
2550    if (! SvPOK(res)) {
2551        SvREFCNT_dec_NN(res);
2552        return NULL;
2553    }
2554
2555    /* See if the charnames handler is the Perl core's, and if so, we can skip
2556     * the validation needed for a user-supplied one, as Perl's does its own
2557     * validation. */
2558    table = GvHV(PL_hintgv);		 /* ^H */
2559    cvp = hv_fetchs(table, "charnames", FALSE);
2560    if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2561        SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2562    {
2563        const char * const name = HvNAME(stash);
2564        if (HvNAMELEN(stash) == sizeof("_charnames")-1
2565         && strEQ(name, "_charnames")) {
2566           return res;
2567       }
2568    }
2569
2570    /* Here, it isn't Perl's charname handler.  We can't rely on a
2571     * user-supplied handler to validate the input name.  For non-ut8 input,
2572     * look to see that the first character is legal.  Then loop through the
2573     * rest checking that each is a continuation */
2574
2575    /* This code makes the reasonable assumption that the only Latin1-range
2576     * characters that begin a character name alias are alphabetic, otherwise
2577     * would have to create a isCHARNAME_BEGIN macro */
2578
2579    if (! UTF) {
2580        if (! isALPHAU(*s)) {
2581            goto bad_charname;
2582        }
2583        s++;
2584        while (s < e) {
2585            if (! isCHARNAME_CONT(*s)) {
2586                goto bad_charname;
2587            }
2588	    if (*s == ' ' && *(s-1) == ' ') {
2589                goto multi_spaces;
2590            }
2591	    if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2592                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2593                           "NO-BREAK SPACE in a charnames "
2594                           "alias definition is deprecated");
2595            }
2596            s++;
2597        }
2598    }
2599    else {
2600        /* Similarly for utf8.  For invariants can check directly; for other
2601         * Latin1, can calculate their code point and check; otherwise  use a
2602         * swash */
2603        if (UTF8_IS_INVARIANT(*s)) {
2604            if (! isALPHAU(*s)) {
2605                goto bad_charname;
2606            }
2607            s++;
2608        } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2609            if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2610                goto bad_charname;
2611            }
2612            s += 2;
2613        }
2614        else {
2615            if (! PL_utf8_charname_begin) {
2616                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2617                PL_utf8_charname_begin = _core_swash_init("utf8",
2618                                                        "_Perl_Charname_Begin",
2619                                                        &PL_sv_undef,
2620                                                        1, 0, NULL, &flags);
2621            }
2622            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2623                goto bad_charname;
2624            }
2625            s += UTF8SKIP(s);
2626        }
2627
2628        while (s < e) {
2629            if (UTF8_IS_INVARIANT(*s)) {
2630                if (! isCHARNAME_CONT(*s)) {
2631                    goto bad_charname;
2632                }
2633                if (*s == ' ' && *(s-1) == ' ') {
2634                    goto multi_spaces;
2635                }
2636                s++;
2637            }
2638            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2639                if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2640                {
2641                    goto bad_charname;
2642                }
2643                if (*s == *NBSP_UTF8
2644                    && *(s+1) == *(NBSP_UTF8+1)
2645                    && ckWARN_d(WARN_DEPRECATED))
2646                {
2647                    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2648                                "NO-BREAK SPACE in a charnames "
2649                                "alias definition is deprecated");
2650                }
2651                s += 2;
2652            }
2653            else {
2654                if (! PL_utf8_charname_continue) {
2655                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2656                    PL_utf8_charname_continue = _core_swash_init("utf8",
2657                                                "_Perl_Charname_Continue",
2658                                                &PL_sv_undef,
2659                                                1, 0, NULL, &flags);
2660                }
2661                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2662                    goto bad_charname;
2663                }
2664                s += UTF8SKIP(s);
2665            }
2666        }
2667    }
2668    if (*(s-1) == ' ') {
2669        yyerror_pv(
2670            Perl_form(aTHX_
2671            "charnames alias definitions may not contain trailing "
2672            "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2673            (int)(s - backslash_ptr + 1), backslash_ptr,
2674            (int)(e - s + 1), s + 1
2675            ),
2676        UTF ? SVf_UTF8 : 0);
2677        return NULL;
2678    }
2679
2680    if (SvUTF8(res)) { /* Don't accept malformed input */
2681        const U8* first_bad_char_loc;
2682        STRLEN len;
2683        const char* const str = SvPV_const(res, len);
2684        if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2685            /* If warnings are on, this will print a more detailed analysis of
2686             * what is wrong than the error message below */
2687            utf8n_to_uvchr(first_bad_char_loc,
2688                           (char *) first_bad_char_loc - str,
2689                           NULL, 0);
2690
2691            /* We deliberately don't try to print the malformed character,
2692             * which might not print very well; it also may be just the first
2693             * of many malformations, so don't print what comes after it */
2694            yyerror_pv(
2695              Perl_form(aTHX_
2696                "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2697                 (int) (e - backslash_ptr + 1), backslash_ptr,
2698                 (int) ((char *) first_bad_char_loc - str), str
2699              ),
2700              SVf_UTF8);
2701            return NULL;
2702        }
2703    }
2704
2705    return res;
2706
2707  bad_charname: {
2708
2709        /* The final %.*s makes sure that should the trailing NUL be missing
2710         * that this print won't run off the end of the string */
2711        yyerror_pv(
2712          Perl_form(aTHX_
2713            "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2714            (int)(s - backslash_ptr + 1), backslash_ptr,
2715            (int)(e - s + 1), s + 1
2716          ),
2717          UTF ? SVf_UTF8 : 0);
2718        return NULL;
2719    }
2720
2721  multi_spaces:
2722        yyerror_pv(
2723          Perl_form(aTHX_
2724            "charnames alias definitions may not contain a sequence of "
2725            "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2726            (int)(s - backslash_ptr + 1), backslash_ptr,
2727            (int)(e - s + 1), s + 1
2728          ),
2729          UTF ? SVf_UTF8 : 0);
2730        return NULL;
2731}
2732
2733/*
2734  scan_const
2735
2736  Extracts the next constant part of a pattern, double-quoted string,
2737  or transliteration.  This is terrifying code.
2738
2739  For example, in parsing the double-quoted string "ab\x63$d", it would
2740  stop at the '$' and return an OP_CONST containing 'abc'.
2741
2742  It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2743  processing a pattern (PL_lex_inpat is true), a transliteration
2744  (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2745
2746  Returns a pointer to the character scanned up to. If this is
2747  advanced from the start pointer supplied (i.e. if anything was
2748  successfully parsed), will leave an OP_CONST for the substring scanned
2749  in pl_yylval. Caller must intuit reason for not parsing further
2750  by looking at the next characters herself.
2751
2752  In patterns:
2753    expand:
2754      \N{FOO}  => \N{U+hex_for_character_FOO}
2755      (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2756
2757    pass through:
2758	all other \-char, including \N and \N{ apart from \N{ABC}
2759
2760    stops on:
2761	@ and $ where it appears to be a var, but not for $ as tail anchor
2762        \l \L \u \U \Q \E
2763	(?{  or  (??{
2764
2765  In transliterations:
2766    characters are VERY literal, except for - not at the start or end
2767    of the string, which indicates a range. If the range is in bytes,
2768    scan_const expands the range to the full set of intermediate
2769    characters. If the range is in utf8, the hyphen is replaced with
2770    a certain range mark which will be handled by pmtrans() in op.c.
2771
2772  In double-quoted strings:
2773    backslashes:
2774      double-quoted style: \r and \n
2775      constants: \x31, etc.
2776      deprecated backrefs: \1 (in substitution replacements)
2777      case and quoting: \U \Q \E
2778    stops on @ and $
2779
2780  scan_const does *not* construct ops to handle interpolated strings.
2781  It stops processing as soon as it finds an embedded $ or @ variable
2782  and leaves it to the caller to work out what's going on.
2783
2784  embedded arrays (whether in pattern or not) could be:
2785      @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2786
2787  $ in double-quoted strings must be the symbol of an embedded scalar.
2788
2789  $ in pattern could be $foo or could be tail anchor.  Assumption:
2790  it's a tail anchor if $ is the last thing in the string, or if it's
2791  followed by one of "()| \r\n\t"
2792
2793  \1 (backreferences) are turned into $1 in substitutions
2794
2795  The structure of the code is
2796      while (there's a character to process) {
2797	  handle transliteration ranges
2798	  skip regexp comments /(?#comment)/ and codes /(?{code})/
2799	  skip #-initiated comments in //x patterns
2800	  check for embedded arrays
2801	  check for embedded scalars
2802	  if (backslash) {
2803	      deprecate \1 in substitution replacements
2804	      handle string-changing backslashes \l \U \Q \E, etc.
2805	      switch (what was escaped) {
2806		  handle \- in a transliteration (becomes a literal -)
2807		  if a pattern and not \N{, go treat as regular character
2808		  handle \132 (octal characters)
2809		  handle \x15 and \x{1234} (hex characters)
2810		  handle \N{name} (named characters, also \N{3,5} in a pattern)
2811		  handle \cV (control characters)
2812		  handle printf-style backslashes (\f, \r, \n, etc)
2813	      } (end switch)
2814	      continue
2815	  } (end if backslash)
2816          handle regular character
2817    } (end while character to read)
2818
2819*/
2820
2821STATIC char *
2822S_scan_const(pTHX_ char *start)
2823{
2824    char *send = PL_bufend;		/* end of the constant */
2825    SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2826                                           on sizing. */
2827    char *s = start;			/* start of the constant */
2828    char *d = SvPVX(sv);		/* destination for copies */
2829    bool dorange = FALSE;               /* are we in a translit range? */
2830    bool didrange = FALSE;              /* did we just finish a range? */
2831    bool in_charclass = FALSE;          /* within /[...]/ */
2832    bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2833    bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2834                                           UTF8?  But, this can show as true
2835                                           when the source isn't utf8, as for
2836                                           example when it is entirely composed
2837                                           of hex constants */
2838    SV *res;		                /* result from charnames */
2839    STRLEN offset_to_max;   /* The offset in the output to where the range
2840                               high-end character is temporarily placed */
2841
2842    /* Note on sizing:  The scanned constant is placed into sv, which is
2843     * initialized by newSV() assuming one byte of output for every byte of
2844     * input.  This routine expects newSV() to allocate an extra byte for a
2845     * trailing NUL, which this routine will append if it gets to the end of
2846     * the input.  There may be more bytes of input than output (eg., \N{LATIN
2847     * CAPITAL LETTER A}), or more output than input if the constant ends up
2848     * recoded to utf8, but each time a construct is found that might increase
2849     * the needed size, SvGROW() is called.  Its size parameter each time is
2850     * based on the best guess estimate at the time, namely the length used so
2851     * far, plus the length the current construct will occupy, plus room for
2852     * the trailing NUL, plus one byte for every input byte still unscanned */
2853
2854    UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2855                       before set */
2856#ifdef EBCDIC
2857    int backslash_N = 0;            /* ? was the character from \N{} */
2858    int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2859                                       platform-specific like \x65 */
2860#endif
2861
2862    PERL_ARGS_ASSERT_SCAN_CONST;
2863
2864    assert(PL_lex_inwhat != OP_TRANSR);
2865    if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2866	/* If we are doing a trans and we know we want UTF8 set expectation */
2867	has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2868	this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2869    }
2870
2871    /* Protect sv from errors and fatal warnings. */
2872    ENTER_with_name("scan_const");
2873    SAVEFREESV(sv);
2874
2875    while (s < send
2876           || dorange   /* Handle tr/// range at right edge of input */
2877    ) {
2878
2879        /* get transliterations out of the way (they're most literal) */
2880	if (PL_lex_inwhat == OP_TRANS) {
2881
2882            /* But there isn't any special handling necessary unless there is a
2883             * range, so for most cases we just drop down and handle the value
2884             * as any other.  There are two exceptions.
2885             *
2886             * 1.  A minus sign indicates that we are actually going to have
2887             *     a range.  In this case, skip the '-', set a flag, then drop
2888             *     down to handle what should be the end range value.
2889             * 2.  After we've handled that value, the next time through, that
2890             *     flag is set and we fix up the range.
2891             *
2892             * Ranges entirely within Latin1 are expanded out entirely, in
2893             * order to avoid the significant overhead of making a swash.
2894             * Ranges that extend above Latin1 have to have a swash, so there
2895             * is no advantage to abbreviate them here, so they are stored here
2896             * as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies a
2897             * hyphen without any possible ambiguity.  On EBCDIC machines, if
2898             * the range is expressed as Unicode, the Latin1 portion is
2899             * expanded out even if the entire range extends above Latin1.
2900             * This is because each code point in it has to be processed here
2901             * individually to get its native translation */
2902
2903	    if (! dorange) {
2904
2905                /* Here, we don't think we're in a range.  If we've processed
2906                 * at least one character, then see if this next one is a '-',
2907                 * indicating the previous one was the start of a range.  But
2908                 * don't bother if we're too close to the end for the minus to
2909                 * mean that. */
2910                if (*s != '-' || s >= send - 1 || s == start) {
2911
2912                    /* A regular character.  Process like any other, but first
2913                     * clear any flags */
2914                    didrange = FALSE;
2915                    dorange = FALSE;
2916#ifdef EBCDIC
2917                    non_portable_endpoint = 0;
2918                    backslash_N = 0;
2919#endif
2920                    /* Drops down to generic code to process current byte */
2921                }
2922                else {
2923                    if (didrange) { /* Something like y/A-C-Z// */
2924                        Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2925                    }
2926
2927                    dorange = TRUE;
2928
2929                    s++;    /* Skip past the minus */
2930
2931                    /* d now points to where the end-range character will be
2932                     * placed.  Save it so won't have to go finding it later,
2933                     * and drop down to get that character.  (Actually we
2934                     * instead save the offset, to handle the case where a
2935                     * realloc in the meantime could change the actual
2936                     * pointer).  We'll finish processing the range the next
2937                     * time through the loop */
2938                    offset_to_max = d - SvPVX_const(sv);
2939                }
2940            }  /* End of not a range */
2941            else {
2942                /* Here we have parsed a range.  Now must handle it.  At this
2943                 * point:
2944                 * 'sv' is a SV* that contains the output string we are
2945                 *      constructing.  The final two characters in that string
2946                 *      are the range start and range end, in order.
2947                 * 'd'  points to just beyond the range end in the 'sv' string,
2948                 *      where we would next place something
2949                 * 'offset_to_max' is the offset in 'sv' at which the character
2950                 *      before 'd' begins.
2951                 */
2952                const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2953                const char * min_ptr;
2954                IV range_min;
2955		IV range_max;	/* last character in range */
2956                STRLEN save_offset;
2957                STRLEN grow;
2958#ifndef EBCDIC  /* Not meaningful except in EBCDIC, so initialize to false */
2959                const bool convert_unicode = FALSE;
2960                const IV real_range_max = 0;
2961#else
2962                bool convert_unicode;
2963                IV real_range_max = 0;
2964#endif
2965
2966                /* Get the range-ends code point values. */
2967                if (has_utf8) {
2968                    /* We know the utf8 is valid, because we just constructed
2969                     * it ourselves in previous loop iterations */
2970                    min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2971                    range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2972                    range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2973                }
2974                else {
2975                    min_ptr = max_ptr - 1;
2976                    range_min = * (U8*) min_ptr;
2977                    range_max = * (U8*) max_ptr;
2978                }
2979
2980#ifdef EBCDIC
2981                /* On EBCDIC platforms, we may have to deal with portable
2982                 * ranges.  These happen if at least one range endpoint is a
2983                 * Unicode value (\N{...}), or if the range is a subset of
2984                 * [A-Z] or [a-z], and both ends are literal characters,
2985                 * like 'A', and not like \x{C1} */
2986                if ((convert_unicode
2987                     = cBOOL(backslash_N)   /* \N{} forces Unicode, hence
2988                                               portable range */
2989                      || (   ! non_portable_endpoint
2990                          && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
2991                             || (isUPPER_A(range_min) && isUPPER_A(range_max))))
2992                )) {
2993
2994                    /* Special handling is needed for these portable ranges.
2995                     * They are defined to all be in Unicode terms, which
2996                     * include all Unicode code points between the end points.
2997                     * Convert to Unicode to get the Unicode range.  Later we
2998                     * will convert each code point in the range back to
2999                     * native.  */
3000                    range_min = NATIVE_TO_UNI(range_min);
3001                    range_max = NATIVE_TO_UNI(range_max);
3002                }
3003#endif
3004
3005                if (range_min > range_max) {
3006                    if (convert_unicode) {
3007                        /* Need to convert back to native for meaningful
3008                         * messages for this platform */
3009                        range_min = UNI_TO_NATIVE(range_min);
3010                        range_max = UNI_TO_NATIVE(range_max);
3011                    }
3012
3013                    /* Use the characters themselves for the error message if
3014                     * ASCII printables; otherwise some visible representation
3015                     * of them */
3016                    if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3017                        Perl_croak(aTHX_
3018			 "Invalid range \"%c-%c\" in transliteration operator",
3019			 (char)range_min, (char)range_max);
3020                    }
3021                    else if (convert_unicode) {
3022                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
3023                        Perl_croak(aTHX_
3024			       "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
3025                               " in transliteration operator",
3026			       range_min, range_max);
3027                    }
3028                    else {
3029                        /* diag_listed_as: Invalid range "%s" in transliteration operator */
3030                        Perl_croak(aTHX_
3031			       "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
3032                               " in transliteration operator",
3033			       range_min, range_max);
3034                    }
3035                }
3036
3037		if (has_utf8) {
3038
3039                    /* We try to avoid creating a swash.  If the upper end of
3040                     * this range is below 256, this range won't force a swash;
3041                     * otherwise it does force a swash, and as long as we have
3042                     * to have one, we might as well not expand things out.
3043                     * But if it's EBCDIC, we may have to look at each
3044                     * character below 256 if we have to convert to/from
3045                     * Unicode values */
3046                    if (range_max > 255
3047#ifdef EBCDIC
3048		        && (range_min > 255 || ! convert_unicode)
3049#endif
3050                    ) {
3051                        /* Move the high character one byte to the right; then
3052                         * insert between it and the range begin, an illegal
3053                         * byte which serves to indicate this is a range (using
3054                         * a '-' could be ambiguous). */
3055                        char *e = d++;
3056                        while (e-- > max_ptr) {
3057                            *(e + 1) = *e;
3058                        }
3059                        *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3060                        goto range_done;
3061                    }
3062
3063                    /* Here, we're going to expand out the range.  For EBCDIC
3064                     * the range can extend above 255 (not so in ASCII), so
3065                     * for EBCDIC, split it into the parts above and below
3066                     * 255/256 */
3067#ifdef EBCDIC
3068                    if (range_max > 255) {
3069                        real_range_max = range_max;
3070                        range_max = 255;
3071                    }
3072#endif
3073		}
3074
3075                /* Here we need to expand out the string to contain each
3076                 * character in the range.  Grow the output to handle this */
3077
3078                save_offset  = min_ptr - SvPVX_const(sv);
3079
3080                /* The base growth is the number of code points in the range */
3081                grow = range_max - range_min + 1;
3082                if (has_utf8) {
3083
3084                    /* But if the output is UTF-8, some of those characters may
3085                     * need two bytes (since the maximum range value here is
3086                     * 255, the max bytes per character is two).  On ASCII
3087                     * platforms, it's not much trouble to get an accurate
3088                     * count of what's needed.  But on EBCDIC, the ones that
3089                     * need 2 bytes are scattered around, so just use a worst
3090                     * case value instead of calculating for that platform.  */
3091#ifdef EBCDIC
3092                    grow *= 2;
3093#else
3094                    /* Only those above 127 require 2 bytes.  This may be
3095                     * everything in the range, or not */
3096                    if (range_min > 127) {
3097                        grow *= 2;
3098                    }
3099                    else if (range_max > 127) {
3100                        grow += range_max - 127;
3101                    }
3102#endif
3103                }
3104
3105                /* Subtract 3 for the bytes that were already accounted for
3106                 * (min, max, and the hyphen) */
3107                SvGROW(sv, SvLEN(sv) + grow - 3);
3108		d = SvPVX(sv) + save_offset;	/* refresh d after realloc */
3109
3110                /* Here, we expand out the range.  On ASCII platforms, the
3111                 * compiler should optimize out the 'convert_unicode==TRUE'
3112                 * portion of this */
3113                if (convert_unicode) {
3114                    IV i;
3115
3116                    /* Recall that the min and max are now in Unicode terms, so
3117                     * we have to convert each character to its native
3118                     * equivalent */
3119                    if (has_utf8) {
3120                        for (i = range_min; i <= range_max; i++) {
3121                            append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3122                                                         (U8 **) &d);
3123                        }
3124                    }
3125                    else {
3126                        for (i = range_min; i <= range_max; i++) {
3127                            *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3128                        }
3129		    }
3130		}
3131                else {
3132                    IV i;
3133
3134                    /* Here, no conversions are necessary, which means that the
3135                     * first character in the range is already in 'd' and
3136                     * valid, so we can skip overwriting it */
3137                    if (has_utf8) {
3138                        d += UTF8SKIP(d);
3139                        for (i = range_min + 1; i <= range_max; i++) {
3140                            append_utf8_from_native_byte((U8) i, (U8 **) &d);
3141                        }
3142                    }
3143                    else {
3144                        d++;
3145                        for (i = range_min + 1; i <= range_max; i++) {
3146                            *d++ = (char)i;
3147                        }
3148		    }
3149		}
3150
3151                /* (Compilers should optimize this out for non-EBCDIC).  If the
3152                 * original range extended above 255, add in that portion */
3153                if (real_range_max) {
3154                    *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3155                    *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3156                    if (real_range_max > 0x101)
3157                        *d++ = (char) ILLEGAL_UTF8_BYTE;
3158                    if (real_range_max > 0x100)
3159                        d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3160                }
3161
3162              range_done:
3163		/* mark the range as done, and continue */
3164		didrange = TRUE;
3165		dorange = FALSE;
3166#ifdef EBCDIC
3167		non_portable_endpoint = 0;
3168                backslash_N = 0;
3169#endif
3170		continue;
3171	    } /* End of is a range */
3172        } /* End of transliteration.  Joins main code after these else's */
3173	else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3174	    char *s1 = s-1;
3175	    int esc = 0;
3176	    while (s1 >= start && *s1-- == '\\')
3177		esc = !esc;
3178	    if (!esc)
3179		in_charclass = TRUE;
3180	}
3181
3182	else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3183	    char *s1 = s-1;
3184	    int esc = 0;
3185	    while (s1 >= start && *s1-- == '\\')
3186		esc = !esc;
3187	    if (!esc)
3188		in_charclass = FALSE;
3189	}
3190
3191	/* skip for regexp comments /(?#comment)/, except for the last
3192	 * char, which will be done separately.
3193	 * Stop on (?{..}) and friends */
3194
3195	else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3196	    if (s[2] == '#') {
3197		while (s+1 < send && *s != ')')
3198		    *d++ = *s++;
3199	    }
3200	    else if (!PL_lex_casemods
3201                     && (    s[2] == '{' /* This should match regcomp.c */
3202		         || (s[2] == '?' && s[3] == '{')))
3203	    {
3204		break;
3205	    }
3206	}
3207
3208	/* likewise skip #-initiated comments in //x patterns */
3209	else if (*s == '#'
3210                 && PL_lex_inpat
3211                 && !in_charclass
3212                 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3213        {
3214	    while (s+1 < send && *s != '\n')
3215		*d++ = *s++;
3216	}
3217
3218	/* no further processing of single-quoted regex */
3219	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3220	    goto default_action;
3221
3222	/* check for embedded arrays
3223	   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3224	   */
3225	else if (*s == '@' && s[1]) {
3226	    if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3227		break;
3228	    if (strchr(":'{$", s[1]))
3229		break;
3230	    if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3231		break; /* in regexp, neither @+ nor @- are interpolated */
3232	}
3233
3234	/* check for embedded scalars.  only stop if we're sure it's a
3235	   variable.
3236        */
3237	else if (*s == '$') {
3238	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
3239		break;
3240	    if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3241		if (s[1] == '\\') {
3242		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3243				   "Possible unintended interpolation of $\\ in regex");
3244		}
3245		break;		/* in regexp, $ might be tail anchor */
3246            }
3247	}
3248
3249	/* End of else if chain - OP_TRANS rejoin rest */
3250
3251	/* backslashes */
3252	if (*s == '\\' && s+1 < send) {
3253	    char* e;	/* Can be used for ending '}', etc. */
3254
3255	    s++;
3256
3257	    /* warn on \1 - \9 in substitution replacements, but note that \11
3258	     * is an octal; and \19 is \1 followed by '9' */
3259	    if (PL_lex_inwhat == OP_SUBST
3260                && !PL_lex_inpat
3261                && isDIGIT(*s)
3262                && *s != '0'
3263                && !isDIGIT(s[1]))
3264	    {
3265		/* diag_listed_as: \%d better written as $%d */
3266		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3267		*--s = '$';
3268		break;
3269	    }
3270
3271	    /* string-change backslash escapes */
3272	    if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3273		--s;
3274		break;
3275	    }
3276	    /* In a pattern, process \N, but skip any other backslash escapes.
3277	     * This is because we don't want to translate an escape sequence
3278	     * into a meta symbol and have the regex compiler use the meta
3279	     * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3280	     * in spite of this, we do have to process \N here while the proper
3281	     * charnames handler is in scope.  See bugs #56444 and #62056.
3282             *
3283	     * There is a complication because \N in a pattern may also stand
3284	     * for 'match a non-nl', and not mean a charname, in which case its
3285	     * processing should be deferred to the regex compiler.  To be a
3286	     * charname it must be followed immediately by a '{', and not look
3287	     * like \N followed by a curly quantifier, i.e., not something like
3288	     * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3289	     * quantifier */
3290	    else if (PL_lex_inpat
3291		    && (*s != 'N'
3292			|| s[1] != '{'
3293			|| regcurly(s + 1)))
3294	    {
3295		*d++ = '\\';
3296		goto default_action;
3297	    }
3298
3299	    switch (*s) {
3300	    default:
3301	        {
3302		    if ((isALPHANUMERIC(*s)))
3303			Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3304				       "Unrecognized escape \\%c passed through",
3305				       *s);
3306		    /* default action is to copy the quoted character */
3307		    goto default_action;
3308		}
3309
3310	    /* eg. \132 indicates the octal constant 0132 */
3311	    case '0': case '1': case '2': case '3':
3312	    case '4': case '5': case '6': case '7':
3313		{
3314                    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3315                    STRLEN len = 3;
3316		    uv = grok_oct(s, &len, &flags, NULL);
3317		    s += len;
3318                    if (len < 3 && s < send && isDIGIT(*s)
3319                        && ckWARN(WARN_MISC))
3320                    {
3321                        Perl_warner(aTHX_ packWARN(WARN_MISC),
3322                                    "%s", form_short_octal_warning(s, len));
3323                    }
3324		}
3325		goto NUM_ESCAPE_INSERT;
3326
3327	    /* eg. \o{24} indicates the octal constant \024 */
3328	    case 'o':
3329		{
3330		    const char* error;
3331
3332		    bool valid = grok_bslash_o(&s, &uv, &error,
3333                                               TRUE, /* Output warning */
3334                                               FALSE, /* Not strict */
3335                                               TRUE, /* Output warnings for
3336                                                         non-portables */
3337                                               UTF);
3338		    if (! valid) {
3339			yyerror(error);
3340			continue;
3341		    }
3342		    goto NUM_ESCAPE_INSERT;
3343		}
3344
3345	    /* eg. \x24 indicates the hex constant 0x24 */
3346	    case 'x':
3347		{
3348		    const char* error;
3349
3350		    bool valid = grok_bslash_x(&s, &uv, &error,
3351                                               TRUE, /* Output warning */
3352                                               FALSE, /* Not strict */
3353                                               TRUE,  /* Output warnings for
3354                                                         non-portables */
3355                                               UTF);
3356		    if (! valid) {
3357			yyerror(error);
3358			continue;
3359		    }
3360		}
3361
3362	      NUM_ESCAPE_INSERT:
3363		/* Insert oct or hex escaped character. */
3364
3365		/* Here uv is the ordinal of the next character being added */
3366		if (UVCHR_IS_INVARIANT(uv)) {
3367		    *d++ = (char) uv;
3368		}
3369		else {
3370		    if (!has_utf8 && uv > 255) {
3371			/* Might need to recode whatever we have accumulated so
3372			 * far if it contains any chars variant in utf8 or
3373			 * utf-ebcdic. */
3374
3375			SvCUR_set(sv, d - SvPVX_const(sv));
3376			SvPOK_on(sv);
3377			*d = '\0';
3378			/* See Note on sizing above.  */
3379			sv_utf8_upgrade_flags_grow(
3380                                       sv,
3381                                       SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3382                                                  /* Above-latin1 in string
3383                                                   * implies no encoding */
3384                                                  |SV_UTF8_NO_ENCODING,
3385                                       UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3386			d = SvPVX(sv) + SvCUR(sv);
3387			has_utf8 = TRUE;
3388                    }
3389
3390                    if (has_utf8) {
3391                       /* Usually, there will already be enough room in 'sv'
3392                        * since such escapes are likely longer than any UTF-8
3393                        * sequence they can end up as.  This isn't the case on
3394                        * EBCDIC where \x{40000000} contains 12 bytes, and the
3395                        * UTF-8 for it contains 14.  And, we have to allow for
3396                        * a trailing NUL.  It probably can't happen on ASCII
3397                        * platforms, but be safe */
3398                        const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3399                                            + 1;
3400                        if (UNLIKELY(needed > SvLEN(sv))) {
3401                            SvCUR_set(sv, d - SvPVX_const(sv));
3402                            d = sv_grow(sv, needed) + SvCUR(sv);
3403                        }
3404
3405		        d = (char*)uvchr_to_utf8((U8*)d, uv);
3406			if (PL_lex_inwhat == OP_TRANS
3407                            && PL_sublex_info.sub_op)
3408                        {
3409			    PL_sublex_info.sub_op->op_private |=
3410				(PL_lex_repl ? OPpTRANS_FROM_UTF
3411					     : OPpTRANS_TO_UTF);
3412			}
3413                    }
3414		    else {
3415		        *d++ = (char)uv;
3416		    }
3417		}
3418#ifdef EBCDIC
3419                non_portable_endpoint++;
3420#endif
3421		continue;
3422
3423 	    case 'N':
3424                /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3425                 * named character, like \N{LATIN SMALL LETTER A}, or a named
3426                 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3427                 * GRAVE} (except y/// can't handle the latter, croaking).  For
3428                 * convenience all three forms are referred to as "named
3429                 * characters" below.
3430                 *
3431                 * For patterns, \N also can mean to match a non-newline.  Code
3432                 * before this 'switch' statement should already have handled
3433                 * this situation, and hence this code only has to deal with
3434                 * the named character cases.
3435                 *
3436                 * For non-patterns, the named characters are converted to
3437                 * their string equivalents.  In patterns, named characters are
3438                 * not converted to their ultimate forms for the same reasons
3439                 * that other escapes aren't.  Instead, they are converted to
3440                 * the \N{U+...} form to get the value from the charnames that
3441                 * is in effect right now, while preserving the fact that it
3442                 * was a named character, so that the regex compiler knows
3443                 * this.
3444                 *
3445		 * The structure of this section of code (besides checking for
3446		 * errors and upgrading to utf8) is:
3447                 *    If the named character is of the form \N{U+...}, pass it
3448                 *      through if a pattern; otherwise convert the code point
3449                 *      to utf8
3450                 *    Otherwise must be some \N{NAME}: convert to
3451                 *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3452                 *
3453                 * Transliteration is an exception.  The conversion to utf8 is
3454                 * only done if the code point requires it to be representable.
3455                 *
3456                 * Here, 's' points to the 'N'; the test below is guaranteed to
3457		 * succeed if we are being called on a pattern, as we already
3458                 * know from a test above that the next character is a '{'.  A
3459                 * non-pattern \N must mean 'named character', which requires
3460                 * braces */
3461		s++;
3462		if (*s != '{') {
3463		    yyerror("Missing braces on \\N{}");
3464		    continue;
3465		}
3466		s++;
3467
3468		/* If there is no matching '}', it is an error. */
3469		if (! (e = strchr(s, '}'))) {
3470		    if (! PL_lex_inpat) {
3471			yyerror("Missing right brace on \\N{}");
3472		    } else {
3473			yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3474		    }
3475		    continue;
3476		}
3477
3478		/* Here it looks like a named character */
3479
3480		if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3481		    s += 2;	    /* Skip to next char after the 'U+' */
3482		    if (PL_lex_inpat) {
3483
3484                        /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3485                        /* Check the syntax.  */
3486                        const char *orig_s;
3487                        orig_s = s - 5;
3488                        if (!isXDIGIT(*s)) {
3489                          bad_NU:
3490                            yyerror(
3491                                "Invalid hexadecimal number in \\N{U+...}"
3492                            );
3493                            s = e + 1;
3494                            continue;
3495                        }
3496                        while (++s < e) {
3497                            if (isXDIGIT(*s))
3498                                continue;
3499                            else if ((*s == '.' || *s == '_')
3500                                  && isXDIGIT(s[1]))
3501                                continue;
3502                            goto bad_NU;
3503                        }
3504
3505                        /* Pass everything through unchanged.
3506                         * +1 is for the '}' */
3507                        Copy(orig_s, d, e - orig_s + 1, char);
3508                        d += e - orig_s + 1;
3509		    }
3510		    else {  /* Not a pattern: convert the hex to string */
3511                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3512				| PERL_SCAN_SILENT_ILLDIGIT
3513				| PERL_SCAN_DISALLOW_PREFIX;
3514                        STRLEN len = e - s;
3515                        uv = grok_hex(s, &len, &flags, NULL);
3516                        if (len == 0 || (len != (STRLEN)(e - s)))
3517                            goto bad_NU;
3518
3519                         /* For non-tr///, if the destination is not in utf8,
3520                          * unconditionally recode it to be so.  This is
3521                          * because \N{} implies Unicode semantics, and scalars
3522                          * have to be in utf8 to guarantee those semantics.
3523                          * tr/// doesn't care about Unicode rules, so no need
3524                          * there to upgrade to UTF-8 for small enough code
3525                          * points */
3526			if (! has_utf8 && (   uv > 0xFF
3527                                           || PL_lex_inwhat != OP_TRANS))
3528                        {
3529			    SvCUR_set(sv, d - SvPVX_const(sv));
3530			    SvPOK_on(sv);
3531			    *d = '\0';
3532			    /* See Note on sizing above.  */
3533			    sv_utf8_upgrade_flags_grow(
3534                                    sv,
3535                                    SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3536				    UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
3537			    d = SvPVX(sv) + SvCUR(sv);
3538			    has_utf8 = TRUE;
3539			}
3540
3541                        /* Add the (Unicode) code point to the output. */
3542			if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3543			    *d++ = (char) LATIN1_TO_NATIVE(uv);
3544			}
3545			else {
3546                            d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3547                        }
3548		    }
3549		}
3550		else /* Here is \N{NAME} but not \N{U+...}. */
3551                     if ((res = get_and_check_backslash_N_name(s, e)))
3552                {
3553                    STRLEN len;
3554                    const char *str = SvPV_const(res, len);
3555                    if (PL_lex_inpat) {
3556
3557			if (! len) { /* The name resolved to an empty string */
3558			    Copy("\\N{}", d, 4, char);
3559			    d += 4;
3560			}
3561			else {
3562			    /* In order to not lose information for the regex
3563			    * compiler, pass the result in the specially made
3564			    * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3565			    * the code points in hex of each character
3566			    * returned by charnames */
3567
3568			    const char *str_end = str + len;
3569			    const STRLEN off = d - SvPVX_const(sv);
3570
3571                            if (! SvUTF8(res)) {
3572                                /* For the non-UTF-8 case, we can determine the
3573                                 * exact length needed without having to parse
3574                                 * through the string.  Each character takes up
3575                                 * 2 hex digits plus either a trailing dot or
3576                                 * the "}" */
3577                                const char initial_text[] = "\\N{U+";
3578                                const STRLEN initial_len = sizeof(initial_text)
3579                                                           - 1;
3580                                d = off + SvGROW(sv, off
3581                                                    + 3 * len
3582
3583                                                    /* +1 for trailing NUL */
3584                                                    + initial_len + 1
3585
3586                                                    + (STRLEN)(send - e));
3587                                Copy(initial_text, d, initial_len, char);
3588                                d += initial_len;
3589                                while (str < str_end) {
3590                                    char hex_string[4];
3591                                    int len =
3592                                        my_snprintf(hex_string,
3593                                                  sizeof(hex_string),
3594                                                  "%02X.",
3595
3596                                                  /* The regex compiler is
3597                                                   * expecting Unicode, not
3598                                                   * native */
3599                                                  NATIVE_TO_LATIN1(*str));
3600                                    PERL_MY_SNPRINTF_POST_GUARD(len,
3601                                                           sizeof(hex_string));
3602                                    Copy(hex_string, d, 3, char);
3603                                    d += 3;
3604                                    str++;
3605                                }
3606                                d--;    /* Below, we will overwrite the final
3607                                           dot with a right brace */
3608                            }
3609                            else {
3610                                STRLEN char_length; /* cur char's byte length */
3611
3612                                /* and the number of bytes after this is
3613                                 * translated into hex digits */
3614                                STRLEN output_length;
3615
3616                                /* 2 hex per byte; 2 chars for '\N'; 2 chars
3617                                 * for max('U+', '.'); and 1 for NUL */
3618                                char hex_string[2 * UTF8_MAXBYTES + 5];
3619
3620                                /* Get the first character of the result. */
3621                                U32 uv = utf8n_to_uvchr((U8 *) str,
3622                                                        len,
3623                                                        &char_length,
3624                                                        UTF8_ALLOW_ANYUV);
3625                                /* Convert first code point to Unicode hex,
3626                                 * including the boiler plate before it. */
3627                                output_length =
3628                                    my_snprintf(hex_string, sizeof(hex_string),
3629                                             "\\N{U+%X",
3630                                             (unsigned int) NATIVE_TO_UNI(uv));
3631
3632                                /* Make sure there is enough space to hold it */
3633                                d = off + SvGROW(sv, off
3634                                                    + output_length
3635                                                    + (STRLEN)(send - e)
3636                                                    + 2);	/* '}' + NUL */
3637                                /* And output it */
3638                                Copy(hex_string, d, output_length, char);
3639                                d += output_length;
3640
3641                                /* For each subsequent character, append dot and
3642                                * its Unicode code point in hex */
3643                                while ((str += char_length) < str_end) {
3644                                    const STRLEN off = d - SvPVX_const(sv);
3645                                    U32 uv = utf8n_to_uvchr((U8 *) str,
3646                                                            str_end - str,
3647                                                            &char_length,
3648                                                            UTF8_ALLOW_ANYUV);
3649                                    output_length =
3650                                        my_snprintf(hex_string,
3651                                             sizeof(hex_string),
3652                                             ".%X",
3653                                             (unsigned int) NATIVE_TO_UNI(uv));
3654
3655                                    d = off + SvGROW(sv, off
3656                                                        + output_length
3657                                                        + (STRLEN)(send - e)
3658                                                        + 2);	/* '}' +  NUL */
3659                                    Copy(hex_string, d, output_length, char);
3660                                    d += output_length;
3661                                }
3662			    }
3663
3664			    *d++ = '}';	/* Done.  Add the trailing brace */
3665			}
3666		    }
3667		    else { /* Here, not in a pattern.  Convert the name to a
3668			    * string. */
3669
3670                        if (PL_lex_inwhat == OP_TRANS) {
3671                            str = SvPV_const(res, len);
3672                            if (len > ((SvUTF8(res))
3673                                       ? UTF8SKIP(str)
3674                                       : 1U))
3675                            {
3676                                yyerror(Perl_form(aTHX_
3677                                    "%.*s must not be a named sequence"
3678                                    " in transliteration operator",
3679                                        /*  +1 to include the "}" */
3680                                    (int) (e + 1 - start), start));
3681                                goto end_backslash_N;
3682                            }
3683                        }
3684                        else if (! SvUTF8(res)) {
3685                            /* Make sure \N{} return is UTF-8.  This is because
3686                            * \N{} implies Unicode semantics, and scalars have to
3687                            * be in utf8 to guarantee those semantics; but not
3688                            * needed in tr/// */
3689                            sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3690                            str = SvPV_const(res, len);
3691                        }
3692
3693                         /* Upgrade destination to be utf8 if this new
3694                          * component is */
3695			if (! has_utf8 && SvUTF8(res)) {
3696			    SvCUR_set(sv, d - SvPVX_const(sv));
3697			    SvPOK_on(sv);
3698			    *d = '\0';
3699			    /* See Note on sizing above.  */
3700			    sv_utf8_upgrade_flags_grow(sv,
3701						SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3702						len + (STRLEN)(send - s) + 1);
3703			    d = SvPVX(sv) + SvCUR(sv);
3704			    has_utf8 = TRUE;
3705			} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3706
3707			    /* See Note on sizing above.  (NOTE: SvCUR() is not
3708			     * set correctly here). */
3709			    const STRLEN off = d - SvPVX_const(sv);
3710			    d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3711			}
3712			Copy(str, d, len, char);
3713			d += len;
3714		    }
3715
3716		    SvREFCNT_dec(res);
3717
3718		} /* End \N{NAME} */
3719
3720              end_backslash_N:
3721#ifdef EBCDIC
3722                backslash_N++; /* \N{} is defined to be Unicode */
3723#endif
3724		s = e + 1;  /* Point to just after the '}' */
3725		continue;
3726
3727	    /* \c is a control character */
3728	    case 'c':
3729		s++;
3730		if (s < send) {
3731		    *d++ = grok_bslash_c(*s++, 1);
3732		}
3733		else {
3734		    yyerror("Missing control char name in \\c");
3735		}
3736#ifdef EBCDIC
3737                non_portable_endpoint++;
3738#endif
3739		continue;
3740
3741	    /* printf-style backslashes, formfeeds, newlines, etc */
3742	    case 'b':
3743		*d++ = '\b';
3744		break;
3745	    case 'n':
3746		*d++ = '\n';
3747		break;
3748	    case 'r':
3749		*d++ = '\r';
3750		break;
3751	    case 'f':
3752		*d++ = '\f';
3753		break;
3754	    case 't':
3755		*d++ = '\t';
3756		break;
3757	    case 'e':
3758		*d++ = ESC_NATIVE;
3759		break;
3760	    case 'a':
3761		*d++ = '\a';
3762		break;
3763	    } /* end switch */
3764
3765	    s++;
3766	    continue;
3767	} /* end if (backslash) */
3768
3769    default_action:
3770	/* If we started with encoded form, or already know we want it,
3771	   then encode the next character */
3772	if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3773	    STRLEN len  = 1;
3774
3775	    /* One might think that it is wasted effort in the case of the
3776	     * source being utf8 (this_utf8 == TRUE) to take the next character
3777	     * in the source, convert it to an unsigned value, and then convert
3778	     * it back again.  But the source has not been validated here.  The
3779	     * routine that does the conversion checks for errors like
3780	     * malformed utf8 */
3781
3782	    const UV nextuv   = (this_utf8)
3783                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3784                                : (UV) ((U8) *s);
3785	    const STRLEN need = UVCHR_SKIP(nextuv);
3786	    if (!has_utf8) {
3787		SvCUR_set(sv, d - SvPVX_const(sv));
3788		SvPOK_on(sv);
3789		*d = '\0';
3790		/* See Note on sizing above.  */
3791		sv_utf8_upgrade_flags_grow(sv,
3792					SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3793					need + (STRLEN)(send - s) + 1);
3794		d = SvPVX(sv) + SvCUR(sv);
3795		has_utf8 = TRUE;
3796	    } else if (need > len) {
3797		/* encoded value larger than old, may need extra space (NOTE:
3798		 * SvCUR() is not set correctly here).   See Note on sizing
3799		 * above.  */
3800		const STRLEN off = d - SvPVX_const(sv);
3801		d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3802	    }
3803	    s += len;
3804
3805	    d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3806	}
3807	else {
3808	    *d++ = *s++;
3809	}
3810    } /* while loop to process each character */
3811
3812    /* terminate the string and set up the sv */
3813    *d = '\0';
3814    SvCUR_set(sv, d - SvPVX_const(sv));
3815    if (SvCUR(sv) >= SvLEN(sv))
3816	Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3817		   " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3818
3819    SvPOK_on(sv);
3820    if (IN_ENCODING && !has_utf8) {
3821	sv_recode_to_utf8(sv, _get_encoding());
3822	if (SvUTF8(sv))
3823	    has_utf8 = TRUE;
3824    }
3825    if (has_utf8) {
3826	SvUTF8_on(sv);
3827	if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3828	    PL_sublex_info.sub_op->op_private |=
3829		    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3830	}
3831    }
3832
3833    /* shrink the sv if we allocated more than we used */
3834    if (SvCUR(sv) + 5 < SvLEN(sv)) {
3835	SvPV_shrink_to_cur(sv);
3836    }
3837
3838    /* return the substring (via pl_yylval) only if we parsed anything */
3839    if (s > start) {
3840	char *s2 = start;
3841	for (; s2 < s; s2++) {
3842	    if (*s2 == '\n')
3843		COPLINE_INC_WITH_HERELINES;
3844	}
3845	SvREFCNT_inc_simple_void_NN(sv);
3846	if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3847            && ! PL_parser->lex_re_reparsing)
3848        {
3849	    const char *const key = PL_lex_inpat ? "qr" : "q";
3850	    const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3851	    const char *type;
3852	    STRLEN typelen;
3853
3854	    if (PL_lex_inwhat == OP_TRANS) {
3855		type = "tr";
3856		typelen = 2;
3857	    } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3858		type = "s";
3859		typelen = 1;
3860	    } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3861		type = "q";
3862		typelen = 1;
3863	    } else  {
3864		type = "qq";
3865		typelen = 2;
3866	    }
3867
3868	    sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3869				type, typelen);
3870	}
3871	pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3872    }
3873    LEAVE_with_name("scan_const");
3874    return s;
3875}
3876
3877/* S_intuit_more
3878 * Returns TRUE if there's more to the expression (e.g., a subscript),
3879 * FALSE otherwise.
3880 *
3881 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3882 *
3883 * ->[ and ->{ return TRUE
3884 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3885 * { and [ outside a pattern are always subscripts, so return TRUE
3886 * if we're outside a pattern and it's not { or [, then return FALSE
3887 * if we're in a pattern and the first char is a {
3888 *   {4,5} (any digits around the comma) returns FALSE
3889 * if we're in a pattern and the first char is a [
3890 *   [] returns FALSE
3891 *   [SOMETHING] has a funky algorithm to decide whether it's a
3892 *      character class or not.  It has to deal with things like
3893 *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3894 * anything else returns TRUE
3895 */
3896
3897/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3898
3899STATIC int
3900S_intuit_more(pTHX_ char *s)
3901{
3902    PERL_ARGS_ASSERT_INTUIT_MORE;
3903
3904    if (PL_lex_brackets)
3905	return TRUE;
3906    if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3907	return TRUE;
3908    if (*s == '-' && s[1] == '>'
3909     && FEATURE_POSTDEREF_QQ_IS_ENABLED
3910     && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3911	||(s[2] == '@' && strchr("*[{",s[3])) ))
3912	return TRUE;
3913    if (*s != '{' && *s != '[')
3914	return FALSE;
3915    if (!PL_lex_inpat)
3916	return TRUE;
3917
3918    /* In a pattern, so maybe we have {n,m}. */
3919    if (*s == '{') {
3920	if (regcurly(s)) {
3921	    return FALSE;
3922	}
3923	return TRUE;
3924    }
3925
3926    /* On the other hand, maybe we have a character class */
3927
3928    s++;
3929    if (*s == ']' || *s == '^')
3930	return FALSE;
3931    else {
3932        /* this is terrifying, and it works */
3933	int weight;
3934	char seen[256];
3935	const char * const send = strchr(s,']');
3936	unsigned char un_char, last_un_char;
3937	char tmpbuf[sizeof PL_tokenbuf * 4];
3938
3939	if (!send)		/* has to be an expression */
3940	    return TRUE;
3941	weight = 2;		/* let's weigh the evidence */
3942
3943	if (*s == '$')
3944	    weight -= 3;
3945	else if (isDIGIT(*s)) {
3946	    if (s[1] != ']') {
3947		if (isDIGIT(s[1]) && s[2] == ']')
3948		    weight -= 10;
3949	    }
3950	    else
3951		weight -= 100;
3952	}
3953	Zero(seen,256,char);
3954	un_char = 255;
3955	for (; s < send; s++) {
3956	    last_un_char = un_char;
3957	    un_char = (unsigned char)*s;
3958	    switch (*s) {
3959	    case '@':
3960	    case '&':
3961	    case '$':
3962		weight -= seen[un_char] * 10;
3963		if (isWORDCHAR_lazy_if(s+1,UTF)) {
3964		    int len;
3965                    char *tmp = PL_bufend;
3966                    PL_bufend = (char*)send;
3967                    scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3968                    PL_bufend = tmp;
3969		    len = (int)strlen(tmpbuf);
3970		    if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3971                                                    UTF ? SVf_UTF8 : 0, SVt_PV))
3972			weight -= 100;
3973		    else
3974			weight -= 10;
3975		}
3976		else if (*s == '$'
3977                         && s[1]
3978                         && strchr("[#!%*<>()-=",s[1]))
3979                {
3980		    if (/*{*/ strchr("])} =",s[2]))
3981			weight -= 10;
3982		    else
3983			weight -= 1;
3984		}
3985		break;
3986	    case '\\':
3987		un_char = 254;
3988		if (s[1]) {
3989		    if (strchr("wds]",s[1]))
3990			weight += 100;
3991		    else if (seen[(U8)'\''] || seen[(U8)'"'])
3992			weight += 1;
3993		    else if (strchr("rnftbxcav",s[1]))
3994			weight += 40;
3995		    else if (isDIGIT(s[1])) {
3996			weight += 40;
3997			while (s[1] && isDIGIT(s[1]))
3998			    s++;
3999		    }
4000		}
4001		else
4002		    weight += 100;
4003		break;
4004	    case '-':
4005		if (s[1] == '\\')
4006		    weight += 50;
4007		if (strchr("aA01! ",last_un_char))
4008		    weight += 30;
4009		if (strchr("zZ79~",s[1]))
4010		    weight += 30;
4011		if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4012		    weight -= 5;	/* cope with negative subscript */
4013		break;
4014	    default:
4015		if (!isWORDCHAR(last_un_char)
4016		    && !(last_un_char == '$' || last_un_char == '@'
4017			 || last_un_char == '&')
4018		    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4019		    char *d = s;
4020		    while (isALPHA(*s))
4021			s++;
4022		    if (keyword(d, s - d, 0))
4023			weight -= 150;
4024		}
4025		if (un_char == last_un_char + 1)
4026		    weight += 5;
4027		weight -= seen[un_char];
4028		break;
4029	    }
4030	    seen[un_char]++;
4031	}
4032	if (weight >= 0)	/* probably a character class */
4033	    return FALSE;
4034    }
4035
4036    return TRUE;
4037}
4038
4039/*
4040 * S_intuit_method
4041 *
4042 * Does all the checking to disambiguate
4043 *   foo bar
4044 * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4045 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4046 *
4047 * First argument is the stuff after the first token, e.g. "bar".
4048 *
4049 * Not a method if foo is a filehandle.
4050 * Not a method if foo is a subroutine prototyped to take a filehandle.
4051 * Not a method if it's really "Foo $bar"
4052 * Method if it's "foo $bar"
4053 * Not a method if it's really "print foo $bar"
4054 * Method if it's really "foo package::" (interpreted as package->foo)
4055 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4056 * Not a method if bar is a filehandle or package, but is quoted with
4057 *   =>
4058 */
4059
4060STATIC int
4061S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4062{
4063    char *s = start + (*start == '$');
4064    char tmpbuf[sizeof PL_tokenbuf];
4065    STRLEN len;
4066    GV* indirgv;
4067	/* Mustn't actually add anything to a symbol table.
4068	   But also don't want to "initialise" any placeholder
4069	   constants that might already be there into full
4070	   blown PVGVs with attached PVCV.  */
4071    GV * const gv =
4072	ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4073
4074    PERL_ARGS_ASSERT_INTUIT_METHOD;
4075
4076    if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4077	    return 0;
4078    if (cv && SvPOK(cv)) {
4079	const char *proto = CvPROTO(cv);
4080	if (proto) {
4081	    while (*proto && (isSPACE(*proto) || *proto == ';'))
4082		proto++;
4083	    if (*proto == '*')
4084		return 0;
4085	}
4086    }
4087
4088    if (*start == '$') {
4089	if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4090            || isUPPER(*PL_tokenbuf))
4091	    return 0;
4092	s = skipspace(s);
4093	PL_bufptr = start;
4094	PL_expect = XREF;
4095	return *s == '(' ? FUNCMETH : METHOD;
4096    }
4097
4098    s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4099    /* start is the beginning of the possible filehandle/object,
4100     * and s is the end of it
4101     * tmpbuf is a copy of it (but with single quotes as double colons)
4102     */
4103
4104    if (!keyword(tmpbuf, len, 0)) {
4105	if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4106	    len -= 2;
4107	    tmpbuf[len] = '\0';
4108	    goto bare_package;
4109	}
4110	indirgv = gv_fetchpvn_flags(tmpbuf, len,
4111				    GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4112				    SVt_PVCV);
4113	if (indirgv && SvTYPE(indirgv) != SVt_NULL
4114	 && (!isGV(indirgv) || GvCVu(indirgv)))
4115	    return 0;
4116	/* filehandle or package name makes it a method */
4117	if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4118	    s = skipspace(s);
4119	    if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4120		return 0;	/* no assumptions -- "=>" quotes bareword */
4121      bare_package:
4122	    NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4123						  S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4124	    NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4125	    PL_expect = XTERM;
4126	    force_next(WORD);
4127	    PL_bufptr = s;
4128	    return *s == '(' ? FUNCMETH : METHOD;
4129	}
4130    }
4131    return 0;
4132}
4133
4134/* Encoded script support. filter_add() effectively inserts a
4135 * 'pre-processing' function into the current source input stream.
4136 * Note that the filter function only applies to the current source file
4137 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4138 *
4139 * The datasv parameter (which may be NULL) can be used to pass
4140 * private data to this instance of the filter. The filter function
4141 * can recover the SV using the FILTER_DATA macro and use it to
4142 * store private buffers and state information.
4143 *
4144 * The supplied datasv parameter is upgraded to a PVIO type
4145 * and the IoDIRP/IoANY field is used to store the function pointer,
4146 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4147 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4148 * private use must be set using malloc'd pointers.
4149 */
4150
4151SV *
4152Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4153{
4154    if (!funcp)
4155	return NULL;
4156
4157    if (!PL_parser)
4158	return NULL;
4159
4160    if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4161	Perl_croak(aTHX_ "Source filters apply only to byte streams");
4162
4163    if (!PL_rsfp_filters)
4164	PL_rsfp_filters = newAV();
4165    if (!datasv)
4166	datasv = newSV(0);
4167    SvUPGRADE(datasv, SVt_PVIO);
4168    IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4169    IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4170    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4171			  FPTR2DPTR(void *, IoANY(datasv)),
4172			  SvPV_nolen(datasv)));
4173    av_unshift(PL_rsfp_filters, 1);
4174    av_store(PL_rsfp_filters, 0, datasv) ;
4175    if (
4176	!PL_parser->filtered
4177     && PL_parser->lex_flags & LEX_EVALBYTES
4178     && PL_bufptr < PL_bufend
4179    ) {
4180	const char *s = PL_bufptr;
4181	while (s < PL_bufend) {
4182	    if (*s == '\n') {
4183		SV *linestr = PL_parser->linestr;
4184		char *buf = SvPVX(linestr);
4185		STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4186		STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4187		STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4188		STRLEN const linestart_pos = PL_parser->linestart - buf;
4189		STRLEN const last_uni_pos =
4190		    PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4191		STRLEN const last_lop_pos =
4192		    PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4193		av_push(PL_rsfp_filters, linestr);
4194		PL_parser->linestr =
4195		    newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4196		buf = SvPVX(PL_parser->linestr);
4197		PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4198		PL_parser->bufptr = buf + bufptr_pos;
4199		PL_parser->oldbufptr = buf + oldbufptr_pos;
4200		PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4201		PL_parser->linestart = buf + linestart_pos;
4202		if (PL_parser->last_uni)
4203		    PL_parser->last_uni = buf + last_uni_pos;
4204		if (PL_parser->last_lop)
4205		    PL_parser->last_lop = buf + last_lop_pos;
4206		SvLEN(linestr) = SvCUR(linestr);
4207		SvCUR(linestr) = s-SvPVX(linestr);
4208		PL_parser->filtered = 1;
4209		break;
4210	    }
4211	    s++;
4212	}
4213    }
4214    return(datasv);
4215}
4216
4217
4218/* Delete most recently added instance of this filter function.	*/
4219void
4220Perl_filter_del(pTHX_ filter_t funcp)
4221{
4222    SV *datasv;
4223
4224    PERL_ARGS_ASSERT_FILTER_DEL;
4225
4226#ifdef DEBUGGING
4227    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4228			  FPTR2DPTR(void*, funcp)));
4229#endif
4230    if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4231	return;
4232    /* if filter is on top of stack (usual case) just pop it off */
4233    datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4234    if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4235	sv_free(av_pop(PL_rsfp_filters));
4236
4237        return;
4238    }
4239    /* we need to search for the correct entry and clear it	*/
4240    Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4241}
4242
4243
4244/* Invoke the idxth filter function for the current rsfp.	 */
4245/* maxlen 0 = read one text line */
4246I32
4247Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4248{
4249    filter_t funcp;
4250    SV *datasv = NULL;
4251    /* This API is bad. It should have been using unsigned int for maxlen.
4252       Not sure if we want to change the API, but if not we should sanity
4253       check the value here.  */
4254    unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4255
4256    PERL_ARGS_ASSERT_FILTER_READ;
4257
4258    if (!PL_parser || !PL_rsfp_filters)
4259	return -1;
4260    if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
4261	/* Provide a default input filter to make life easy.	*/
4262	/* Note that we append to the line. This is handy.	*/
4263	DEBUG_P(PerlIO_printf(Perl_debug_log,
4264			      "filter_read %d: from rsfp\n", idx));
4265	if (correct_length) {
4266 	    /* Want a block */
4267	    int len ;
4268	    const int old_len = SvCUR(buf_sv);
4269
4270	    /* ensure buf_sv is large enough */
4271	    SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4272	    if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4273				   correct_length)) <= 0) {
4274		if (PerlIO_error(PL_rsfp))
4275	            return -1;		/* error */
4276	        else
4277		    return 0 ;		/* end of file */
4278	    }
4279	    SvCUR_set(buf_sv, old_len + len) ;
4280	    SvPVX(buf_sv)[old_len + len] = '\0';
4281	} else {
4282	    /* Want a line */
4283            if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4284		if (PerlIO_error(PL_rsfp))
4285	            return -1;		/* error */
4286	        else
4287		    return 0 ;		/* end of file */
4288	    }
4289	}
4290	return SvCUR(buf_sv);
4291    }
4292    /* Skip this filter slot if filter has been deleted	*/
4293    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4294	DEBUG_P(PerlIO_printf(Perl_debug_log,
4295			      "filter_read %d: skipped (filter deleted)\n",
4296			      idx));
4297	return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4298    }
4299    if (SvTYPE(datasv) != SVt_PVIO) {
4300	if (correct_length) {
4301 	    /* Want a block */
4302	    const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4303	    if (!remainder) return 0; /* eof */
4304	    if (correct_length > remainder) correct_length = remainder;
4305	    sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4306	    SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4307	} else {
4308	    /* Want a line */
4309	    const char *s = SvEND(datasv);
4310	    const char *send = SvPVX(datasv) + SvLEN(datasv);
4311	    while (s < send) {
4312		if (*s == '\n') {
4313		    s++;
4314		    break;
4315		}
4316		s++;
4317	    }
4318	    if (s == send) return 0; /* eof */
4319	    sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4320	    SvCUR_set(datasv, s-SvPVX(datasv));
4321	}
4322	return SvCUR(buf_sv);
4323    }
4324    /* Get function pointer hidden within datasv	*/
4325    funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4326    DEBUG_P(PerlIO_printf(Perl_debug_log,
4327			  "filter_read %d: via function %p (%s)\n",
4328			  idx, (void*)datasv, SvPV_nolen_const(datasv)));
4329    /* Call function. The function is expected to 	*/
4330    /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
4331    /* Return: <0:error, =0:eof, >0:not eof 		*/
4332    return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4333}
4334
4335STATIC char *
4336S_filter_gets(pTHX_ SV *sv, STRLEN append)
4337{
4338    PERL_ARGS_ASSERT_FILTER_GETS;
4339
4340#ifdef PERL_CR_FILTER
4341    if (!PL_rsfp_filters) {
4342	filter_add(S_cr_textfilter,NULL);
4343    }
4344#endif
4345    if (PL_rsfp_filters) {
4346	if (!append)
4347            SvCUR_set(sv, 0);	/* start with empty line	*/
4348        if (FILTER_READ(0, sv, 0) > 0)
4349            return ( SvPVX(sv) ) ;
4350        else
4351	    return NULL ;
4352    }
4353    else
4354        return (sv_gets(sv, PL_rsfp, append));
4355}
4356
4357STATIC HV *
4358S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4359{
4360    GV *gv;
4361
4362    PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4363
4364    if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4365        return PL_curstash;
4366
4367    if (len > 2
4368        && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4369        && (gv = gv_fetchpvn_flags(pkgname,
4370                                   len,
4371                                   ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4372    {
4373        return GvHV(gv);			/* Foo:: */
4374    }
4375
4376    /* use constant CLASS => 'MyClass' */
4377    gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4378    if (gv && GvCV(gv)) {
4379	SV * const sv = cv_const_sv(GvCV(gv));
4380	if (sv)
4381	    return gv_stashsv(sv, 0);
4382    }
4383
4384    return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4385}
4386
4387
4388STATIC char *
4389S_tokenize_use(pTHX_ int is_use, char *s) {
4390    PERL_ARGS_ASSERT_TOKENIZE_USE;
4391
4392    if (PL_expect != XSTATE)
4393	yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4394		    is_use ? "use" : "no"));
4395    PL_expect = XTERM;
4396    s = skipspace(s);
4397    if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4398	s = force_version(s, TRUE);
4399	if (*s == ';' || *s == '}'
4400		|| (s = skipspace(s), (*s == ';' || *s == '}'))) {
4401	    NEXTVAL_NEXTTOKE.opval = NULL;
4402	    force_next(WORD);
4403	}
4404	else if (*s == 'v') {
4405	    s = force_word(s,WORD,FALSE,TRUE);
4406	    s = force_version(s, FALSE);
4407	}
4408    }
4409    else {
4410	s = force_word(s,WORD,FALSE,TRUE);
4411	s = force_version(s, FALSE);
4412    }
4413    pl_yylval.ival = is_use;
4414    return s;
4415}
4416#ifdef DEBUGGING
4417    static const char* const exp_name[] =
4418	{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4419	  "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4420	  "TERMORDORDOR"
4421	};
4422#endif
4423
4424#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4425STATIC bool
4426S_word_takes_any_delimeter(char *p, STRLEN len)
4427{
4428    return (len == 1 && strchr("msyq", p[0]))
4429            || (len == 2
4430                && ((p[0] == 't' && p[1] == 'r')
4431                    || (p[0] == 'q' && strchr("qwxr", p[1]))));
4432}
4433
4434static void
4435S_check_scalar_slice(pTHX_ char *s)
4436{
4437    s++;
4438    while (*s == ' ' || *s == '\t') s++;
4439    if (*s == 'q' && s[1] == 'w'
4440     && !isWORDCHAR_lazy_if(s+2,UTF))
4441	return;
4442    while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4443	s += UTF ? UTF8SKIP(s) : 1;
4444    if (*s == '}' || *s == ']')
4445	pl_yylval.ival = OPpSLICEWARNING;
4446}
4447
4448/*
4449  yylex
4450
4451  Works out what to call the token just pulled out of the input
4452  stream.  The yacc parser takes care of taking the ops we return and
4453  stitching them into a tree.
4454
4455  Returns:
4456    The type of the next token
4457
4458  Structure:
4459      Switch based on the current state:
4460	  - if we already built the token before, use it
4461	  - if we have a case modifier in a string, deal with that
4462	  - handle other cases of interpolation inside a string
4463	  - scan the next line if we are inside a format
4464      In the normal state switch on the next character:
4465	  - default:
4466	    if alphabetic, go to key lookup
4467	    unrecoginized character - croak
4468	  - 0/4/26: handle end-of-line or EOF
4469	  - cases for whitespace
4470	  - \n and #: handle comments and line numbers
4471	  - various operators, brackets and sigils
4472	  - numbers
4473	  - quotes
4474	  - 'v': vstrings (or go to key lookup)
4475	  - 'x' repetition operator (or go to key lookup)
4476	  - other ASCII alphanumerics (key lookup begins here):
4477	      word before => ?
4478	      keyword plugin
4479	      scan built-in keyword (but do nothing with it yet)
4480	      check for statement label
4481	      check for lexical subs
4482		  goto just_a_word if there is one
4483	      see whether built-in keyword is overridden
4484	      switch on keyword number:
4485		  - default: just_a_word:
4486		      not a built-in keyword; handle bareword lookup
4487		      disambiguate between method and sub call
4488		      fall back to bareword
4489		  - cases for built-in keywords
4490*/
4491
4492
4493int
4494Perl_yylex(pTHX)
4495{
4496    dVAR;
4497    char *s = PL_bufptr;
4498    char *d;
4499    STRLEN len;
4500    bool bof = FALSE;
4501    const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4502    U8 formbrack = 0;
4503    U32 fake_eof = 0;
4504
4505    /* orig_keyword, gvp, and gv are initialized here because
4506     * jump to the label just_a_word_zero can bypass their
4507     * initialization later. */
4508    I32 orig_keyword = 0;
4509    GV *gv = NULL;
4510    GV **gvp = NULL;
4511
4512    DEBUG_T( {
4513	SV* tmp = newSVpvs("");
4514	PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4515	    (IV)CopLINE(PL_curcop),
4516	    lex_state_names[PL_lex_state],
4517	    exp_name[PL_expect],
4518	    pv_display(tmp, s, strlen(s), 0, 60));
4519	SvREFCNT_dec(tmp);
4520    } );
4521
4522    /* when we've already built the next token, just pull it out of the queue */
4523    if (PL_nexttoke) {
4524	PL_nexttoke--;
4525	pl_yylval = PL_nextval[PL_nexttoke];
4526	if (!PL_nexttoke) {
4527	    PL_lex_state = PL_lex_defer;
4528	    PL_lex_defer = LEX_NORMAL;
4529	}
4530	{
4531	    I32 next_type;
4532	    next_type = PL_nexttype[PL_nexttoke];
4533	    if (next_type & (7<<24)) {
4534		if (next_type & (1<<24)) {
4535		    if (PL_lex_brackets > 100)
4536			Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4537		    PL_lex_brackstack[PL_lex_brackets++] =
4538			(char) ((next_type >> 16) & 0xff);
4539		}
4540		if (next_type & (2<<24))
4541		    PL_lex_allbrackets++;
4542		if (next_type & (4<<24))
4543		    PL_lex_allbrackets--;
4544		next_type &= 0xffff;
4545	    }
4546	    return REPORT(next_type == 'p' ? pending_ident() : next_type);
4547	}
4548    }
4549
4550    switch (PL_lex_state) {
4551    case LEX_NORMAL:
4552    case LEX_INTERPNORMAL:
4553	break;
4554
4555    /* interpolated case modifiers like \L \U, including \Q and \E.
4556       when we get here, PL_bufptr is at the \
4557    */
4558    case LEX_INTERPCASEMOD:
4559#ifdef DEBUGGING
4560	if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4561	    Perl_croak(aTHX_
4562		       "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4563		       PL_bufptr, PL_bufend, *PL_bufptr);
4564#endif
4565	/* handle \E or end of string */
4566       	if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4567	    /* if at a \E */
4568	    if (PL_lex_casemods) {
4569		const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4570		PL_lex_casestack[PL_lex_casemods] = '\0';
4571
4572		if (PL_bufptr != PL_bufend
4573		    && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4574                        || oldmod == 'F')) {
4575		    PL_bufptr += 2;
4576		    PL_lex_state = LEX_INTERPCONCAT;
4577		}
4578		PL_lex_allbrackets--;
4579		return REPORT(')');
4580	    }
4581            else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4582               /* Got an unpaired \E */
4583               Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4584                        "Useless use of \\E");
4585            }
4586	    if (PL_bufptr != PL_bufend)
4587		PL_bufptr += 2;
4588	    PL_lex_state = LEX_INTERPCONCAT;
4589	    return yylex();
4590	}
4591	else {
4592	    DEBUG_T({ PerlIO_printf(Perl_debug_log,
4593              "### Saw case modifier\n"); });
4594	    s = PL_bufptr + 1;
4595	    if (s[1] == '\\' && s[2] == 'E') {
4596	        PL_bufptr = s + 3;
4597		PL_lex_state = LEX_INTERPCONCAT;
4598		return yylex();
4599	    }
4600	    else {
4601		I32 tmp;
4602                if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4603                    tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
4604		if ((*s == 'L' || *s == 'U' || *s == 'F')
4605                    && (strchr(PL_lex_casestack, 'L')
4606                        || strchr(PL_lex_casestack, 'U')
4607                        || strchr(PL_lex_casestack, 'F')))
4608                {
4609		    PL_lex_casestack[--PL_lex_casemods] = '\0';
4610		    PL_lex_allbrackets--;
4611		    return REPORT(')');
4612		}
4613		if (PL_lex_casemods > 10)
4614		    Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4615		PL_lex_casestack[PL_lex_casemods++] = *s;
4616		PL_lex_casestack[PL_lex_casemods] = '\0';
4617		PL_lex_state = LEX_INTERPCONCAT;
4618		NEXTVAL_NEXTTOKE.ival = 0;
4619		force_next((2<<24)|'(');
4620		if (*s == 'l')
4621		    NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4622		else if (*s == 'u')
4623		    NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4624		else if (*s == 'L')
4625		    NEXTVAL_NEXTTOKE.ival = OP_LC;
4626		else if (*s == 'U')
4627		    NEXTVAL_NEXTTOKE.ival = OP_UC;
4628		else if (*s == 'Q')
4629		    NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4630                else if (*s == 'F')
4631		    NEXTVAL_NEXTTOKE.ival = OP_FC;
4632		else
4633		    Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4634		PL_bufptr = s + 1;
4635	    }
4636	    force_next(FUNC);
4637	    if (PL_lex_starts) {
4638		s = PL_bufptr;
4639		PL_lex_starts = 0;
4640		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4641		if (PL_lex_casemods == 1 && PL_lex_inpat)
4642		    TOKEN(',');
4643		else
4644		    AopNOASSIGN(OP_CONCAT);
4645	    }
4646	    else
4647		return yylex();
4648	}
4649
4650    case LEX_INTERPPUSH:
4651        return REPORT(sublex_push());
4652
4653    case LEX_INTERPSTART:
4654	if (PL_bufptr == PL_bufend)
4655	    return REPORT(sublex_done());
4656	DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4657              "### Interpolated variable\n"); });
4658	PL_expect = XTERM;
4659        /* for /@a/, we leave the joining for the regex engine to do
4660         * (unless we're within \Q etc) */
4661	PL_lex_dojoin = (*PL_bufptr == '@'
4662                            && (!PL_lex_inpat || PL_lex_casemods));
4663	PL_lex_state = LEX_INTERPNORMAL;
4664	if (PL_lex_dojoin) {
4665	    NEXTVAL_NEXTTOKE.ival = 0;
4666	    force_next(',');
4667	    force_ident("\"", '$');
4668	    NEXTVAL_NEXTTOKE.ival = 0;
4669	    force_next('$');
4670	    NEXTVAL_NEXTTOKE.ival = 0;
4671	    force_next((2<<24)|'(');
4672	    NEXTVAL_NEXTTOKE.ival = OP_JOIN;	/* emulate join($", ...) */
4673	    force_next(FUNC);
4674	}
4675	/* Convert (?{...}) and friends to 'do {...}' */
4676	if (PL_lex_inpat && *PL_bufptr == '(') {
4677	    PL_parser->lex_shared->re_eval_start = PL_bufptr;
4678	    PL_bufptr += 2;
4679	    if (*PL_bufptr != '{')
4680		PL_bufptr++;
4681	    PL_expect = XTERMBLOCK;
4682	    force_next(DO);
4683	}
4684
4685	if (PL_lex_starts++) {
4686	    s = PL_bufptr;
4687	    /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4688	    if (!PL_lex_casemods && PL_lex_inpat)
4689		TOKEN(',');
4690	    else
4691		AopNOASSIGN(OP_CONCAT);
4692	}
4693	return yylex();
4694
4695    case LEX_INTERPENDMAYBE:
4696	if (intuit_more(PL_bufptr)) {
4697	    PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
4698	    break;
4699	}
4700	/* FALLTHROUGH */
4701
4702    case LEX_INTERPEND:
4703	/* Treat state as LEX_NORMAL if we have no inner lexing scope.
4704	   XXX This hack can be removed if we stop setting PL_lex_state to
4705	   LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
4706	if (UNLIKELY(!PL_lex_inwhat)) {
4707	    PL_lex_state = LEX_NORMAL;
4708	    break;
4709	}
4710
4711	if (PL_lex_dojoin) {
4712	    const U8 dojoin_was = PL_lex_dojoin;
4713	    PL_lex_dojoin = FALSE;
4714	    PL_lex_state = LEX_INTERPCONCAT;
4715	    PL_lex_allbrackets--;
4716	    return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4717	}
4718	if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4719	    && SvEVALED(PL_lex_repl))
4720	{
4721	    if (PL_bufptr != PL_bufend)
4722		Perl_croak(aTHX_ "Bad evalled substitution pattern");
4723	    PL_lex_repl = NULL;
4724	}
4725	/* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4726	   re_eval_str.  If the here-doc body���s length equals the previous
4727	   value of re_eval_start, re_eval_start will now be null.  So
4728	   check re_eval_str as well. */
4729	if (PL_parser->lex_shared->re_eval_start
4730	 || PL_parser->lex_shared->re_eval_str) {
4731	    SV *sv;
4732	    if (*PL_bufptr != ')')
4733		Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4734	    PL_bufptr++;
4735	    /* having compiled a (?{..}) expression, return the original
4736	     * text too, as a const */
4737	    if (PL_parser->lex_shared->re_eval_str) {
4738		sv = PL_parser->lex_shared->re_eval_str;
4739		PL_parser->lex_shared->re_eval_str = NULL;
4740		SvCUR_set(sv,
4741			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4742		SvPV_shrink_to_cur(sv);
4743	    }
4744	    else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4745			 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4746	    NEXTVAL_NEXTTOKE.opval =
4747		    (OP*)newSVOP(OP_CONST, 0,
4748				 sv);
4749	    force_next(THING);
4750	    PL_parser->lex_shared->re_eval_start = NULL;
4751	    PL_expect = XTERM;
4752	    return REPORT(',');
4753	}
4754
4755	/* FALLTHROUGH */
4756    case LEX_INTERPCONCAT:
4757#ifdef DEBUGGING
4758	if (PL_lex_brackets)
4759	    Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4760		       (long) PL_lex_brackets);
4761#endif
4762	/* Treat state as LEX_NORMAL when not in an inner lexing scope.
4763	   XXX This hack can be removed if we stop setting PL_lex_state to
4764	   LEX_KNOWNEXT.  */
4765	if (UNLIKELY(!PL_lex_inwhat)) {
4766	    PL_lex_state = LEX_NORMAL;
4767	    break;
4768	}
4769
4770	if (PL_bufptr == PL_bufend)
4771	    return REPORT(sublex_done());
4772
4773	/* m'foo' still needs to be parsed for possible (?{...}) */
4774	if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4775	    SV *sv = newSVsv(PL_linestr);
4776	    sv = tokeq(sv);
4777	    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4778	    s = PL_bufend;
4779	}
4780	else {
4781	    s = scan_const(PL_bufptr);
4782	    if (*s == '\\')
4783		PL_lex_state = LEX_INTERPCASEMOD;
4784	    else
4785		PL_lex_state = LEX_INTERPSTART;
4786	}
4787
4788	if (s != PL_bufptr) {
4789	    NEXTVAL_NEXTTOKE = pl_yylval;
4790	    PL_expect = XTERM;
4791	    force_next(THING);
4792	    if (PL_lex_starts++) {
4793		/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4794		if (!PL_lex_casemods && PL_lex_inpat)
4795		    TOKEN(',');
4796		else
4797		    AopNOASSIGN(OP_CONCAT);
4798	    }
4799	    else {
4800		PL_bufptr = s;
4801		return yylex();
4802	    }
4803	}
4804
4805	return yylex();
4806    case LEX_FORMLINE:
4807	s = scan_formline(PL_bufptr);
4808	if (!PL_lex_formbrack)
4809	{
4810	    formbrack = 1;
4811	    goto rightbracket;
4812	}
4813	PL_bufptr = s;
4814	return yylex();
4815    }
4816
4817    /* We really do *not* want PL_linestr ever becoming a COW. */
4818    assert (!SvIsCOW(PL_linestr));
4819    s = PL_bufptr;
4820    PL_oldoldbufptr = PL_oldbufptr;
4821    PL_oldbufptr = s;
4822    PL_parser->saw_infix_sigil = 0;
4823
4824  retry:
4825    switch (*s) {
4826    default:
4827	if (UTF) {
4828            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4829                ENTER;
4830                SAVESPTR(PL_warnhook);
4831                PL_warnhook = PERL_WARNHOOK_FATAL;
4832                utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4833                LEAVE;
4834            }
4835            if (isIDFIRST_utf8((U8*)s)) {
4836                goto keylookup;
4837            }
4838        }
4839        else if (isALNUMC(*s)) {
4840	    goto keylookup;
4841	}
4842    {
4843        SV *dsv = newSVpvs_flags("", SVs_TEMP);
4844        const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4845                                                    UTF8SKIP(s),
4846                                                    SVs_TEMP | SVf_UTF8),
4847                                            10, UNI_DISPLAY_ISPRINT)
4848                            : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4849        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4850        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4851            d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4852        } else {
4853            d = PL_linestart;
4854        }
4855        Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4856                          UTF8fARG(UTF, (s - d), d),
4857                         (int) len + 1);
4858    }
4859    case 4:
4860    case 26:
4861	goto fake_eof;			/* emulate EOF on ^D or ^Z */
4862    case 0:
4863	if ((!PL_rsfp || PL_lex_inwhat)
4864	 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4865	    PL_last_uni = 0;
4866	    PL_last_lop = 0;
4867	    if (PL_lex_brackets
4868                && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4869            {
4870		yyerror((const char *)
4871			(PL_lex_formbrack
4872			 ? "Format not terminated"
4873			 : "Missing right curly or square bracket"));
4874	    }
4875            DEBUG_T( { PerlIO_printf(Perl_debug_log,
4876                        "### Tokener got EOF\n");
4877            } );
4878	    TOKEN(0);
4879	}
4880	if (s++ < PL_bufend)
4881	    goto retry;			/* ignore stray nulls */
4882	PL_last_uni = 0;
4883	PL_last_lop = 0;
4884	if (!PL_in_eval && !PL_preambled) {
4885	    PL_preambled = TRUE;
4886	    if (PL_perldb) {
4887		/* Generate a string of Perl code to load the debugger.
4888		 * If PERL5DB is set, it will return the contents of that,
4889		 * otherwise a compile-time require of perl5db.pl.  */
4890
4891		const char * const pdb = PerlEnv_getenv("PERL5DB");
4892
4893		if (pdb) {
4894		    sv_setpv(PL_linestr, pdb);
4895		    sv_catpvs(PL_linestr,";");
4896		} else {
4897		    SETERRNO(0,SS_NORMAL);
4898		    sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4899		}
4900		PL_parser->preambling = CopLINE(PL_curcop);
4901	    } else
4902		sv_setpvs(PL_linestr,"");
4903	    if (PL_preambleav) {
4904		SV **svp = AvARRAY(PL_preambleav);
4905		SV **const end = svp + AvFILLp(PL_preambleav);
4906		while(svp <= end) {
4907		    sv_catsv(PL_linestr, *svp);
4908		    ++svp;
4909		    sv_catpvs(PL_linestr, ";");
4910		}
4911		sv_free(MUTABLE_SV(PL_preambleav));
4912		PL_preambleav = NULL;
4913	    }
4914	    if (PL_minus_E)
4915		sv_catpvs(PL_linestr,
4916			  "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4917	    if (PL_minus_n || PL_minus_p) {
4918		sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4919		if (PL_minus_l)
4920		    sv_catpvs(PL_linestr,"chomp;");
4921		if (PL_minus_a) {
4922		    if (PL_minus_F) {
4923			if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4924			     || *PL_splitstr == '"')
4925			      && strchr(PL_splitstr + 1, *PL_splitstr))
4926			    Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4927			else {
4928			    /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4929			       bytes can be used as quoting characters.  :-) */
4930			    const char *splits = PL_splitstr;
4931			    sv_catpvs(PL_linestr, "our @F=split(q\0");
4932			    do {
4933				/* Need to \ \s  */
4934				if (*splits == '\\')
4935				    sv_catpvn(PL_linestr, splits, 1);
4936				sv_catpvn(PL_linestr, splits, 1);
4937			    } while (*splits++);
4938			    /* This loop will embed the trailing NUL of
4939			       PL_linestr as the last thing it does before
4940			       terminating.  */
4941			    sv_catpvs(PL_linestr, ");");
4942			}
4943		    }
4944		    else
4945		        sv_catpvs(PL_linestr,"our @F=split(' ');");
4946		}
4947	    }
4948	    sv_catpvs(PL_linestr, "\n");
4949	    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4950	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4951	    PL_last_lop = PL_last_uni = NULL;
4952	    if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
4953		update_debugger_info(PL_linestr, NULL, 0);
4954	    goto retry;
4955	}
4956	do {
4957	    fake_eof = 0;
4958	    bof = PL_rsfp ? TRUE : FALSE;
4959	    if (0) {
4960	      fake_eof:
4961		fake_eof = LEX_FAKE_EOF;
4962	    }
4963	    PL_bufptr = PL_bufend;
4964	    COPLINE_INC_WITH_HERELINES;
4965	    if (!lex_next_chunk(fake_eof)) {
4966		CopLINE_dec(PL_curcop);
4967		s = PL_bufptr;
4968		TOKEN(';');	/* not infinite loop because rsfp is NULL now */
4969	    }
4970	    CopLINE_dec(PL_curcop);
4971	    s = PL_bufptr;
4972	    /* If it looks like the start of a BOM or raw UTF-16,
4973	     * check if it in fact is. */
4974	    if (bof && PL_rsfp
4975                && (*s == 0
4976                    || *(U8*)s == BOM_UTF8_FIRST_BYTE
4977                        || *(U8*)s >= 0xFE
4978                        || s[1] == 0))
4979            {
4980		Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4981		bof = (offset == (Off_t)SvCUR(PL_linestr));
4982#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4983		/* offset may include swallowed CR */
4984		if (!bof)
4985		    bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4986#endif
4987		if (bof) {
4988		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4989		    s = swallow_bom((U8*)s);
4990		}
4991	    }
4992	    if (PL_parser->in_pod) {
4993		/* Incest with pod. */
4994		if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4995		    sv_setpvs(PL_linestr, "");
4996		    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4997		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4998		    PL_last_lop = PL_last_uni = NULL;
4999		    PL_parser->in_pod = 0;
5000		}
5001	    }
5002	    if (PL_rsfp || PL_parser->filtered)
5003		incline(s);
5004	} while (PL_parser->in_pod);
5005	PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5006	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5007	PL_last_lop = PL_last_uni = NULL;
5008	if (CopLINE(PL_curcop) == 1) {
5009	    while (s < PL_bufend && isSPACE(*s))
5010		s++;
5011	    if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5012		s++;
5013	    d = NULL;
5014	    if (!PL_in_eval) {
5015		if (*s == '#' && *(s+1) == '!')
5016		    d = s + 2;
5017#ifdef ALTERNATE_SHEBANG
5018		else {
5019		    static char const as[] = ALTERNATE_SHEBANG;
5020		    if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5021			d = s + (sizeof(as) - 1);
5022		}
5023#endif /* ALTERNATE_SHEBANG */
5024	    }
5025	    if (d) {
5026		char *ipath;
5027		char *ipathend;
5028
5029		while (isSPACE(*d))
5030		    d++;
5031		ipath = d;
5032		while (*d && !isSPACE(*d))
5033		    d++;
5034		ipathend = d;
5035
5036#ifdef ARG_ZERO_IS_SCRIPT
5037		if (ipathend > ipath) {
5038		    /*
5039		     * HP-UX (at least) sets argv[0] to the script name,
5040		     * which makes $^X incorrect.  And Digital UNIX and Linux,
5041		     * at least, set argv[0] to the basename of the Perl
5042		     * interpreter. So, having found "#!", we'll set it right.
5043		     */
5044                    SV* copfilesv = CopFILESV(PL_curcop);
5045                    if (copfilesv) {
5046                        SV * const x =
5047                            GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5048                                             SVt_PV)); /* $^X */
5049                        assert(SvPOK(x) || SvGMAGICAL(x));
5050                        if (sv_eq(x, copfilesv)) {
5051                            sv_setpvn(x, ipath, ipathend - ipath);
5052                            SvSETMAGIC(x);
5053                        }
5054                        else {
5055                            STRLEN blen;
5056                            STRLEN llen;
5057                            const char *bstart = SvPV_const(copfilesv, blen);
5058                            const char * const lstart = SvPV_const(x, llen);
5059                            if (llen < blen) {
5060                                bstart += blen - llen;
5061                                if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
5062                                    sv_setpvn(x, ipath, ipathend - ipath);
5063                                    SvSETMAGIC(x);
5064                                }
5065                            }
5066			}
5067                    }
5068                    else {
5069                        /* Anything to do if no copfilesv? */
5070		    }
5071		    TAINT_NOT;	/* $^X is always tainted, but that's OK */
5072		}
5073#endif /* ARG_ZERO_IS_SCRIPT */
5074
5075		/*
5076		 * Look for options.
5077		 */
5078		d = instr(s,"perl -");
5079		if (!d) {
5080		    d = instr(s,"perl");
5081#if defined(DOSISH)
5082		    /* avoid getting into infinite loops when shebang
5083		     * line contains "Perl" rather than "perl" */
5084		    if (!d) {
5085			for (d = ipathend-4; d >= ipath; --d) {
5086			    if (isALPHA_FOLD_EQ(*d, 'p')
5087				&& !ibcmp(d, "perl", 4))
5088			    {
5089				break;
5090			    }
5091			}
5092			if (d < ipath)
5093			    d = NULL;
5094		    }
5095#endif
5096		}
5097#ifdef ALTERNATE_SHEBANG
5098		/*
5099		 * If the ALTERNATE_SHEBANG on this system starts with a
5100		 * character that can be part of a Perl expression, then if
5101		 * we see it but not "perl", we're probably looking at the
5102		 * start of Perl code, not a request to hand off to some
5103		 * other interpreter.  Similarly, if "perl" is there, but
5104		 * not in the first 'word' of the line, we assume the line
5105		 * contains the start of the Perl program.
5106		 */
5107		if (d && *s != '#') {
5108		    const char *c = ipath;
5109		    while (*c && !strchr("; \t\r\n\f\v#", *c))
5110			c++;
5111		    if (c < d)
5112			d = NULL;	/* "perl" not in first word; ignore */
5113		    else
5114			*s = '#';	/* Don't try to parse shebang line */
5115		}
5116#endif /* ALTERNATE_SHEBANG */
5117		if (!d
5118                    && *s == '#'
5119                    && ipathend > ipath
5120                    && !PL_minus_c
5121                    && !instr(s,"indir")
5122                    && instr(PL_origargv[0],"perl"))
5123		{
5124		    dVAR;
5125		    char **newargv;
5126
5127		    *ipathend = '\0';
5128		    s = ipathend + 1;
5129		    while (s < PL_bufend && isSPACE(*s))
5130			s++;
5131		    if (s < PL_bufend) {
5132			Newx(newargv,PL_origargc+3,char*);
5133			newargv[1] = s;
5134			while (s < PL_bufend && !isSPACE(*s))
5135			    s++;
5136			*s = '\0';
5137			Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5138		    }
5139		    else
5140			newargv = PL_origargv;
5141		    newargv[0] = ipath;
5142		    PERL_FPU_PRE_EXEC
5143		    PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5144		    PERL_FPU_POST_EXEC
5145		    Perl_croak(aTHX_ "Can't exec %s", ipath);
5146		}
5147		if (d) {
5148		    while (*d && !isSPACE(*d))
5149			d++;
5150		    while (SPACE_OR_TAB(*d))
5151			d++;
5152
5153		    if (*d++ == '-') {
5154			const bool switches_done = PL_doswitches;
5155			const U32 oldpdb = PL_perldb;
5156			const bool oldn = PL_minus_n;
5157			const bool oldp = PL_minus_p;
5158			const char *d1 = d;
5159
5160			do {
5161			    bool baduni = FALSE;
5162			    if (*d1 == 'C') {
5163				const char *d2 = d1 + 1;
5164				if (parse_unicode_opts((const char **)&d2)
5165				    != PL_unicode)
5166				    baduni = TRUE;
5167			    }
5168			    if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5169				const char * const m = d1;
5170				while (*d1 && !isSPACE(*d1))
5171				    d1++;
5172				Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5173				      (int)(d1 - m), m);
5174			    }
5175			    d1 = moreswitches(d1);
5176			} while (d1);
5177			if (PL_doswitches && !switches_done) {
5178			    int argc = PL_origargc;
5179			    char **argv = PL_origargv;
5180			    do {
5181				argc--,argv++;
5182			    } while (argc && argv[0][0] == '-' && argv[0][1]);
5183			    init_argv_symbols(argc,argv);
5184			}
5185			if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5186                            || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5187			      /* if we have already added "LINE: while (<>) {",
5188			         we must not do it again */
5189			{
5190			    sv_setpvs(PL_linestr, "");
5191			    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5192			    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5193			    PL_last_lop = PL_last_uni = NULL;
5194			    PL_preambled = FALSE;
5195			    if (PERLDB_LINE_OR_SAVESRC)
5196				(void)gv_fetchfile(PL_origfilename);
5197			    goto retry;
5198			}
5199		    }
5200		}
5201	    }
5202	}
5203	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5204	    PL_lex_state = LEX_FORMLINE;
5205	    force_next(FORMRBRACK);
5206	    TOKEN(';');
5207	}
5208	goto retry;
5209    case '\r':
5210#ifdef PERL_STRICT_CR
5211	Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5212	Perl_croak(aTHX_
5213      "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5214#endif
5215    case ' ': case '\t': case '\f': case '\v':
5216	s++;
5217	goto retry;
5218    case '#':
5219    case '\n':
5220	if (PL_lex_state != LEX_NORMAL
5221            || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5222        {
5223            const bool in_comment = *s == '#';
5224	    if (*s == '#' && s == PL_linestart && PL_in_eval
5225	     && !PL_rsfp && !PL_parser->filtered) {
5226		/* handle eval qq[#line 1 "foo"\n ...] */
5227		CopLINE_dec(PL_curcop);
5228		incline(s);
5229	    }
5230            d = s;
5231            while (d < PL_bufend && *d != '\n')
5232                d++;
5233            if (d < PL_bufend)
5234                d++;
5235            else if (d > PL_bufend)
5236                /* Found by Ilya: feed random input to Perl. */
5237                Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5238                           d, PL_bufend);
5239            s = d;
5240            if (in_comment && d == PL_bufend
5241                && PL_lex_state == LEX_INTERPNORMAL
5242                && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5243                && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5244            else
5245                incline(s);
5246	    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5247		PL_lex_state = LEX_FORMLINE;
5248		force_next(FORMRBRACK);
5249		TOKEN(';');
5250	    }
5251	}
5252	else {
5253            while (s < PL_bufend && *s != '\n')
5254                s++;
5255            if (s < PL_bufend)
5256                {
5257                    s++;
5258                    if (s < PL_bufend)
5259                        incline(s);
5260                }
5261            else if (s > PL_bufend)
5262                /* Found by Ilya: feed random input to Perl. */
5263                Perl_croak(aTHX_ "panic: input overflow");
5264	}
5265	goto retry;
5266    case '-':
5267	if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5268	    I32 ftst = 0;
5269	    char tmp;
5270
5271	    s++;
5272	    PL_bufptr = s;
5273	    tmp = *s++;
5274
5275	    while (s < PL_bufend && SPACE_OR_TAB(*s))
5276		s++;
5277
5278	    if (strnEQ(s,"=>",2)) {
5279		s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5280		DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5281		OPERATOR('-');		/* unary minus */
5282	    }
5283	    switch (tmp) {
5284	    case 'r': ftst = OP_FTEREAD;	break;
5285	    case 'w': ftst = OP_FTEWRITE;	break;
5286	    case 'x': ftst = OP_FTEEXEC;	break;
5287	    case 'o': ftst = OP_FTEOWNED;	break;
5288	    case 'R': ftst = OP_FTRREAD;	break;
5289	    case 'W': ftst = OP_FTRWRITE;	break;
5290	    case 'X': ftst = OP_FTREXEC;	break;
5291	    case 'O': ftst = OP_FTROWNED;	break;
5292	    case 'e': ftst = OP_FTIS;		break;
5293	    case 'z': ftst = OP_FTZERO;		break;
5294	    case 's': ftst = OP_FTSIZE;		break;
5295	    case 'f': ftst = OP_FTFILE;		break;
5296	    case 'd': ftst = OP_FTDIR;		break;
5297	    case 'l': ftst = OP_FTLINK;		break;
5298	    case 'p': ftst = OP_FTPIPE;		break;
5299	    case 'S': ftst = OP_FTSOCK;		break;
5300	    case 'u': ftst = OP_FTSUID;		break;
5301	    case 'g': ftst = OP_FTSGID;		break;
5302	    case 'k': ftst = OP_FTSVTX;		break;
5303	    case 'b': ftst = OP_FTBLK;		break;
5304	    case 'c': ftst = OP_FTCHR;		break;
5305	    case 't': ftst = OP_FTTTY;		break;
5306	    case 'T': ftst = OP_FTTEXT;		break;
5307	    case 'B': ftst = OP_FTBINARY;	break;
5308	    case 'M': case 'A': case 'C':
5309		gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5310		switch (tmp) {
5311		case 'M': ftst = OP_FTMTIME;	break;
5312		case 'A': ftst = OP_FTATIME;	break;
5313		case 'C': ftst = OP_FTCTIME;	break;
5314		default:			break;
5315		}
5316		break;
5317	    default:
5318		break;
5319	    }
5320	    if (ftst) {
5321                PL_last_uni = PL_oldbufptr;
5322		PL_last_lop_op = (OPCODE)ftst;
5323		DEBUG_T( { PerlIO_printf(Perl_debug_log,
5324                        "### Saw file test %c\n", (int)tmp);
5325		} );
5326		FTST(ftst);
5327	    }
5328	    else {
5329		/* Assume it was a minus followed by a one-letter named
5330		 * subroutine call (or a -bareword), then. */
5331		DEBUG_T( { PerlIO_printf(Perl_debug_log,
5332			"### '-%c' looked like a file test but was not\n",
5333			(int) tmp);
5334		} );
5335		s = --PL_bufptr;
5336	    }
5337	}
5338	{
5339	    const char tmp = *s++;
5340	    if (*s == tmp) {
5341		s++;
5342		if (PL_expect == XOPERATOR)
5343		    TERM(POSTDEC);
5344		else
5345		    OPERATOR(PREDEC);
5346	    }
5347	    else if (*s == '>') {
5348		s++;
5349		s = skipspace(s);
5350		if (((*s == '$' || *s == '&') && s[1] == '*')
5351		  ||(*s == '$' && s[1] == '#' && s[2] == '*')
5352		  ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5353		  ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5354		 )
5355		{
5356		    PL_expect = XPOSTDEREF;
5357		    TOKEN(ARROW);
5358		}
5359		if (isIDFIRST_lazy_if(s,UTF)) {
5360		    s = force_word(s,METHOD,FALSE,TRUE);
5361		    TOKEN(ARROW);
5362		}
5363		else if (*s == '$')
5364		    OPERATOR(ARROW);
5365		else
5366		    TERM(ARROW);
5367	    }
5368	    if (PL_expect == XOPERATOR) {
5369		if (*s == '='
5370                    && !PL_lex_allbrackets
5371                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5372                {
5373		    s--;
5374		    TOKEN(0);
5375		}
5376		Aop(OP_SUBTRACT);
5377	    }
5378	    else {
5379		if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5380		    check_uni();
5381		OPERATOR('-');		/* unary minus */
5382	    }
5383	}
5384
5385    case '+':
5386	{
5387	    const char tmp = *s++;
5388	    if (*s == tmp) {
5389		s++;
5390		if (PL_expect == XOPERATOR)
5391		    TERM(POSTINC);
5392		else
5393		    OPERATOR(PREINC);
5394	    }
5395	    if (PL_expect == XOPERATOR) {
5396		if (*s == '='
5397                    && !PL_lex_allbrackets
5398                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5399                {
5400		    s--;
5401		    TOKEN(0);
5402		}
5403		Aop(OP_ADD);
5404	    }
5405	    else {
5406		if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5407		    check_uni();
5408		OPERATOR('+');
5409	    }
5410	}
5411
5412    case '*':
5413	if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5414	if (PL_expect != XOPERATOR) {
5415	    s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5416	    PL_expect = XOPERATOR;
5417	    force_ident(PL_tokenbuf, '*');
5418	    if (!*PL_tokenbuf)
5419		PREREF('*');
5420	    TERM('*');
5421	}
5422	s++;
5423	if (*s == '*') {
5424	    s++;
5425	    if (*s == '=' && !PL_lex_allbrackets
5426                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5427            {
5428		s -= 2;
5429		TOKEN(0);
5430	    }
5431	    PWop(OP_POW);
5432	}
5433	if (*s == '='
5434            && !PL_lex_allbrackets
5435            && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5436        {
5437	    s--;
5438	    TOKEN(0);
5439	}
5440	PL_parser->saw_infix_sigil = 1;
5441	Mop(OP_MULTIPLY);
5442
5443    case '%':
5444    {
5445	if (PL_expect == XOPERATOR) {
5446	    if (s[1] == '='
5447                && !PL_lex_allbrackets
5448                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5449            {
5450		TOKEN(0);
5451            }
5452	    ++s;
5453	    PL_parser->saw_infix_sigil = 1;
5454	    Mop(OP_MODULO);
5455	}
5456	else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5457	PL_tokenbuf[0] = '%';
5458	s = scan_ident(s, PL_tokenbuf + 1,
5459		sizeof PL_tokenbuf - 1, FALSE);
5460	pl_yylval.ival = 0;
5461	if (!PL_tokenbuf[1]) {
5462	    PREREF('%');
5463	}
5464	if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5465	    if (*s == '[')
5466		PL_tokenbuf[0] = '@';
5467	}
5468	PL_expect = XOPERATOR;
5469	force_ident_maybe_lex('%');
5470	TERM('%');
5471    }
5472    case '^':
5473	d = s;
5474	bof = FEATURE_BITWISE_IS_ENABLED;
5475	if (bof && s[1] == '.')
5476	    s++;
5477	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5478		(s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5479	{
5480	    s = d;
5481	    TOKEN(0);
5482	}
5483	s++;
5484	BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5485    case '[':
5486	if (PL_lex_brackets > 100)
5487	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5488	PL_lex_brackstack[PL_lex_brackets++] = 0;
5489	PL_lex_allbrackets++;
5490	{
5491	    const char tmp = *s++;
5492	    OPERATOR(tmp);
5493	}
5494    case '~':
5495	if (s[1] == '~'
5496	    && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5497	{
5498	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5499		TOKEN(0);
5500	    s += 2;
5501            Perl_ck_warner_d(aTHX_
5502                packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5503                "Smartmatch is experimental");
5504	    Eop(OP_SMARTMATCH);
5505	}
5506	s++;
5507	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5508	    s++;
5509	    BCop(OP_SCOMPLEMENT);
5510	}
5511	BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5512    case ',':
5513	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5514	    TOKEN(0);
5515	s++;
5516	OPERATOR(',');
5517    case ':':
5518	if (s[1] == ':') {
5519	    len = 0;
5520	    goto just_a_word_zero_gv;
5521	}
5522	s++;
5523        {
5524        OP *attrs;
5525
5526	switch (PL_expect) {
5527	case XOPERATOR:
5528	    if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5529		break;
5530	    PL_bufptr = s;	/* update in case we back off */
5531	    if (*s == '=') {
5532		Perl_croak(aTHX_
5533			   "Use of := for an empty attribute list is not allowed");
5534	    }
5535	    goto grabattrs;
5536	case XATTRBLOCK:
5537	    PL_expect = XBLOCK;
5538	    goto grabattrs;
5539	case XATTRTERM:
5540	    PL_expect = XTERMBLOCK;
5541	 grabattrs:
5542	    s = skipspace(s);
5543	    attrs = NULL;
5544	    while (isIDFIRST_lazy_if(s,UTF)) {
5545		I32 tmp;
5546		SV *sv;
5547		d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5548		if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5549		    if (tmp < 0) tmp = -tmp;
5550		    switch (tmp) {
5551		    case KEY_or:
5552		    case KEY_and:
5553		    case KEY_for:
5554		    case KEY_foreach:
5555		    case KEY_unless:
5556		    case KEY_if:
5557		    case KEY_while:
5558		    case KEY_until:
5559			goto got_attrs;
5560		    default:
5561			break;
5562		    }
5563		}
5564		sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5565		if (*d == '(') {
5566		    d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5567		    COPLINE_SET_FROM_MULTI_END;
5568		    if (!d) {
5569			/* MUST advance bufptr here to avoid bogus
5570			   "at end of line" context messages from yyerror().
5571			 */
5572			PL_bufptr = s + len;
5573			yyerror("Unterminated attribute parameter in attribute list");
5574			if (attrs)
5575			    op_free(attrs);
5576			sv_free(sv);
5577			return REPORT(0);	/* EOF indicator */
5578		    }
5579		}
5580		if (PL_lex_stuff) {
5581		    sv_catsv(sv, PL_lex_stuff);
5582		    attrs = op_append_elem(OP_LIST, attrs,
5583					newSVOP(OP_CONST, 0, sv));
5584		    SvREFCNT_dec_NN(PL_lex_stuff);
5585		    PL_lex_stuff = NULL;
5586		}
5587		else {
5588		    if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5589			sv_free(sv);
5590			if (PL_in_my == KEY_our) {
5591			    deprecate(":unique");
5592			}
5593			else
5594			    Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5595		    }
5596
5597		    /* NOTE: any CV attrs applied here need to be part of
5598		       the CVf_BUILTIN_ATTRS define in cv.h! */
5599		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5600			sv_free(sv);
5601			CvLVALUE_on(PL_compcv);
5602		    }
5603		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5604			sv_free(sv);
5605			deprecate(":locked");
5606		    }
5607		    else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5608			sv_free(sv);
5609			CvMETHOD_on(PL_compcv);
5610		    }
5611		    else if (!PL_in_my && len == 5
5612			  && strnEQ(SvPVX(sv), "const", len))
5613		    {
5614			sv_free(sv);
5615			Perl_ck_warner_d(aTHX_
5616			    packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5617			   ":const is experimental"
5618			);
5619			CvANONCONST_on(PL_compcv);
5620			if (!CvANON(PL_compcv))
5621			    yyerror(":const is not permitted on named "
5622				    "subroutines");
5623		    }
5624		    /* After we've set the flags, it could be argued that
5625		       we don't need to do the attributes.pm-based setting
5626		       process, and shouldn't bother appending recognized
5627		       flags.  To experiment with that, uncomment the
5628		       following "else".  (Note that's already been
5629		       uncommented.  That keeps the above-applied built-in
5630		       attributes from being intercepted (and possibly
5631		       rejected) by a package's attribute routines, but is
5632		       justified by the performance win for the common case
5633		       of applying only built-in attributes.) */
5634		    else
5635		        attrs = op_append_elem(OP_LIST, attrs,
5636					    newSVOP(OP_CONST, 0,
5637					      	    sv));
5638		}
5639		s = skipspace(d);
5640		if (*s == ':' && s[1] != ':')
5641		    s = skipspace(s+1);
5642		else if (s == d)
5643		    break;	/* require real whitespace or :'s */
5644		/* XXX losing whitespace on sequential attributes here */
5645	    }
5646	    {
5647		if (*s != ';'
5648                    && *s != '}'
5649                    && !(PL_expect == XOPERATOR
5650			 ? (*s == '=' ||  *s == ')')
5651			 : (*s == '{' ||  *s == '(')))
5652                {
5653		    const char q = ((*s == '\'') ? '"' : '\'');
5654		    /* If here for an expression, and parsed no attrs, back
5655		       off. */
5656		    if (PL_expect == XOPERATOR && !attrs) {
5657			s = PL_bufptr;
5658			break;
5659		    }
5660		    /* MUST advance bufptr here to avoid bogus "at end of line"
5661		       context messages from yyerror().
5662		    */
5663		    PL_bufptr = s;
5664		    yyerror( (const char *)
5665			     (*s
5666			      ? Perl_form(aTHX_ "Invalid separator character "
5667					  "%c%c%c in attribute list", q, *s, q)
5668			      : "Unterminated attribute list" ) );
5669		    if (attrs)
5670			op_free(attrs);
5671		    OPERATOR(':');
5672		}
5673	    }
5674	got_attrs:
5675	    if (attrs) {
5676		NEXTVAL_NEXTTOKE.opval = attrs;
5677		force_next(THING);
5678	    }
5679	    TOKEN(COLONATTR);
5680	}
5681	}
5682	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5683	    s--;
5684	    TOKEN(0);
5685	}
5686	PL_lex_allbrackets--;
5687	OPERATOR(':');
5688    case '(':
5689	s++;
5690	if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5691	    PL_oldbufptr = PL_oldoldbufptr;		/* allow print(STDOUT 123) */
5692	else
5693	    PL_expect = XTERM;
5694	s = skipspace(s);
5695	PL_lex_allbrackets++;
5696	TOKEN('(');
5697    case ';':
5698	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5699	    TOKEN(0);
5700	CLINE;
5701	s++;
5702	PL_expect = XSTATE;
5703	TOKEN(';');
5704    case ')':
5705	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5706	    TOKEN(0);
5707	s++;
5708	PL_lex_allbrackets--;
5709	s = skipspace(s);
5710	if (*s == '{')
5711	    PREBLOCK(')');
5712	TERM(')');
5713    case ']':
5714	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5715	    TOKEN(0);
5716	s++;
5717	if (PL_lex_brackets <= 0)
5718	    /* diag_listed_as: Unmatched right %s bracket */
5719	    yyerror("Unmatched right square bracket");
5720	else
5721	    --PL_lex_brackets;
5722	PL_lex_allbrackets--;
5723	if (PL_lex_state == LEX_INTERPNORMAL) {
5724	    if (PL_lex_brackets == 0) {
5725		if (*s == '-' && s[1] == '>')
5726		    PL_lex_state = LEX_INTERPENDMAYBE;
5727		else if (*s != '[' && *s != '{')
5728		    PL_lex_state = LEX_INTERPEND;
5729	    }
5730	}
5731	TERM(']');
5732    case '{':
5733	s++;
5734      leftbracket:
5735	if (PL_lex_brackets > 100) {
5736	    Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5737	}
5738	switch (PL_expect) {
5739	case XTERM:
5740	case XTERMORDORDOR:
5741	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5742	    PL_lex_allbrackets++;
5743	    OPERATOR(HASHBRACK);
5744	case XOPERATOR:
5745	    while (s < PL_bufend && SPACE_OR_TAB(*s))
5746		s++;
5747	    d = s;
5748	    PL_tokenbuf[0] = '\0';
5749	    if (d < PL_bufend && *d == '-') {
5750		PL_tokenbuf[0] = '-';
5751		d++;
5752		while (d < PL_bufend && SPACE_OR_TAB(*d))
5753		    d++;
5754	    }
5755	    if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5756		d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5757			      FALSE, &len);
5758		while (d < PL_bufend && SPACE_OR_TAB(*d))
5759		    d++;
5760		if (*d == '}') {
5761		    const char minus = (PL_tokenbuf[0] == '-');
5762		    s = force_word(s + minus, WORD, FALSE, TRUE);
5763		    if (minus)
5764			force_next('-');
5765		}
5766	    }
5767	    /* FALLTHROUGH */
5768	case XATTRTERM:
5769	case XTERMBLOCK:
5770	    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5771	    PL_lex_allbrackets++;
5772	    PL_expect = XSTATE;
5773	    break;
5774	case XATTRBLOCK:
5775	case XBLOCK:
5776	    PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5777	    PL_lex_allbrackets++;
5778	    PL_expect = XSTATE;
5779	    break;
5780	case XBLOCKTERM:
5781	    PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5782	    PL_lex_allbrackets++;
5783	    PL_expect = XSTATE;
5784	    break;
5785	default: {
5786		const char *t;
5787		if (PL_oldoldbufptr == PL_last_lop)
5788		    PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5789		else
5790		    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5791		PL_lex_allbrackets++;
5792		s = skipspace(s);
5793		if (*s == '}') {
5794		    if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5795			PL_expect = XTERM;
5796			/* This hack is to get the ${} in the message. */
5797			PL_bufptr = s+1;
5798			yyerror("syntax error");
5799			break;
5800		    }
5801		    OPERATOR(HASHBRACK);
5802		}
5803		if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5804		    /* ${...} or @{...} etc., but not print {...}
5805		     * Skip the disambiguation and treat this as a block.
5806		     */
5807		    goto block_expectation;
5808		}
5809		/* This hack serves to disambiguate a pair of curlies
5810		 * as being a block or an anon hash.  Normally, expectation
5811		 * determines that, but in cases where we're not in a
5812		 * position to expect anything in particular (like inside
5813		 * eval"") we have to resolve the ambiguity.  This code
5814		 * covers the case where the first term in the curlies is a
5815		 * quoted string.  Most other cases need to be explicitly
5816		 * disambiguated by prepending a "+" before the opening
5817		 * curly in order to force resolution as an anon hash.
5818		 *
5819		 * XXX should probably propagate the outer expectation
5820		 * into eval"" to rely less on this hack, but that could
5821		 * potentially break current behavior of eval"".
5822		 * GSAR 97-07-21
5823		 */
5824		t = s;
5825		if (*s == '\'' || *s == '"' || *s == '`') {
5826		    /* common case: get past first string, handling escapes */
5827		    for (t++; t < PL_bufend && *t != *s;)
5828			if (*t++ == '\\')
5829			    t++;
5830		    t++;
5831		}
5832		else if (*s == 'q') {
5833		    if (++t < PL_bufend
5834			&& (!isWORDCHAR(*t)
5835			    || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5836				&& !isWORDCHAR(*t))))
5837		    {
5838			/* skip q//-like construct */
5839			const char *tmps;
5840			char open, close, term;
5841			I32 brackets = 1;
5842
5843			while (t < PL_bufend && isSPACE(*t))
5844			    t++;
5845			/* check for q => */
5846			if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5847			    OPERATOR(HASHBRACK);
5848			}
5849			term = *t;
5850			open = term;
5851			if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5852			    term = tmps[5];
5853			close = term;
5854			if (open == close)
5855			    for (t++; t < PL_bufend; t++) {
5856				if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5857				    t++;
5858				else if (*t == open)
5859				    break;
5860			    }
5861			else {
5862			    for (t++; t < PL_bufend; t++) {
5863				if (*t == '\\' && t+1 < PL_bufend)
5864				    t++;
5865				else if (*t == close && --brackets <= 0)
5866				    break;
5867				else if (*t == open)
5868				    brackets++;
5869			    }
5870			}
5871			t++;
5872		    }
5873		    else
5874			/* skip plain q word */
5875			while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5876			    t += UTF ? UTF8SKIP(t) : 1;
5877		}
5878		else if (isWORDCHAR_lazy_if(t,UTF)) {
5879		    t += UTF ? UTF8SKIP(t) : 1;
5880		    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5881			t += UTF ? UTF8SKIP(t) : 1;
5882		}
5883		while (t < PL_bufend && isSPACE(*t))
5884		    t++;
5885		/* if comma follows first term, call it an anon hash */
5886		/* XXX it could be a comma expression with loop modifiers */
5887		if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5888				   || (*t == '=' && t[1] == '>')))
5889		    OPERATOR(HASHBRACK);
5890		if (PL_expect == XREF)
5891		{
5892		  block_expectation:
5893		    /* If there is an opening brace or 'sub:', treat it
5894		       as a term to make ${{...}}{k} and &{sub:attr...}
5895		       dwim.  Otherwise, treat it as a statement, so
5896		       map {no strict; ...} works.
5897		     */
5898		    s = skipspace(s);
5899		    if (*s == '{') {
5900			PL_expect = XTERM;
5901			break;
5902		    }
5903		    if (strnEQ(s, "sub", 3)) {
5904			d = s + 3;
5905			d = skipspace(d);
5906			if (*d == ':') {
5907			    PL_expect = XTERM;
5908			    break;
5909			}
5910		    }
5911		    PL_expect = XSTATE;
5912		}
5913		else {
5914		    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5915		    PL_expect = XSTATE;
5916		}
5917	    }
5918	    break;
5919	}
5920	pl_yylval.ival = CopLINE(PL_curcop);
5921	PL_copline = NOLINE;   /* invalidate current command line number */
5922	TOKEN(formbrack ? '=' : '{');
5923    case '}':
5924	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5925	    TOKEN(0);
5926      rightbracket:
5927	s++;
5928	if (PL_lex_brackets <= 0)
5929	    /* diag_listed_as: Unmatched right %s bracket */
5930	    yyerror("Unmatched right curly bracket");
5931	else
5932	    PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5933	PL_lex_allbrackets--;
5934	if (PL_lex_state == LEX_INTERPNORMAL) {
5935	    if (PL_lex_brackets == 0) {
5936		if (PL_expect & XFAKEBRACK) {
5937		    PL_expect &= XENUMMASK;
5938		    PL_lex_state = LEX_INTERPEND;
5939		    PL_bufptr = s;
5940		    return yylex();	/* ignore fake brackets */
5941		}
5942		if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5943		 && SvEVALED(PL_lex_repl))
5944		    PL_lex_state = LEX_INTERPEND;
5945		else if (*s == '-' && s[1] == '>')
5946		    PL_lex_state = LEX_INTERPENDMAYBE;
5947		else if (*s != '[' && *s != '{')
5948		    PL_lex_state = LEX_INTERPEND;
5949	    }
5950	}
5951	if (PL_expect & XFAKEBRACK) {
5952	    PL_expect &= XENUMMASK;
5953	    PL_bufptr = s;
5954	    return yylex();		/* ignore fake brackets */
5955	}
5956	force_next(formbrack ? '.' : '}');
5957	if (formbrack) LEAVE;
5958	if (formbrack == 2) { /* means . where arguments were expected */
5959	    force_next(';');
5960	    TOKEN(FORMRBRACK);
5961	}
5962	TOKEN(';');
5963    case '&':
5964	if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5965	s++;
5966	if (*s++ == '&') {
5967	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5968		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5969		s -= 2;
5970		TOKEN(0);
5971	    }
5972	    AOPERATOR(ANDAND);
5973	}
5974	s--;
5975	if (PL_expect == XOPERATOR) {
5976	    if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5977		&& isIDFIRST_lazy_if(s,UTF))
5978	    {
5979		CopLINE_dec(PL_curcop);
5980		Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5981		CopLINE_inc(PL_curcop);
5982	    }
5983	    d = s;
5984	    if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5985		s++;
5986	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5987		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5988		s = d;
5989		s--;
5990		TOKEN(0);
5991	    }
5992	    if (d == s) {
5993		PL_parser->saw_infix_sigil = 1;
5994		BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5995	    }
5996	    else
5997		BAop(OP_SBIT_AND);
5998	}
5999
6000	PL_tokenbuf[0] = '&';
6001	s = scan_ident(s - 1, PL_tokenbuf + 1,
6002		       sizeof PL_tokenbuf - 1, TRUE);
6003	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6004	if (PL_tokenbuf[1]) {
6005	    force_ident_maybe_lex('&');
6006	}
6007	else
6008	    PREREF('&');
6009	TERM('&');
6010
6011    case '|':
6012	s++;
6013	if (*s++ == '|') {
6014	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6015		    (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6016		s -= 2;
6017		TOKEN(0);
6018	    }
6019	    AOPERATOR(OROR);
6020	}
6021	s--;
6022	d = s;
6023	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6024	    s++;
6025	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6026		(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6027	    s = d - 1;
6028	    TOKEN(0);
6029	}
6030	BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6031    case '=':
6032	s++;
6033	{
6034	    const char tmp = *s++;
6035	    if (tmp == '=') {
6036		if (!PL_lex_allbrackets
6037                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6038                {
6039		    s -= 2;
6040		    TOKEN(0);
6041		}
6042		Eop(OP_EQ);
6043	    }
6044	    if (tmp == '>') {
6045		if (!PL_lex_allbrackets
6046                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6047                {
6048		    s -= 2;
6049		    TOKEN(0);
6050		}
6051		OPERATOR(',');
6052	    }
6053	    if (tmp == '~')
6054		PMop(OP_MATCH);
6055	    if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6056		&& strchr("+-*/%.^&|<",tmp))
6057		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6058			    "Reversed %c= operator",(int)tmp);
6059	    s--;
6060	    if (PL_expect == XSTATE
6061                && isALPHA(tmp)
6062                && (s == PL_linestart+1 || s[-2] == '\n') )
6063            {
6064                if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6065                    || PL_lex_state != LEX_NORMAL) {
6066                    d = PL_bufend;
6067                    while (s < d) {
6068                        if (*s++ == '\n') {
6069                            incline(s);
6070                            if (strnEQ(s,"=cut",4)) {
6071                                s = strchr(s,'\n');
6072                                if (s)
6073                                    s++;
6074                                else
6075                                    s = d;
6076                                incline(s);
6077                                goto retry;
6078                            }
6079                        }
6080                    }
6081                    goto retry;
6082                }
6083                s = PL_bufend;
6084                PL_parser->in_pod = 1;
6085                goto retry;
6086            }
6087	}
6088	if (PL_expect == XBLOCK) {
6089	    const char *t = s;
6090#ifdef PERL_STRICT_CR
6091	    while (SPACE_OR_TAB(*t))
6092#else
6093	    while (SPACE_OR_TAB(*t) || *t == '\r')
6094#endif
6095		t++;
6096	    if (*t == '\n' || *t == '#') {
6097		formbrack = 1;
6098		ENTER;
6099		SAVEI8(PL_parser->form_lex_state);
6100		SAVEI32(PL_lex_formbrack);
6101		PL_parser->form_lex_state = PL_lex_state;
6102		PL_lex_formbrack = PL_lex_brackets + 1;
6103		goto leftbracket;
6104	    }
6105	}
6106	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6107	    s--;
6108	    TOKEN(0);
6109	}
6110	pl_yylval.ival = 0;
6111	OPERATOR(ASSIGNOP);
6112    case '!':
6113	s++;
6114	{
6115	    const char tmp = *s++;
6116	    if (tmp == '=') {
6117		/* was this !=~ where !~ was meant?
6118		 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6119
6120		if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6121		    const char *t = s+1;
6122
6123		    while (t < PL_bufend && isSPACE(*t))
6124			++t;
6125
6126		    if (*t == '/' || *t == '?'
6127                        || ((*t == 'm' || *t == 's' || *t == 'y')
6128			    && !isWORDCHAR(t[1]))
6129                        || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6130			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6131				    "!=~ should be !~");
6132		}
6133		if (!PL_lex_allbrackets
6134                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6135                {
6136		    s -= 2;
6137		    TOKEN(0);
6138		}
6139		Eop(OP_NE);
6140	    }
6141	    if (tmp == '~')
6142		PMop(OP_NOT);
6143	}
6144	s--;
6145	OPERATOR('!');
6146    case '<':
6147	if (PL_expect != XOPERATOR) {
6148	    if (s[1] != '<' && !strchr(s,'>'))
6149		check_uni();
6150	    if (s[1] == '<' && s[2] != '>')
6151		s = scan_heredoc(s);
6152	    else
6153		s = scan_inputsymbol(s);
6154	    PL_expect = XOPERATOR;
6155	    TOKEN(sublex_start());
6156	}
6157	s++;
6158	{
6159	    char tmp = *s++;
6160	    if (tmp == '<') {
6161		if (*s == '=' && !PL_lex_allbrackets
6162                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6163                {
6164		    s -= 2;
6165		    TOKEN(0);
6166		}
6167		SHop(OP_LEFT_SHIFT);
6168	    }
6169	    if (tmp == '=') {
6170		tmp = *s++;
6171		if (tmp == '>') {
6172		    if (!PL_lex_allbrackets
6173                        && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6174                    {
6175			s -= 3;
6176			TOKEN(0);
6177		    }
6178		    Eop(OP_NCMP);
6179		}
6180		s--;
6181		if (!PL_lex_allbrackets
6182                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6183                {
6184		    s -= 2;
6185		    TOKEN(0);
6186		}
6187		Rop(OP_LE);
6188	    }
6189	}
6190	s--;
6191	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6192	    s--;
6193	    TOKEN(0);
6194	}
6195	Rop(OP_LT);
6196    case '>':
6197	s++;
6198	{
6199	    const char tmp = *s++;
6200	    if (tmp == '>') {
6201		if (*s == '=' && !PL_lex_allbrackets
6202                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6203                {
6204		    s -= 2;
6205		    TOKEN(0);
6206		}
6207		SHop(OP_RIGHT_SHIFT);
6208	    }
6209	    else if (tmp == '=') {
6210		if (!PL_lex_allbrackets
6211                    && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6212                {
6213		    s -= 2;
6214		    TOKEN(0);
6215		}
6216		Rop(OP_GE);
6217	    }
6218	}
6219	s--;
6220	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6221	    s--;
6222	    TOKEN(0);
6223	}
6224	Rop(OP_GT);
6225
6226    case '$':
6227	CLINE;
6228
6229	if (PL_expect == XOPERATOR) {
6230	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6231		return deprecate_commaless_var_list();
6232	    }
6233	}
6234	else if (PL_expect == XPOSTDEREF) {
6235	    if (s[1] == '#') {
6236		s++;
6237		POSTDEREF(DOLSHARP);
6238	    }
6239	    POSTDEREF('$');
6240	}
6241
6242	if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6243	    PL_tokenbuf[0] = '@';
6244	    s = scan_ident(s + 1, PL_tokenbuf + 1,
6245			   sizeof PL_tokenbuf - 1, FALSE);
6246            if (PL_expect == XOPERATOR) {
6247                d = s;
6248                if (PL_bufptr > s) {
6249                    d = PL_bufptr-1;
6250                    PL_bufptr = PL_oldbufptr;
6251                }
6252		no_op("Array length", d);
6253            }
6254	    if (!PL_tokenbuf[1])
6255		PREREF(DOLSHARP);
6256	    PL_expect = XOPERATOR;
6257	    force_ident_maybe_lex('#');
6258	    TOKEN(DOLSHARP);
6259	}
6260
6261	PL_tokenbuf[0] = '$';
6262	s = scan_ident(s, PL_tokenbuf + 1,
6263		       sizeof PL_tokenbuf - 1, FALSE);
6264	if (PL_expect == XOPERATOR) {
6265	    d = s;
6266	    if (PL_bufptr > s) {
6267		d = PL_bufptr-1;
6268		PL_bufptr = PL_oldbufptr;
6269	    }
6270	    no_op("Scalar", d);
6271	}
6272	if (!PL_tokenbuf[1]) {
6273	    if (s == PL_bufend)
6274		yyerror("Final $ should be \\$ or $name");
6275	    PREREF('$');
6276	}
6277
6278	d = s;
6279	{
6280	    const char tmp = *s;
6281	    if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6282		s = skipspace(s);
6283
6284	    if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6285		&& intuit_more(s)) {
6286		if (*s == '[') {
6287		    PL_tokenbuf[0] = '@';
6288		    if (ckWARN(WARN_SYNTAX)) {
6289			char *t = s+1;
6290
6291			while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6292			    t += UTF ? UTF8SKIP(t) : 1;
6293			if (*t++ == ',') {
6294			    PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6295			    while (t < PL_bufend && *t != ']')
6296				t++;
6297			    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6298					"Multidimensional syntax %"UTF8f" not supported",
6299                                        UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6300			}
6301		    }
6302		}
6303		else if (*s == '{') {
6304		    char *t;
6305		    PL_tokenbuf[0] = '%';
6306		    if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6307			&& (t = strchr(s, '}')) && (t = strchr(t, '=')))
6308			{
6309			    char tmpbuf[sizeof PL_tokenbuf];
6310			    do {
6311				t++;
6312			    } while (isSPACE(*t));
6313			    if (isIDFIRST_lazy_if(t,UTF)) {
6314				STRLEN len;
6315				t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6316					      &len);
6317				while (isSPACE(*t))
6318				    t++;
6319				if (*t == ';'
6320                                       && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6321				    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6322					"You need to quote \"%"UTF8f"\"",
6323					 UTF8fARG(UTF, len, tmpbuf));
6324			    }
6325			}
6326		}
6327	    }
6328
6329	    PL_expect = XOPERATOR;
6330	    if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6331		const bool islop = (PL_last_lop == PL_oldoldbufptr);
6332		if (!islop || PL_last_lop_op == OP_GREPSTART)
6333		    PL_expect = XOPERATOR;
6334		else if (strchr("$@\"'`q", *s))
6335		    PL_expect = XTERM;		/* e.g. print $fh "foo" */
6336		else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6337		    PL_expect = XTERM;		/* e.g. print $fh &sub */
6338		else if (isIDFIRST_lazy_if(s,UTF)) {
6339		    char tmpbuf[sizeof PL_tokenbuf];
6340		    int t2;
6341		    scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6342		    if ((t2 = keyword(tmpbuf, len, 0))) {
6343			/* binary operators exclude handle interpretations */
6344			switch (t2) {
6345			case -KEY_x:
6346			case -KEY_eq:
6347			case -KEY_ne:
6348			case -KEY_gt:
6349			case -KEY_lt:
6350			case -KEY_ge:
6351			case -KEY_le:
6352			case -KEY_cmp:
6353			    break;
6354			default:
6355			    PL_expect = XTERM;	/* e.g. print $fh length() */
6356			    break;
6357			}
6358		    }
6359		    else {
6360			PL_expect = XTERM;	/* e.g. print $fh subr() */
6361		    }
6362		}
6363		else if (isDIGIT(*s))
6364		    PL_expect = XTERM;		/* e.g. print $fh 3 */
6365		else if (*s == '.' && isDIGIT(s[1]))
6366		    PL_expect = XTERM;		/* e.g. print $fh .3 */
6367		else if ((*s == '?' || *s == '-' || *s == '+')
6368			 && !isSPACE(s[1]) && s[1] != '=')
6369		    PL_expect = XTERM;		/* e.g. print $fh -1 */
6370		else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6371			 && s[1] != '/')
6372		    PL_expect = XTERM;		/* e.g. print $fh /.../
6373						   XXX except DORDOR operator
6374						*/
6375		else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6376			 && s[2] != '=')
6377		    PL_expect = XTERM;		/* print $fh <<"EOF" */
6378	    }
6379	}
6380	force_ident_maybe_lex('$');
6381	TOKEN('$');
6382
6383    case '@':
6384        if (PL_expect == XPOSTDEREF)
6385            POSTDEREF('@');
6386	PL_tokenbuf[0] = '@';
6387	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6388	if (PL_expect == XOPERATOR) {
6389            d = s;
6390            if (PL_bufptr > s) {
6391                d = PL_bufptr-1;
6392                PL_bufptr = PL_oldbufptr;
6393            }
6394	    no_op("Array", d);
6395        }
6396	pl_yylval.ival = 0;
6397	if (!PL_tokenbuf[1]) {
6398	    PREREF('@');
6399	}
6400	if (PL_lex_state == LEX_NORMAL)
6401	    s = skipspace(s);
6402	if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6403	    if (*s == '{')
6404		PL_tokenbuf[0] = '%';
6405
6406	    /* Warn about @ where they meant $. */
6407	    if (*s == '[' || *s == '{') {
6408		if (ckWARN(WARN_SYNTAX)) {
6409		    S_check_scalar_slice(aTHX_ s);
6410		}
6411	    }
6412	}
6413	PL_expect = XOPERATOR;
6414	force_ident_maybe_lex('@');
6415	TERM('@');
6416
6417     case '/':			/* may be division, defined-or, or pattern */
6418	if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6419	    if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6420		    (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6421		TOKEN(0);
6422	    s += 2;
6423	    AOPERATOR(DORDOR);
6424	}
6425	else if (PL_expect == XOPERATOR) {
6426	    s++;
6427	    if (*s == '=' && !PL_lex_allbrackets
6428                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6429            {
6430		s--;
6431		TOKEN(0);
6432	    }
6433	    Mop(OP_DIVIDE);
6434        }
6435	else {
6436	    /* Disable warning on "study /blah/" */
6437	    if (PL_oldoldbufptr == PL_last_uni
6438	     && (*PL_last_uni != 's' || s - PL_last_uni < 5
6439	         || memNE(PL_last_uni, "study", 5)
6440	         || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6441	     ))
6442	        check_uni();
6443	    s = scan_pat(s,OP_MATCH);
6444	    TERM(sublex_start());
6445	}
6446
6447     case '?':			/* conditional */
6448	s++;
6449	if (!PL_lex_allbrackets
6450            && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6451        {
6452	    s--;
6453	    TOKEN(0);
6454	}
6455	PL_lex_allbrackets++;
6456	OPERATOR('?');
6457
6458    case '.':
6459	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6460#ifdef PERL_STRICT_CR
6461	    && s[1] == '\n'
6462#else
6463	    && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6464#endif
6465	    && (s == PL_linestart || s[-1] == '\n') )
6466	{
6467	    PL_expect = XSTATE;
6468	    formbrack = 2; /* dot seen where arguments expected */
6469	    goto rightbracket;
6470	}
6471	if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6472	    s += 3;
6473	    OPERATOR(YADAYADA);
6474	}
6475	if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6476	    char tmp = *s++;
6477	    if (*s == tmp) {
6478		if (!PL_lex_allbrackets
6479                    && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6480                {
6481		    s--;
6482		    TOKEN(0);
6483		}
6484		s++;
6485		if (*s == tmp) {
6486		    s++;
6487		    pl_yylval.ival = OPf_SPECIAL;
6488		}
6489		else
6490		    pl_yylval.ival = 0;
6491		OPERATOR(DOTDOT);
6492	    }
6493	    if (*s == '=' && !PL_lex_allbrackets
6494                && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6495            {
6496		s--;
6497		TOKEN(0);
6498	    }
6499	    Aop(OP_CONCAT);
6500	}
6501	/* FALLTHROUGH */
6502    case '0': case '1': case '2': case '3': case '4':
6503    case '5': case '6': case '7': case '8': case '9':
6504	s = scan_num(s, &pl_yylval);
6505	DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6506	if (PL_expect == XOPERATOR)
6507	    no_op("Number",s);
6508	TERM(THING);
6509
6510    case '\'':
6511	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6512	if (!s)
6513	    missingterm(NULL);
6514	COPLINE_SET_FROM_MULTI_END;
6515	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6516	if (PL_expect == XOPERATOR) {
6517	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6518		return deprecate_commaless_var_list();
6519	    }
6520	    else
6521		no_op("String",s);
6522	}
6523	pl_yylval.ival = OP_CONST;
6524	TERM(sublex_start());
6525
6526    case '"':
6527	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6528	DEBUG_T( {
6529	    if (s)
6530		printbuf("### Saw string before %s\n", s);
6531	    else
6532		PerlIO_printf(Perl_debug_log,
6533			     "### Saw unterminated string\n");
6534	} );
6535	if (PL_expect == XOPERATOR) {
6536	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6537		return deprecate_commaless_var_list();
6538	    }
6539	    else
6540		no_op("String",s);
6541	}
6542	if (!s)
6543	    missingterm(NULL);
6544	pl_yylval.ival = OP_CONST;
6545	/* FIXME. I think that this can be const if char *d is replaced by
6546	   more localised variables.  */
6547	for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6548	    if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6549		pl_yylval.ival = OP_STRINGIFY;
6550		break;
6551	    }
6552	}
6553	if (pl_yylval.ival == OP_CONST)
6554	    COPLINE_SET_FROM_MULTI_END;
6555	TERM(sublex_start());
6556
6557    case '`':
6558	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6559	DEBUG_T( {
6560            if (s)
6561                printbuf("### Saw backtick string before %s\n", s);
6562            else
6563		PerlIO_printf(Perl_debug_log,
6564			     "### Saw unterminated backtick string\n");
6565        } );
6566	if (PL_expect == XOPERATOR)
6567	    no_op("Backticks",s);
6568	if (!s)
6569	    missingterm(NULL);
6570	pl_yylval.ival = OP_BACKTICK;
6571	TERM(sublex_start());
6572
6573    case '\\':
6574	s++;
6575	if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6576	 && isDIGIT(*s))
6577	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6578			   *s, *s);
6579	if (PL_expect == XOPERATOR)
6580	    no_op("Backslash",s);
6581	OPERATOR(REFGEN);
6582
6583    case 'v':
6584	if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6585	    char *start = s + 2;
6586	    while (isDIGIT(*start) || *start == '_')
6587		start++;
6588	    if (*start == '.' && isDIGIT(start[1])) {
6589		s = scan_num(s, &pl_yylval);
6590		TERM(THING);
6591	    }
6592	    else if ((*start == ':' && start[1] == ':')
6593		  || (PL_expect == XSTATE && *start == ':'))
6594		goto keylookup;
6595	    else if (PL_expect == XSTATE) {
6596		d = start;
6597		while (d < PL_bufend && isSPACE(*d)) d++;
6598		if (*d == ':') goto keylookup;
6599	    }
6600	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6601	    if (!isALPHA(*start) && (PL_expect == XTERM
6602			|| PL_expect == XREF || PL_expect == XSTATE
6603			|| PL_expect == XTERMORDORDOR)) {
6604		GV *const gv = gv_fetchpvn_flags(s, start - s,
6605                                                    UTF ? SVf_UTF8 : 0, SVt_PVCV);
6606		if (!gv) {
6607		    s = scan_num(s, &pl_yylval);
6608		    TERM(THING);
6609		}
6610	    }
6611	}
6612	goto keylookup;
6613    case 'x':
6614	if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6615	    s++;
6616	    Mop(OP_REPEAT);
6617	}
6618	goto keylookup;
6619
6620    case '_':
6621    case 'a': case 'A':
6622    case 'b': case 'B':
6623    case 'c': case 'C':
6624    case 'd': case 'D':
6625    case 'e': case 'E':
6626    case 'f': case 'F':
6627    case 'g': case 'G':
6628    case 'h': case 'H':
6629    case 'i': case 'I':
6630    case 'j': case 'J':
6631    case 'k': case 'K':
6632    case 'l': case 'L':
6633    case 'm': case 'M':
6634    case 'n': case 'N':
6635    case 'o': case 'O':
6636    case 'p': case 'P':
6637    case 'q': case 'Q':
6638    case 'r': case 'R':
6639    case 's': case 'S':
6640    case 't': case 'T':
6641    case 'u': case 'U':
6642	      case 'V':
6643    case 'w': case 'W':
6644	      case 'X':
6645    case 'y': case 'Y':
6646    case 'z': case 'Z':
6647
6648      keylookup: {
6649	bool anydelim;
6650	bool lex;
6651	I32 tmp;
6652	SV *sv;
6653	CV *cv;
6654	PADOFFSET off;
6655	OP *rv2cv_op;
6656
6657	lex = FALSE;
6658	orig_keyword = 0;
6659	off = 0;
6660	sv = NULL;
6661	cv = NULL;
6662	gv = NULL;
6663	gvp = NULL;
6664	rv2cv_op = NULL;
6665
6666	PL_bufptr = s;
6667	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6668
6669	/* Some keywords can be followed by any delimiter, including ':' */
6670	anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6671
6672	/* x::* is just a word, unless x is "CORE" */
6673	if (!anydelim && *s == ':' && s[1] == ':') {
6674	    if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6675	    goto just_a_word;
6676	}
6677
6678	d = s;
6679	while (d < PL_bufend && isSPACE(*d))
6680		d++;	/* no comments skipped here, or s### is misparsed */
6681
6682	/* Is this a word before a => operator? */
6683	if (*d == '=' && d[1] == '>') {
6684	  fat_arrow:
6685	    CLINE;
6686	    pl_yylval.opval
6687		= (OP*)newSVOP(OP_CONST, 0,
6688			       S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6689	    pl_yylval.opval->op_private = OPpCONST_BARE;
6690	    TERM(WORD);
6691	}
6692
6693	/* Check for plugged-in keyword */
6694	{
6695	    OP *o;
6696	    int result;
6697	    char *saved_bufptr = PL_bufptr;
6698	    PL_bufptr = s;
6699	    result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6700	    s = PL_bufptr;
6701	    if (result == KEYWORD_PLUGIN_DECLINE) {
6702		/* not a plugged-in keyword */
6703		PL_bufptr = saved_bufptr;
6704	    } else if (result == KEYWORD_PLUGIN_STMT) {
6705		pl_yylval.opval = o;
6706		CLINE;
6707		if (!PL_nexttoke) PL_expect = XSTATE;
6708		return REPORT(PLUGSTMT);
6709	    } else if (result == KEYWORD_PLUGIN_EXPR) {
6710		pl_yylval.opval = o;
6711		CLINE;
6712		if (!PL_nexttoke) PL_expect = XOPERATOR;
6713		return REPORT(PLUGEXPR);
6714	    } else {
6715		Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6716					PL_tokenbuf);
6717	    }
6718	}
6719
6720	/* Check for built-in keyword */
6721	tmp = keyword(PL_tokenbuf, len, 0);
6722
6723	/* Is this a label? */
6724	if (!anydelim && PL_expect == XSTATE
6725	      && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6726	    s = d + 1;
6727	    pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6728	    pl_yylval.pval[len] = '\0';
6729	    pl_yylval.pval[len+1] = UTF ? 1 : 0;
6730	    CLINE;
6731	    TOKEN(LABEL);
6732	}
6733
6734	/* Check for lexical sub */
6735	if (PL_expect != XOPERATOR) {
6736	    char tmpbuf[sizeof PL_tokenbuf + 1];
6737	    *tmpbuf = '&';
6738	    Copy(PL_tokenbuf, tmpbuf+1, len, char);
6739	    off = pad_findmy_pvn(tmpbuf, len+1, 0);
6740	    if (off != NOT_IN_PAD) {
6741		assert(off); /* we assume this is boolean-true below */
6742		if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6743		    HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6744		    HEK * const stashname = HvNAME_HEK(stash);
6745		    sv = newSVhek(stashname);
6746                    sv_catpvs(sv, "::");
6747                    sv_catpvn_flags(sv, PL_tokenbuf, len,
6748				    (UTF ? SV_CATUTF8 : SV_CATBYTES));
6749		    gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6750				    SVt_PVCV);
6751		    off = 0;
6752		    if (!gv) {
6753			sv_free(sv);
6754			sv = NULL;
6755			goto just_a_word;
6756		    }
6757		}
6758		else {
6759		    rv2cv_op = newOP(OP_PADANY, 0);
6760		    rv2cv_op->op_targ = off;
6761		    cv = find_lexical_cv(off);
6762		}
6763		lex = TRUE;
6764		goto just_a_word;
6765	    }
6766	    off = 0;
6767	}
6768
6769	if (tmp < 0) {			/* second-class keyword? */
6770	    GV *ogv = NULL;	/* override (winner) */
6771	    GV *hgv = NULL;	/* hidden (loser) */
6772	    if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6773		CV *cv;
6774		if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6775					    (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6776					    SVt_PVCV))
6777                    && (cv = GvCVu(gv)))
6778		{
6779		    if (GvIMPORTED_CV(gv))
6780			ogv = gv;
6781		    else if (! CvMETHOD(cv))
6782			hgv = gv;
6783		}
6784		if (!ogv
6785                    && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6786                                                              len, FALSE))
6787                    && (gv = *gvp)
6788                    && (isGV_with_GP(gv)
6789			? GvCVu(gv) && GvIMPORTED_CV(gv)
6790			:   SvPCS_IMPORTED(gv)
6791			&& (gv_init(gv, PL_globalstash, PL_tokenbuf,
6792                                                                 len, 0), 1)))
6793		{
6794		    ogv = gv;
6795		}
6796	    }
6797	    if (ogv) {
6798		orig_keyword = tmp;
6799		tmp = 0;		/* overridden by import or by GLOBAL */
6800	    }
6801	    else if (gv && !gvp
6802		     && -tmp==KEY_lock	/* XXX generalizable kludge */
6803		     && GvCVu(gv))
6804	    {
6805		tmp = 0;		/* any sub overrides "weak" keyword */
6806	    }
6807	    else {			/* no override */
6808		tmp = -tmp;
6809		if (tmp == KEY_dump) {
6810		    Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6811				   "dump() better written as CORE::dump()");
6812		}
6813		gv = NULL;
6814		gvp = 0;
6815		if (hgv && tmp != KEY_x)	/* never ambiguous */
6816		    Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6817				   "Ambiguous call resolved as CORE::%s(), "
6818				   "qualify as such or use &",
6819				   GvENAME(hgv));
6820	    }
6821	}
6822
6823	if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6824	 && (!anydelim || *s != '#')) {
6825	    /* no override, and not s### either; skipspace is safe here
6826	     * check for => on following line */
6827	    bool arrow;
6828	    STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6829	    STRLEN   soff = s         - SvPVX(PL_linestr);
6830	    s = skipspace_flags(s, LEX_NO_INCLINE);
6831	    arrow = *s == '=' && s[1] == '>';
6832	    PL_bufptr = SvPVX(PL_linestr) + bufoff;
6833	    s         = SvPVX(PL_linestr) +   soff;
6834	    if (arrow)
6835		goto fat_arrow;
6836	}
6837
6838      reserved_word:
6839	switch (tmp) {
6840
6841	default:			/* not a keyword */
6842	    /* Trade off - by using this evil construction we can pull the
6843	       variable gv into the block labelled keylookup. If not, then
6844	       we have to give it function scope so that the goto from the
6845	       earlier ':' case doesn't bypass the initialisation.  */
6846	    if (0) {
6847	    just_a_word_zero_gv:
6848		sv = NULL;
6849		cv = NULL;
6850		gv = NULL;
6851		gvp = NULL;
6852		rv2cv_op = NULL;
6853		orig_keyword = 0;
6854		lex = 0;
6855		off = 0;
6856	    }
6857	  just_a_word: {
6858		int pkgname = 0;
6859		const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6860		bool safebw;
6861
6862
6863		/* Get the rest if it looks like a package qualifier */
6864
6865		if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6866		    STRLEN morelen;
6867		    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6868				  TRUE, &morelen);
6869		    if (!morelen)
6870			Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6871				UTF8fARG(UTF, len, PL_tokenbuf),
6872				*s == '\'' ? "'" : "::");
6873		    len += morelen;
6874		    pkgname = 1;
6875		}
6876
6877		if (PL_expect == XOPERATOR) {
6878		    if (PL_bufptr == PL_linestart) {
6879			CopLINE_dec(PL_curcop);
6880			Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6881			CopLINE_inc(PL_curcop);
6882		    }
6883		    else
6884			no_op("Bareword",s);
6885		}
6886
6887		/* See if the name is "Foo::",
6888		   in which case Foo is a bareword
6889		   (and a package name). */
6890
6891		if (len > 2
6892                    && PL_tokenbuf[len - 2] == ':'
6893                    && PL_tokenbuf[len - 1] == ':')
6894		{
6895		    if (ckWARN(WARN_BAREWORD)
6896			&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6897			Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6898		  	  "Bareword \"%"UTF8f"\" refers to nonexistent package",
6899			   UTF8fARG(UTF, len, PL_tokenbuf));
6900		    len -= 2;
6901		    PL_tokenbuf[len] = '\0';
6902		    gv = NULL;
6903		    gvp = 0;
6904		    safebw = TRUE;
6905		}
6906		else {
6907		    safebw = FALSE;
6908		}
6909
6910		/* if we saw a global override before, get the right name */
6911
6912		if (!sv)
6913		  sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6914						len);
6915		if (gvp) {
6916		    SV * const tmp_sv = sv;
6917		    sv = newSVpvs("CORE::GLOBAL::");
6918		    sv_catsv(sv, tmp_sv);
6919		    SvREFCNT_dec(tmp_sv);
6920		}
6921
6922
6923		/* Presume this is going to be a bareword of some sort. */
6924		CLINE;
6925		pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6926		pl_yylval.opval->op_private = OPpCONST_BARE;
6927
6928		/* And if "Foo::", then that's what it certainly is. */
6929		if (safebw)
6930		    goto safe_bareword;
6931
6932		if (!off)
6933		{
6934		    OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6935		    const_op->op_private = OPpCONST_BARE;
6936		    rv2cv_op =
6937			newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6938		    cv = lex
6939			? isGV(gv)
6940			    ? GvCV(gv)
6941			    : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6942				? (CV *)SvRV(gv)
6943				: ((CV *)gv)
6944			: rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6945		}
6946
6947		/* Use this var to track whether intuit_method has been
6948		   called.  intuit_method returns 0 or > 255.  */
6949		tmp = 1;
6950
6951		/* See if it's the indirect object for a list operator. */
6952
6953		if (PL_oldoldbufptr
6954                    && PL_oldoldbufptr < PL_bufptr
6955                    && (PL_oldoldbufptr == PL_last_lop
6956		        || PL_oldoldbufptr == PL_last_uni)
6957                    && /* NO SKIPSPACE BEFORE HERE! */
6958		       (PL_expect == XREF
6959                        || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
6960                                                               == OA_FILEREF))
6961		{
6962		    bool immediate_paren = *s == '(';
6963
6964		    /* (Now we can afford to cross potential line boundary.) */
6965		    s = skipspace(s);
6966
6967		    /* Two barewords in a row may indicate method call. */
6968
6969		    if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
6970                        && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
6971                    {
6972			goto method;
6973		    }
6974
6975		    /* If not a declared subroutine, it's an indirect object. */
6976		    /* (But it's an indir obj regardless for sort.) */
6977		    /* Also, if "_" follows a filetest operator, it's a bareword */
6978
6979		    if (
6980			( !immediate_paren && (PL_last_lop_op == OP_SORT
6981                         || (!cv
6982                             && (PL_last_lop_op != OP_MAPSTART
6983                                 && PL_last_lop_op != OP_GREPSTART))))
6984		       || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6985			    && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
6986                                                            == OA_FILESTATOP))
6987		       )
6988		    {
6989			PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6990			goto bareword;
6991		    }
6992		}
6993
6994		PL_expect = XOPERATOR;
6995		s = skipspace(s);
6996
6997		/* Is this a word before a => operator? */
6998		if (*s == '=' && s[1] == '>' && !pkgname) {
6999		    op_free(rv2cv_op);
7000		    CLINE;
7001		    if (gvp || (lex && !off)) {
7002			assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7003			/* This is our own scalar, created a few lines
7004			   above, so this is safe. */
7005			SvREADONLY_off(sv);
7006			sv_setpv(sv, PL_tokenbuf);
7007			if (UTF && !IN_BYTES
7008			 && is_utf8_string((U8*)PL_tokenbuf, len))
7009			      SvUTF8_on(sv);
7010			SvREADONLY_on(sv);
7011		    }
7012		    TERM(WORD);
7013		}
7014
7015		/* If followed by a paren, it's certainly a subroutine. */
7016		if (*s == '(') {
7017		    CLINE;
7018		    if (cv) {
7019			d = s + 1;
7020			while (SPACE_OR_TAB(*d))
7021			    d++;
7022			if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7023			    s = d + 1;
7024			    goto its_constant;
7025			}
7026		    }
7027		    NEXTVAL_NEXTTOKE.opval =
7028			off ? rv2cv_op : pl_yylval.opval;
7029		    if (off)
7030			 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7031		    else op_free(rv2cv_op),	   force_next(WORD);
7032		    pl_yylval.ival = 0;
7033		    TOKEN('&');
7034		}
7035
7036		/* If followed by var or block, call it a method (unless sub) */
7037
7038		if ((*s == '$' || *s == '{') && !cv) {
7039		    op_free(rv2cv_op);
7040		    PL_last_lop = PL_oldbufptr;
7041		    PL_last_lop_op = OP_METHOD;
7042		    if (!PL_lex_allbrackets
7043                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7044                    {
7045			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7046                    }
7047		    PL_expect = XBLOCKTERM;
7048		    PL_bufptr = s;
7049		    return REPORT(METHOD);
7050		}
7051
7052		/* If followed by a bareword, see if it looks like indir obj. */
7053
7054		if (tmp == 1 && !orig_keyword
7055			&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7056			&& (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7057		  method:
7058		    if (lex && !off) {
7059			assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7060			SvREADONLY_off(sv);
7061			sv_setpvn(sv, PL_tokenbuf, len);
7062			if (UTF && !IN_BYTES
7063			 && is_utf8_string((U8*)PL_tokenbuf, len))
7064			    SvUTF8_on (sv);
7065			else SvUTF8_off(sv);
7066		    }
7067		    op_free(rv2cv_op);
7068		    if (tmp == METHOD && !PL_lex_allbrackets
7069                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7070                    {
7071			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7072                    }
7073		    return REPORT(tmp);
7074		}
7075
7076		/* Not a method, so call it a subroutine (if defined) */
7077
7078		if (cv) {
7079		    /* Check for a constant sub */
7080		    if ((sv = cv_const_sv_or_av(cv))) {
7081		  its_constant:
7082			op_free(rv2cv_op);
7083			SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7084			((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7085			if (SvTYPE(sv) == SVt_PVAV)
7086			    pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7087						      pl_yylval.opval);
7088			else {
7089			    pl_yylval.opval->op_private = 0;
7090			    pl_yylval.opval->op_folded = 1;
7091			    pl_yylval.opval->op_flags |= OPf_SPECIAL;
7092			}
7093			TOKEN(WORD);
7094		    }
7095
7096		    op_free(pl_yylval.opval);
7097		    pl_yylval.opval =
7098			off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7099		    pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7100		    PL_last_lop = PL_oldbufptr;
7101		    PL_last_lop_op = OP_ENTERSUB;
7102		    /* Is there a prototype? */
7103		    if (
7104			SvPOK(cv))
7105		    {
7106			STRLEN protolen = CvPROTOLEN(cv);
7107			const char *proto = CvPROTO(cv);
7108			bool optional;
7109			proto = S_strip_spaces(aTHX_ proto, &protolen);
7110			if (!protolen)
7111			    TERM(FUNC0SUB);
7112			if ((optional = *proto == ';'))
7113			  do
7114			    proto++;
7115			  while (*proto == ';');
7116			if (
7117			    (
7118			        (
7119			            *proto == '$' || *proto == '_'
7120			         || *proto == '*' || *proto == '+'
7121			        )
7122			     && proto[1] == '\0'
7123			    )
7124			 || (
7125			     *proto == '\\' && proto[1] && proto[2] == '\0'
7126			    )
7127			)
7128			    UNIPROTO(UNIOPSUB,optional);
7129			if (*proto == '\\' && proto[1] == '[') {
7130			    const char *p = proto + 2;
7131			    while(*p && *p != ']')
7132				++p;
7133			    if(*p == ']' && !p[1])
7134				UNIPROTO(UNIOPSUB,optional);
7135			}
7136			if (*proto == '&' && *s == '{') {
7137			    if (PL_curstash)
7138				sv_setpvs(PL_subname, "__ANON__");
7139			    else
7140				sv_setpvs(PL_subname, "__ANON__::__ANON__");
7141			    if (!PL_lex_allbrackets
7142                                && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7143                            {
7144				PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7145                            }
7146			    PREBLOCK(LSTOPSUB);
7147			}
7148		    }
7149		    NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7150		    PL_expect = XTERM;
7151		    force_next(off ? PRIVATEREF : WORD);
7152		    if (!PL_lex_allbrackets
7153                        && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7154                    {
7155			PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7156                    }
7157		    TOKEN(NOAMP);
7158		}
7159
7160		/* Call it a bare word */
7161
7162		if (PL_hints & HINT_STRICT_SUBS)
7163		    pl_yylval.opval->op_private |= OPpCONST_STRICT;
7164		else {
7165		bareword:
7166		    /* after "print" and similar functions (corresponding to
7167		     * "F? L" in opcode.pl), whatever wasn't already parsed as
7168		     * a filehandle should be subject to "strict subs".
7169		     * Likewise for the optional indirect-object argument to system
7170		     * or exec, which can't be a bareword */
7171		    if ((PL_last_lop_op == OP_PRINT
7172			    || PL_last_lop_op == OP_PRTF
7173			    || PL_last_lop_op == OP_SAY
7174			    || PL_last_lop_op == OP_SYSTEM
7175			    || PL_last_lop_op == OP_EXEC)
7176			    && (PL_hints & HINT_STRICT_SUBS))
7177			pl_yylval.opval->op_private |= OPpCONST_STRICT;
7178		    if (lastchar != '-') {
7179			if (ckWARN(WARN_RESERVED)) {
7180			    d = PL_tokenbuf;
7181			    while (isLOWER(*d))
7182				d++;
7183			    if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7184                            {
7185                                /* PL_warn_reserved is constant */
7186                                GCC_DIAG_IGNORE(-Wformat-nonliteral);
7187				Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7188				       PL_tokenbuf);
7189                                GCC_DIAG_RESTORE;
7190                            }
7191			}
7192		    }
7193		}
7194		op_free(rv2cv_op);
7195
7196	    safe_bareword:
7197		if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7198		 && saw_infix_sigil) {
7199		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7200				     "Operator or semicolon missing before %c%"UTF8f,
7201				     lastchar,
7202				     UTF8fARG(UTF, strlen(PL_tokenbuf),
7203					      PL_tokenbuf));
7204		    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7205				     "Ambiguous use of %c resolved as operator %c",
7206				     lastchar, lastchar);
7207		}
7208		TOKEN(WORD);
7209	    }
7210
7211	case KEY___FILE__:
7212	    FUN0OP(
7213		(OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7214	    );
7215
7216	case KEY___LINE__:
7217	    FUN0OP(
7218        	(OP*)newSVOP(OP_CONST, 0,
7219		    Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7220	    );
7221
7222	case KEY___PACKAGE__:
7223	    FUN0OP(
7224		(OP*)newSVOP(OP_CONST, 0,
7225					(PL_curstash
7226					 ? newSVhek(HvNAME_HEK(PL_curstash))
7227					 : &PL_sv_undef))
7228	    );
7229
7230	case KEY___DATA__:
7231	case KEY___END__: {
7232	    GV *gv;
7233	    if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7234		HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7235					? PL_curstash
7236					: PL_defstash;
7237		gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7238		if (!isGV(gv))
7239		    gv_init(gv,stash,"DATA",4,0);
7240		GvMULTI_on(gv);
7241		if (!GvIO(gv))
7242		    GvIOp(gv) = newIO();
7243		IoIFP(GvIOp(gv)) = PL_rsfp;
7244#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7245		{
7246		    const int fd = PerlIO_fileno(PL_rsfp);
7247                    if (fd >= 3) {
7248                        fcntl(fd,F_SETFD, FD_CLOEXEC);
7249                    }
7250		}
7251#endif
7252		/* Mark this internal pseudo-handle as clean */
7253		IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7254		if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7255		    IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7256		else
7257		    IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7258#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7259		/* if the script was opened in binmode, we need to revert
7260		 * it to text mode for compatibility; but only iff it has CRs
7261		 * XXX this is a questionable hack at best. */
7262		if (PL_bufend-PL_bufptr > 2
7263		    && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7264		{
7265		    Off_t loc = 0;
7266		    if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7267			loc = PerlIO_tell(PL_rsfp);
7268			(void)PerlIO_seek(PL_rsfp, 0L, 0);
7269		    }
7270#ifdef NETWARE
7271			if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7272#else
7273		    if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7274#endif	/* NETWARE */
7275			if (loc > 0)
7276			    PerlIO_seek(PL_rsfp, loc, 0);
7277		    }
7278		}
7279#endif
7280#ifdef PERLIO_LAYERS
7281		if (!IN_BYTES) {
7282		    if (UTF)
7283			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7284		    else if (IN_ENCODING) {
7285			SV *name;
7286			dSP;
7287			ENTER;
7288			SAVETMPS;
7289			PUSHMARK(sp);
7290			XPUSHs(_get_encoding());
7291			PUTBACK;
7292			call_method("name", G_SCALAR);
7293			SPAGAIN;
7294			name = POPs;
7295			PUTBACK;
7296			PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7297					    Perl_form(aTHX_ ":encoding(%"SVf")",
7298						      SVfARG(name)));
7299			FREETMPS;
7300			LEAVE;
7301		    }
7302		}
7303#endif
7304		PL_rsfp = NULL;
7305	    }
7306	    goto fake_eof;
7307	}
7308
7309	case KEY___SUB__:
7310	    FUN0OP(CvCLONE(PL_compcv)
7311			? newOP(OP_RUNCV, 0)
7312			: newPVOP(OP_RUNCV,0,NULL));
7313
7314	case KEY_AUTOLOAD:
7315	case KEY_DESTROY:
7316	case KEY_BEGIN:
7317	case KEY_UNITCHECK:
7318	case KEY_CHECK:
7319	case KEY_INIT:
7320	case KEY_END:
7321	    if (PL_expect == XSTATE) {
7322		s = PL_bufptr;
7323		goto really_sub;
7324	    }
7325	    goto just_a_word;
7326
7327	case_KEY_CORE:
7328	    {
7329		STRLEN olen = len;
7330		d = s;
7331		s += 2;
7332		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7333		if ((*s == ':' && s[1] == ':')
7334		 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7335		{
7336		    s = d;
7337		    len = olen;
7338		    Copy(PL_bufptr, PL_tokenbuf, olen, char);
7339		    goto just_a_word;
7340		}
7341		if (!tmp)
7342		    Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7343				      UTF8fARG(UTF, len, PL_tokenbuf));
7344		if (tmp < 0)
7345		    tmp = -tmp;
7346		else if (tmp == KEY_require || tmp == KEY_do
7347		      || tmp == KEY_glob)
7348		    /* that's a way to remember we saw "CORE::" */
7349		    orig_keyword = tmp;
7350		goto reserved_word;
7351	    }
7352
7353	case KEY_abs:
7354	    UNI(OP_ABS);
7355
7356	case KEY_alarm:
7357	    UNI(OP_ALARM);
7358
7359	case KEY_accept:
7360	    LOP(OP_ACCEPT,XTERM);
7361
7362	case KEY_and:
7363	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7364		return REPORT(0);
7365	    OPERATOR(ANDOP);
7366
7367	case KEY_atan2:
7368	    LOP(OP_ATAN2,XTERM);
7369
7370	case KEY_bind:
7371	    LOP(OP_BIND,XTERM);
7372
7373	case KEY_binmode:
7374	    LOP(OP_BINMODE,XTERM);
7375
7376	case KEY_bless:
7377	    LOP(OP_BLESS,XTERM);
7378
7379	case KEY_break:
7380	    FUN0(OP_BREAK);
7381
7382	case KEY_chop:
7383	    UNI(OP_CHOP);
7384
7385	case KEY_continue:
7386		    /* We have to disambiguate the two senses of
7387		      "continue". If the next token is a '{' then
7388		      treat it as the start of a continue block;
7389		      otherwise treat it as a control operator.
7390		     */
7391		    s = skipspace(s);
7392		    if (*s == '{')
7393	    PREBLOCK(CONTINUE);
7394		    else
7395			FUN0(OP_CONTINUE);
7396
7397	case KEY_chdir:
7398	    /* may use HOME */
7399	    (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7400	    UNI(OP_CHDIR);
7401
7402	case KEY_close:
7403	    UNI(OP_CLOSE);
7404
7405	case KEY_closedir:
7406	    UNI(OP_CLOSEDIR);
7407
7408	case KEY_cmp:
7409	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7410		return REPORT(0);
7411	    Eop(OP_SCMP);
7412
7413	case KEY_caller:
7414	    UNI(OP_CALLER);
7415
7416	case KEY_crypt:
7417#ifdef FCRYPT
7418	    if (!PL_cryptseen) {
7419		PL_cryptseen = TRUE;
7420		init_des();
7421	    }
7422#endif
7423	    LOP(OP_CRYPT,XTERM);
7424
7425	case KEY_chmod:
7426	    LOP(OP_CHMOD,XTERM);
7427
7428	case KEY_chown:
7429	    LOP(OP_CHOWN,XTERM);
7430
7431	case KEY_connect:
7432	    LOP(OP_CONNECT,XTERM);
7433
7434	case KEY_chr:
7435	    UNI(OP_CHR);
7436
7437	case KEY_cos:
7438	    UNI(OP_COS);
7439
7440	case KEY_chroot:
7441	    UNI(OP_CHROOT);
7442
7443	case KEY_default:
7444	    PREBLOCK(DEFAULT);
7445
7446	case KEY_do:
7447	    s = skipspace(s);
7448	    if (*s == '{')
7449		PRETERMBLOCK(DO);
7450	    if (*s != '\'') {
7451		*PL_tokenbuf = '&';
7452		d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7453			      1, &len);
7454		if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7455		 && !keyword(PL_tokenbuf + 1, len, 0)) {
7456		    d = skipspace(d);
7457		    if (*d == '(') {
7458			force_ident_maybe_lex('&');
7459			s = d;
7460		    }
7461		}
7462	    }
7463	    if (orig_keyword == KEY_do) {
7464		orig_keyword = 0;
7465		pl_yylval.ival = 1;
7466	    }
7467	    else
7468		pl_yylval.ival = 0;
7469	    OPERATOR(DO);
7470
7471	case KEY_die:
7472	    PL_hints |= HINT_BLOCK_SCOPE;
7473	    LOP(OP_DIE,XTERM);
7474
7475	case KEY_defined:
7476	    UNI(OP_DEFINED);
7477
7478	case KEY_delete:
7479	    UNI(OP_DELETE);
7480
7481	case KEY_dbmopen:
7482	    Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7483			      STR_WITH_LEN("NDBM_File::"),
7484			      STR_WITH_LEN("DB_File::"),
7485			      STR_WITH_LEN("GDBM_File::"),
7486			      STR_WITH_LEN("SDBM_File::"),
7487			      STR_WITH_LEN("ODBM_File::"),
7488			      NULL);
7489	    LOP(OP_DBMOPEN,XTERM);
7490
7491	case KEY_dbmclose:
7492	    UNI(OP_DBMCLOSE);
7493
7494	case KEY_dump:
7495	    LOOPX(OP_DUMP);
7496
7497	case KEY_else:
7498	    PREBLOCK(ELSE);
7499
7500	case KEY_elsif:
7501	    pl_yylval.ival = CopLINE(PL_curcop);
7502	    OPERATOR(ELSIF);
7503
7504	case KEY_eq:
7505	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7506		return REPORT(0);
7507	    Eop(OP_SEQ);
7508
7509	case KEY_exists:
7510	    UNI(OP_EXISTS);
7511
7512	case KEY_exit:
7513	    UNI(OP_EXIT);
7514
7515	case KEY_eval:
7516	    s = skipspace(s);
7517	    if (*s == '{') { /* block eval */
7518		PL_expect = XTERMBLOCK;
7519		UNIBRACK(OP_ENTERTRY);
7520	    }
7521	    else { /* string eval */
7522		PL_expect = XTERM;
7523		UNIBRACK(OP_ENTEREVAL);
7524	    }
7525
7526	case KEY_evalbytes:
7527	    PL_expect = XTERM;
7528	    UNIBRACK(-OP_ENTEREVAL);
7529
7530	case KEY_eof:
7531	    UNI(OP_EOF);
7532
7533	case KEY_exp:
7534	    UNI(OP_EXP);
7535
7536	case KEY_each:
7537	    UNI(OP_EACH);
7538
7539	case KEY_exec:
7540	    LOP(OP_EXEC,XREF);
7541
7542	case KEY_endhostent:
7543	    FUN0(OP_EHOSTENT);
7544
7545	case KEY_endnetent:
7546	    FUN0(OP_ENETENT);
7547
7548	case KEY_endservent:
7549	    FUN0(OP_ESERVENT);
7550
7551	case KEY_endprotoent:
7552	    FUN0(OP_EPROTOENT);
7553
7554	case KEY_endpwent:
7555	    FUN0(OP_EPWENT);
7556
7557	case KEY_endgrent:
7558	    FUN0(OP_EGRENT);
7559
7560	case KEY_for:
7561	case KEY_foreach:
7562	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7563		return REPORT(0);
7564	    pl_yylval.ival = CopLINE(PL_curcop);
7565	    s = skipspace(s);
7566	    if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7567		char *p = s;
7568
7569		if ((PL_bufend - p) >= 3
7570                    && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7571                {
7572		    p += 2;
7573                }
7574		else if ((PL_bufend - p) >= 4
7575                         && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7576		    p += 3;
7577		p = skipspace(p);
7578                /* skip optional package name, as in "for my abc $x (..)" */
7579		if (isIDFIRST_lazy_if(p,UTF)) {
7580		    p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7581		    p = skipspace(p);
7582		}
7583		if (*p != '$')
7584		    Perl_croak(aTHX_ "Missing $ on loop variable");
7585	    }
7586	    OPERATOR(FOR);
7587
7588	case KEY_formline:
7589	    LOP(OP_FORMLINE,XTERM);
7590
7591	case KEY_fork:
7592	    FUN0(OP_FORK);
7593
7594	case KEY_fc:
7595	    UNI(OP_FC);
7596
7597	case KEY_fcntl:
7598	    LOP(OP_FCNTL,XTERM);
7599
7600	case KEY_fileno:
7601	    UNI(OP_FILENO);
7602
7603	case KEY_flock:
7604	    LOP(OP_FLOCK,XTERM);
7605
7606	case KEY_gt:
7607	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7608		return REPORT(0);
7609	    Rop(OP_SGT);
7610
7611	case KEY_ge:
7612	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7613		return REPORT(0);
7614	    Rop(OP_SGE);
7615
7616	case KEY_grep:
7617	    LOP(OP_GREPSTART, XREF);
7618
7619	case KEY_goto:
7620	    LOOPX(OP_GOTO);
7621
7622	case KEY_gmtime:
7623	    UNI(OP_GMTIME);
7624
7625	case KEY_getc:
7626	    UNIDOR(OP_GETC);
7627
7628	case KEY_getppid:
7629	    FUN0(OP_GETPPID);
7630
7631	case KEY_getpgrp:
7632	    UNI(OP_GETPGRP);
7633
7634	case KEY_getpriority:
7635	    LOP(OP_GETPRIORITY,XTERM);
7636
7637	case KEY_getprotobyname:
7638	    UNI(OP_GPBYNAME);
7639
7640	case KEY_getprotobynumber:
7641	    LOP(OP_GPBYNUMBER,XTERM);
7642
7643	case KEY_getprotoent:
7644	    FUN0(OP_GPROTOENT);
7645
7646	case KEY_getpwent:
7647	    FUN0(OP_GPWENT);
7648
7649	case KEY_getpwnam:
7650	    UNI(OP_GPWNAM);
7651
7652	case KEY_getpwuid:
7653	    UNI(OP_GPWUID);
7654
7655	case KEY_getpeername:
7656	    UNI(OP_GETPEERNAME);
7657
7658	case KEY_gethostbyname:
7659	    UNI(OP_GHBYNAME);
7660
7661	case KEY_gethostbyaddr:
7662	    LOP(OP_GHBYADDR,XTERM);
7663
7664	case KEY_gethostent:
7665	    FUN0(OP_GHOSTENT);
7666
7667	case KEY_getnetbyname:
7668	    UNI(OP_GNBYNAME);
7669
7670	case KEY_getnetbyaddr:
7671	    LOP(OP_GNBYADDR,XTERM);
7672
7673	case KEY_getnetent:
7674	    FUN0(OP_GNETENT);
7675
7676	case KEY_getservbyname:
7677	    LOP(OP_GSBYNAME,XTERM);
7678
7679	case KEY_getservbyport:
7680	    LOP(OP_GSBYPORT,XTERM);
7681
7682	case KEY_getservent:
7683	    FUN0(OP_GSERVENT);
7684
7685	case KEY_getsockname:
7686	    UNI(OP_GETSOCKNAME);
7687
7688	case KEY_getsockopt:
7689	    LOP(OP_GSOCKOPT,XTERM);
7690
7691	case KEY_getgrent:
7692	    FUN0(OP_GGRENT);
7693
7694	case KEY_getgrnam:
7695	    UNI(OP_GGRNAM);
7696
7697	case KEY_getgrgid:
7698	    UNI(OP_GGRGID);
7699
7700	case KEY_getlogin:
7701	    FUN0(OP_GETLOGIN);
7702
7703	case KEY_given:
7704	    pl_yylval.ival = CopLINE(PL_curcop);
7705            Perl_ck_warner_d(aTHX_
7706                packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7707                "given is experimental");
7708	    OPERATOR(GIVEN);
7709
7710	case KEY_glob:
7711	    LOP(
7712	     orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7713	     XTERM
7714	    );
7715
7716	case KEY_hex:
7717	    UNI(OP_HEX);
7718
7719	case KEY_if:
7720	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7721		return REPORT(0);
7722	    pl_yylval.ival = CopLINE(PL_curcop);
7723	    OPERATOR(IF);
7724
7725	case KEY_index:
7726	    LOP(OP_INDEX,XTERM);
7727
7728	case KEY_int:
7729	    UNI(OP_INT);
7730
7731	case KEY_ioctl:
7732	    LOP(OP_IOCTL,XTERM);
7733
7734	case KEY_join:
7735	    LOP(OP_JOIN,XTERM);
7736
7737	case KEY_keys:
7738	    UNI(OP_KEYS);
7739
7740	case KEY_kill:
7741	    LOP(OP_KILL,XTERM);
7742
7743	case KEY_last:
7744	    LOOPX(OP_LAST);
7745
7746	case KEY_lc:
7747	    UNI(OP_LC);
7748
7749	case KEY_lcfirst:
7750	    UNI(OP_LCFIRST);
7751
7752	case KEY_local:
7753	    pl_yylval.ival = 0;
7754	    OPERATOR(LOCAL);
7755
7756	case KEY_length:
7757	    UNI(OP_LENGTH);
7758
7759	case KEY_lt:
7760	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7761		return REPORT(0);
7762	    Rop(OP_SLT);
7763
7764	case KEY_le:
7765	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7766		return REPORT(0);
7767	    Rop(OP_SLE);
7768
7769	case KEY_localtime:
7770	    UNI(OP_LOCALTIME);
7771
7772	case KEY_log:
7773	    UNI(OP_LOG);
7774
7775	case KEY_link:
7776	    LOP(OP_LINK,XTERM);
7777
7778	case KEY_listen:
7779	    LOP(OP_LISTEN,XTERM);
7780
7781	case KEY_lock:
7782	    UNI(OP_LOCK);
7783
7784	case KEY_lstat:
7785	    UNI(OP_LSTAT);
7786
7787	case KEY_m:
7788	    s = scan_pat(s,OP_MATCH);
7789	    TERM(sublex_start());
7790
7791	case KEY_map:
7792	    LOP(OP_MAPSTART, XREF);
7793
7794	case KEY_mkdir:
7795	    LOP(OP_MKDIR,XTERM);
7796
7797	case KEY_msgctl:
7798	    LOP(OP_MSGCTL,XTERM);
7799
7800	case KEY_msgget:
7801	    LOP(OP_MSGGET,XTERM);
7802
7803	case KEY_msgrcv:
7804	    LOP(OP_MSGRCV,XTERM);
7805
7806	case KEY_msgsnd:
7807	    LOP(OP_MSGSND,XTERM);
7808
7809	case KEY_our:
7810	case KEY_my:
7811	case KEY_state:
7812	    if (PL_in_my) {
7813	        yyerror(Perl_form(aTHX_
7814	                          "Can't redeclare \"%s\" in \"%s\"",
7815	                           tmp      == KEY_my    ? "my" :
7816	                           tmp      == KEY_state ? "state" : "our",
7817	                           PL_in_my == KEY_my    ? "my" :
7818	                           PL_in_my == KEY_state ? "state" : "our"));
7819	    }
7820	    PL_in_my = (U16)tmp;
7821	    s = skipspace(s);
7822	    if (isIDFIRST_lazy_if(s,UTF)) {
7823		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7824		if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7825		{
7826		    if (!FEATURE_LEXSUBS_IS_ENABLED)
7827			Perl_croak(aTHX_
7828				  "Experimental \"%s\" subs not enabled",
7829				   tmp == KEY_my    ? "my"    :
7830				   tmp == KEY_state ? "state" : "our");
7831		    Perl_ck_warner_d(aTHX_
7832			packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7833			"The lexical_subs feature is experimental");
7834		    goto really_sub;
7835		}
7836		PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7837		if (!PL_in_my_stash) {
7838		    char tmpbuf[1024];
7839                    int len;
7840		    PL_bufptr = s;
7841		    len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7842                    PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7843		    yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7844		}
7845	    }
7846	    pl_yylval.ival = 1;
7847	    OPERATOR(MY);
7848
7849	case KEY_next:
7850	    LOOPX(OP_NEXT);
7851
7852	case KEY_ne:
7853	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7854		return REPORT(0);
7855	    Eop(OP_SNE);
7856
7857	case KEY_no:
7858	    s = tokenize_use(0, s);
7859	    TOKEN(USE);
7860
7861	case KEY_not:
7862	    if (*s == '(' || (s = skipspace(s), *s == '('))
7863		FUN1(OP_NOT);
7864	    else {
7865		if (!PL_lex_allbrackets
7866                    && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7867                {
7868		    PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7869                }
7870		OPERATOR(NOTOP);
7871	    }
7872
7873	case KEY_open:
7874	    s = skipspace(s);
7875	    if (isIDFIRST_lazy_if(s,UTF)) {
7876          const char *t;
7877          d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7878              &len);
7879		for (t=d; isSPACE(*t);)
7880		    t++;
7881		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7882		    /* [perl #16184] */
7883		    && !(t[0] == '=' && t[1] == '>')
7884		    && !(t[0] == ':' && t[1] == ':')
7885		    && !keyword(s, d-s, 0)
7886		) {
7887		    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7888		       "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7889			UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7890		}
7891	    }
7892	    LOP(OP_OPEN,XTERM);
7893
7894	case KEY_or:
7895	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7896		return REPORT(0);
7897	    pl_yylval.ival = OP_OR;
7898	    OPERATOR(OROP);
7899
7900	case KEY_ord:
7901	    UNI(OP_ORD);
7902
7903	case KEY_oct:
7904	    UNI(OP_OCT);
7905
7906	case KEY_opendir:
7907	    LOP(OP_OPEN_DIR,XTERM);
7908
7909	case KEY_print:
7910	    checkcomma(s,PL_tokenbuf,"filehandle");
7911	    LOP(OP_PRINT,XREF);
7912
7913	case KEY_printf:
7914	    checkcomma(s,PL_tokenbuf,"filehandle");
7915	    LOP(OP_PRTF,XREF);
7916
7917	case KEY_prototype:
7918	    UNI(OP_PROTOTYPE);
7919
7920	case KEY_push:
7921	    LOP(OP_PUSH,XTERM);
7922
7923	case KEY_pop:
7924	    UNIDOR(OP_POP);
7925
7926	case KEY_pos:
7927	    UNIDOR(OP_POS);
7928
7929	case KEY_pack:
7930	    LOP(OP_PACK,XTERM);
7931
7932	case KEY_package:
7933	    s = force_word(s,WORD,FALSE,TRUE);
7934	    s = skipspace(s);
7935	    s = force_strict_version(s);
7936	    PREBLOCK(PACKAGE);
7937
7938	case KEY_pipe:
7939	    LOP(OP_PIPE_OP,XTERM);
7940
7941	case KEY_q:
7942	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7943	    if (!s)
7944		missingterm(NULL);
7945	    COPLINE_SET_FROM_MULTI_END;
7946	    pl_yylval.ival = OP_CONST;
7947	    TERM(sublex_start());
7948
7949	case KEY_quotemeta:
7950	    UNI(OP_QUOTEMETA);
7951
7952	case KEY_qw: {
7953	    OP *words = NULL;
7954	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7955	    if (!s)
7956		missingterm(NULL);
7957	    COPLINE_SET_FROM_MULTI_END;
7958	    PL_expect = XOPERATOR;
7959	    if (SvCUR(PL_lex_stuff)) {
7960		int warned_comma = !ckWARN(WARN_QW);
7961		int warned_comment = warned_comma;
7962		d = SvPV_force(PL_lex_stuff, len);
7963		while (len) {
7964		    for (; isSPACE(*d) && len; --len, ++d)
7965			/**/;
7966		    if (len) {
7967			SV *sv;
7968			const char *b = d;
7969			if (!warned_comma || !warned_comment) {
7970			    for (; !isSPACE(*d) && len; --len, ++d) {
7971				if (!warned_comma && *d == ',') {
7972				    Perl_warner(aTHX_ packWARN(WARN_QW),
7973					"Possible attempt to separate words with commas");
7974				    ++warned_comma;
7975				}
7976				else if (!warned_comment && *d == '#') {
7977				    Perl_warner(aTHX_ packWARN(WARN_QW),
7978					"Possible attempt to put comments in qw() list");
7979				    ++warned_comment;
7980				}
7981			    }
7982			}
7983			else {
7984			    for (; !isSPACE(*d) && len; --len, ++d)
7985				/**/;
7986			}
7987			sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7988			words = op_append_elem(OP_LIST, words,
7989					    newSVOP(OP_CONST, 0, tokeq(sv)));
7990		    }
7991		}
7992	    }
7993	    if (!words)
7994		words = newNULLLIST();
7995	    SvREFCNT_dec_NN(PL_lex_stuff);
7996	    PL_lex_stuff = NULL;
7997	    PL_expect = XOPERATOR;
7998	    pl_yylval.opval = sawparens(words);
7999	    TOKEN(QWLIST);
8000	}
8001
8002	case KEY_qq:
8003	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8004	    if (!s)
8005		missingterm(NULL);
8006	    pl_yylval.ival = OP_STRINGIFY;
8007	    if (SvIVX(PL_lex_stuff) == '\'')
8008		SvIV_set(PL_lex_stuff, 0);	/* qq'$foo' should interpolate */
8009	    TERM(sublex_start());
8010
8011	case KEY_qr:
8012	    s = scan_pat(s,OP_QR);
8013	    TERM(sublex_start());
8014
8015	case KEY_qx:
8016	    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8017	    if (!s)
8018		missingterm(NULL);
8019	    pl_yylval.ival = OP_BACKTICK;
8020	    TERM(sublex_start());
8021
8022	case KEY_return:
8023	    OLDLOP(OP_RETURN);
8024
8025	case KEY_require:
8026	    s = skipspace(s);
8027	    if (isDIGIT(*s)) {
8028		s = force_version(s, FALSE);
8029	    }
8030	    else if (*s != 'v' || !isDIGIT(s[1])
8031		    || (s = force_version(s, TRUE), *s == 'v'))
8032	    {
8033		*PL_tokenbuf = '\0';
8034		s = force_word(s,WORD,TRUE,TRUE);
8035		if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8036		    gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8037                                GV_ADD | (UTF ? SVf_UTF8 : 0));
8038		else if (*s == '<')
8039		    yyerror("<> at require-statement should be quotes");
8040	    }
8041	    if (orig_keyword == KEY_require) {
8042		orig_keyword = 0;
8043		pl_yylval.ival = 1;
8044	    }
8045	    else
8046		pl_yylval.ival = 0;
8047	    PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8048	    PL_bufptr = s;
8049	    PL_last_uni = PL_oldbufptr;
8050	    PL_last_lop_op = OP_REQUIRE;
8051	    s = skipspace(s);
8052	    return REPORT( (int)REQUIRE );
8053
8054	case KEY_reset:
8055	    UNI(OP_RESET);
8056
8057	case KEY_redo:
8058	    LOOPX(OP_REDO);
8059
8060	case KEY_rename:
8061	    LOP(OP_RENAME,XTERM);
8062
8063	case KEY_rand:
8064	    UNI(OP_RAND);
8065
8066	case KEY_rmdir:
8067	    UNI(OP_RMDIR);
8068
8069	case KEY_rindex:
8070	    LOP(OP_RINDEX,XTERM);
8071
8072	case KEY_read:
8073	    LOP(OP_READ,XTERM);
8074
8075	case KEY_readdir:
8076	    UNI(OP_READDIR);
8077
8078	case KEY_readline:
8079	    UNIDOR(OP_READLINE);
8080
8081	case KEY_readpipe:
8082	    UNIDOR(OP_BACKTICK);
8083
8084	case KEY_rewinddir:
8085	    UNI(OP_REWINDDIR);
8086
8087	case KEY_recv:
8088	    LOP(OP_RECV,XTERM);
8089
8090	case KEY_reverse:
8091	    LOP(OP_REVERSE,XTERM);
8092
8093	case KEY_readlink:
8094	    UNIDOR(OP_READLINK);
8095
8096	case KEY_ref:
8097	    UNI(OP_REF);
8098
8099	case KEY_s:
8100	    s = scan_subst(s);
8101	    if (pl_yylval.opval)
8102		TERM(sublex_start());
8103	    else
8104		TOKEN(1);	/* force error */
8105
8106	case KEY_say:
8107	    checkcomma(s,PL_tokenbuf,"filehandle");
8108	    LOP(OP_SAY,XREF);
8109
8110	case KEY_chomp:
8111	    UNI(OP_CHOMP);
8112
8113	case KEY_scalar:
8114	    UNI(OP_SCALAR);
8115
8116	case KEY_select:
8117	    LOP(OP_SELECT,XTERM);
8118
8119	case KEY_seek:
8120	    LOP(OP_SEEK,XTERM);
8121
8122	case KEY_semctl:
8123	    LOP(OP_SEMCTL,XTERM);
8124
8125	case KEY_semget:
8126	    LOP(OP_SEMGET,XTERM);
8127
8128	case KEY_semop:
8129	    LOP(OP_SEMOP,XTERM);
8130
8131	case KEY_send:
8132	    LOP(OP_SEND,XTERM);
8133
8134	case KEY_setpgrp:
8135	    LOP(OP_SETPGRP,XTERM);
8136
8137	case KEY_setpriority:
8138	    LOP(OP_SETPRIORITY,XTERM);
8139
8140	case KEY_sethostent:
8141	    UNI(OP_SHOSTENT);
8142
8143	case KEY_setnetent:
8144	    UNI(OP_SNETENT);
8145
8146	case KEY_setservent:
8147	    UNI(OP_SSERVENT);
8148
8149	case KEY_setprotoent:
8150	    UNI(OP_SPROTOENT);
8151
8152	case KEY_setpwent:
8153	    FUN0(OP_SPWENT);
8154
8155	case KEY_setgrent:
8156	    FUN0(OP_SGRENT);
8157
8158	case KEY_seekdir:
8159	    LOP(OP_SEEKDIR,XTERM);
8160
8161	case KEY_setsockopt:
8162	    LOP(OP_SSOCKOPT,XTERM);
8163
8164	case KEY_shift:
8165	    UNIDOR(OP_SHIFT);
8166
8167	case KEY_shmctl:
8168	    LOP(OP_SHMCTL,XTERM);
8169
8170	case KEY_shmget:
8171	    LOP(OP_SHMGET,XTERM);
8172
8173	case KEY_shmread:
8174	    LOP(OP_SHMREAD,XTERM);
8175
8176	case KEY_shmwrite:
8177	    LOP(OP_SHMWRITE,XTERM);
8178
8179	case KEY_shutdown:
8180	    LOP(OP_SHUTDOWN,XTERM);
8181
8182	case KEY_sin:
8183	    UNI(OP_SIN);
8184
8185	case KEY_sleep:
8186	    UNI(OP_SLEEP);
8187
8188	case KEY_socket:
8189	    LOP(OP_SOCKET,XTERM);
8190
8191	case KEY_socketpair:
8192	    LOP(OP_SOCKPAIR,XTERM);
8193
8194	case KEY_sort:
8195	    checkcomma(s,PL_tokenbuf,"subroutine name");
8196	    s = skipspace(s);
8197	    PL_expect = XTERM;
8198	    s = force_word(s,WORD,TRUE,TRUE);
8199	    LOP(OP_SORT,XREF);
8200
8201	case KEY_split:
8202	    LOP(OP_SPLIT,XTERM);
8203
8204	case KEY_sprintf:
8205	    LOP(OP_SPRINTF,XTERM);
8206
8207	case KEY_splice:
8208	    LOP(OP_SPLICE,XTERM);
8209
8210	case KEY_sqrt:
8211	    UNI(OP_SQRT);
8212
8213	case KEY_srand:
8214	    UNI(OP_SRAND);
8215
8216	case KEY_stat:
8217	    UNI(OP_STAT);
8218
8219	case KEY_study:
8220	    UNI(OP_STUDY);
8221
8222	case KEY_substr:
8223	    LOP(OP_SUBSTR,XTERM);
8224
8225	case KEY_format:
8226	case KEY_sub:
8227	  really_sub:
8228	    {
8229		char * const tmpbuf = PL_tokenbuf + 1;
8230		expectation attrful;
8231		bool have_name, have_proto;
8232		const int key = tmp;
8233                SV *format_name = NULL;
8234
8235		d = s;
8236		s = skipspace(s);
8237
8238		if (isIDFIRST_lazy_if(s,UTF)
8239                    || *s == '\''
8240                    || (*s == ':' && s[1] == ':'))
8241		{
8242
8243		    PL_expect = XBLOCK;
8244		    attrful = XATTRBLOCK;
8245		    d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8246				  &len);
8247                    if (key == KEY_format)
8248			format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8249		    *PL_tokenbuf = '&';
8250		    if (memchr(tmpbuf, ':', len) || key != KEY_sub
8251		     || pad_findmy_pvn(
8252			    PL_tokenbuf, len + 1, 0
8253			) != NOT_IN_PAD)
8254			sv_setpvn(PL_subname, tmpbuf, len);
8255		    else {
8256			sv_setsv(PL_subname,PL_curstname);
8257			sv_catpvs(PL_subname,"::");
8258			sv_catpvn(PL_subname,tmpbuf,len);
8259		    }
8260                    if (SvUTF8(PL_linestr))
8261                        SvUTF8_on(PL_subname);
8262		    have_name = TRUE;
8263
8264
8265		    s = skipspace(d);
8266		}
8267		else {
8268		    if (key == KEY_my || key == KEY_our || key==KEY_state)
8269		    {
8270			*d = '\0';
8271			/* diag_listed_as: Missing name in "%s sub" */
8272			Perl_croak(aTHX_
8273				  "Missing name in \"%s\"", PL_bufptr);
8274		    }
8275		    PL_expect = XTERMBLOCK;
8276		    attrful = XATTRTERM;
8277		    sv_setpvs(PL_subname,"?");
8278		    have_name = FALSE;
8279		}
8280
8281		if (key == KEY_format) {
8282		    if (format_name) {
8283                        NEXTVAL_NEXTTOKE.opval
8284                            = (OP*)newSVOP(OP_CONST,0, format_name);
8285                        NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8286                        force_next(WORD);
8287                    }
8288		    PREBLOCK(FORMAT);
8289		}
8290
8291		/* Look for a prototype */
8292		if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8293		    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8294		    COPLINE_SET_FROM_MULTI_END;
8295		    if (!s)
8296			Perl_croak(aTHX_ "Prototype not terminated");
8297		    (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8298		    have_proto = TRUE;
8299
8300		    s = skipspace(s);
8301		}
8302		else
8303		    have_proto = FALSE;
8304
8305		if (*s == ':' && s[1] != ':')
8306		    PL_expect = attrful;
8307		else if ((*s != '{' && *s != '(') && key != KEY_format) {
8308                    assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8309                           key == KEY_DESTROY || key == KEY_BEGIN ||
8310                           key == KEY_UNITCHECK || key == KEY_CHECK ||
8311                           key == KEY_INIT || key == KEY_END ||
8312                           key == KEY_my || key == KEY_state ||
8313                           key == KEY_our);
8314		    if (!have_name)
8315			Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8316		    else if (*s != ';' && *s != '}')
8317			Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8318		}
8319
8320		if (have_proto) {
8321		    NEXTVAL_NEXTTOKE.opval =
8322			(OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8323		    PL_lex_stuff = NULL;
8324		    force_next(THING);
8325		}
8326		if (!have_name) {
8327		    if (PL_curstash)
8328			sv_setpvs(PL_subname, "__ANON__");
8329		    else
8330			sv_setpvs(PL_subname, "__ANON__::__ANON__");
8331		    TOKEN(ANONSUB);
8332		}
8333		force_ident_maybe_lex('&');
8334		TOKEN(SUB);
8335	    }
8336
8337	case KEY_system:
8338	    LOP(OP_SYSTEM,XREF);
8339
8340	case KEY_symlink:
8341	    LOP(OP_SYMLINK,XTERM);
8342
8343	case KEY_syscall:
8344	    LOP(OP_SYSCALL,XTERM);
8345
8346	case KEY_sysopen:
8347	    LOP(OP_SYSOPEN,XTERM);
8348
8349	case KEY_sysseek:
8350	    LOP(OP_SYSSEEK,XTERM);
8351
8352	case KEY_sysread:
8353	    LOP(OP_SYSREAD,XTERM);
8354
8355	case KEY_syswrite:
8356	    LOP(OP_SYSWRITE,XTERM);
8357
8358	case KEY_tr:
8359	case KEY_y:
8360	    s = scan_trans(s);
8361	    TERM(sublex_start());
8362
8363	case KEY_tell:
8364	    UNI(OP_TELL);
8365
8366	case KEY_telldir:
8367	    UNI(OP_TELLDIR);
8368
8369	case KEY_tie:
8370	    LOP(OP_TIE,XTERM);
8371
8372	case KEY_tied:
8373	    UNI(OP_TIED);
8374
8375	case KEY_time:
8376	    FUN0(OP_TIME);
8377
8378	case KEY_times:
8379	    FUN0(OP_TMS);
8380
8381	case KEY_truncate:
8382	    LOP(OP_TRUNCATE,XTERM);
8383
8384	case KEY_uc:
8385	    UNI(OP_UC);
8386
8387	case KEY_ucfirst:
8388	    UNI(OP_UCFIRST);
8389
8390	case KEY_untie:
8391	    UNI(OP_UNTIE);
8392
8393	case KEY_until:
8394	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8395		return REPORT(0);
8396	    pl_yylval.ival = CopLINE(PL_curcop);
8397	    OPERATOR(UNTIL);
8398
8399	case KEY_unless:
8400	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8401		return REPORT(0);
8402	    pl_yylval.ival = CopLINE(PL_curcop);
8403	    OPERATOR(UNLESS);
8404
8405	case KEY_unlink:
8406	    LOP(OP_UNLINK,XTERM);
8407
8408	case KEY_undef:
8409	    UNIDOR(OP_UNDEF);
8410
8411	case KEY_unpack:
8412	    LOP(OP_UNPACK,XTERM);
8413
8414	case KEY_utime:
8415	    LOP(OP_UTIME,XTERM);
8416
8417	case KEY_umask:
8418	    UNIDOR(OP_UMASK);
8419
8420	case KEY_unshift:
8421	    LOP(OP_UNSHIFT,XTERM);
8422
8423	case KEY_use:
8424	    s = tokenize_use(1, s);
8425	    TOKEN(USE);
8426
8427	case KEY_values:
8428	    UNI(OP_VALUES);
8429
8430	case KEY_vec:
8431	    LOP(OP_VEC,XTERM);
8432
8433	case KEY_when:
8434	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8435		return REPORT(0);
8436	    pl_yylval.ival = CopLINE(PL_curcop);
8437            Perl_ck_warner_d(aTHX_
8438                packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8439                "when is experimental");
8440	    OPERATOR(WHEN);
8441
8442	case KEY_while:
8443	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8444		return REPORT(0);
8445	    pl_yylval.ival = CopLINE(PL_curcop);
8446	    OPERATOR(WHILE);
8447
8448	case KEY_warn:
8449	    PL_hints |= HINT_BLOCK_SCOPE;
8450	    LOP(OP_WARN,XTERM);
8451
8452	case KEY_wait:
8453	    FUN0(OP_WAIT);
8454
8455	case KEY_waitpid:
8456	    LOP(OP_WAITPID,XTERM);
8457
8458	case KEY_wantarray:
8459	    FUN0(OP_WANTARRAY);
8460
8461	case KEY_write:
8462            /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8463             * we use the same number on EBCDIC */
8464	    gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8465	    UNI(OP_ENTERWRITE);
8466
8467	case KEY_x:
8468	    if (PL_expect == XOPERATOR) {
8469		if (*s == '=' && !PL_lex_allbrackets
8470                    && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8471                {
8472		    return REPORT(0);
8473                }
8474		Mop(OP_REPEAT);
8475	    }
8476	    check_uni();
8477	    goto just_a_word;
8478
8479	case KEY_xor:
8480	    if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8481		return REPORT(0);
8482	    pl_yylval.ival = OP_XOR;
8483	    OPERATOR(OROP);
8484	}
8485    }}
8486}
8487
8488/*
8489  S_pending_ident
8490
8491  Looks up an identifier in the pad or in a package
8492
8493  Returns:
8494    PRIVATEREF if this is a lexical name.
8495    WORD       if this belongs to a package.
8496
8497  Structure:
8498      if we're in a my declaration
8499	  croak if they tried to say my($foo::bar)
8500	  build the ops for a my() declaration
8501      if it's an access to a my() variable
8502	  build ops for access to a my() variable
8503      if in a dq string, and they've said @foo and we can't find @foo
8504	  warn
8505      build ops for a bareword
8506*/
8507
8508static int
8509S_pending_ident(pTHX)
8510{
8511    PADOFFSET tmp = 0;
8512    const char pit = (char)pl_yylval.ival;
8513    const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8514    /* All routes through this function want to know if there is a colon.  */
8515    const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8516
8517    DEBUG_T({ PerlIO_printf(Perl_debug_log,
8518          "### Pending identifier '%s'\n", PL_tokenbuf); });
8519
8520    /* if we're in a my(), we can't allow dynamics here.
8521       $foo'bar has already been turned into $foo::bar, so
8522       just check for colons.
8523
8524       if it's a legal name, the OP is a PADANY.
8525    */
8526    if (PL_in_my) {
8527        if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
8528            if (has_colon)
8529                yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8530                                  "variable %s in \"our\"",
8531                                  PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8532            tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8533        }
8534        else {
8535            if (has_colon) {
8536                /* "my" variable %s can't be in a package */
8537                /* PL_no_myglob is constant */
8538                GCC_DIAG_IGNORE(-Wformat-nonliteral);
8539                yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8540                            PL_in_my == KEY_my ? "my" : "state",
8541                            *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8542                            PL_tokenbuf),
8543                            UTF ? SVf_UTF8 : 0);
8544                GCC_DIAG_RESTORE;
8545            }
8546
8547            pl_yylval.opval = newOP(OP_PADANY, 0);
8548            pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8549                                                        UTF ? SVf_UTF8 : 0);
8550	    return PRIVATEREF;
8551        }
8552    }
8553
8554    /*
8555       build the ops for accesses to a my() variable.
8556    */
8557
8558    if (!has_colon) {
8559	if (!PL_in_my)
8560	    tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8561                                 0);
8562        if (tmp != NOT_IN_PAD) {
8563            /* might be an "our" variable" */
8564            if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8565                /* build ops for a bareword */
8566		HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8567		HEK * const stashname = HvNAME_HEK(stash);
8568		SV *  const sym = newSVhek(stashname);
8569                sv_catpvs(sym, "::");
8570                sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8571                pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8572                pl_yylval.opval->op_private = OPpCONST_ENTERED;
8573                if (pit != '&')
8574                  gv_fetchsv(sym,
8575                    GV_ADDMULTI,
8576                    ((PL_tokenbuf[0] == '$') ? SVt_PV
8577                     : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8578                     : SVt_PVHV));
8579                return WORD;
8580            }
8581
8582            pl_yylval.opval = newOP(OP_PADANY, 0);
8583            pl_yylval.opval->op_targ = tmp;
8584            return PRIVATEREF;
8585        }
8586    }
8587
8588    /*
8589       Whine if they've said @foo in a doublequoted string,
8590       and @foo isn't a variable we can find in the symbol
8591       table.
8592    */
8593    if (ckWARN(WARN_AMBIGUOUS)
8594        && pit == '@'
8595        && PL_lex_state != LEX_NORMAL
8596        && !PL_lex_brackets)
8597    {
8598        GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8599                                        ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8600        if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8601		/* DO NOT warn for @- and @+ */
8602		&& !( PL_tokenbuf[2] == '\0'
8603                      && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8604	   )
8605        {
8606            /* Downgraded from fatal to warning 20000522 mjd */
8607            Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8608			"Possible unintended interpolation of %"UTF8f
8609			" in string",
8610			UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8611        }
8612    }
8613
8614    /* build ops for a bareword */
8615    pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8616				   newSVpvn_flags(PL_tokenbuf + 1,
8617						      tokenbuf_len - 1,
8618                                                      UTF ? SVf_UTF8 : 0 ));
8619    pl_yylval.opval->op_private = OPpCONST_ENTERED;
8620    if (pit != '&')
8621	gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8622		     (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8623                     | ( UTF ? SVf_UTF8 : 0 ),
8624		     ((PL_tokenbuf[0] == '$') ? SVt_PV
8625		      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8626		      : SVt_PVHV));
8627    return WORD;
8628}
8629
8630STATIC void
8631S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8632{
8633    PERL_ARGS_ASSERT_CHECKCOMMA;
8634
8635    if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
8636	if (ckWARN(WARN_SYNTAX)) {
8637	    int level = 1;
8638	    const char *w;
8639	    for (w = s+2; *w && level; w++) {
8640		if (*w == '(')
8641		    ++level;
8642		else if (*w == ')')
8643		    --level;
8644	    }
8645	    while (isSPACE(*w))
8646		++w;
8647	    /* the list of chars below is for end of statements or
8648	     * block / parens, boolean operators (&&, ||, //) and branch
8649	     * constructs (or, and, if, until, unless, while, err, for).
8650	     * Not a very solid hack... */
8651	    if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8652		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8653			    "%s (...) interpreted as function",name);
8654	}
8655    }
8656    while (s < PL_bufend && isSPACE(*s))
8657	s++;
8658    if (*s == '(')
8659	s++;
8660    while (s < PL_bufend && isSPACE(*s))
8661	s++;
8662    if (isIDFIRST_lazy_if(s,UTF)) {
8663	const char * const w = s;
8664        s += UTF ? UTF8SKIP(s) : 1;
8665	while (isWORDCHAR_lazy_if(s,UTF))
8666	    s += UTF ? UTF8SKIP(s) : 1;
8667	while (s < PL_bufend && isSPACE(*s))
8668	    s++;
8669	if (*s == ',') {
8670	    GV* gv;
8671	    PADOFFSET off;
8672	    if (keyword(w, s - w, 0))
8673		return;
8674
8675	    gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8676	    if (gv && GvCVu(gv))
8677		return;
8678	    if (s - w <= 254) {
8679		char tmpbuf[256];
8680		Copy(w, tmpbuf+1, s - w, char);
8681		*tmpbuf = '&';
8682		off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8683		if (off != NOT_IN_PAD) return;
8684	    }
8685	    Perl_croak(aTHX_ "No comma allowed after %s", what);
8686	}
8687    }
8688}
8689
8690/* S_new_constant(): do any overload::constant lookup.
8691
8692   Either returns sv, or mortalizes/frees sv and returns a new SV*.
8693   Best used as sv=new_constant(..., sv, ...).
8694   If s, pv are NULL, calls subroutine with one argument,
8695   and <type> is used with error messages only.
8696   <type> is assumed to be well formed UTF-8 */
8697
8698STATIC SV *
8699S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8700	       SV *sv, SV *pv, const char *type, STRLEN typelen)
8701{
8702    dSP;
8703    HV * table = GvHV(PL_hintgv);		 /* ^H */
8704    SV *res;
8705    SV *errsv = NULL;
8706    SV **cvp;
8707    SV *cv, *typesv;
8708    const char *why1 = "", *why2 = "", *why3 = "";
8709
8710    PERL_ARGS_ASSERT_NEW_CONSTANT;
8711    /* We assume that this is true: */
8712    if (*key == 'c') { assert (strEQ(key, "charnames")); }
8713    assert(type || s);
8714
8715    /* charnames doesn't work well if there have been errors found */
8716    if (PL_error_count > 0 && *key == 'c')
8717    {
8718	SvREFCNT_dec_NN(sv);
8719	return &PL_sv_undef;
8720    }
8721
8722    sv_2mortal(sv);			/* Parent created it permanently */
8723    if (!table
8724	|| ! (PL_hints & HINT_LOCALIZE_HH)
8725	|| ! (cvp = hv_fetch(table, key, keylen, FALSE))
8726	|| ! SvOK(*cvp))
8727    {
8728	char *msg;
8729
8730	/* Here haven't found what we're looking for.  If it is charnames,
8731	 * perhaps it needs to be loaded.  Try doing that before giving up */
8732	if (*key == 'c') {
8733	    Perl_load_module(aTHX_
8734		            0,
8735			    newSVpvs("_charnames"),
8736			     /* version parameter; no need to specify it, as if
8737			      * we get too early a version, will fail anyway,
8738			      * not being able to find '_charnames' */
8739			    NULL,
8740			    newSVpvs(":full"),
8741			    newSVpvs(":short"),
8742			    NULL);
8743            assert(sp == PL_stack_sp);
8744	    table = GvHV(PL_hintgv);
8745	    if (table
8746		&& (PL_hints & HINT_LOCALIZE_HH)
8747		&& (cvp = hv_fetch(table, key, keylen, FALSE))
8748		&& SvOK(*cvp))
8749	    {
8750		goto now_ok;
8751	    }
8752	}
8753	if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8754	    msg = Perl_form(aTHX_
8755			       "Constant(%.*s) unknown",
8756				(int)(type ? typelen : len),
8757				(type ? type: s));
8758	}
8759	else {
8760            why1 = "$^H{";
8761            why2 = key;
8762            why3 = "} is not defined";
8763        report:
8764            if (*key == 'c') {
8765                msg = Perl_form(aTHX_
8766                            /* The +3 is for '\N{'; -4 for that, plus '}' */
8767                            "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8768                      );
8769            }
8770            else {
8771                msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8772                                    (int)(type ? typelen : len),
8773                                    (type ? type: s), why1, why2, why3);
8774            }
8775        }
8776	yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8777  	return SvREFCNT_inc_simple_NN(sv);
8778    }
8779  now_ok:
8780    cv = *cvp;
8781    if (!pv && s)
8782  	pv = newSVpvn_flags(s, len, SVs_TEMP);
8783    if (type && pv)
8784  	typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8785    else
8786  	typesv = &PL_sv_undef;
8787
8788    PUSHSTACKi(PERLSI_OVERLOAD);
8789    ENTER ;
8790    SAVETMPS;
8791
8792    PUSHMARK(SP) ;
8793    EXTEND(sp, 3);
8794    if (pv)
8795 	PUSHs(pv);
8796    PUSHs(sv);
8797    if (pv)
8798 	PUSHs(typesv);
8799    PUTBACK;
8800    call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8801
8802    SPAGAIN ;
8803
8804    /* Check the eval first */
8805    if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8806	STRLEN errlen;
8807	const char * errstr;
8808	sv_catpvs(errsv, "Propagated");
8809	errstr = SvPV_const(errsv, errlen);
8810	yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8811	(void)POPs;
8812	res = SvREFCNT_inc_simple_NN(sv);
8813    }
8814    else {
8815 	res = POPs;
8816	SvREFCNT_inc_simple_void_NN(res);
8817    }
8818
8819    PUTBACK ;
8820    FREETMPS ;
8821    LEAVE ;
8822    POPSTACK;
8823
8824    if (!SvOK(res)) {
8825 	why1 = "Call to &{$^H{";
8826 	why2 = key;
8827 	why3 = "}} did not return a defined value";
8828 	sv = res;
8829	(void)sv_2mortal(sv);
8830 	goto report;
8831    }
8832
8833    return res;
8834}
8835
8836PERL_STATIC_INLINE void
8837S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8838    PERL_ARGS_ASSERT_PARSE_IDENT;
8839
8840    for (;;) {
8841        if (*d >= e)
8842            Perl_croak(aTHX_ "%s", ident_too_long);
8843        if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8844             /* The UTF-8 case must come first, otherwise things
8845             * like c\N{COMBINING TILDE} would start failing, as the
8846             * isWORDCHAR_A case below would gobble the 'c' up.
8847             */
8848
8849            char *t = *s + UTF8SKIP(*s);
8850            while (isIDCONT_utf8((U8*)t))
8851                t += UTF8SKIP(t);
8852            if (*d + (t - *s) > e)
8853                Perl_croak(aTHX_ "%s", ident_too_long);
8854            Copy(*s, *d, t - *s, char);
8855            *d += t - *s;
8856            *s = t;
8857        }
8858        else if ( isWORDCHAR_A(**s) ) {
8859            do {
8860                *(*d)++ = *(*s)++;
8861            } while (isWORDCHAR_A(**s) && *d < e);
8862        }
8863        else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8864            *(*d)++ = ':';
8865            *(*d)++ = ':';
8866            (*s)++;
8867        }
8868        else if (allow_package && **s == ':' && (*s)[1] == ':'
8869           /* Disallow things like Foo::$bar. For the curious, this is
8870            * the code path that triggers the "Bad name after" warning
8871            * when looking for barewords.
8872            */
8873           && (*s)[2] != '$') {
8874            *(*d)++ = *(*s)++;
8875            *(*d)++ = *(*s)++;
8876        }
8877        else
8878            break;
8879    }
8880    return;
8881}
8882
8883/* Returns a NUL terminated string, with the length of the string written to
8884   *slp
8885   */
8886STATIC char *
8887S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8888{
8889    char *d = dest;
8890    char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8891    bool is_utf8 = cBOOL(UTF);
8892
8893    PERL_ARGS_ASSERT_SCAN_WORD;
8894
8895    parse_ident(&s, &d, e, allow_package, is_utf8);
8896    *d = '\0';
8897    *slp = d - dest;
8898    return s;
8899}
8900
8901/* Is the byte 'd' a legal single character identifier name?  'u' is true
8902 * iff Unicode semantics are to be used.  The legal ones are any of:
8903 *  a) all ASCII characters except:
8904 *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
8905 *          2) '{'
8906 *     The final case currently doesn't get this far in the program, so we
8907 *     don't test for it.  If that were to change, it would be ok to allow it.
8908 *  c) When not under Unicode rules, any upper Latin1 character
8909 *  d) Otherwise, when unicode rules are used, all XIDS characters.
8910 *
8911 *      Because all ASCII characters have the same representation whether
8912 *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8913 *      '{' without knowing if is UTF-8 or not.
8914 * EBCDIC already uses the rules that ASCII platforms will use after the
8915 * deprecation cycle; see comment below about the deprecation. */
8916#ifdef EBCDIC
8917#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
8918    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8919                         ? isIDFIRST_utf8((U8*) (s))                          \
8920                         : (isGRAPH_L1(*s)                                    \
8921                            && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8922#else
8923#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
8924    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
8925                         ? isIDFIRST_utf8((U8*) (s))                          \
8926                         : ! isASCII_utf8((U8*) (s))))
8927#endif
8928
8929STATIC char *
8930S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8931{
8932    I32 herelines = PL_parser->herelines;
8933    SSize_t bracket = -1;
8934    char funny = *s++;
8935    char *d = dest;
8936    char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8937    bool is_utf8 = cBOOL(UTF);
8938    I32 orig_copline = 0, tmp_copline = 0;
8939
8940    PERL_ARGS_ASSERT_SCAN_IDENT;
8941
8942    if (isSPACE(*s) || !*s)
8943	s = skipspace(s);
8944    if (isDIGIT(*s)) {
8945	while (isDIGIT(*s)) {
8946	    if (d >= e)
8947		Perl_croak(aTHX_ "%s", ident_too_long);
8948	    *d++ = *s++;
8949	}
8950    }
8951    else {  /* See if it is a "normal" identifier */
8952        parse_ident(&s, &d, e, 1, is_utf8);
8953    }
8954    *d = '\0';
8955    d = dest;
8956    if (*d) {
8957        /* Either a digit variable, or parse_ident() found an identifier
8958           (anything valid as a bareword), so job done and return.  */
8959	if (PL_lex_state != LEX_NORMAL)
8960	    PL_lex_state = LEX_INTERPENDMAYBE;
8961	return s;
8962    }
8963
8964    /* Here, it is not a run-of-the-mill identifier name */
8965
8966    if (*s == '$' && s[1]
8967        && (isIDFIRST_lazy_if(s+1,is_utf8)
8968            || isDIGIT_A((U8)s[1])
8969            || s[1] == '$'
8970            || s[1] == '{'
8971            || strnEQ(s+1,"::",2)) )
8972    {
8973        /* Dereferencing a value in a scalar variable.
8974           The alternatives are different syntaxes for a scalar variable.
8975           Using ' as a leading package separator isn't allowed. :: is.   */
8976	return s;
8977    }
8978    /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
8979    if (*s == '{') {
8980	bracket = s - SvPVX(PL_linestr);
8981	s++;
8982	orig_copline = CopLINE(PL_curcop);
8983        if (s < PL_bufend && isSPACE(*s)) {
8984            s = skipspace(s);
8985        }
8986    }
8987    if ((s <= PL_bufend - (is_utf8)
8988                          ? UTF8SKIP(s)
8989                          : 1)
8990        && VALID_LEN_ONE_IDENT(s, is_utf8))
8991    {
8992        /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
8993         * because often it has no graphic representation.  (We can't get to
8994         * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8995         * test for it.) */
8996        if ((is_utf8)
8997            ? ! isGRAPH_utf8( (U8*) s)
8998            : (! isGRAPH_L1( (U8) *s)
8999               || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
9000        {
9001            deprecate("literal non-graphic characters in variable names");
9002        }
9003
9004        if (is_utf8) {
9005            const STRLEN skip = UTF8SKIP(s);
9006            STRLEN i;
9007            d[skip] = '\0';
9008            for ( i = 0; i < skip; i++ )
9009                d[i] = *s++;
9010        }
9011        else {
9012            *d = *s++;
9013            d[1] = '\0';
9014        }
9015    }
9016    /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9017    if (*d == '^' && *s && isCONTROLVAR(*s)) {
9018	*d = toCTRL(*s);
9019	s++;
9020    }
9021    /* Warn about ambiguous code after unary operators if {...} notation isn't
9022       used.  There's no difference in ambiguity; it's merely a heuristic
9023       about when not to warn.  */
9024    else if (ck_uni && bracket == -1)
9025	check_uni();
9026    if (bracket != -1) {
9027        bool skip;
9028        char *s2;
9029        /* If we were processing {...} notation then...  */
9030	if (isIDFIRST_lazy_if(d,is_utf8)) {
9031            /* if it starts as a valid identifier, assume that it is one.
9032               (the later check for } being at the expected point will trap
9033               cases where this doesn't pan out.)  */
9034            d += is_utf8 ? UTF8SKIP(d) : 1;
9035            parse_ident(&s, &d, e, 1, is_utf8);
9036	    *d = '\0';
9037            tmp_copline = CopLINE(PL_curcop);
9038            if (s < PL_bufend && isSPACE(*s)) {
9039                s = skipspace(s);
9040            }
9041	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9042                /* ${foo[0]} and ${foo{bar}} notation.  */
9043		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9044		    const char * const brack =
9045			(const char *)
9046			((*s == '[') ? "[...]" : "{...}");
9047                    orig_copline = CopLINE(PL_curcop);
9048                    CopLINE_set(PL_curcop, tmp_copline);
9049   /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9050		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9051			"Ambiguous use of %c{%s%s} resolved to %c%s%s",
9052			funny, dest, brack, funny, dest, brack);
9053                    CopLINE_set(PL_curcop, orig_copline);
9054		}
9055		bracket++;
9056		PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9057		PL_lex_allbrackets++;
9058		return s;
9059	    }
9060	}
9061	/* Handle extended ${^Foo} variables
9062	 * 1999-02-27 mjd-perl-patch@plover.com */
9063	else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9064		 && isWORDCHAR(*s))
9065	{
9066	    d++;
9067	    while (isWORDCHAR(*s) && d < e) {
9068		*d++ = *s++;
9069	    }
9070	    if (d >= e)
9071		Perl_croak(aTHX_ "%s", ident_too_long);
9072	    *d = '\0';
9073	}
9074
9075        if ( !tmp_copline )
9076            tmp_copline = CopLINE(PL_curcop);
9077        if ((skip = s < PL_bufend && isSPACE(*s)))
9078            /* Avoid incrementing line numbers or resetting PL_linestart,
9079               in case we have to back up.  */
9080            s2 = skipspace_flags(s, LEX_NO_INCLINE);
9081        else
9082            s2 = s;
9083
9084        /* Expect to find a closing } after consuming any trailing whitespace.
9085         */
9086        if (*s2 == '}') {
9087            /* Now increment line numbers if applicable.  */
9088            if (skip)
9089                s = skipspace(s);
9090	    s++;
9091	    if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9092		PL_lex_state = LEX_INTERPEND;
9093		PL_expect = XREF;
9094	    }
9095	    if (PL_lex_state == LEX_NORMAL) {
9096		if (ckWARN(WARN_AMBIGUOUS)
9097                    && (keyword(dest, d - dest, 0)
9098		        || get_cvn_flags(dest, d - dest, is_utf8
9099                           ? SVf_UTF8
9100                           : 0)))
9101		{
9102                    SV *tmp = newSVpvn_flags( dest, d - dest,
9103                                        SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9104		    if (funny == '#')
9105			funny = '@';
9106                    orig_copline = CopLINE(PL_curcop);
9107                    CopLINE_set(PL_curcop, tmp_copline);
9108		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9109			"Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9110			funny, SVfARG(tmp), funny, SVfARG(tmp));
9111                    CopLINE_set(PL_curcop, orig_copline);
9112		}
9113	    }
9114	}
9115	else {
9116            /* Didn't find the closing } at the point we expected, so restore
9117               state such that the next thing to process is the opening { and */
9118	    s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9119            CopLINE_set(PL_curcop, orig_copline);
9120            PL_parser->herelines = herelines;
9121	    *dest = '\0';
9122	}
9123    }
9124    else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9125	PL_lex_state = LEX_INTERPEND;
9126    return s;
9127}
9128
9129static bool
9130S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9131
9132    /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9133     * found in the parse starting at 's', based on the subset that are valid
9134     * in this context input to this routine in 'valid_flags'. Advances s.
9135     * Returns TRUE if the input should be treated as a valid flag, so the next
9136     * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9137     * upon first call on the current regex.  This routine will set it to any
9138     * charset modifier found.  The caller shouldn't change it.  This way,
9139     * another charset modifier encountered in the parse can be detected as an
9140     * error, as we have decided to allow only one */
9141
9142    const char c = **s;
9143    STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9144
9145    if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9146        if (isWORDCHAR_lazy_if(*s, UTF)) {
9147            yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9148                       UTF ? SVf_UTF8 : 0);
9149            (*s) += charlen;
9150            /* Pretend that it worked, so will continue processing before
9151             * dieing */
9152            return TRUE;
9153        }
9154        return FALSE;
9155    }
9156
9157    switch (c) {
9158
9159        CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9160        case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9161        case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9162        case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9163        case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9164        case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9165	case LOCALE_PAT_MOD:
9166	    if (*charset) {
9167		goto multiple_charsets;
9168	    }
9169	    set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9170	    *charset = c;
9171	    break;
9172	case UNICODE_PAT_MOD:
9173	    if (*charset) {
9174		goto multiple_charsets;
9175	    }
9176	    set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9177	    *charset = c;
9178	    break;
9179	case ASCII_RESTRICT_PAT_MOD:
9180	    if (! *charset) {
9181		set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9182	    }
9183	    else {
9184
9185		/* Error if previous modifier wasn't an 'a', but if it was, see
9186		 * if, and accept, a second occurrence (only) */
9187		if (*charset != 'a'
9188		    || get_regex_charset(*pmfl)
9189			!= REGEX_ASCII_RESTRICTED_CHARSET)
9190		{
9191			goto multiple_charsets;
9192		}
9193		set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9194	    }
9195	    *charset = c;
9196	    break;
9197	case DEPENDS_PAT_MOD:
9198	    if (*charset) {
9199		goto multiple_charsets;
9200	    }
9201	    set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9202	    *charset = c;
9203	    break;
9204    }
9205
9206    (*s)++;
9207    return TRUE;
9208
9209    multiple_charsets:
9210	if (*charset != c) {
9211	    yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9212	}
9213	else if (c == 'a') {
9214  /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9215	    yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9216	}
9217	else {
9218	    yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9219	}
9220
9221	/* Pretend that it worked, so will continue processing before dieing */
9222	(*s)++;
9223	return TRUE;
9224}
9225
9226STATIC char *
9227S_scan_pat(pTHX_ char *start, I32 type)
9228{
9229    PMOP *pm;
9230    char *s;
9231    const char * const valid_flags =
9232	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9233    char charset = '\0';    /* character set modifier */
9234    unsigned int x_mod_count = 0;
9235
9236    PERL_ARGS_ASSERT_SCAN_PAT;
9237
9238    s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9239    if (!s)
9240	Perl_croak(aTHX_ "Search pattern not terminated");
9241
9242    pm = (PMOP*)newPMOP(type, 0);
9243    if (PL_multi_open == '?') {
9244	/* This is the only point in the code that sets PMf_ONCE:  */
9245	pm->op_pmflags |= PMf_ONCE;
9246
9247	/* Hence it's safe to do this bit of PMOP book-keeping here, which
9248	   allows us to restrict the list needed by reset to just the ??
9249	   matches.  */
9250	assert(type != OP_TRANS);
9251	if (PL_curstash) {
9252	    MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9253	    U32 elements;
9254	    if (!mg) {
9255		mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9256				 0);
9257	    }
9258	    elements = mg->mg_len / sizeof(PMOP**);
9259	    Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9260	    ((PMOP**)mg->mg_ptr) [elements++] = pm;
9261	    mg->mg_len = elements * sizeof(PMOP**);
9262	    PmopSTASH_set(pm,PL_curstash);
9263	}
9264    }
9265
9266    /* if qr/...(?{..}).../, then need to parse the pattern within a new
9267     * anon CV. False positives like qr/[(?{]/ are harmless */
9268
9269    if (type == OP_QR) {
9270	STRLEN len;
9271	char *e, *p = SvPV(PL_lex_stuff, len);
9272	e = p + len;
9273	for (; p < e; p++) {
9274	    if (p[0] == '(' && p[1] == '?'
9275		&& (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9276	    {
9277		pm->op_pmflags |= PMf_HAS_CV;
9278		break;
9279	    }
9280	}
9281	pm->op_pmflags |= PMf_IS_QR;
9282    }
9283
9284    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9285                                &s, &charset, &x_mod_count))
9286    {};
9287    /* issue a warning if /c is specified,but /g is not */
9288    if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9289    {
9290        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9291		       "Use of /c modifier is meaningless without /g" );
9292    }
9293
9294    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9295
9296    PL_lex_op = (OP*)pm;
9297    pl_yylval.ival = OP_MATCH;
9298    return s;
9299}
9300
9301STATIC char *
9302S_scan_subst(pTHX_ char *start)
9303{
9304    char *s;
9305    PMOP *pm;
9306    I32 first_start;
9307    line_t first_line;
9308    I32 es = 0;
9309    char charset = '\0';    /* character set modifier */
9310    unsigned int x_mod_count = 0;
9311    char *t;
9312
9313    PERL_ARGS_ASSERT_SCAN_SUBST;
9314
9315    pl_yylval.ival = OP_NULL;
9316
9317    s = scan_str(start, TRUE, FALSE, FALSE, &t);
9318
9319    if (!s)
9320	Perl_croak(aTHX_ "Substitution pattern not terminated");
9321
9322    s = t;
9323
9324    first_start = PL_multi_start;
9325    first_line = CopLINE(PL_curcop);
9326    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9327    if (!s) {
9328	SvREFCNT_dec_NN(PL_lex_stuff);
9329	PL_lex_stuff = NULL;
9330	Perl_croak(aTHX_ "Substitution replacement not terminated");
9331    }
9332    PL_multi_start = first_start;	/* so whole substitution is taken together */
9333
9334    pm = (PMOP*)newPMOP(OP_SUBST, 0);
9335
9336
9337    while (*s) {
9338	if (*s == EXEC_PAT_MOD) {
9339	    s++;
9340	    es++;
9341	}
9342	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9343                                  &s, &charset, &x_mod_count))
9344	{
9345	    break;
9346	}
9347    }
9348
9349    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9350
9351    if ((pm->op_pmflags & PMf_CONTINUE)) {
9352        Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9353    }
9354
9355    if (es) {
9356	SV * const repl = newSVpvs("");
9357
9358	PL_multi_end = 0;
9359	pm->op_pmflags |= PMf_EVAL;
9360	while (es-- > 0) {
9361	    if (es)
9362		sv_catpvs(repl, "eval ");
9363	    else
9364		sv_catpvs(repl, "do ");
9365	}
9366	sv_catpvs(repl, "{");
9367	sv_catsv(repl, PL_sublex_info.repl);
9368	sv_catpvs(repl, "}");
9369	SvEVALED_on(repl);
9370	SvREFCNT_dec(PL_sublex_info.repl);
9371	PL_sublex_info.repl = repl;
9372    }
9373    if (CopLINE(PL_curcop) != first_line) {
9374	sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9375	((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9376	    CopLINE(PL_curcop) - first_line;
9377	CopLINE_set(PL_curcop, first_line);
9378    }
9379
9380    PL_lex_op = (OP*)pm;
9381    pl_yylval.ival = OP_SUBST;
9382    return s;
9383}
9384
9385STATIC char *
9386S_scan_trans(pTHX_ char *start)
9387{
9388    char* s;
9389    OP *o;
9390    U8 squash;
9391    U8 del;
9392    U8 complement;
9393    bool nondestruct = 0;
9394    char *t;
9395
9396    PERL_ARGS_ASSERT_SCAN_TRANS;
9397
9398    pl_yylval.ival = OP_NULL;
9399
9400    s = scan_str(start,FALSE,FALSE,FALSE,&t);
9401    if (!s)
9402	Perl_croak(aTHX_ "Transliteration pattern not terminated");
9403
9404    s = t;
9405
9406    s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9407    if (!s) {
9408	SvREFCNT_dec_NN(PL_lex_stuff);
9409	PL_lex_stuff = NULL;
9410	Perl_croak(aTHX_ "Transliteration replacement not terminated");
9411    }
9412
9413    complement = del = squash = 0;
9414    while (1) {
9415	switch (*s) {
9416	case 'c':
9417	    complement = OPpTRANS_COMPLEMENT;
9418	    break;
9419	case 'd':
9420	    del = OPpTRANS_DELETE;
9421	    break;
9422	case 's':
9423	    squash = OPpTRANS_SQUASH;
9424	    break;
9425	case 'r':
9426	    nondestruct = 1;
9427	    break;
9428	default:
9429	    goto no_more;
9430	}
9431	s++;
9432    }
9433  no_more:
9434
9435    o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9436    o->op_private &= ~OPpTRANS_ALL;
9437    o->op_private |= del|squash|complement|
9438      (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9439      (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9440
9441    PL_lex_op = o;
9442    pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9443
9444
9445    return s;
9446}
9447
9448/* scan_heredoc
9449   Takes a pointer to the first < in <<FOO.
9450   Returns a pointer to the byte following <<FOO.
9451
9452   This function scans a heredoc, which involves different methods
9453   depending on whether we are in a string eval, quoted construct, etc.
9454   This is because PL_linestr could containing a single line of input, or
9455   a whole string being evalled, or the contents of the current quote-
9456   like operator.
9457
9458   The two basic methods are:
9459    - Steal lines from the input stream
9460    - Scan the heredoc in PL_linestr and remove it therefrom
9461
9462   In a file scope or filtered eval, the first method is used; in a
9463   string eval, the second.
9464
9465   In a quote-like operator, we have to choose between the two,
9466   depending on where we can find a newline.  We peek into outer lex-
9467   ing scopes until we find one with a newline in it.  If we reach the
9468   outermost lexing scope and it is a file, we use the stream method.
9469   Otherwise it is treated as an eval.
9470*/
9471
9472STATIC char *
9473S_scan_heredoc(pTHX_ char *s)
9474{
9475    I32 op_type = OP_SCALAR;
9476    I32 len;
9477    SV *tmpstr;
9478    char term;
9479    char *d;
9480    char *e;
9481    char *peek;
9482    const bool infile = PL_rsfp || PL_parser->filtered;
9483    const line_t origline = CopLINE(PL_curcop);
9484    LEXSHARED *shared = PL_parser->lex_shared;
9485
9486    PERL_ARGS_ASSERT_SCAN_HEREDOC;
9487
9488    s += 2;
9489    d = PL_tokenbuf + 1;
9490    e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9491    *PL_tokenbuf = '\n';
9492    peek = s;
9493    while (SPACE_OR_TAB(*peek))
9494	peek++;
9495    if (*peek == '`' || *peek == '\'' || *peek =='"') {
9496	s = peek;
9497	term = *s++;
9498	s = delimcpy(d, e, s, PL_bufend, term, &len);
9499	if (s == PL_bufend)
9500	    Perl_croak(aTHX_ "Unterminated delimiter for here document");
9501	d += len;
9502	s++;
9503    }
9504    else {
9505	if (*s == '\\')
9506            /* <<\FOO is equivalent to <<'FOO' */
9507	    s++, term = '\'';
9508	else
9509	    term = '"';
9510	if (!isWORDCHAR_lazy_if(s,UTF))
9511	    deprecate("bare << to mean <<\"\"");
9512	peek = s;
9513	while (isWORDCHAR_lazy_if(peek,UTF)) {
9514	    peek += UTF ? UTF8SKIP(peek) : 1;
9515	}
9516	len = (peek - s >= e - d) ? (e - d) : (peek - s);
9517	Copy(s, d, len, char);
9518	s += len;
9519	d += len;
9520    }
9521    if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9522	Perl_croak(aTHX_ "Delimiter for here document is too long");
9523    *d++ = '\n';
9524    *d = '\0';
9525    len = d - PL_tokenbuf;
9526
9527#ifndef PERL_STRICT_CR
9528    d = strchr(s, '\r');
9529    if (d) {
9530	char * const olds = s;
9531	s = d;
9532	while (s < PL_bufend) {
9533	    if (*s == '\r') {
9534		*d++ = '\n';
9535		if (*++s == '\n')
9536		    s++;
9537	    }
9538	    else if (*s == '\n' && s[1] == '\r') {	/* \015\013 on a mac? */
9539		*d++ = *s++;
9540		s++;
9541	    }
9542	    else
9543		*d++ = *s++;
9544	}
9545	*d = '\0';
9546	PL_bufend = d;
9547	SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9548	s = olds;
9549    }
9550#endif
9551
9552    tmpstr = newSV_type(SVt_PVIV);
9553    SvGROW(tmpstr, 80);
9554    if (term == '\'') {
9555	op_type = OP_CONST;
9556	SvIV_set(tmpstr, -1);
9557    }
9558    else if (term == '`') {
9559	op_type = OP_BACKTICK;
9560	SvIV_set(tmpstr, '\\');
9561    }
9562
9563    PL_multi_start = origline + 1 + PL_parser->herelines;
9564    PL_multi_open = PL_multi_close = '<';
9565    /* inside a string eval or quote-like operator */
9566    if (!infile || PL_lex_inwhat) {
9567	SV *linestr;
9568	char *bufend;
9569	char * const olds = s;
9570	PERL_CONTEXT * const cx = CX_CUR();
9571	/* These two fields are not set until an inner lexing scope is
9572	   entered.  But we need them set here. */
9573	shared->ls_bufptr  = s;
9574	shared->ls_linestr = PL_linestr;
9575	if (PL_lex_inwhat)
9576	  /* Look for a newline.  If the current buffer does not have one,
9577	     peek into the line buffer of the parent lexing scope, going
9578 	     up as many levels as necessary to find one with a newline
9579	     after bufptr.
9580	   */
9581	  while (!(s = (char *)memchr(
9582		    (void *)shared->ls_bufptr, '\n',
9583		    SvEND(shared->ls_linestr)-shared->ls_bufptr
9584		))) {
9585	    shared = shared->ls_prev;
9586	    /* shared is only null if we have gone beyond the outermost
9587	       lexing scope.  In a file, we will have broken out of the
9588	       loop in the previous iteration.  In an eval, the string buf-
9589	       fer ends with "\n;", so the while condition above will have
9590	       evaluated to false.  So shared can never be null.  Or so you
9591	       might think.  Odd syntax errors like s;@{<<; can gobble up
9592	       the implicit semicolon at the end of a flie, causing the
9593	       file handle to be closed even when we are not in a string
9594	       eval.  So shared may be null in that case.  */
9595	    if (UNLIKELY(!shared))
9596		goto interminable;
9597	    /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9598	       most lexing scope.  In a file, shared->ls_linestr at that
9599	       level is just one line, so there is no body to steal. */
9600	    if (infile && !shared->ls_prev) {
9601		s = olds;
9602		goto streaming;
9603	    }
9604	  }
9605	else {	/* eval or we've already hit EOF */
9606	    s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9607	    if (!s)
9608                goto interminable;
9609	}
9610	linestr = shared->ls_linestr;
9611	bufend = SvEND(linestr);
9612	d = s;
9613	while (s < bufend - len + 1
9614               && memNE(s,PL_tokenbuf,len) )
9615        {
9616	    if (*s++ == '\n')
9617		++PL_parser->herelines;
9618	}
9619	if (s >= bufend - len + 1) {
9620	    goto interminable;
9621	}
9622	sv_setpvn(tmpstr,d+1,s-d);
9623	s += len - 1;
9624	/* the preceding stmt passes a newline */
9625	PL_parser->herelines++;
9626
9627	/* s now points to the newline after the heredoc terminator.
9628	   d points to the newline before the body of the heredoc.
9629	 */
9630
9631	/* We are going to modify linestr in place here, so set
9632	   aside copies of the string if necessary for re-evals or
9633	   (caller $n)[6]. */
9634	/* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9635	   check shared->re_eval_str. */
9636	if (shared->re_eval_start || shared->re_eval_str) {
9637	    /* Set aside the rest of the regexp */
9638	    if (!shared->re_eval_str)
9639		shared->re_eval_str =
9640		       newSVpvn(shared->re_eval_start,
9641				bufend - shared->re_eval_start);
9642	    shared->re_eval_start -= s-d;
9643	}
9644	if (cxstack_ix >= 0
9645            && CxTYPE(cx) == CXt_EVAL
9646            && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9647            && cx->blk_eval.cur_text == linestr)
9648        {
9649	    cx->blk_eval.cur_text = newSVsv(linestr);
9650	    SvSCREAM_on(cx->blk_eval.cur_text);
9651	}
9652	/* Copy everything from s onwards back to d. */
9653	Move(s,d,bufend-s + 1,char);
9654	SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9655	/* Setting PL_bufend only applies when we have not dug deeper
9656	   into other scopes, because sublex_done sets PL_bufend to
9657	   SvEND(PL_linestr). */
9658	if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9659	s = olds;
9660    }
9661    else
9662    {
9663      SV *linestr_save;
9664      char *oldbufptr_save;
9665     streaming:
9666      sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9667      term = PL_tokenbuf[1];
9668      len--;
9669      linestr_save = PL_linestr; /* must restore this afterwards */
9670      d = s;			 /* and this */
9671      oldbufptr_save = PL_oldbufptr;
9672      PL_linestr = newSVpvs("");
9673      PL_bufend = SvPVX(PL_linestr);
9674      while (1) {
9675	PL_bufptr = PL_bufend;
9676	CopLINE_set(PL_curcop,
9677		    origline + 1 + PL_parser->herelines);
9678	if (!lex_next_chunk(LEX_NO_TERM)
9679	 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9680	    /* Simply freeing linestr_save might seem simpler here, as it
9681	       does not matter what PL_linestr points to, since we are
9682	       about to croak; but in a quote-like op, linestr_save
9683	       will have been prospectively freed already, via
9684	       SAVEFREESV(PL_linestr) in sublex_push, so it���s easier to
9685	       restore PL_linestr. */
9686	    SvREFCNT_dec_NN(PL_linestr);
9687	    PL_linestr = linestr_save;
9688            PL_oldbufptr = oldbufptr_save;
9689	    goto interminable;
9690	}
9691	CopLINE_set(PL_curcop, origline);
9692	if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9693            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9694            /* ^That should be enough to avoid this needing to grow:  */
9695	    sv_catpvs(PL_linestr, "\n\0");
9696            assert(s == SvPVX(PL_linestr));
9697            PL_bufend = SvEND(PL_linestr);
9698	}
9699	s = PL_bufptr;
9700	PL_parser->herelines++;
9701	PL_last_lop = PL_last_uni = NULL;
9702#ifndef PERL_STRICT_CR
9703	if (PL_bufend - PL_linestart >= 2) {
9704	    if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9705                || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9706	    {
9707		PL_bufend[-2] = '\n';
9708		PL_bufend--;
9709		SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9710	    }
9711	    else if (PL_bufend[-1] == '\r')
9712		PL_bufend[-1] = '\n';
9713	}
9714	else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9715	    PL_bufend[-1] = '\n';
9716#endif
9717	if (*s == term && PL_bufend-s >= len
9718	 && memEQ(s,PL_tokenbuf + 1,len)) {
9719	    SvREFCNT_dec(PL_linestr);
9720	    PL_linestr = linestr_save;
9721	    PL_linestart = SvPVX(linestr_save);
9722	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9723            PL_oldbufptr = oldbufptr_save;
9724	    s = d;
9725	    break;
9726	}
9727	else {
9728	    sv_catsv(tmpstr,PL_linestr);
9729	}
9730      }
9731    }
9732    PL_multi_end = origline + PL_parser->herelines;
9733    if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9734	SvPV_shrink_to_cur(tmpstr);
9735    }
9736    if (!IN_BYTES) {
9737	if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9738	    SvUTF8_on(tmpstr);
9739	else if (IN_ENCODING)
9740	    sv_recode_to_utf8(tmpstr, _get_encoding());
9741    }
9742    PL_lex_stuff = tmpstr;
9743    pl_yylval.ival = op_type;
9744    return s;
9745
9746  interminable:
9747    SvREFCNT_dec(tmpstr);
9748    CopLINE_set(PL_curcop, origline);
9749    missingterm(PL_tokenbuf + 1);
9750}
9751
9752/* scan_inputsymbol
9753   takes: current position in input buffer
9754   returns: new position in input buffer
9755   side-effects: pl_yylval and lex_op are set.
9756
9757   This code handles:
9758
9759   <>		read from ARGV
9760   <<>>		read from ARGV without magic open
9761   <FH> 	read from filehandle
9762   <pkg::FH>	read from package qualified filehandle
9763   <pkg'FH>	read from package qualified filehandle
9764   <$fh>	read from filehandle in $fh
9765   <*.h>	filename glob
9766
9767*/
9768
9769STATIC char *
9770S_scan_inputsymbol(pTHX_ char *start)
9771{
9772    char *s = start;		/* current position in buffer */
9773    char *end;
9774    I32 len;
9775    bool nomagicopen = FALSE;
9776    char *d = PL_tokenbuf;					/* start of temp holding space */
9777    const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;	/* end of temp holding space */
9778
9779    PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9780
9781    end = strchr(s, '\n');
9782    if (!end)
9783	end = PL_bufend;
9784    if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9785        nomagicopen = TRUE;
9786        *d = '\0';
9787        len = 0;
9788        s += 3;
9789    }
9790    else
9791        s = delimcpy(d, e, s + 1, end, '>', &len);	/* extract until > */
9792
9793    /* die if we didn't have space for the contents of the <>,
9794       or if it didn't end, or if we see a newline
9795    */
9796
9797    if (len >= (I32)sizeof PL_tokenbuf)
9798	Perl_croak(aTHX_ "Excessively long <> operator");
9799    if (s >= end)
9800	Perl_croak(aTHX_ "Unterminated <> operator");
9801
9802    s++;
9803
9804    /* check for <$fh>
9805       Remember, only scalar variables are interpreted as filehandles by
9806       this code.  Anything more complex (e.g., <$fh{$num}>) will be
9807       treated as a glob() call.
9808       This code makes use of the fact that except for the $ at the front,
9809       a scalar variable and a filehandle look the same.
9810    */
9811    if (*d == '$' && d[1]) d++;
9812
9813    /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9814    while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9815	d += UTF ? UTF8SKIP(d) : 1;
9816
9817    /* If we've tried to read what we allow filehandles to look like, and
9818       there's still text left, then it must be a glob() and not a getline.
9819       Use scan_str to pull out the stuff between the <> and treat it
9820       as nothing more than a string.
9821    */
9822
9823    if (d - PL_tokenbuf != len) {
9824	pl_yylval.ival = OP_GLOB;
9825	s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9826	if (!s)
9827	   Perl_croak(aTHX_ "Glob not terminated");
9828	return s;
9829    }
9830    else {
9831	bool readline_overriden = FALSE;
9832	GV *gv_readline;
9833    	/* we're in a filehandle read situation */
9834	d = PL_tokenbuf;
9835
9836	/* turn <> into <ARGV> */
9837	if (!len)
9838	    Copy("ARGV",d,5,char);
9839
9840	/* Check whether readline() is overriden */
9841	if ((gv_readline = gv_override("readline",8)))
9842	    readline_overriden = TRUE;
9843
9844	/* if <$fh>, create the ops to turn the variable into a
9845	   filehandle
9846	*/
9847	if (*d == '$') {
9848	    /* try to find it in the pad for this block, otherwise find
9849	       add symbol table ops
9850	    */
9851	    const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9852	    if (tmp != NOT_IN_PAD) {
9853		if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9854		    HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9855		    HEK * const stashname = HvNAME_HEK(stash);
9856		    SV * const sym = sv_2mortal(newSVhek(stashname));
9857		    sv_catpvs(sym, "::");
9858		    sv_catpv(sym, d+1);
9859		    d = SvPVX(sym);
9860		    goto intro_sym;
9861		}
9862		else {
9863		    OP * const o = newOP(OP_PADSV, 0);
9864		    o->op_targ = tmp;
9865		    PL_lex_op = readline_overriden
9866			? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9867				op_append_elem(OP_LIST, o,
9868				    newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9869			: (OP*)newUNOP(OP_READLINE, 0, o);
9870		}
9871	    }
9872	    else {
9873		GV *gv;
9874		++d;
9875              intro_sym:
9876		gv = gv_fetchpv(d,
9877				GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9878				SVt_PV);
9879		PL_lex_op = readline_overriden
9880		    ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9881			    op_append_elem(OP_LIST,
9882				newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9883				newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9884		    : (OP*)newUNOP(OP_READLINE, 0,
9885			    newUNOP(OP_RV2SV, 0,
9886				newGVOP(OP_GV, 0, gv)));
9887	    }
9888	    /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9889	    pl_yylval.ival = OP_NULL;
9890	}
9891
9892	/* If it's none of the above, it must be a literal filehandle
9893	   (<Foo::BAR> or <FOO>) so build a simple readline OP */
9894	else {
9895	    GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9896	    PL_lex_op = readline_overriden
9897		? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9898			op_append_elem(OP_LIST,
9899			    newGVOP(OP_GV, 0, gv),
9900			    newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9901		: (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9902	    pl_yylval.ival = OP_NULL;
9903	}
9904    }
9905
9906    return s;
9907}
9908
9909
9910/* scan_str
9911   takes:
9912	start			position in buffer
9913        keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
9914                                only if they are of the open/close form
9915	keep_delims		preserve the delimiters around the string
9916	re_reparse		compiling a run-time /(?{})/:
9917				   collapse // to /,  and skip encoding src
9918	delimp			if non-null, this is set to the position of
9919				the closing delimiter, or just after it if
9920				the closing and opening delimiters differ
9921				(i.e., the opening delimiter of a substitu-
9922				tion replacement)
9923   returns: position to continue reading from buffer
9924   side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9925   	updates the read buffer.
9926
9927   This subroutine pulls a string out of the input.  It is called for:
9928   	q		single quotes		q(literal text)
9929	'		single quotes		'literal text'
9930	qq		double quotes		qq(interpolate $here please)
9931	"		double quotes		"interpolate $here please"
9932	qx		backticks		qx(/bin/ls -l)
9933	`		backticks		`/bin/ls -l`
9934	qw		quote words		@EXPORT_OK = qw( func() $spam )
9935	m//		regexp match		m/this/
9936	s///		regexp substitute	s/this/that/
9937	tr///		string transliterate	tr/this/that/
9938	y///		string transliterate	y/this/that/
9939	($*@)		sub prototypes		sub foo ($)
9940	(stuff)		sub attr parameters	sub foo : attr(stuff)
9941	<>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
9942
9943   In most of these cases (all but <>, patterns and transliterate)
9944   yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9945   calls scan_str().  s/// makes yylex() call scan_subst() which calls
9946   scan_str().  tr/// and y/// make yylex() call scan_trans() which
9947   calls scan_str().
9948
9949   It skips whitespace before the string starts, and treats the first
9950   character as the delimiter.  If the delimiter is one of ([{< then
9951   the corresponding "close" character )]}> is used as the closing
9952   delimiter.  It allows quoting of delimiters, and if the string has
9953   balanced delimiters ([{<>}]) it allows nesting.
9954
9955   On success, the SV with the resulting string is put into lex_stuff or,
9956   if that is already non-NULL, into lex_repl. The second case occurs only
9957   when parsing the RHS of the special constructs s/// and tr/// (y///).
9958   For convenience, the terminating delimiter character is stuffed into
9959   SvIVX of the SV.
9960*/
9961
9962STATIC char *
9963S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9964		 char **delimp
9965    )
9966{
9967    SV *sv;			/* scalar value: string */
9968    const char *tmps;		/* temp string, used for delimiter matching */
9969    char *s = start;		/* current position in the buffer */
9970    char term;			/* terminating character */
9971    char *to;			/* current position in the sv's data */
9972    I32 brackets = 1;		/* bracket nesting level */
9973    bool has_utf8 = FALSE;	/* is there any utf8 content? */
9974    I32 termcode;		/* terminating char. code */
9975    U8 termstr[UTF8_MAXBYTES];	/* terminating string */
9976    STRLEN termlen;		/* length of terminating string */
9977    int last_off = 0;		/* last position for nesting bracket */
9978    line_t herelines;
9979
9980    PERL_ARGS_ASSERT_SCAN_STR;
9981
9982    /* skip space before the delimiter */
9983    if (isSPACE(*s)) {
9984	s = skipspace(s);
9985    }
9986
9987    /* mark where we are, in case we need to report errors */
9988    CLINE;
9989
9990    /* after skipping whitespace, the next character is the terminator */
9991    term = *s;
9992    if (!UTF) {
9993	termcode = termstr[0] = term;
9994	termlen = 1;
9995    }
9996    else {
9997	termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9998	Copy(s, termstr, termlen, U8);
9999	if (!UTF8_IS_INVARIANT(term))
10000	    has_utf8 = TRUE;
10001    }
10002
10003    /* mark where we are */
10004    PL_multi_start = CopLINE(PL_curcop);
10005    PL_multi_open = term;
10006    herelines = PL_parser->herelines;
10007
10008    /* find corresponding closing delimiter */
10009    if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10010	termcode = termstr[0] = term = tmps[5];
10011
10012    PL_multi_close = term;
10013
10014    if (PL_multi_open == PL_multi_close) {
10015        keep_bracketed_quoted = FALSE;
10016    }
10017
10018    /* create a new SV to hold the contents.  79 is the SV's initial length.
10019       What a random number. */
10020    sv = newSV_type(SVt_PVIV);
10021    SvGROW(sv, 80);
10022    SvIV_set(sv, termcode);
10023    (void)SvPOK_only(sv);		/* validate pointer */
10024
10025    /* move past delimiter and try to read a complete string */
10026    if (keep_delims)
10027	sv_catpvn(sv, s, termlen);
10028    s += termlen;
10029    for (;;) {
10030	if (IN_ENCODING && !UTF && !re_reparse) {
10031	    bool cont = TRUE;
10032
10033	    while (cont) {
10034		int offset = s - SvPVX_const(PL_linestr);
10035		const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
10036					   &offset, (char*)termstr, termlen);
10037		const char *ns;
10038		char *svlast;
10039
10040		if (SvIsCOW(PL_linestr)) {
10041		    STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10042		    STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10043		    STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10044		    char *buf = SvPVX(PL_linestr);
10045		    bufend_pos = PL_parser->bufend - buf;
10046		    bufptr_pos = PL_parser->bufptr - buf;
10047		    oldbufptr_pos = PL_parser->oldbufptr - buf;
10048		    oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10049		    linestart_pos = PL_parser->linestart - buf;
10050		    last_uni_pos = PL_parser->last_uni
10051			? PL_parser->last_uni - buf
10052			: 0;
10053		    last_lop_pos = PL_parser->last_lop
10054			? PL_parser->last_lop - buf
10055			: 0;
10056		    re_eval_start_pos =
10057			PL_parser->lex_shared->re_eval_start ?
10058                            PL_parser->lex_shared->re_eval_start - buf : 0;
10059		    s_pos = s - buf;
10060
10061		    sv_force_normal(PL_linestr);
10062
10063		    buf = SvPVX(PL_linestr);
10064		    PL_parser->bufend = buf + bufend_pos;
10065		    PL_parser->bufptr = buf + bufptr_pos;
10066		    PL_parser->oldbufptr = buf + oldbufptr_pos;
10067		    PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10068		    PL_parser->linestart = buf + linestart_pos;
10069		    if (PL_parser->last_uni)
10070			PL_parser->last_uni = buf + last_uni_pos;
10071		    if (PL_parser->last_lop)
10072			PL_parser->last_lop = buf + last_lop_pos;
10073		    if (PL_parser->lex_shared->re_eval_start)
10074		        PL_parser->lex_shared->re_eval_start  =
10075			    buf + re_eval_start_pos;
10076		    s = buf + s_pos;
10077		}
10078		ns = SvPVX_const(PL_linestr) + offset;
10079		svlast = SvEND(sv) - 1;
10080
10081		for (; s < ns; s++) {
10082		    if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10083			COPLINE_INC_WITH_HERELINES;
10084		}
10085		if (!found)
10086		    goto read_more_line;
10087		else {
10088		    /* handle quoted delimiters */
10089		    if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10090			const char *t;
10091			for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10092			    t--;
10093			if ((svlast-1 - t) % 2) {
10094			    if (!keep_bracketed_quoted) {
10095				*(svlast-1) = term;
10096				*svlast = '\0';
10097				SvCUR_set(sv, SvCUR(sv) - 1);
10098			    }
10099			    continue;
10100			}
10101		    }
10102		    if (PL_multi_open == PL_multi_close) {
10103			cont = FALSE;
10104		    }
10105		    else {
10106			const char *t;
10107			char *w;
10108			for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10109			    /* At here, all closes are "was quoted" one,
10110			       so we don't check PL_multi_close. */
10111			    if (*t == '\\') {
10112				if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
10113				    t++;
10114				else
10115				    *w++ = *t++;
10116			    }
10117			    else if (*t == PL_multi_open)
10118				brackets++;
10119
10120			    *w = *t;
10121			}
10122			if (w < t) {
10123			    *w++ = term;
10124			    *w = '\0';
10125			    SvCUR_set(sv, w - SvPVX_const(sv));
10126			}
10127			last_off = w - SvPVX(sv);
10128			if (--brackets <= 0)
10129			    cont = FALSE;
10130		    }
10131		}
10132	    }
10133	    if (!keep_delims) {
10134		SvCUR_set(sv, SvCUR(sv) - 1);
10135		*SvEND(sv) = '\0';
10136	    }
10137	    break;
10138	}
10139
10140    	/* extend sv if need be */
10141	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10142	/* set 'to' to the next character in the sv's string */
10143	to = SvPVX(sv)+SvCUR(sv);
10144
10145	/* if open delimiter is the close delimiter read unbridle */
10146	if (PL_multi_open == PL_multi_close) {
10147	    for (; s < PL_bufend; s++,to++) {
10148	    	/* embedded newlines increment the current line number */
10149		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10150		    COPLINE_INC_WITH_HERELINES;
10151		/* handle quoted delimiters */
10152		if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10153		    if (!keep_bracketed_quoted
10154		        && (s[1] == term
10155			    || (re_reparse && s[1] == '\\'))
10156		    )
10157			s++;
10158		    else /* any other quotes are simply copied straight through */
10159			*to++ = *s++;
10160		}
10161		/* terminate when run out of buffer (the for() condition), or
10162		   have found the terminator */
10163		else if (*s == term) {
10164		    if (termlen == 1)
10165			break;
10166		    if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10167			break;
10168		}
10169		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10170		    has_utf8 = TRUE;
10171		*to = *s;
10172	    }
10173	}
10174
10175	/* if the terminator isn't the same as the start character (e.g.,
10176	   matched brackets), we have to allow more in the quoting, and
10177	   be prepared for nested brackets.
10178	*/
10179	else {
10180	    /* read until we run out of string, or we find the terminator */
10181	    for (; s < PL_bufend; s++,to++) {
10182	    	/* embedded newlines increment the line count */
10183		if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10184		    COPLINE_INC_WITH_HERELINES;
10185		/* backslashes can escape the open or closing characters */
10186		if (*s == '\\' && s+1 < PL_bufend) {
10187		    if (!keep_bracketed_quoted
10188                       && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10189                    {
10190			s++;
10191                    }
10192		    else
10193			*to++ = *s++;
10194                }
10195		/* allow nested opens and closes */
10196		else if (*s == PL_multi_close && --brackets <= 0)
10197		    break;
10198		else if (*s == PL_multi_open)
10199		    brackets++;
10200		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10201		    has_utf8 = TRUE;
10202		*to = *s;
10203	    }
10204	}
10205	/* terminate the copied string and update the sv's end-of-string */
10206	*to = '\0';
10207	SvCUR_set(sv, to - SvPVX_const(sv));
10208
10209	/*
10210	 * this next chunk reads more into the buffer if we're not done yet
10211	 */
10212
10213  	if (s < PL_bufend)
10214	    break;		/* handle case where we are done yet :-) */
10215
10216#ifndef PERL_STRICT_CR
10217	if (to - SvPVX_const(sv) >= 2) {
10218	    if (   (to[-2] == '\r' && to[-1] == '\n')
10219                || (to[-2] == '\n' && to[-1] == '\r'))
10220	    {
10221		to[-2] = '\n';
10222		to--;
10223		SvCUR_set(sv, to - SvPVX_const(sv));
10224	    }
10225	    else if (to[-1] == '\r')
10226		to[-1] = '\n';
10227	}
10228	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10229	    to[-1] = '\n';
10230#endif
10231
10232     read_more_line:
10233	/* if we're out of file, or a read fails, bail and reset the current
10234	   line marker so we can report where the unterminated string began
10235	*/
10236	COPLINE_INC_WITH_HERELINES;
10237	PL_bufptr = PL_bufend;
10238	if (!lex_next_chunk(0)) {
10239	    sv_free(sv);
10240	    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10241	    return NULL;
10242	}
10243	s = PL_bufptr;
10244    }
10245
10246    /* at this point, we have successfully read the delimited string */
10247
10248    if (!IN_ENCODING || UTF || re_reparse) {
10249
10250	if (keep_delims)
10251	    sv_catpvn(sv, s, termlen);
10252	s += termlen;
10253    }
10254    if (has_utf8 || (IN_ENCODING && !re_reparse))
10255	SvUTF8_on(sv);
10256
10257    PL_multi_end = CopLINE(PL_curcop);
10258    CopLINE_set(PL_curcop, PL_multi_start);
10259    PL_parser->herelines = herelines;
10260
10261    /* if we allocated too much space, give some back */
10262    if (SvCUR(sv) + 5 < SvLEN(sv)) {
10263	SvLEN_set(sv, SvCUR(sv) + 1);
10264	SvPV_renew(sv, SvLEN(sv));
10265    }
10266
10267    /* decide whether this is the first or second quoted string we've read
10268       for this op
10269    */
10270
10271    if (PL_lex_stuff)
10272	PL_sublex_info.repl = sv;
10273    else
10274	PL_lex_stuff = sv;
10275    if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10276    return s;
10277}
10278
10279/*
10280  scan_num
10281  takes: pointer to position in buffer
10282  returns: pointer to new position in buffer
10283  side-effects: builds ops for the constant in pl_yylval.op
10284
10285  Read a number in any of the formats that Perl accepts:
10286
10287  \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
10288  \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
10289  0b[01](_?[01])*                                       binary integers
10290  0[0-7](_?[0-7])*                                      octal integers
10291  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10292  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10293
10294  Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10295  thing it reads.
10296
10297  If it reads a number without a decimal point or an exponent, it will
10298  try converting the number to an integer and see if it can do so
10299  without loss of precision.
10300*/
10301
10302char *
10303Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10304{
10305    const char *s = start;	/* current position in buffer */
10306    char *d;			/* destination in temp buffer */
10307    char *e;			/* end of temp buffer */
10308    NV nv;				/* number read, as a double */
10309    SV *sv = NULL;			/* place to put the converted number */
10310    bool floatit;			/* boolean: int or float? */
10311    const char *lastub = NULL;		/* position of last underbar */
10312    static const char* const number_too_long = "Number too long";
10313    /* Hexadecimal floating point.
10314     *
10315     * In many places (where we have quads and NV is IEEE 754 double)
10316     * we can fit the mantissa bits of a NV into an unsigned quad.
10317     * (Note that UVs might not be quads even when we have quads.)
10318     * This will not work everywhere, though (either no quads, or
10319     * using long doubles), in which case we have to resort to NV,
10320     * which will probably mean horrible loss of precision due to
10321     * multiple fp operations. */
10322    bool hexfp = FALSE;
10323    int total_bits = 0;
10324    int significant_bits = 0;
10325#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10326#  define HEXFP_UQUAD
10327    Uquad_t hexfp_uquad = 0;
10328    int hexfp_frac_bits = 0;
10329#else
10330#  define HEXFP_NV
10331    NV hexfp_nv = 0.0;
10332#endif
10333    NV hexfp_mult = 1.0;
10334    UV high_non_zero = 0; /* highest digit */
10335    int non_zero_integer_digits = 0;
10336
10337    PERL_ARGS_ASSERT_SCAN_NUM;
10338
10339    /* We use the first character to decide what type of number this is */
10340
10341    switch (*s) {
10342    default:
10343	Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10344
10345    /* if it starts with a 0, it could be an octal number, a decimal in
10346       0.13 disguise, or a hexadecimal number, or a binary number. */
10347    case '0':
10348	{
10349	  /* variables:
10350	     u		holds the "number so far"
10351	     shift	the power of 2 of the base
10352			(hex == 4, octal == 3, binary == 1)
10353	     overflowed	was the number more than we can hold?
10354
10355	     Shift is used when we add a digit.  It also serves as an "are
10356	     we in octal/hex/binary?" indicator to disallow hex characters
10357	     when in octal mode.
10358	   */
10359	    NV n = 0.0;
10360	    UV u = 0;
10361	    I32 shift;
10362	    bool overflowed = FALSE;
10363	    bool just_zero  = TRUE;	/* just plain 0 or binary number? */
10364	    static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10365	    static const char* const bases[5] =
10366	      { "", "binary", "", "octal", "hexadecimal" };
10367	    static const char* const Bases[5] =
10368	      { "", "Binary", "", "Octal", "Hexadecimal" };
10369	    static const char* const maxima[5] =
10370	      { "",
10371		"0b11111111111111111111111111111111",
10372		"",
10373		"037777777777",
10374		"0xffffffff" };
10375	    const char *base, *Base, *max;
10376
10377	    /* check for hex */
10378	    if (isALPHA_FOLD_EQ(s[1], 'x')) {
10379		shift = 4;
10380		s += 2;
10381		just_zero = FALSE;
10382	    } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10383		shift = 1;
10384		s += 2;
10385		just_zero = FALSE;
10386	    }
10387	    /* check for a decimal in disguise */
10388	    else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10389		goto decimal;
10390	    /* so it must be octal */
10391	    else {
10392		shift = 3;
10393		s++;
10394	    }
10395
10396	    if (*s == '_') {
10397		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10398			       "Misplaced _ in number");
10399	       lastub = s++;
10400	    }
10401
10402	    base = bases[shift];
10403	    Base = Bases[shift];
10404	    max  = maxima[shift];
10405
10406	    /* read the rest of the number */
10407	    for (;;) {
10408		/* x is used in the overflow test,
10409		   b is the digit we're adding on. */
10410		UV x, b;
10411
10412		switch (*s) {
10413
10414		/* if we don't mention it, we're done */
10415		default:
10416		    goto out;
10417
10418		/* _ are ignored -- but warned about if consecutive */
10419		case '_':
10420		    if (lastub && s == lastub + 1)
10421		        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10422				       "Misplaced _ in number");
10423		    lastub = s++;
10424		    break;
10425
10426		/* 8 and 9 are not octal */
10427		case '8': case '9':
10428		    if (shift == 3)
10429			yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10430		    /* FALLTHROUGH */
10431
10432	        /* octal digits */
10433		case '2': case '3': case '4':
10434		case '5': case '6': case '7':
10435		    if (shift == 1)
10436			yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10437		    /* FALLTHROUGH */
10438
10439		case '0': case '1':
10440		    b = *s++ & 15;		/* ASCII digit -> value of digit */
10441		    goto digit;
10442
10443	        /* hex digits */
10444		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10445		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10446		    /* make sure they said 0x */
10447		    if (shift != 4)
10448			goto out;
10449		    b = (*s++ & 7) + 9;
10450
10451		    /* Prepare to put the digit we have onto the end
10452		       of the number so far.  We check for overflows.
10453		    */
10454
10455		  digit:
10456		    just_zero = FALSE;
10457		    if (!overflowed) {
10458			x = u << shift;	/* make room for the digit */
10459
10460                        total_bits += shift;
10461
10462			if ((x >> shift) != u
10463			    && !(PL_hints & HINT_NEW_BINARY)) {
10464			    overflowed = TRUE;
10465			    n = (NV) u;
10466			    Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10467					     "Integer overflow in %s number",
10468					     base);
10469			} else
10470			    u = x | b;		/* add the digit to the end */
10471		    }
10472		    if (overflowed) {
10473			n *= nvshift[shift];
10474			/* If an NV has not enough bits in its
10475			 * mantissa to represent an UV this summing of
10476			 * small low-order numbers is a waste of time
10477			 * (because the NV cannot preserve the
10478			 * low-order bits anyway): we could just
10479			 * remember when did we overflow and in the
10480			 * end just multiply n by the right
10481			 * amount. */
10482			n += (NV) b;
10483		    }
10484
10485                    if (high_non_zero == 0 && b > 0)
10486                        high_non_zero = b;
10487
10488                    if (high_non_zero)
10489                        non_zero_integer_digits++;
10490
10491                    /* this could be hexfp, but peek ahead
10492                     * to avoid matching ".." */
10493                    if (UNLIKELY(HEXFP_PEEK(s))) {
10494                        goto out;
10495                    }
10496
10497		    break;
10498		}
10499	    }
10500
10501	  /* if we get here, we had success: make a scalar value from
10502	     the number.
10503	  */
10504	  out:
10505
10506	    /* final misplaced underbar check */
10507	    if (s[-1] == '_') {
10508		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10509	    }
10510
10511            if (UNLIKELY(HEXFP_PEEK(s))) {
10512                /* Do sloppy (on the underbars) but quick detection
10513                 * (and value construction) for hexfp, the decimal
10514                 * detection will shortly be more thorough with the
10515                 * underbar checks. */
10516                const char* h = s;
10517                significant_bits = non_zero_integer_digits * shift;
10518#ifdef HEXFP_UQUAD
10519                hexfp_uquad = u;
10520#else /* HEXFP_NV */
10521                hexfp_nv = u;
10522#endif
10523                /* Ignore the leading zero bits of
10524                 * the high (first) non-zero digit. */
10525                if (high_non_zero) {
10526                    if (high_non_zero < 0x8)
10527                        significant_bits--;
10528                    if (high_non_zero < 0x4)
10529                        significant_bits--;
10530                    if (high_non_zero < 0x2)
10531                        significant_bits--;
10532                }
10533
10534                if (*h == '.') {
10535#ifdef HEXFP_NV
10536                    NV nv_mult = 1.0;
10537#endif
10538                    bool accumulate = TRUE;
10539                    for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10540                        if (isXDIGIT(*h)) {
10541                            U8 b = XDIGIT_VALUE(*h);
10542                            significant_bits += shift;
10543#ifdef HEXFP_UQUAD
10544                            if (accumulate) {
10545                                if (significant_bits < NV_MANT_DIG) {
10546                                    /* We are in the long "run" of xdigits,
10547                                     * accumulate the full four bits. */
10548                                    hexfp_uquad <<= shift;
10549                                    hexfp_uquad |= b;
10550                                    hexfp_frac_bits += shift;
10551                                } else {
10552                                    /* We are at a hexdigit either at,
10553                                     * or straddling, the edge of mantissa.
10554                                     * We will try grabbing as many as
10555                                     * possible bits. */
10556                                    int tail =
10557                                      significant_bits - NV_MANT_DIG;
10558                                    if (tail <= 0)
10559                                       tail += shift;
10560                                    hexfp_uquad <<= tail;
10561                                    hexfp_uquad |= b >> (shift - tail);
10562                                    hexfp_frac_bits += tail;
10563
10564                                    /* Ignore the trailing zero bits
10565                                     * of the last non-zero xdigit.
10566                                     *
10567                                     * The assumption here is that if
10568                                     * one has input of e.g. the xdigit
10569                                     * eight (0x8), there is only one
10570                                     * bit being input, not the full
10571                                     * four bits.  Conversely, if one
10572                                     * specifies a zero xdigit, the
10573                                     * assumption is that one really
10574                                     * wants all those bits to be zero. */
10575                                    if (b) {
10576                                        if ((b & 0x1) == 0x0) {
10577                                            significant_bits--;
10578                                            if ((b & 0x2) == 0x0) {
10579                                                significant_bits--;
10580                                                if ((b & 0x4) == 0x0) {
10581                                                    significant_bits--;
10582                                                }
10583                                            }
10584                                        }
10585                                    }
10586
10587                                    accumulate = FALSE;
10588                                }
10589                            } else {
10590                                /* Keep skipping the xdigits, and
10591                                 * accumulating the significant bits,
10592                                 * but do not shift the uquad
10593                                 * (which would catastrophically drop
10594                                 * high-order bits) or accumulate the
10595                                 * xdigits anymore. */
10596                            }
10597#else /* HEXFP_NV */
10598                            if (accumulate) {
10599                                nv_mult /= 16.0;
10600                                if (nv_mult > 0.0)
10601                                    hexfp_nv += b * nv_mult;
10602                                else
10603                                    accumulate = FALSE;
10604                            }
10605#endif
10606                        }
10607                        if (significant_bits >= NV_MANT_DIG)
10608                            accumulate = FALSE;
10609                    }
10610                }
10611
10612                if ((total_bits > 0 || significant_bits > 0) &&
10613                    isALPHA_FOLD_EQ(*h, 'p')) {
10614                    bool negexp = FALSE;
10615                    h++;
10616                    if (*h == '+')
10617                        h++;
10618                    else if (*h == '-') {
10619                        negexp = TRUE;
10620                        h++;
10621                    }
10622                    if (isDIGIT(*h)) {
10623                        I32 hexfp_exp = 0;
10624                        while (isDIGIT(*h) || *h == '_') {
10625                            if (isDIGIT(*h)) {
10626                                hexfp_exp *= 10;
10627                                hexfp_exp += *h - '0';
10628#ifdef NV_MIN_EXP
10629                                if (negexp
10630                                    && -hexfp_exp < NV_MIN_EXP - 1) {
10631                                    /* NOTE: this means that the exponent
10632                                     * underflow warning happens for
10633                                     * the IEEE 754 subnormals (denormals),
10634                                     * because DBL_MIN_EXP etc are the lowest
10635                                     * possible binary (or, rather, DBL_RADIX-base)
10636                                     * exponent for normals, not subnormals.
10637                                     *
10638                                     * This may or may not be a good thing. */
10639                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10640                                                   "Hexadecimal float: exponent underflow");
10641                                    break;
10642                                }
10643#endif
10644#ifdef NV_MAX_EXP
10645                                if (!negexp
10646                                    && hexfp_exp > NV_MAX_EXP - 1) {
10647                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10648                                                   "Hexadecimal float: exponent overflow");
10649                                    break;
10650                                }
10651#endif
10652                            }
10653                            h++;
10654                        }
10655                        if (negexp)
10656                            hexfp_exp = -hexfp_exp;
10657#ifdef HEXFP_UQUAD
10658                        hexfp_exp -= hexfp_frac_bits;
10659#endif
10660                        hexfp_mult = Perl_pow(2.0, hexfp_exp);
10661                        hexfp = TRUE;
10662                        goto decimal;
10663                    }
10664                }
10665            }
10666
10667	    if (overflowed) {
10668		if (n > 4294967295.0)
10669		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10670				   "%s number > %s non-portable",
10671				   Base, max);
10672		sv = newSVnv(n);
10673	    }
10674	    else {
10675#if UVSIZE > 4
10676		if (u > 0xffffffff)
10677		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10678				   "%s number > %s non-portable",
10679				   Base, max);
10680#endif
10681		sv = newSVuv(u);
10682	    }
10683	    if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10684		sv = new_constant(start, s - start, "integer",
10685				  sv, NULL, NULL, 0);
10686	    else if (PL_hints & HINT_NEW_BINARY)
10687		sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10688	}
10689	break;
10690
10691    /*
10692      handle decimal numbers.
10693      we're also sent here when we read a 0 as the first digit
10694    */
10695    case '1': case '2': case '3': case '4': case '5':
10696    case '6': case '7': case '8': case '9': case '.':
10697      decimal:
10698	d = PL_tokenbuf;
10699	e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10700        floatit = FALSE;
10701        if (hexfp) {
10702            floatit = TRUE;
10703            *d++ = '0';
10704            *d++ = 'x';
10705            s = start + 2;
10706        }
10707
10708	/* read next group of digits and _ and copy into d */
10709	while (isDIGIT(*s)
10710               || *s == '_'
10711               || UNLIKELY(hexfp && isXDIGIT(*s)))
10712        {
10713	    /* skip underscores, checking for misplaced ones
10714	       if -w is on
10715	    */
10716	    if (*s == '_') {
10717		if (lastub && s == lastub + 1)
10718		    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10719				   "Misplaced _ in number");
10720		lastub = s++;
10721	    }
10722	    else {
10723	        /* check for end of fixed-length buffer */
10724		if (d >= e)
10725		    Perl_croak(aTHX_ "%s", number_too_long);
10726		/* if we're ok, copy the character */
10727		*d++ = *s++;
10728	    }
10729	}
10730
10731	/* final misplaced underbar check */
10732	if (lastub && s == lastub + 1) {
10733	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10734	}
10735
10736	/* read a decimal portion if there is one.  avoid
10737	   3..5 being interpreted as the number 3. followed
10738	   by .5
10739	*/
10740	if (*s == '.' && s[1] != '.') {
10741	    floatit = TRUE;
10742	    *d++ = *s++;
10743
10744	    if (*s == '_') {
10745		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10746			       "Misplaced _ in number");
10747		lastub = s;
10748	    }
10749
10750	    /* copy, ignoring underbars, until we run out of digits.
10751	    */
10752	    for (; isDIGIT(*s)
10753                   || *s == '_'
10754                   || UNLIKELY(hexfp && isXDIGIT(*s));
10755                 s++)
10756            {
10757	        /* fixed length buffer check */
10758		if (d >= e)
10759		    Perl_croak(aTHX_ "%s", number_too_long);
10760		if (*s == '_') {
10761		   if (lastub && s == lastub + 1)
10762		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10763				      "Misplaced _ in number");
10764		   lastub = s;
10765		}
10766		else
10767		    *d++ = *s;
10768	    }
10769	    /* fractional part ending in underbar? */
10770	    if (s[-1] == '_') {
10771		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10772			       "Misplaced _ in number");
10773	    }
10774	    if (*s == '.' && isDIGIT(s[1])) {
10775		/* oops, it's really a v-string, but without the "v" */
10776		s = start;
10777		goto vstring;
10778	    }
10779	}
10780
10781	/* read exponent part, if present */
10782	if ((isALPHA_FOLD_EQ(*s, 'e')
10783              || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10784            && strchr("+-0123456789_", s[1]))
10785        {
10786            floatit = TRUE;
10787
10788	    /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10789               ditto for p (hexfloats) */
10790            if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10791		/* At least some Mach atof()s don't grok 'E' */
10792                *d++ = 'e';
10793            }
10794            else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10795                *d++ = 'p';
10796            }
10797
10798	    s++;
10799
10800
10801	    /* stray preinitial _ */
10802	    if (*s == '_') {
10803		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10804			       "Misplaced _ in number");
10805	        lastub = s++;
10806	    }
10807
10808	    /* allow positive or negative exponent */
10809	    if (*s == '+' || *s == '-')
10810		*d++ = *s++;
10811
10812	    /* stray initial _ */
10813	    if (*s == '_') {
10814		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10815			       "Misplaced _ in number");
10816	        lastub = s++;
10817	    }
10818
10819	    /* read digits of exponent */
10820	    while (isDIGIT(*s) || *s == '_') {
10821	        if (isDIGIT(*s)) {
10822		    if (d >= e)
10823		        Perl_croak(aTHX_ "%s", number_too_long);
10824		    *d++ = *s++;
10825		}
10826		else {
10827		   if (((lastub && s == lastub + 1)
10828                        || (!isDIGIT(s[1]) && s[1] != '_')))
10829		       Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10830				      "Misplaced _ in number");
10831		   lastub = s++;
10832		}
10833	    }
10834	}
10835
10836
10837	/*
10838           We try to do an integer conversion first if no characters
10839           indicating "float" have been found.
10840	 */
10841
10842	if (!floatit) {
10843    	    UV uv;
10844	    const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10845
10846            if (flags == IS_NUMBER_IN_UV) {
10847              if (uv <= IV_MAX)
10848		sv = newSViv(uv); /* Prefer IVs over UVs. */
10849              else
10850	    	sv = newSVuv(uv);
10851            } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10852              if (uv <= (UV) IV_MIN)
10853                sv = newSViv(-(IV)uv);
10854              else
10855	    	floatit = TRUE;
10856            } else
10857              floatit = TRUE;
10858        }
10859	if (floatit) {
10860            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10861	    /* terminate the string */
10862	    *d = '\0';
10863            if (UNLIKELY(hexfp)) {
10864#  ifdef NV_MANT_DIG
10865                if (significant_bits > NV_MANT_DIG)
10866                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10867                                   "Hexadecimal float: mantissa overflow");
10868#  endif
10869#ifdef HEXFP_UQUAD
10870                nv = hexfp_uquad * hexfp_mult;
10871#else /* HEXFP_NV */
10872                nv = hexfp_nv * hexfp_mult;
10873#endif
10874            } else {
10875                nv = Atof(PL_tokenbuf);
10876            }
10877            RESTORE_LC_NUMERIC_UNDERLYING();
10878            sv = newSVnv(nv);
10879	}
10880
10881	if ( floatit
10882	     ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10883	    const char *const key = floatit ? "float" : "integer";
10884	    const STRLEN keylen = floatit ? 5 : 7;
10885	    sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10886				key, keylen, sv, NULL, NULL, 0);
10887	}
10888	break;
10889
10890    /* if it starts with a v, it could be a v-string */
10891    case 'v':
10892    vstring:
10893		sv = newSV(5); /* preallocate storage space */
10894		ENTER_with_name("scan_vstring");
10895		SAVEFREESV(sv);
10896		s = scan_vstring(s, PL_bufend, sv);
10897		SvREFCNT_inc_simple_void_NN(sv);
10898		LEAVE_with_name("scan_vstring");
10899	break;
10900    }
10901
10902    /* make the op for the constant and return */
10903
10904    if (sv)
10905	lvalp->opval = newSVOP(OP_CONST, 0, sv);
10906    else
10907	lvalp->opval = NULL;
10908
10909    return (char *)s;
10910}
10911
10912STATIC char *
10913S_scan_formline(pTHX_ char *s)
10914{
10915    char *eol;
10916    char *t;
10917    SV * const stuff = newSVpvs("");
10918    bool needargs = FALSE;
10919    bool eofmt = FALSE;
10920
10921    PERL_ARGS_ASSERT_SCAN_FORMLINE;
10922
10923    while (!needargs) {
10924	if (*s == '.') {
10925	    t = s+1;
10926#ifdef PERL_STRICT_CR
10927	    while (SPACE_OR_TAB(*t))
10928		t++;
10929#else
10930	    while (SPACE_OR_TAB(*t) || *t == '\r')
10931		t++;
10932#endif
10933	    if (*t == '\n' || t == PL_bufend) {
10934	        eofmt = TRUE;
10935		break;
10936            }
10937	}
10938	eol = (char *) memchr(s,'\n',PL_bufend-s);
10939	if (!eol++)
10940		eol = PL_bufend;
10941	if (*s != '#') {
10942	    for (t = s; t < eol; t++) {
10943		if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10944		    needargs = FALSE;
10945		    goto enough;	/* ~~ must be first line in formline */
10946		}
10947		if (*t == '@' || *t == '^')
10948		    needargs = TRUE;
10949	    }
10950	    if (eol > s) {
10951	        sv_catpvn(stuff, s, eol-s);
10952#ifndef PERL_STRICT_CR
10953		if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10954		    char *end = SvPVX(stuff) + SvCUR(stuff);
10955		    end[-2] = '\n';
10956		    end[-1] = '\0';
10957		    SvCUR_set(stuff, SvCUR(stuff) - 1);
10958		}
10959#endif
10960	    }
10961	    else
10962	      break;
10963	}
10964	s = (char*)eol;
10965	if ((PL_rsfp || PL_parser->filtered)
10966	 && PL_parser->form_lex_state == LEX_NORMAL) {
10967	    bool got_some;
10968	    PL_bufptr = PL_bufend;
10969	    COPLINE_INC_WITH_HERELINES;
10970	    got_some = lex_next_chunk(0);
10971	    CopLINE_dec(PL_curcop);
10972	    s = PL_bufptr;
10973	    if (!got_some)
10974		break;
10975	}
10976	incline(s);
10977    }
10978  enough:
10979    if (!SvCUR(stuff) || needargs)
10980	PL_lex_state = PL_parser->form_lex_state;
10981    if (SvCUR(stuff)) {
10982	PL_expect = XSTATE;
10983	if (needargs) {
10984	    const char *s2 = s;
10985	    while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10986		|| *s2 == '\v')
10987		s2++;
10988	    if (*s2 == '{') {
10989		PL_expect = XTERMBLOCK;
10990		NEXTVAL_NEXTTOKE.ival = 0;
10991		force_next(DO);
10992	    }
10993	    NEXTVAL_NEXTTOKE.ival = 0;
10994	    force_next(FORMLBRACK);
10995	}
10996	if (!IN_BYTES) {
10997	    if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10998		SvUTF8_on(stuff);
10999	    else if (IN_ENCODING)
11000		sv_recode_to_utf8(stuff, _get_encoding());
11001	}
11002	NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11003	force_next(THING);
11004    }
11005    else {
11006	SvREFCNT_dec(stuff);
11007	if (eofmt)
11008	    PL_lex_formbrack = 0;
11009    }
11010    return s;
11011}
11012
11013I32
11014Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11015{
11016    const I32 oldsavestack_ix = PL_savestack_ix;
11017    CV* const outsidecv = PL_compcv;
11018
11019    SAVEI32(PL_subline);
11020    save_item(PL_subname);
11021    SAVESPTR(PL_compcv);
11022
11023    PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11024    CvFLAGS(PL_compcv) |= flags;
11025
11026    PL_subline = CopLINE(PL_curcop);
11027    CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11028    CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11029    CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11030    if (outsidecv && CvPADLIST(outsidecv))
11031	CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11032
11033    return oldsavestack_ix;
11034}
11035
11036static int
11037S_yywarn(pTHX_ const char *const s, U32 flags)
11038{
11039    PERL_ARGS_ASSERT_YYWARN;
11040
11041    PL_in_eval |= EVAL_WARNONLY;
11042    yyerror_pv(s, flags);
11043    return 0;
11044}
11045
11046int
11047Perl_yyerror(pTHX_ const char *const s)
11048{
11049    PERL_ARGS_ASSERT_YYERROR;
11050    return yyerror_pvn(s, strlen(s), 0);
11051}
11052
11053int
11054Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11055{
11056    PERL_ARGS_ASSERT_YYERROR_PV;
11057    return yyerror_pvn(s, strlen(s), flags);
11058}
11059
11060int
11061Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11062{
11063    const char *context = NULL;
11064    int contlen = -1;
11065    SV *msg;
11066    SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11067    int yychar  = PL_parser->yychar;
11068
11069    PERL_ARGS_ASSERT_YYERROR_PVN;
11070
11071    if (!yychar || (yychar == ';' && !PL_rsfp))
11072	sv_catpvs(where_sv, "at EOF");
11073    else if (   PL_oldoldbufptr
11074             && PL_bufptr > PL_oldoldbufptr
11075             && PL_bufptr - PL_oldoldbufptr < 200
11076             && PL_oldoldbufptr != PL_oldbufptr
11077             && PL_oldbufptr != PL_bufptr)
11078    {
11079	/*
11080		Only for NetWare:
11081		The code below is removed for NetWare because it abends/crashes on NetWare
11082		when the script has error such as not having the closing quotes like:
11083		    if ($var eq "value)
11084		Checking of white spaces is anyway done in NetWare code.
11085	*/
11086#ifndef NETWARE
11087	while (isSPACE(*PL_oldoldbufptr))
11088	    PL_oldoldbufptr++;
11089#endif
11090	context = PL_oldoldbufptr;
11091	contlen = PL_bufptr - PL_oldoldbufptr;
11092    }
11093    else if (  PL_oldbufptr
11094            && PL_bufptr > PL_oldbufptr
11095            && PL_bufptr - PL_oldbufptr < 200
11096            && PL_oldbufptr != PL_bufptr) {
11097	/*
11098		Only for NetWare:
11099		The code below is removed for NetWare because it abends/crashes on NetWare
11100		when the script has error such as not having the closing quotes like:
11101		    if ($var eq "value)
11102		Checking of white spaces is anyway done in NetWare code.
11103	*/
11104#ifndef NETWARE
11105	while (isSPACE(*PL_oldbufptr))
11106	    PL_oldbufptr++;
11107#endif
11108	context = PL_oldbufptr;
11109	contlen = PL_bufptr - PL_oldbufptr;
11110    }
11111    else if (yychar > 255)
11112	sv_catpvs(where_sv, "next token ???");
11113    else if (yychar == YYEMPTY) {
11114	if (    PL_lex_state == LEX_NORMAL
11115            || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11116	    sv_catpvs(where_sv, "at end of line");
11117	else if (PL_lex_inpat)
11118	    sv_catpvs(where_sv, "within pattern");
11119	else
11120	    sv_catpvs(where_sv, "within string");
11121    }
11122    else {
11123	sv_catpvs(where_sv, "next char ");
11124	if (yychar < 32)
11125	    Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11126	else if (isPRINT_LC(yychar)) {
11127	    const char string = yychar;
11128	    sv_catpvn(where_sv, &string, 1);
11129	}
11130	else
11131	    Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11132    }
11133    msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11134    Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11135        OutCopFILE(PL_curcop),
11136        (IV)(PL_parser->preambling == NOLINE
11137               ? CopLINE(PL_curcop)
11138               : PL_parser->preambling));
11139    if (context)
11140	Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11141			     UTF8fARG(UTF, contlen, context));
11142    else
11143	Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11144    if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11145        Perl_sv_catpvf(aTHX_ msg,
11146        "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11147                (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11148        PL_multi_end = 0;
11149    }
11150    if (PL_in_eval & EVAL_WARNONLY) {
11151	PL_in_eval &= ~EVAL_WARNONLY;
11152	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11153    }
11154    else
11155	qerror(msg);
11156    if (PL_error_count >= 10) {
11157	SV * errsv;
11158	if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11159	    Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11160		       SVfARG(errsv), OutCopFILE(PL_curcop));
11161	else
11162	    Perl_croak(aTHX_ "%s has too many errors.\n",
11163            OutCopFILE(PL_curcop));
11164    }
11165    PL_in_my = 0;
11166    PL_in_my_stash = NULL;
11167    return 0;
11168}
11169
11170STATIC char*
11171S_swallow_bom(pTHX_ U8 *s)
11172{
11173    const STRLEN slen = SvCUR(PL_linestr);
11174
11175    PERL_ARGS_ASSERT_SWALLOW_BOM;
11176
11177    switch (s[0]) {
11178    case 0xFF:
11179	if (s[1] == 0xFE) {
11180	    /* UTF-16 little-endian? (or UTF-32LE?) */
11181	    if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11182		/* diag_listed_as: Unsupported script encoding %s */
11183		Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11184#ifndef PERL_NO_UTF16_FILTER
11185	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11186	    s += 2;
11187	    if (PL_bufend > (char*)s) {
11188		s = add_utf16_textfilter(s, TRUE);
11189	    }
11190#else
11191	    /* diag_listed_as: Unsupported script encoding %s */
11192	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11193#endif
11194	}
11195	break;
11196    case 0xFE:
11197	if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11198#ifndef PERL_NO_UTF16_FILTER
11199	    if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11200	    s += 2;
11201	    if (PL_bufend > (char *)s) {
11202		s = add_utf16_textfilter(s, FALSE);
11203	    }
11204#else
11205	    /* diag_listed_as: Unsupported script encoding %s */
11206	    Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11207#endif
11208	}
11209	break;
11210    case BOM_UTF8_FIRST_BYTE: {
11211        const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11212        if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11213            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11214            s += len + 1;                      /* UTF-8 */
11215        }
11216        break;
11217    }
11218    case 0:
11219	if (slen > 3) {
11220	     if (s[1] == 0) {
11221		  if (s[2] == 0xFE && s[3] == 0xFF) {
11222		       /* UTF-32 big-endian */
11223		       /* diag_listed_as: Unsupported script encoding %s */
11224		       Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11225		  }
11226	     }
11227	     else if (s[2] == 0 && s[3] != 0) {
11228		  /* Leading bytes
11229		   * 00 xx 00 xx
11230		   * are a good indicator of UTF-16BE. */
11231#ifndef PERL_NO_UTF16_FILTER
11232		  if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11233		  s = add_utf16_textfilter(s, FALSE);
11234#else
11235		  /* diag_listed_as: Unsupported script encoding %s */
11236		  Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11237#endif
11238	     }
11239	}
11240        break;
11241
11242    default:
11243	 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11244		  /* Leading bytes
11245		   * xx 00 xx 00
11246		   * are a good indicator of UTF-16LE. */
11247#ifndef PERL_NO_UTF16_FILTER
11248	      if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11249	      s = add_utf16_textfilter(s, TRUE);
11250#else
11251	      /* diag_listed_as: Unsupported script encoding %s */
11252	      Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11253#endif
11254	 }
11255    }
11256    return (char*)s;
11257}
11258
11259
11260#ifndef PERL_NO_UTF16_FILTER
11261static I32
11262S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11263{
11264    SV *const filter = FILTER_DATA(idx);
11265    /* We re-use this each time round, throwing the contents away before we
11266       return.  */
11267    SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11268    SV *const utf8_buffer = filter;
11269    IV status = IoPAGE(filter);
11270    const bool reverse = cBOOL(IoLINES(filter));
11271    I32 retval;
11272
11273    PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11274
11275    /* As we're automatically added, at the lowest level, and hence only called
11276       from this file, we can be sure that we're not called in block mode. Hence
11277       don't bother writing code to deal with block mode.  */
11278    if (maxlen) {
11279	Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11280    }
11281    if (status < 0) {
11282	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11283    }
11284    DEBUG_P(PerlIO_printf(Perl_debug_log,
11285			  "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11286			  FPTR2DPTR(void *, S_utf16_textfilter),
11287			  reverse ? 'l' : 'b', idx, maxlen, status,
11288			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11289
11290    while (1) {
11291	STRLEN chars;
11292	STRLEN have;
11293	I32 newlen;
11294	U8 *end;
11295	/* First, look in our buffer of existing UTF-8 data:  */
11296	char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11297
11298	if (nl) {
11299	    ++nl;
11300	} else if (status == 0) {
11301	    /* EOF */
11302	    IoPAGE(filter) = 0;
11303	    nl = SvEND(utf8_buffer);
11304	}
11305	if (nl) {
11306	    STRLEN got = nl - SvPVX(utf8_buffer);
11307	    /* Did we have anything to append?  */
11308	    retval = got != 0;
11309	    sv_catpvn(sv, SvPVX(utf8_buffer), got);
11310	    /* Everything else in this code works just fine if SVp_POK isn't
11311	       set.  This, however, needs it, and we need it to work, else
11312	       we loop infinitely because the buffer is never consumed.  */
11313	    sv_chop(utf8_buffer, nl);
11314	    break;
11315	}
11316
11317	/* OK, not a complete line there, so need to read some more UTF-16.
11318	   Read an extra octect if the buffer currently has an odd number. */
11319	while (1) {
11320	    if (status <= 0)
11321		break;
11322	    if (SvCUR(utf16_buffer) >= 2) {
11323		/* Location of the high octet of the last complete code point.
11324		   Gosh, UTF-16 is a pain. All the benefits of variable length,
11325		   *coupled* with all the benefits of partial reads and
11326		   endianness.  */
11327		const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11328		    + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11329
11330		if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11331		    break;
11332		}
11333
11334		/* We have the first half of a surrogate. Read more.  */
11335		DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11336	    }
11337
11338	    status = FILTER_READ(idx + 1, utf16_buffer,
11339				 160 + (SvCUR(utf16_buffer) & 1));
11340	    DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11341	    DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11342	    if (status < 0) {
11343		/* Error */
11344		IoPAGE(filter) = status;
11345		return status;
11346	    }
11347	}
11348
11349	chars = SvCUR(utf16_buffer) >> 1;
11350	have = SvCUR(utf8_buffer);
11351	SvGROW(utf8_buffer, have + chars * 3 + 1);
11352
11353	if (reverse) {
11354	    end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11355					 (U8*)SvPVX_const(utf8_buffer) + have,
11356					 chars * 2, &newlen);
11357	} else {
11358	    end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11359				(U8*)SvPVX_const(utf8_buffer) + have,
11360				chars * 2, &newlen);
11361	}
11362	SvCUR_set(utf8_buffer, have + newlen);
11363	*end = '\0';
11364
11365	/* No need to keep this SV "well-formed" with a '\0' after the end, as
11366	   it's private to us, and utf16_to_utf8{,reversed} take a
11367	   (pointer,length) pair, rather than a NUL-terminated string.  */
11368	if(SvCUR(utf16_buffer) & 1) {
11369	    *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11370	    SvCUR_set(utf16_buffer, 1);
11371	} else {
11372	    SvCUR_set(utf16_buffer, 0);
11373	}
11374    }
11375    DEBUG_P(PerlIO_printf(Perl_debug_log,
11376			  "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11377			  status,
11378			  (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11379    DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11380    return retval;
11381}
11382
11383static U8 *
11384S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11385{
11386    SV *filter = filter_add(S_utf16_textfilter, NULL);
11387
11388    PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11389
11390    IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11391    sv_setpvs(filter, "");
11392    IoLINES(filter) = reversed;
11393    IoPAGE(filter) = 1; /* Not EOF */
11394
11395    /* Sadly, we have to return a valid pointer, come what may, so we have to
11396       ignore any error return from this.  */
11397    SvCUR_set(PL_linestr, 0);
11398    if (FILTER_READ(0, PL_linestr, 0)) {
11399	SvUTF8_on(PL_linestr);
11400    } else {
11401	SvUTF8_on(PL_linestr);
11402    }
11403    PL_bufend = SvEND(PL_linestr);
11404    return (U8*)SvPVX(PL_linestr);
11405}
11406#endif
11407
11408/*
11409Returns a pointer to the next character after the parsed
11410vstring, as well as updating the passed in sv.
11411
11412Function must be called like
11413
11414	sv = sv_2mortal(newSV(5));
11415	s = scan_vstring(s,e,sv);
11416
11417where s and e are the start and end of the string.
11418The sv should already be large enough to store the vstring
11419passed in, for performance reasons.
11420
11421This function may croak if fatal warnings are enabled in the
11422calling scope, hence the sv_2mortal in the example (to prevent
11423a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11424sv_2mortal.
11425
11426*/
11427
11428char *
11429Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11430{
11431    const char *pos = s;
11432    const char *start = s;
11433
11434    PERL_ARGS_ASSERT_SCAN_VSTRING;
11435
11436    if (*pos == 'v') pos++;  /* get past 'v' */
11437    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11438	pos++;
11439    if ( *pos != '.') {
11440	/* this may not be a v-string if followed by => */
11441	const char *next = pos;
11442	while (next < e && isSPACE(*next))
11443	    ++next;
11444	if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11445	    /* return string not v-string */
11446	    sv_setpvn(sv,(char *)s,pos-s);
11447	    return (char *)pos;
11448	}
11449    }
11450
11451    if (!isALPHA(*pos)) {
11452	U8 tmpbuf[UTF8_MAXBYTES+1];
11453
11454	if (*s == 'v')
11455	    s++;  /* get past 'v' */
11456
11457	sv_setpvs(sv, "");
11458
11459	for (;;) {
11460	    /* this is atoi() that tolerates underscores */
11461	    U8 *tmpend;
11462	    UV rev = 0;
11463	    const char *end = pos;
11464	    UV mult = 1;
11465	    while (--end >= s) {
11466		if (*end != '_') {
11467		    const UV orev = rev;
11468		    rev += (*end - '0') * mult;
11469		    mult *= 10;
11470		    if (orev > rev)
11471			/* diag_listed_as: Integer overflow in %s number */
11472			Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11473					 "Integer overflow in decimal number");
11474		}
11475	    }
11476
11477	    /* Append native character for the rev point */
11478	    tmpend = uvchr_to_utf8(tmpbuf, rev);
11479	    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11480	    if (!UVCHR_IS_INVARIANT(rev))
11481		 SvUTF8_on(sv);
11482	    if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11483		 s = ++pos;
11484	    else {
11485		 s = pos;
11486		 break;
11487	    }
11488	    while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11489		 pos++;
11490	}
11491	SvPOK_on(sv);
11492	sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11493	SvRMAGICAL_on(sv);
11494    }
11495    return (char *)s;
11496}
11497
11498int
11499Perl_keyword_plugin_standard(pTHX_
11500	char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11501{
11502    PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11503    PERL_UNUSED_CONTEXT;
11504    PERL_UNUSED_ARG(keyword_ptr);
11505    PERL_UNUSED_ARG(keyword_len);
11506    PERL_UNUSED_ARG(op_ptr);
11507    return KEYWORD_PLUGIN_DECLINE;
11508}
11509
11510#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11511static void
11512S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11513{
11514    SAVEI32(PL_lex_brackets);
11515    if (PL_lex_brackets > 100)
11516	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11517    PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11518    SAVEI32(PL_lex_allbrackets);
11519    PL_lex_allbrackets = 0;
11520    SAVEI8(PL_lex_fakeeof);
11521    PL_lex_fakeeof = (U8)fakeeof;
11522    if(yyparse(gramtype) && !PL_parser->error_count)
11523	qerror(Perl_mess(aTHX_ "Parse error"));
11524}
11525
11526#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11527static OP *
11528S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11529{
11530    OP *o;
11531    ENTER;
11532    SAVEVPTR(PL_eval_root);
11533    PL_eval_root = NULL;
11534    parse_recdescent(gramtype, fakeeof);
11535    o = PL_eval_root;
11536    LEAVE;
11537    return o;
11538}
11539
11540#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11541static OP *
11542S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11543{
11544    OP *exprop;
11545    if (flags & ~PARSE_OPTIONAL)
11546	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11547    exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11548    if (!exprop && !(flags & PARSE_OPTIONAL)) {
11549	if (!PL_parser->error_count)
11550	    qerror(Perl_mess(aTHX_ "Parse error"));
11551	exprop = newOP(OP_NULL, 0);
11552    }
11553    return exprop;
11554}
11555
11556/*
11557=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11558
11559Parse a Perl arithmetic expression.  This may contain operators of precedence
11560down to the bit shift operators.  The expression must be followed (and thus
11561terminated) either by a comparison or lower-precedence operator or by
11562something that would normally terminate an expression such as semicolon.
11563If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11564otherwise it is mandatory.  It is up to the caller to ensure that the
11565dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11566the source of the code to be parsed and the lexical context for the
11567expression.
11568
11569The op tree representing the expression is returned.  If an optional
11570expression is absent, a null pointer is returned, otherwise the pointer
11571will be non-null.
11572
11573If an error occurs in parsing or compilation, in most cases a valid op
11574tree is returned anyway.  The error is reflected in the parser state,
11575normally resulting in a single exception at the top level of parsing
11576which covers all the compilation errors that occurred.  Some compilation
11577errors, however, will throw an exception immediately.
11578
11579=cut
11580*/
11581
11582OP *
11583Perl_parse_arithexpr(pTHX_ U32 flags)
11584{
11585    return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11586}
11587
11588/*
11589=for apidoc Amx|OP *|parse_termexpr|U32 flags
11590
11591Parse a Perl term expression.  This may contain operators of precedence
11592down to the assignment operators.  The expression must be followed (and thus
11593terminated) either by a comma or lower-precedence operator or by
11594something that would normally terminate an expression such as semicolon.
11595If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11596otherwise it is mandatory.  It is up to the caller to ensure that the
11597dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11598the source of the code to be parsed and the lexical context for the
11599expression.
11600
11601The op tree representing the expression is returned.  If an optional
11602expression is absent, a null pointer is returned, otherwise the pointer
11603will be non-null.
11604
11605If an error occurs in parsing or compilation, in most cases a valid op
11606tree is returned anyway.  The error is reflected in the parser state,
11607normally resulting in a single exception at the top level of parsing
11608which covers all the compilation errors that occurred.  Some compilation
11609errors, however, will throw an exception immediately.
11610
11611=cut
11612*/
11613
11614OP *
11615Perl_parse_termexpr(pTHX_ U32 flags)
11616{
11617    return parse_expr(LEX_FAKEEOF_COMMA, flags);
11618}
11619
11620/*
11621=for apidoc Amx|OP *|parse_listexpr|U32 flags
11622
11623Parse a Perl list expression.  This may contain operators of precedence
11624down to the comma operator.  The expression must be followed (and thus
11625terminated) either by a low-precedence logic operator such as C<or> or by
11626something that would normally terminate an expression such as semicolon.
11627If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11628otherwise it is mandatory.  It is up to the caller to ensure that the
11629dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11630the source of the code to be parsed and the lexical context for the
11631expression.
11632
11633The op tree representing the expression is returned.  If an optional
11634expression is absent, a null pointer is returned, otherwise the pointer
11635will be non-null.
11636
11637If an error occurs in parsing or compilation, in most cases a valid op
11638tree is returned anyway.  The error is reflected in the parser state,
11639normally resulting in a single exception at the top level of parsing
11640which covers all the compilation errors that occurred.  Some compilation
11641errors, however, will throw an exception immediately.
11642
11643=cut
11644*/
11645
11646OP *
11647Perl_parse_listexpr(pTHX_ U32 flags)
11648{
11649    return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11650}
11651
11652/*
11653=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11654
11655Parse a single complete Perl expression.  This allows the full
11656expression grammar, including the lowest-precedence operators such
11657as C<or>.  The expression must be followed (and thus terminated) by a
11658token that an expression would normally be terminated by: end-of-file,
11659closing bracketing punctuation, semicolon, or one of the keywords that
11660signals a postfix expression-statement modifier.  If C<flags> has the
11661C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11662mandatory.  It is up to the caller to ensure that the dynamic parser
11663state (L</PL_parser> et al) is correctly set to reflect the source of
11664the code to be parsed and the lexical context for the expression.
11665
11666The op tree representing the expression is returned.  If an optional
11667expression is absent, a null pointer is returned, otherwise the pointer
11668will be non-null.
11669
11670If an error occurs in parsing or compilation, in most cases a valid op
11671tree is returned anyway.  The error is reflected in the parser state,
11672normally resulting in a single exception at the top level of parsing
11673which covers all the compilation errors that occurred.  Some compilation
11674errors, however, will throw an exception immediately.
11675
11676=cut
11677*/
11678
11679OP *
11680Perl_parse_fullexpr(pTHX_ U32 flags)
11681{
11682    return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11683}
11684
11685/*
11686=for apidoc Amx|OP *|parse_block|U32 flags
11687
11688Parse a single complete Perl code block.  This consists of an opening
11689brace, a sequence of statements, and a closing brace.  The block
11690constitutes a lexical scope, so C<my> variables and various compile-time
11691effects can be contained within it.  It is up to the caller to ensure
11692that the dynamic parser state (L</PL_parser> et al) is correctly set to
11693reflect the source of the code to be parsed and the lexical context for
11694the statement.
11695
11696The op tree representing the code block is returned.  This is always a
11697real op, never a null pointer.  It will normally be a C<lineseq> list,
11698including C<nextstate> or equivalent ops.  No ops to construct any kind
11699of runtime scope are included by virtue of it being a block.
11700
11701If an error occurs in parsing or compilation, in most cases a valid op
11702tree (most likely null) is returned anyway.  The error is reflected in
11703the parser state, normally resulting in a single exception at the top
11704level of parsing which covers all the compilation errors that occurred.
11705Some compilation errors, however, will throw an exception immediately.
11706
11707The C<flags> parameter is reserved for future use, and must always
11708be zero.
11709
11710=cut
11711*/
11712
11713OP *
11714Perl_parse_block(pTHX_ U32 flags)
11715{
11716    if (flags)
11717	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11718    return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11719}
11720
11721/*
11722=for apidoc Amx|OP *|parse_barestmt|U32 flags
11723
11724Parse a single unadorned Perl statement.  This may be a normal imperative
11725statement or a declaration that has compile-time effect.  It does not
11726include any label or other affixture.  It is up to the caller to ensure
11727that the dynamic parser state (L</PL_parser> et al) is correctly set to
11728reflect the source of the code to be parsed and the lexical context for
11729the statement.
11730
11731The op tree representing the statement is returned.  This may be a
11732null pointer if the statement is null, for example if it was actually
11733a subroutine definition (which has compile-time side effects).  If not
11734null, it will be ops directly implementing the statement, suitable to
11735pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11736equivalent op (except for those embedded in a scope contained entirely
11737within the statement).
11738
11739If an error occurs in parsing or compilation, in most cases a valid op
11740tree (most likely null) is returned anyway.  The error is reflected in
11741the parser state, normally resulting in a single exception at the top
11742level of parsing which covers all the compilation errors that occurred.
11743Some compilation errors, however, will throw an exception immediately.
11744
11745The C<flags> parameter is reserved for future use, and must always
11746be zero.
11747
11748=cut
11749*/
11750
11751OP *
11752Perl_parse_barestmt(pTHX_ U32 flags)
11753{
11754    if (flags)
11755	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11756    return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11757}
11758
11759/*
11760=for apidoc Amx|SV *|parse_label|U32 flags
11761
11762Parse a single label, possibly optional, of the type that may prefix a
11763Perl statement.  It is up to the caller to ensure that the dynamic parser
11764state (L</PL_parser> et al) is correctly set to reflect the source of
11765the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11766label is optional, otherwise it is mandatory.
11767
11768The name of the label is returned in the form of a fresh scalar.  If an
11769optional label is absent, a null pointer is returned.
11770
11771If an error occurs in parsing, which can only occur if the label is
11772mandatory, a valid label is returned anyway.  The error is reflected in
11773the parser state, normally resulting in a single exception at the top
11774level of parsing which covers all the compilation errors that occurred.
11775
11776=cut
11777*/
11778
11779SV *
11780Perl_parse_label(pTHX_ U32 flags)
11781{
11782    if (flags & ~PARSE_OPTIONAL)
11783	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11784    if (PL_lex_state == LEX_KNOWNEXT) {
11785	PL_parser->yychar = yylex();
11786	if (PL_parser->yychar == LABEL) {
11787	    char * const lpv = pl_yylval.pval;
11788	    STRLEN llen = strlen(lpv);
11789	    PL_parser->yychar = YYEMPTY;
11790	    return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11791	} else {
11792	    yyunlex();
11793	    goto no_label;
11794	}
11795    } else {
11796	char *s, *t;
11797	STRLEN wlen, bufptr_pos;
11798	lex_read_space(0);
11799	t = s = PL_bufptr;
11800        if (!isIDFIRST_lazy_if(s, UTF))
11801	    goto no_label;
11802	t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11803	if (word_takes_any_delimeter(s, wlen))
11804	    goto no_label;
11805	bufptr_pos = s - SvPVX(PL_linestr);
11806	PL_bufptr = t;
11807	lex_read_space(LEX_KEEP_PREVIOUS);
11808	t = PL_bufptr;
11809	s = SvPVX(PL_linestr) + bufptr_pos;
11810	if (t[0] == ':' && t[1] != ':') {
11811	    PL_oldoldbufptr = PL_oldbufptr;
11812	    PL_oldbufptr = s;
11813	    PL_bufptr = t+1;
11814	    return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11815	} else {
11816	    PL_bufptr = s;
11817	    no_label:
11818	    if (flags & PARSE_OPTIONAL) {
11819		return NULL;
11820	    } else {
11821		qerror(Perl_mess(aTHX_ "Parse error"));
11822		return newSVpvs("x");
11823	    }
11824	}
11825    }
11826}
11827
11828/*
11829=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11830
11831Parse a single complete Perl statement.  This may be a normal imperative
11832statement or a declaration that has compile-time effect, and may include
11833optional labels.  It is up to the caller to ensure that the dynamic
11834parser state (L</PL_parser> et al) is correctly set to reflect the source
11835of the code to be parsed and the lexical context for the statement.
11836
11837The op tree representing the statement is returned.  This may be a
11838null pointer if the statement is null, for example if it was actually
11839a subroutine definition (which has compile-time side effects).  If not
11840null, it will be the result of a L</newSTATEOP> call, normally including
11841a C<nextstate> or equivalent op.
11842
11843If an error occurs in parsing or compilation, in most cases a valid op
11844tree (most likely null) is returned anyway.  The error is reflected in
11845the parser state, normally resulting in a single exception at the top
11846level of parsing which covers all the compilation errors that occurred.
11847Some compilation errors, however, will throw an exception immediately.
11848
11849The C<flags> parameter is reserved for future use, and must always
11850be zero.
11851
11852=cut
11853*/
11854
11855OP *
11856Perl_parse_fullstmt(pTHX_ U32 flags)
11857{
11858    if (flags)
11859	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11860    return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11861}
11862
11863/*
11864=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11865
11866Parse a sequence of zero or more Perl statements.  These may be normal
11867imperative statements, including optional labels, or declarations
11868that have compile-time effect, or any mixture thereof.  The statement
11869sequence ends when a closing brace or end-of-file is encountered in a
11870place where a new statement could have validly started.  It is up to
11871the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11872is correctly set to reflect the source of the code to be parsed and the
11873lexical context for the statements.
11874
11875The op tree representing the statement sequence is returned.  This may
11876be a null pointer if the statements were all null, for example if there
11877were no statements or if there were only subroutine definitions (which
11878have compile-time side effects).  If not null, it will be a C<lineseq>
11879list, normally including C<nextstate> or equivalent ops.
11880
11881If an error occurs in parsing or compilation, in most cases a valid op
11882tree is returned anyway.  The error is reflected in the parser state,
11883normally resulting in a single exception at the top level of parsing
11884which covers all the compilation errors that occurred.  Some compilation
11885errors, however, will throw an exception immediately.
11886
11887The C<flags> parameter is reserved for future use, and must always
11888be zero.
11889
11890=cut
11891*/
11892
11893OP *
11894Perl_parse_stmtseq(pTHX_ U32 flags)
11895{
11896    OP *stmtseqop;
11897    I32 c;
11898    if (flags)
11899	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11900    stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11901    c = lex_peek_unichar(0);
11902    if (c != -1 && c != /*{*/'}')
11903	qerror(Perl_mess(aTHX_ "Parse error"));
11904    return stmtseqop;
11905}
11906
11907#define lex_token_boundary() S_lex_token_boundary(aTHX)
11908static void
11909S_lex_token_boundary(pTHX)
11910{
11911    PL_oldoldbufptr = PL_oldbufptr;
11912    PL_oldbufptr = PL_bufptr;
11913}
11914
11915#define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11916static OP *
11917S_parse_opt_lexvar(pTHX)
11918{
11919    I32 sigil, c;
11920    char *s, *d;
11921    OP *var;
11922    lex_token_boundary();
11923    sigil = lex_read_unichar(0);
11924    if (lex_peek_unichar(0) == '#') {
11925	qerror(Perl_mess(aTHX_ "Parse error"));
11926	return NULL;
11927    }
11928    lex_read_space(0);
11929    c = lex_peek_unichar(0);
11930    if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11931	return NULL;
11932    s = PL_bufptr;
11933    d = PL_tokenbuf + 1;
11934    PL_tokenbuf[0] = (char)sigil;
11935    parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11936    PL_bufptr = s;
11937    if (d == PL_tokenbuf+1)
11938	return NULL;
11939    var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11940		OPf_MOD | (OPpLVAL_INTRO<<8));
11941    var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11942    return var;
11943}
11944
11945OP *
11946Perl_parse_subsignature(pTHX)
11947{
11948    I32 c;
11949    int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11950    OP *initops = NULL;
11951    lex_read_space(0);
11952    c = lex_peek_unichar(0);
11953    while (c != /*(*/')') {
11954	switch (c) {
11955	    case '$': {
11956		OP *var, *expr;
11957		if (prev_type == 2)
11958		    qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11959		var = parse_opt_lexvar();
11960		expr = var ?
11961		    newBINOP(OP_AELEM, 0,
11962			ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11963			    OP_RV2AV),
11964			newSVOP(OP_CONST, 0, newSViv(pos))) :
11965		    NULL;
11966		lex_read_space(0);
11967		c = lex_peek_unichar(0);
11968		if (c == '=') {
11969		    lex_token_boundary();
11970		    lex_read_unichar(0);
11971		    lex_read_space(0);
11972		    c = lex_peek_unichar(0);
11973		    if (c == ',' || c == /*(*/')') {
11974			if (var)
11975			    qerror(Perl_mess(aTHX_ "Optional parameter "
11976				    "lacks default expression"));
11977		    } else {
11978			OP *defexpr = parse_termexpr(0);
11979			if (defexpr->op_type == OP_UNDEF
11980                            && !(defexpr->op_flags & OPf_KIDS))
11981                        {
11982			    op_free(defexpr);
11983			} else {
11984			    OP *ifop =
11985				newBINOP(OP_GE, 0,
11986				    scalar(newUNOP(OP_RV2AV, 0,
11987					    newGVOP(OP_GV, 0, PL_defgv))),
11988				    newSVOP(OP_CONST, 0, newSViv(pos+1)));
11989			    expr = var ?
11990				newCONDOP(0, ifop, expr, defexpr) :
11991				newLOGOP(OP_OR, 0, ifop, defexpr);
11992			}
11993		    }
11994		    prev_type = 1;
11995		} else {
11996		    if (prev_type == 1)
11997			qerror(Perl_mess(aTHX_ "Mandatory parameter "
11998				"follows optional parameter"));
11999		    prev_type = 0;
12000		    min_arity = pos + 1;
12001		}
12002		if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
12003		if (expr)
12004		    initops = op_append_list(OP_LINESEQ, initops,
12005				newSTATEOP(0, NULL, expr));
12006		max_arity = ++pos;
12007	    } break;
12008	    case '@':
12009	    case '%': {
12010		OP *var;
12011		if (prev_type == 2)
12012		    qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12013		var = parse_opt_lexvar();
12014		if (c == '%') {
12015		    OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
12016			    newBINOP(OP_BIT_AND, 0,
12017				scalar(newUNOP(OP_RV2AV, 0,
12018				    newGVOP(OP_GV, 0, PL_defgv))),
12019				newSVOP(OP_CONST, 0, newSViv(1))),
12020		            op_convert_list(OP_DIE, 0,
12021		                op_convert_list(OP_SPRINTF, 0,
12022		                    op_append_list(OP_LIST,
12023		                        newSVOP(OP_CONST, 0,
12024		                            newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
12025		                        newSLICEOP(0,
12026		                            op_append_list(OP_LIST,
12027		                                newSVOP(OP_CONST, 0, newSViv(1)),
12028		                                newSVOP(OP_CONST, 0, newSViv(2))),
12029		                            newOP(OP_CALLER, 0))))));
12030		    if (pos != min_arity)
12031			chkop = newLOGOP(OP_AND, 0,
12032				    newBINOP(OP_GT, 0,
12033					scalar(newUNOP(OP_RV2AV, 0,
12034					    newGVOP(OP_GV, 0, PL_defgv))),
12035					newSVOP(OP_CONST, 0, newSViv(pos))),
12036				    chkop);
12037		    initops = op_append_list(OP_LINESEQ,
12038				newSTATEOP(0, NULL, chkop),
12039				initops);
12040		}
12041		if (var) {
12042		    OP *slice = pos ?
12043			op_prepend_elem(OP_ASLICE,
12044			    newOP(OP_PUSHMARK, 0),
12045			    newLISTOP(OP_ASLICE, 0,
12046				list(newRANGE(0,
12047				    newSVOP(OP_CONST, 0, newSViv(pos)),
12048				    newUNOP(OP_AV2ARYLEN, 0,
12049					ref(newUNOP(OP_RV2AV, 0,
12050						newGVOP(OP_GV, 0, PL_defgv)),
12051					    OP_AV2ARYLEN)))),
12052				ref(newUNOP(OP_RV2AV, 0,
12053					newGVOP(OP_GV, 0, PL_defgv)),
12054				    OP_ASLICE))) :
12055			newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12056		    initops = op_append_list(OP_LINESEQ, initops,
12057			newSTATEOP(0, NULL,
12058			    newASSIGNOP(OPf_STACKED, var, 0, slice)));
12059		}
12060		prev_type = 2;
12061		max_arity = -1;
12062	    } break;
12063	    default:
12064		parse_error:
12065		qerror(Perl_mess(aTHX_ "Parse error"));
12066		return NULL;
12067	}
12068	lex_read_space(0);
12069	c = lex_peek_unichar(0);
12070	switch (c) {
12071	    case /*(*/')': break;
12072	    case ',':
12073		do {
12074		    lex_token_boundary();
12075		    lex_read_unichar(0);
12076		    lex_read_space(0);
12077		    c = lex_peek_unichar(0);
12078		} while (c == ',');
12079		break;
12080	    default:
12081		goto parse_error;
12082	}
12083    }
12084    if (min_arity != 0) {
12085	initops = op_append_list(OP_LINESEQ,
12086	    newSTATEOP(0, NULL,
12087		newLOGOP(OP_OR, 0,
12088		    newBINOP(OP_GE, 0,
12089			scalar(newUNOP(OP_RV2AV, 0,
12090			    newGVOP(OP_GV, 0, PL_defgv))),
12091			newSVOP(OP_CONST, 0, newSViv(min_arity))),
12092		    op_convert_list(OP_DIE, 0,
12093		        op_convert_list(OP_SPRINTF, 0,
12094		            op_append_list(OP_LIST,
12095		                newSVOP(OP_CONST, 0,
12096		                    newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
12097		                newSLICEOP(0,
12098		                    op_append_list(OP_LIST,
12099		                        newSVOP(OP_CONST, 0, newSViv(1)),
12100		                        newSVOP(OP_CONST, 0, newSViv(2))),
12101		                    newOP(OP_CALLER, 0))))))),
12102	    initops);
12103    }
12104    if (max_arity != -1) {
12105	initops = op_append_list(OP_LINESEQ,
12106	    newSTATEOP(0, NULL,
12107		newLOGOP(OP_OR, 0,
12108		    newBINOP(OP_LE, 0,
12109			scalar(newUNOP(OP_RV2AV, 0,
12110			    newGVOP(OP_GV, 0, PL_defgv))),
12111			newSVOP(OP_CONST, 0, newSViv(max_arity))),
12112		    op_convert_list(OP_DIE, 0,
12113		        op_convert_list(OP_SPRINTF, 0,
12114		            op_append_list(OP_LIST,
12115		                newSVOP(OP_CONST, 0,
12116		                    newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
12117		                newSLICEOP(0,
12118		                    op_append_list(OP_LIST,
12119		                        newSVOP(OP_CONST, 0, newSViv(1)),
12120		                        newSVOP(OP_CONST, 0, newSViv(2))),
12121		                    newOP(OP_CALLER, 0))))))),
12122	    initops);
12123    }
12124    return initops;
12125}
12126
12127/*
12128 * ex: set ts=8 sts=4 sw=4 et:
12129 */
12130