toke.c revision 1.22
1/* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11/* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17/* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24/* 25=head1 Lexer interface 26This is the lower layer of the Perl parser, managing characters and tokens. 27 28=for apidoc AmU|yy_parser *|PL_parser 29 30Pointer to a structure encapsulating the state of the parsing operation 31currently in progress. The pointer can be locally changed to perform 32a nested parse without interfering with the state of an outer parse. 33Individual members of C<PL_parser> have their own documentation. 34 35=cut 36*/ 37 38#include "EXTERN.h" 39#define PERL_IN_TOKE_C 40#include "perl.h" 41#include "dquote_inline.h" 42 43#define new_constant(a,b,c,d,e,f,g) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) 45 46#define pl_yylval (PL_parser->yylval) 47 48/* XXX temporary backwards compatibility */ 49#define PL_lex_brackets (PL_parser->lex_brackets) 50#define PL_lex_allbrackets (PL_parser->lex_allbrackets) 51#define PL_lex_fakeeof (PL_parser->lex_fakeeof) 52#define PL_lex_brackstack (PL_parser->lex_brackstack) 53#define PL_lex_casemods (PL_parser->lex_casemods) 54#define PL_lex_casestack (PL_parser->lex_casestack) 55#define PL_lex_defer (PL_parser->lex_defer) 56#define PL_lex_dojoin (PL_parser->lex_dojoin) 57#define PL_lex_formbrack (PL_parser->lex_formbrack) 58#define PL_lex_inpat (PL_parser->lex_inpat) 59#define PL_lex_inwhat (PL_parser->lex_inwhat) 60#define PL_lex_op (PL_parser->lex_op) 61#define PL_lex_repl (PL_parser->lex_repl) 62#define PL_lex_starts (PL_parser->lex_starts) 63#define PL_lex_stuff (PL_parser->lex_stuff) 64#define PL_multi_start (PL_parser->multi_start) 65#define PL_multi_open (PL_parser->multi_open) 66#define PL_multi_close (PL_parser->multi_close) 67#define PL_preambled (PL_parser->preambled) 68#define PL_sublex_info (PL_parser->sublex_info) 69#define PL_linestr (PL_parser->linestr) 70#define PL_expect (PL_parser->expect) 71#define PL_copline (PL_parser->copline) 72#define PL_bufptr (PL_parser->bufptr) 73#define PL_oldbufptr (PL_parser->oldbufptr) 74#define PL_oldoldbufptr (PL_parser->oldoldbufptr) 75#define PL_linestart (PL_parser->linestart) 76#define PL_bufend (PL_parser->bufend) 77#define PL_last_uni (PL_parser->last_uni) 78#define PL_last_lop (PL_parser->last_lop) 79#define PL_last_lop_op (PL_parser->last_lop_op) 80#define PL_lex_state (PL_parser->lex_state) 81#define PL_rsfp (PL_parser->rsfp) 82#define PL_rsfp_filters (PL_parser->rsfp_filters) 83#define PL_in_my (PL_parser->in_my) 84#define PL_in_my_stash (PL_parser->in_my_stash) 85#define PL_tokenbuf (PL_parser->tokenbuf) 86#define PL_multi_end (PL_parser->multi_end) 87#define PL_error_count (PL_parser->error_count) 88 89# define PL_nexttoke (PL_parser->nexttoke) 90# define PL_nexttype (PL_parser->nexttype) 91# define PL_nextval (PL_parser->nextval) 92 93static const char* const ident_too_long = "Identifier too long"; 94 95# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 96 97#define XENUMMASK 0x3f 98#define XFAKEEOF 0x40 99#define XFAKEBRACK 0x80 100 101#ifdef USE_UTF8_SCRIPTS 102# define UTF cBOOL(!IN_BYTES) 103#else 104# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 105#endif 106 107/* The maximum number of characters preceding the unrecognized one to display */ 108#define UNRECOGNIZED_PRECEDE_COUNT 10 109 110/* In variables named $^X, these are the legal values for X. 111 * 1999-02-27 mjd-perl-patch@plover.com */ 112#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) 113 114#define SPACE_OR_TAB(c) isBLANK_A(c) 115 116#define HEXFP_PEEK(s) \ 117 (((s[0] == '.') && \ 118 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ 119 isALPHA_FOLD_EQ(s[0], 'p')) 120 121/* LEX_* are values for PL_lex_state, the state of the lexer. 122 * They are arranged oddly so that the guard on the switch statement 123 * can get by with a single comparison (if the compiler is smart enough). 124 * 125 * These values refer to the various states within a sublex parse, 126 * i.e. within a double quotish string 127 */ 128 129/* #define LEX_NOTPARSING 11 is done in perl.h. */ 130 131#define LEX_NORMAL 10 /* normal code (ie not within "...") */ 132#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 133#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 134#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 135#define LEX_INTERPSTART 6 /* expecting the start of a $var */ 136 137 /* at end of code, eg "$x" followed by: */ 138#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 139#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 140 141#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 142 string or after \E, $foo, etc */ 143#define LEX_INTERPCONST 2 /* NOT USED */ 144#define LEX_FORMLINE 1 /* expecting a format line */ 145#define LEX_KNOWNEXT 0 /* next token known; just return it */ 146 147 148#ifdef DEBUGGING 149static const char* const lex_state_names[] = { 150 "KNOWNEXT", 151 "FORMLINE", 152 "INTERPCONST", 153 "INTERPCONCAT", 154 "INTERPENDMAYBE", 155 "INTERPEND", 156 "INTERPSTART", 157 "INTERPPUSH", 158 "INTERPCASEMOD", 159 "INTERPNORMAL", 160 "NORMAL" 161}; 162#endif 163 164#include "keywords.h" 165 166/* CLINE is a macro that ensures PL_copline has a sane value */ 167 168#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 169 170/* 171 * Convenience functions to return different tokens and prime the 172 * lexer for the next token. They all take an argument. 173 * 174 * TOKEN : generic token (used for '(', DOLSHARP, etc) 175 * OPERATOR : generic operator 176 * AOPERATOR : assignment operator 177 * PREBLOCK : beginning the block after an if, while, foreach, ... 178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 179 * PREREF : *EXPR where EXPR is not a simple identifier 180 * TERM : expression term 181 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 182 * LOOPX : loop exiting command (goto, last, dump, etc) 183 * FTST : file test operator 184 * FUN0 : zero-argument function 185 * FUN0OP : zero-argument function, with its op created in this file 186 * FUN1 : not used, except for not, which isn't a UNIOP 187 * BOop : bitwise or or xor 188 * BAop : bitwise and 189 * BCop : bitwise complement 190 * SHop : shift operator 191 * PWop : power operator 192 * PMop : pattern-matching operator 193 * Aop : addition-level operator 194 * AopNOASSIGN : addition-level operator that is never part of .= 195 * Mop : multiplication-level operator 196 * Eop : equality-testing operator 197 * Rop : relational operator <= != gt 198 * 199 * Also see LOP and lop() below. 200 */ 201 202#ifdef DEBUGGING /* Serve -DT. */ 203# define REPORT(retval) tokereport((I32)retval, &pl_yylval) 204#else 205# define REPORT(retval) (retval) 206#endif 207 208#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 209#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 210#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) 211#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 212#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 213#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 214#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 215#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 216#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \ 217 pl_yylval.ival=f, \ 218 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ 219 REPORT((int)LOOPEX)) 220#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 221#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 222#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 223#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 224#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) 225#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) 226#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ 227 REPORT('~') 228#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) 229#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) 230#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 231#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) 232#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) 233#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) 234#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) 235#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) 236 237/* This bit of chicanery makes a unary function followed by 238 * a parenthesis into a function with one argument, highest precedence. 239 * The UNIDOR macro is for unary functions that can be followed by the // 240 * operator (such as C<shift // 0>). 241 */ 242#define UNI3(f,x,have_x) { \ 243 pl_yylval.ival = f; \ 244 if (have_x) PL_expect = x; \ 245 PL_bufptr = s; \ 246 PL_last_uni = PL_oldbufptr; \ 247 PL_last_lop_op = (f) < 0 ? -(f) : (f); \ 248 if (*s == '(') \ 249 return REPORT( (int)FUNC1 ); \ 250 s = skipspace(s); \ 251 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 252 } 253#define UNI(f) UNI3(f,XTERM,1) 254#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 255#define UNIPROTO(f,optional) { \ 256 if (optional) PL_last_uni = PL_oldbufptr; \ 257 OPERATOR(f); \ 258 } 259 260#define UNIBRACK(f) UNI3(f,0,0) 261 262/* grandfather return to old style */ 263#define OLDLOP(f) \ 264 do { \ 265 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 266 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 267 pl_yylval.ival = (f); \ 268 PL_expect = XTERM; \ 269 PL_bufptr = s; \ 270 return (int)LSTOP; \ 271 } while(0) 272 273#define COPLINE_INC_WITH_HERELINES \ 274 STMT_START { \ 275 CopLINE_inc(PL_curcop); \ 276 if (PL_parser->herelines) \ 277 CopLINE(PL_curcop) += PL_parser->herelines, \ 278 PL_parser->herelines = 0; \ 279 } STMT_END 280/* Called after scan_str to update CopLINE(PL_curcop), but only when there 281 * is no sublex_push to follow. */ 282#define COPLINE_SET_FROM_MULTI_END \ 283 STMT_START { \ 284 CopLINE_set(PL_curcop, PL_multi_end); \ 285 if (PL_multi_end != PL_multi_start) \ 286 PL_parser->herelines = 0; \ 287 } STMT_END 288 289 290#ifdef DEBUGGING 291 292/* how to interpret the pl_yylval associated with the token */ 293enum token_type { 294 TOKENTYPE_NONE, 295 TOKENTYPE_IVAL, 296 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 297 TOKENTYPE_PVAL, 298 TOKENTYPE_OPVAL 299}; 300 301static struct debug_tokens { 302 const int token; 303 enum token_type type; 304 const char *name; 305} const debug_tokens[] = 306{ 307 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, 308 { ANDAND, TOKENTYPE_NONE, "ANDAND" }, 309 { ANDOP, TOKENTYPE_NONE, "ANDOP" }, 310 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, 311 { ARROW, TOKENTYPE_NONE, "ARROW" }, 312 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, 313 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, 314 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, 315 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, 316 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, 317 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, 318 { DO, TOKENTYPE_NONE, "DO" }, 319 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, 320 { DORDOR, TOKENTYPE_NONE, "DORDOR" }, 321 { DOROP, TOKENTYPE_OPNUM, "DOROP" }, 322 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, 323 { ELSE, TOKENTYPE_NONE, "ELSE" }, 324 { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, 325 { EQOP, TOKENTYPE_OPNUM, "EQOP" }, 326 { FOR, TOKENTYPE_IVAL, "FOR" }, 327 { FORMAT, TOKENTYPE_NONE, "FORMAT" }, 328 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, 329 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" }, 330 { FUNC, TOKENTYPE_OPNUM, "FUNC" }, 331 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, 332 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" }, 333 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, 334 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, 335 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, 336 { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, 337 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, 338 { IF, TOKENTYPE_IVAL, "IF" }, 339 { LABEL, TOKENTYPE_PVAL, "LABEL" }, 340 { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, 341 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, 342 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, 343 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, 344 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, 345 { METHOD, TOKENTYPE_OPVAL, "METHOD" }, 346 { MULOP, TOKENTYPE_OPNUM, "MULOP" }, 347 { MY, TOKENTYPE_IVAL, "MY" }, 348 { NOAMP, TOKENTYPE_NONE, "NOAMP" }, 349 { NOTOP, TOKENTYPE_NONE, "NOTOP" }, 350 { OROP, TOKENTYPE_IVAL, "OROP" }, 351 { OROR, TOKENTYPE_NONE, "OROR" }, 352 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, 353 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, 354 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, 355 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, 356 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" }, 357 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, 358 { POSTINC, TOKENTYPE_NONE, "POSTINC" }, 359 { POWOP, TOKENTYPE_OPNUM, "POWOP" }, 360 { PREDEC, TOKENTYPE_NONE, "PREDEC" }, 361 { PREINC, TOKENTYPE_NONE, "PREINC" }, 362 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, 363 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, 364 { REFGEN, TOKENTYPE_NONE, "REFGEN" }, 365 { RELOP, TOKENTYPE_OPNUM, "RELOP" }, 366 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, 367 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, 368 { SUB, TOKENTYPE_NONE, "SUB" }, 369 { THING, TOKENTYPE_OPVAL, "THING" }, 370 { UMINUS, TOKENTYPE_NONE, "UMINUS" }, 371 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, 372 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, 373 { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, 374 { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, 375 { USE, TOKENTYPE_IVAL, "USE" }, 376 { WHEN, TOKENTYPE_IVAL, "WHEN" }, 377 { WHILE, TOKENTYPE_IVAL, "WHILE" }, 378 { WORD, TOKENTYPE_OPVAL, "WORD" }, 379 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, 380 { 0, TOKENTYPE_NONE, NULL } 381}; 382 383/* dump the returned token in rv, plus any optional arg in pl_yylval */ 384 385STATIC int 386S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 387{ 388 PERL_ARGS_ASSERT_TOKEREPORT; 389 390 if (DEBUG_T_TEST) { 391 const char *name = NULL; 392 enum token_type type = TOKENTYPE_NONE; 393 const struct debug_tokens *p; 394 SV* const report = newSVpvs("<== "); 395 396 for (p = debug_tokens; p->token; p++) { 397 if (p->token == (int)rv) { 398 name = p->name; 399 type = p->type; 400 break; 401 } 402 } 403 if (name) 404 Perl_sv_catpv(aTHX_ report, name); 405 else if (isGRAPH(rv)) 406 { 407 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 408 if ((char)rv == 'p') 409 sv_catpvs(report, " (pending identifier)"); 410 } 411 else if (!rv) 412 sv_catpvs(report, "EOF"); 413 else 414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); 415 switch (type) { 416 case TOKENTYPE_NONE: 417 break; 418 case TOKENTYPE_IVAL: 419 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival); 420 break; 421 case TOKENTYPE_OPNUM: 422 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 423 PL_op_name[lvalp->ival]); 424 break; 425 case TOKENTYPE_PVAL: 426 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); 427 break; 428 case TOKENTYPE_OPVAL: 429 if (lvalp->opval) { 430 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 431 PL_op_name[lvalp->opval->op_type]); 432 if (lvalp->opval->op_type == OP_CONST) { 433 Perl_sv_catpvf(aTHX_ report, " %s", 434 SvPEEK(cSVOPx_sv(lvalp->opval))); 435 } 436 437 } 438 else 439 sv_catpvs(report, "(opval=null)"); 440 break; 441 } 442 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 443 }; 444 return (int)rv; 445} 446 447 448/* print the buffer with suitable escapes */ 449 450STATIC void 451S_printbuf(pTHX_ const char *const fmt, const char *const s) 452{ 453 SV* const tmp = newSVpvs(""); 454 455 PERL_ARGS_ASSERT_PRINTBUF; 456 457 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */ 458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 459 GCC_DIAG_RESTORE; 460 SvREFCNT_dec(tmp); 461} 462 463#endif 464 465static int 466S_deprecate_commaless_var_list(pTHX) { 467 PL_expect = XTERM; 468 deprecate("comma-less variable list"); 469 return REPORT(','); /* grandfather non-comma-format format */ 470} 471 472/* 473 * S_ao 474 * 475 * This subroutine looks for an '=' next to the operator that has just been 476 * parsed and turns it into an ASSIGNOP if it finds one. 477 */ 478 479STATIC int 480S_ao(pTHX_ int toketype) 481{ 482 if (*PL_bufptr == '=') { 483 PL_bufptr++; 484 if (toketype == ANDAND) 485 pl_yylval.ival = OP_ANDASSIGN; 486 else if (toketype == OROR) 487 pl_yylval.ival = OP_ORASSIGN; 488 else if (toketype == DORDOR) 489 pl_yylval.ival = OP_DORASSIGN; 490 toketype = ASSIGNOP; 491 } 492 return REPORT(toketype); 493} 494 495/* 496 * S_no_op 497 * When Perl expects an operator and finds something else, no_op 498 * prints the warning. It always prints "<something> found where 499 * operator expected. It prints "Missing semicolon on previous line?" 500 * if the surprise occurs at the start of the line. "do you need to 501 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 502 * where the compiler doesn't know if foo is a method call or a function. 503 * It prints "Missing operator before end of line" if there's nothing 504 * after the missing operator, or "... before <...>" if there is something 505 * after the missing operator. 506 * 507 * PL_bufptr is expected to point to the start of the thing that was found, 508 * and s after the next token or partial token. 509 */ 510 511STATIC void 512S_no_op(pTHX_ const char *const what, char *s) 513{ 514 char * const oldbp = PL_bufptr; 515 const bool is_first = (PL_oldbufptr == PL_linestart); 516 517 PERL_ARGS_ASSERT_NO_OP; 518 519 if (!s) 520 s = oldbp; 521 else 522 PL_bufptr = s; 523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); 524 if (ckWARN_d(WARN_SYNTAX)) { 525 if (is_first) 526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 527 "\t(Missing semicolon on previous line?)\n"); 528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { 529 const char *t; 530 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':'); 531 t += UTF ? UTF8SKIP(t) : 1) 532 NOOP; 533 if (t < PL_bufptr && isSPACE(*t)) 534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 535 "\t(Do you need to predeclare %"UTF8f"?)\n", 536 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); 537 } 538 else { 539 assert(s >= oldbp); 540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 541 "\t(Missing operator before %"UTF8f"?)\n", 542 UTF8fARG(UTF, s - oldbp, oldbp)); 543 } 544 } 545 PL_bufptr = oldbp; 546} 547 548/* 549 * S_missingterm 550 * Complain about missing quote/regexp/heredoc terminator. 551 * If it's called with NULL then it cauterizes the line buffer. 552 * If we're in a delimited string and the delimiter is a control 553 * character, it's reformatted into a two-char sequence like ^C. 554 * This is fatal. 555 */ 556 557STATIC void 558S_missingterm(pTHX_ char *s) 559{ 560 char tmpbuf[3]; 561 char q; 562 if (s) { 563 char * const nl = strrchr(s,'\n'); 564 if (nl) 565 *nl = '\0'; 566 } 567 else if ((U8) PL_multi_close < 32) { 568 *tmpbuf = '^'; 569 tmpbuf[1] = (char)toCTRL(PL_multi_close); 570 tmpbuf[2] = '\0'; 571 s = tmpbuf; 572 } 573 else { 574 *tmpbuf = (char)PL_multi_close; 575 tmpbuf[1] = '\0'; 576 s = tmpbuf; 577 } 578 q = strchr(s,'"') ? '\'' : '"'; 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); 580} 581 582#include "feature.h" 583 584/* 585 * Check whether the named feature is enabled. 586 */ 587bool 588Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) 589{ 590 char he_name[8 + MAX_FEATURE_LEN] = "feature_"; 591 592 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; 593 594 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM); 595 596 if (namelen > MAX_FEATURE_LEN) 597 return FALSE; 598 memcpy(&he_name[8], name, namelen); 599 600 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0, 601 REFCOUNTED_HE_EXISTS)); 602} 603 604/* 605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 606 * utf16-to-utf8-reversed. 607 */ 608 609#ifdef PERL_CR_FILTER 610static void 611strip_return(SV *sv) 612{ 613 const char *s = SvPVX_const(sv); 614 const char * const e = s + SvCUR(sv); 615 616 PERL_ARGS_ASSERT_STRIP_RETURN; 617 618 /* outer loop optimized to do nothing if there are no CR-LFs */ 619 while (s < e) { 620 if (*s++ == '\r' && *s == '\n') { 621 /* hit a CR-LF, need to copy the rest */ 622 char *d = s - 1; 623 *d++ = *s++; 624 while (s < e) { 625 if (*s == '\r' && s[1] == '\n') 626 s++; 627 *d++ = *s++; 628 } 629 SvCUR(sv) -= s - d; 630 return; 631 } 632 } 633} 634 635STATIC I32 636S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 637{ 638 const I32 count = FILTER_READ(idx+1, sv, maxlen); 639 if (count > 0 && !maxlen) 640 strip_return(sv); 641 return count; 642} 643#endif 644 645/* 646=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags 647 648Creates and initialises a new lexer/parser state object, supplying 649a context in which to lex and parse from a new source of Perl code. 650A pointer to the new state object is placed in L</PL_parser>. An entry 651is made on the save stack so that upon unwinding the new state object 652will be destroyed and the former value of L</PL_parser> will be restored. 653Nothing else need be done to clean up the parsing context. 654 655The code to be parsed comes from C<line> and C<rsfp>. C<line>, if 656non-null, provides a string (in SV form) containing code to be parsed. 657A copy of the string is made, so subsequent modification of C<line> 658does not affect parsing. C<rsfp>, if non-null, provides an input stream 659from which code will be read to be parsed. If both are non-null, the 660code in C<line> comes first and must consist of complete lines of input, 661and C<rsfp> supplies the remainder of the source. 662 663The C<flags> parameter is reserved for future use. Currently it is only 664used by perl internally, so extensions should always pass zero. 665 666=cut 667*/ 668 669/* LEX_START_SAME_FILTER indicates that this is not a new file, so it 670 can share filters with the current parser. 671 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 672 caller, hence isn't owned by the parser, so shouldn't be closed on parser 673 destruction. This is used to handle the case of defaulting to reading the 674 script from the standard input because no filename was given on the command 675 line (without getting confused by situation where STDIN has been closed, so 676 the script handle is opened on fd 0) */ 677 678void 679Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 680{ 681 const char *s = NULL; 682 yy_parser *parser, *oparser; 683 if (flags && flags & ~LEX_START_FLAGS) 684 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 685 686 /* create and initialise a parser */ 687 688 Newxz(parser, 1, yy_parser); 689 parser->old_parser = oparser = PL_parser; 690 PL_parser = parser; 691 692 parser->stack = NULL; 693 parser->ps = NULL; 694 parser->stack_size = 0; 695 696 /* on scope exit, free this parser and restore any outer one */ 697 SAVEPARSER(parser); 698 parser->saved_curcop = PL_curcop; 699 700 /* initialise lexer state */ 701 702 parser->nexttoke = 0; 703 parser->error_count = oparser ? oparser->error_count : 0; 704 parser->copline = parser->preambling = NOLINE; 705 parser->lex_state = LEX_NORMAL; 706 parser->expect = XSTATE; 707 parser->rsfp = rsfp; 708 parser->rsfp_filters = 709 !(flags & LEX_START_SAME_FILTER) || !oparser 710 ? NULL 711 : MUTABLE_AV(SvREFCNT_inc( 712 oparser->rsfp_filters 713 ? oparser->rsfp_filters 714 : (oparser->rsfp_filters = newAV()) 715 )); 716 717 Newx(parser->lex_brackstack, 120, char); 718 Newx(parser->lex_casestack, 12, char); 719 *parser->lex_casestack = '\0'; 720 Newxz(parser->lex_shared, 1, LEXSHARED); 721 722 if (line) { 723 STRLEN len; 724 s = SvPV_const(line, len); 725 parser->linestr = flags & LEX_START_COPIED 726 ? SvREFCNT_inc_simple_NN(line) 727 : newSVpvn_flags(s, len, SvUTF8(line)); 728 if (!rsfp) 729 sv_catpvs(parser->linestr, "\n;"); 730 } else { 731 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 732 } 733 parser->oldoldbufptr = 734 parser->oldbufptr = 735 parser->bufptr = 736 parser->linestart = SvPVX(parser->linestr); 737 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 738 parser->last_lop = parser->last_uni = NULL; 739 740 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 741 |LEX_DONT_CLOSE_RSFP)); 742 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 743 |LEX_DONT_CLOSE_RSFP)); 744 745 parser->in_pod = parser->filtered = 0; 746} 747 748 749/* delete a parser object */ 750 751void 752Perl_parser_free(pTHX_ const yy_parser *parser) 753{ 754 PERL_ARGS_ASSERT_PARSER_FREE; 755 756 PL_curcop = parser->saved_curcop; 757 SvREFCNT_dec(parser->linestr); 758 759 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 760 PerlIO_clearerr(parser->rsfp); 761 else if (parser->rsfp && (!parser->old_parser 762 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 763 PerlIO_close(parser->rsfp); 764 SvREFCNT_dec(parser->rsfp_filters); 765 SvREFCNT_dec(parser->lex_stuff); 766 SvREFCNT_dec(parser->sublex_info.repl); 767 768 Safefree(parser->lex_brackstack); 769 Safefree(parser->lex_casestack); 770 Safefree(parser->lex_shared); 771 PL_parser = parser->old_parser; 772 Safefree(parser); 773} 774 775void 776Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 777{ 778 I32 nexttoke = parser->nexttoke; 779 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 780 while (nexttoke--) { 781 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 782 && parser->nextval[nexttoke].opval 783 && parser->nextval[nexttoke].opval->op_slabbed 784 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 785 op_free(parser->nextval[nexttoke].opval); 786 parser->nextval[nexttoke].opval = NULL; 787 } 788 } 789} 790 791 792/* 793=for apidoc AmxU|SV *|PL_parser-E<gt>linestr 794 795Buffer scalar containing the chunk currently under consideration of the 796text currently being lexed. This is always a plain string scalar (for 797which C<SvPOK> is true). It is not intended to be used as a scalar by 798normal scalar means; instead refer to the buffer directly by the pointer 799variables described below. 800 801The lexer maintains various C<char*> pointers to things in the 802C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 803reallocated, all of these pointers must be updated. Don't attempt to 804do this manually, but rather use L</lex_grow_linestr> if you need to 805reallocate the buffer. 806 807The content of the text chunk in the buffer is commonly exactly one 808complete line of input, up to and including a newline terminator, 809but there are situations where it is otherwise. The octets of the 810buffer may be intended to be interpreted as either UTF-8 or Latin-1. 811The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 812flag on this scalar, which may disagree with it. 813 814For direct examination of the buffer, the variable 815L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 816lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 817of these pointers is usually preferable to examination of the scalar 818through normal scalar means. 819 820=for apidoc AmxU|char *|PL_parser-E<gt>bufend 821 822Direct pointer to the end of the chunk of text currently being lexed, the 823end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 824+ SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 825always located at the end of the buffer, and does not count as part of 826the buffer's contents. 827 828=for apidoc AmxU|char *|PL_parser-E<gt>bufptr 829 830Points to the current position of lexing inside the lexer buffer. 831Characters around this point may be freely examined, within 832the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 833L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 834interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 835 836Lexing code (whether in the Perl core or not) moves this pointer past 837the characters that it consumes. It is also expected to perform some 838bookkeeping whenever a newline character is consumed. This movement 839can be more conveniently performed by the function L</lex_read_to>, 840which handles newlines appropriately. 841 842Interpretation of the buffer's octets can be abstracted out by 843using the slightly higher-level functions L</lex_peek_unichar> and 844L</lex_read_unichar>. 845 846=for apidoc AmxU|char *|PL_parser-E<gt>linestart 847 848Points to the start of the current line inside the lexer buffer. 849This is useful for indicating at which column an error occurred, and 850not much else. This must be updated by any lexing code that consumes 851a newline; the function L</lex_read_to> handles this detail. 852 853=cut 854*/ 855 856/* 857=for apidoc Amx|bool|lex_bufutf8 858 859Indicates whether the octets in the lexer buffer 860(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 861of Unicode characters. If not, they should be interpreted as Latin-1 862characters. This is analogous to the C<SvUTF8> flag for scalars. 863 864In UTF-8 mode, it is not guaranteed that the lexer buffer actually 865contains valid UTF-8. Lexing code must be robust in the face of invalid 866encoding. 867 868The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 869is significant, but not the whole story regarding the input character 870encoding. Normally, when a file is being read, the scalar contains octets 871and its C<SvUTF8> flag is off, but the octets should be interpreted as 872UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 873however, the scalar may have the C<SvUTF8> flag on, and in this case its 874octets should be interpreted as UTF-8 unless the C<use bytes> pragma 875is in effect. This logic may change in the future; use this function 876instead of implementing the logic yourself. 877 878=cut 879*/ 880 881bool 882Perl_lex_bufutf8(pTHX) 883{ 884 return UTF; 885} 886 887/* 888=for apidoc Amx|char *|lex_grow_linestr|STRLEN len 889 890Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 891at least C<len> octets (including terminating C<NUL>). Returns a 892pointer to the reallocated buffer. This is necessary before making 893any direct modification of the buffer that would increase its length. 894L</lex_stuff_pvn> provides a more convenient way to insert text into 895the buffer. 896 897Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 898this function updates all of the lexer's variables that point directly 899into the buffer. 900 901=cut 902*/ 903 904char * 905Perl_lex_grow_linestr(pTHX_ STRLEN len) 906{ 907 SV *linestr; 908 char *buf; 909 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 910 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 911 linestr = PL_parser->linestr; 912 buf = SvPVX(linestr); 913 if (len <= SvLEN(linestr)) 914 return buf; 915 bufend_pos = PL_parser->bufend - buf; 916 bufptr_pos = PL_parser->bufptr - buf; 917 oldbufptr_pos = PL_parser->oldbufptr - buf; 918 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 919 linestart_pos = PL_parser->linestart - buf; 920 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 921 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 922 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? 923 PL_parser->lex_shared->re_eval_start - buf : 0; 924 925 buf = sv_grow(linestr, len); 926 927 PL_parser->bufend = buf + bufend_pos; 928 PL_parser->bufptr = buf + bufptr_pos; 929 PL_parser->oldbufptr = buf + oldbufptr_pos; 930 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 931 PL_parser->linestart = buf + linestart_pos; 932 if (PL_parser->last_uni) 933 PL_parser->last_uni = buf + last_uni_pos; 934 if (PL_parser->last_lop) 935 PL_parser->last_lop = buf + last_lop_pos; 936 if (PL_parser->lex_shared->re_eval_start) 937 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 938 return buf; 939} 940 941/* 942=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags 943 944Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 945immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 946reallocating the buffer if necessary. This means that lexing code that 947runs later will see the characters as if they had appeared in the input. 948It is not recommended to do this as part of normal parsing, and most 949uses of this facility run the risk of the inserted characters being 950interpreted in an unintended manner. 951 952The string to be inserted is represented by C<len> octets starting 953at C<pv>. These octets are interpreted as either UTF-8 or Latin-1, 954according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>. 955The characters are recoded for the lexer buffer, according to how the 956buffer is currently being interpreted (L</lex_bufutf8>). If a string 957to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 958function is more convenient. 959 960=cut 961*/ 962 963void 964Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 965{ 966 dVAR; 967 char *bufptr; 968 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 969 if (flags & ~(LEX_STUFF_UTF8)) 970 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 971 if (UTF) { 972 if (flags & LEX_STUFF_UTF8) { 973 goto plain_copy; 974 } else { 975 STRLEN highhalf = 0; /* Count of variants */ 976 const char *p, *e = pv+len; 977 for (p = pv; p != e; p++) { 978 if (! UTF8_IS_INVARIANT(*p)) { 979 highhalf++; 980 } 981 } 982 if (!highhalf) 983 goto plain_copy; 984 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 985 bufptr = PL_parser->bufptr; 986 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 987 SvCUR_set(PL_parser->linestr, 988 SvCUR(PL_parser->linestr) + len+highhalf); 989 PL_parser->bufend += len+highhalf; 990 for (p = pv; p != e; p++) { 991 U8 c = (U8)*p; 992 if (! UTF8_IS_INVARIANT(c)) { 993 *bufptr++ = UTF8_TWO_BYTE_HI(c); 994 *bufptr++ = UTF8_TWO_BYTE_LO(c); 995 } else { 996 *bufptr++ = (char)c; 997 } 998 } 999 } 1000 } else { 1001 if (flags & LEX_STUFF_UTF8) { 1002 STRLEN highhalf = 0; 1003 const char *p, *e = pv+len; 1004 for (p = pv; p != e; p++) { 1005 U8 c = (U8)*p; 1006 if (UTF8_IS_ABOVE_LATIN1(c)) { 1007 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1008 "non-Latin-1 character into Latin-1 input"); 1009 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1010 p++; 1011 highhalf++; 1012 } else if (! UTF8_IS_INVARIANT(c)) { 1013 /* malformed UTF-8 */ 1014 ENTER; 1015 SAVESPTR(PL_warnhook); 1016 PL_warnhook = PERL_WARNHOOK_FATAL; 1017 utf8n_to_uvchr((U8*)p, e-p, NULL, 0); 1018 LEAVE; 1019 } 1020 } 1021 if (!highhalf) 1022 goto plain_copy; 1023 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1024 bufptr = PL_parser->bufptr; 1025 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1026 SvCUR_set(PL_parser->linestr, 1027 SvCUR(PL_parser->linestr) + len-highhalf); 1028 PL_parser->bufend += len-highhalf; 1029 p = pv; 1030 while (p < e) { 1031 if (UTF8_IS_INVARIANT(*p)) { 1032 *bufptr++ = *p; 1033 p++; 1034 } 1035 else { 1036 assert(p < e -1 ); 1037 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); 1038 p += 2; 1039 } 1040 } 1041 } else { 1042 plain_copy: 1043 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1044 bufptr = PL_parser->bufptr; 1045 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1046 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1047 PL_parser->bufend += len; 1048 Copy(pv, bufptr, len, char); 1049 } 1050 } 1051} 1052 1053/* 1054=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags 1055 1056Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1057immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1058reallocating the buffer if necessary. This means that lexing code that 1059runs later will see the characters as if they had appeared in the input. 1060It is not recommended to do this as part of normal parsing, and most 1061uses of this facility run the risk of the inserted characters being 1062interpreted in an unintended manner. 1063 1064The string to be inserted is represented by octets starting at C<pv> 1065and continuing to the first nul. These octets are interpreted as either 1066UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1067in C<flags>. The characters are recoded for the lexer buffer, according 1068to how the buffer is currently being interpreted (L</lex_bufutf8>). 1069If it is not convenient to nul-terminate a string to be inserted, the 1070L</lex_stuff_pvn> function is more appropriate. 1071 1072=cut 1073*/ 1074 1075void 1076Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1077{ 1078 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1079 lex_stuff_pvn(pv, strlen(pv), flags); 1080} 1081 1082/* 1083=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags 1084 1085Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1086immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1087reallocating the buffer if necessary. This means that lexing code that 1088runs later will see the characters as if they had appeared in the input. 1089It is not recommended to do this as part of normal parsing, and most 1090uses of this facility run the risk of the inserted characters being 1091interpreted in an unintended manner. 1092 1093The string to be inserted is the string value of C<sv>. The characters 1094are recoded for the lexer buffer, according to how the buffer is currently 1095being interpreted (L</lex_bufutf8>). If a string to be inserted is 1096not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1097need to construct a scalar. 1098 1099=cut 1100*/ 1101 1102void 1103Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1104{ 1105 char *pv; 1106 STRLEN len; 1107 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1108 if (flags) 1109 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1110 pv = SvPV(sv, len); 1111 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1112} 1113 1114/* 1115=for apidoc Amx|void|lex_unstuff|char *ptr 1116 1117Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1118C<ptr>. Text following C<ptr> will be moved, and the buffer shortened. 1119This hides the discarded text from any lexing code that runs later, 1120as if the text had never appeared. 1121 1122This is not the normal way to consume lexed text. For that, use 1123L</lex_read_to>. 1124 1125=cut 1126*/ 1127 1128void 1129Perl_lex_unstuff(pTHX_ char *ptr) 1130{ 1131 char *buf, *bufend; 1132 STRLEN unstuff_len; 1133 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1134 buf = PL_parser->bufptr; 1135 if (ptr < buf) 1136 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1137 if (ptr == buf) 1138 return; 1139 bufend = PL_parser->bufend; 1140 if (ptr > bufend) 1141 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1142 unstuff_len = ptr - buf; 1143 Move(ptr, buf, bufend+1-ptr, char); 1144 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1145 PL_parser->bufend = bufend - unstuff_len; 1146} 1147 1148/* 1149=for apidoc Amx|void|lex_read_to|char *ptr 1150 1151Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1152to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>, 1153performing the correct bookkeeping whenever a newline character is passed. 1154This is the normal way to consume lexed text. 1155 1156Interpretation of the buffer's octets can be abstracted out by 1157using the slightly higher-level functions L</lex_peek_unichar> and 1158L</lex_read_unichar>. 1159 1160=cut 1161*/ 1162 1163void 1164Perl_lex_read_to(pTHX_ char *ptr) 1165{ 1166 char *s; 1167 PERL_ARGS_ASSERT_LEX_READ_TO; 1168 s = PL_parser->bufptr; 1169 if (ptr < s || ptr > PL_parser->bufend) 1170 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1171 for (; s != ptr; s++) 1172 if (*s == '\n') { 1173 COPLINE_INC_WITH_HERELINES; 1174 PL_parser->linestart = s+1; 1175 } 1176 PL_parser->bufptr = ptr; 1177} 1178 1179/* 1180=for apidoc Amx|void|lex_discard_to|char *ptr 1181 1182Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1183up to C<ptr>. The remaining content of the buffer will be moved, and 1184all pointers into the buffer updated appropriately. C<ptr> must not 1185be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1186it is not permitted to discard text that has yet to be lexed. 1187 1188Normally it is not necessarily to do this directly, because it suffices to 1189use the implicit discarding behaviour of L</lex_next_chunk> and things 1190based on it. However, if a token stretches across multiple lines, 1191and the lexing code has kept multiple lines of text in the buffer for 1192that purpose, then after completion of the token it would be wise to 1193explicitly discard the now-unneeded earlier lines, to avoid future 1194multi-line tokens growing the buffer without bound. 1195 1196=cut 1197*/ 1198 1199void 1200Perl_lex_discard_to(pTHX_ char *ptr) 1201{ 1202 char *buf; 1203 STRLEN discard_len; 1204 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1205 buf = SvPVX(PL_parser->linestr); 1206 if (ptr < buf) 1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1208 if (ptr == buf) 1209 return; 1210 if (ptr > PL_parser->bufptr) 1211 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1212 discard_len = ptr - buf; 1213 if (PL_parser->oldbufptr < ptr) 1214 PL_parser->oldbufptr = ptr; 1215 if (PL_parser->oldoldbufptr < ptr) 1216 PL_parser->oldoldbufptr = ptr; 1217 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1218 PL_parser->last_uni = NULL; 1219 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1220 PL_parser->last_lop = NULL; 1221 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1222 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1223 PL_parser->bufend -= discard_len; 1224 PL_parser->bufptr -= discard_len; 1225 PL_parser->oldbufptr -= discard_len; 1226 PL_parser->oldoldbufptr -= discard_len; 1227 if (PL_parser->last_uni) 1228 PL_parser->last_uni -= discard_len; 1229 if (PL_parser->last_lop) 1230 PL_parser->last_lop -= discard_len; 1231} 1232 1233/* 1234=for apidoc Amx|bool|lex_next_chunk|U32 flags 1235 1236Reads in the next chunk of text to be lexed, appending it to 1237L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1238looked to the end of the current chunk and wants to know more. It is 1239usual, but not necessary, for lexing to have consumed the entirety of 1240the current chunk at this time. 1241 1242If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1243chunk (i.e., the current chunk has been entirely consumed), normally the 1244current chunk will be discarded at the same time that the new chunk is 1245read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk 1246will not be discarded. If the current chunk has not been entirely 1247consumed, then it will not be discarded regardless of the flag. 1248 1249Returns true if some new text was added to the buffer, or false if the 1250buffer has reached the end of the input text. 1251 1252=cut 1253*/ 1254 1255#define LEX_FAKE_EOF 0x80000000 1256#define LEX_NO_TERM 0x40000000 /* here-doc */ 1257 1258bool 1259Perl_lex_next_chunk(pTHX_ U32 flags) 1260{ 1261 SV *linestr; 1262 char *buf; 1263 STRLEN old_bufend_pos, new_bufend_pos; 1264 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1265 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1266 bool got_some_for_debugger = 0; 1267 bool got_some; 1268 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1269 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1270 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1271 return FALSE; 1272 linestr = PL_parser->linestr; 1273 buf = SvPVX(linestr); 1274 if (!(flags & LEX_KEEP_PREVIOUS) 1275 && PL_parser->bufptr == PL_parser->bufend) 1276 { 1277 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1278 linestart_pos = 0; 1279 if (PL_parser->last_uni != PL_parser->bufend) 1280 PL_parser->last_uni = NULL; 1281 if (PL_parser->last_lop != PL_parser->bufend) 1282 PL_parser->last_lop = NULL; 1283 last_uni_pos = last_lop_pos = 0; 1284 *buf = 0; 1285 SvCUR(linestr) = 0; 1286 } else { 1287 old_bufend_pos = PL_parser->bufend - buf; 1288 bufptr_pos = PL_parser->bufptr - buf; 1289 oldbufptr_pos = PL_parser->oldbufptr - buf; 1290 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1291 linestart_pos = PL_parser->linestart - buf; 1292 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1293 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1294 } 1295 if (flags & LEX_FAKE_EOF) { 1296 goto eof; 1297 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1298 got_some = 0; 1299 } else if (filter_gets(linestr, old_bufend_pos)) { 1300 got_some = 1; 1301 got_some_for_debugger = 1; 1302 } else if (flags & LEX_NO_TERM) { 1303 got_some = 0; 1304 } else { 1305 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1306 sv_setpvs(linestr, ""); 1307 eof: 1308 /* End of real input. Close filehandle (unless it was STDIN), 1309 * then add implicit termination. 1310 */ 1311 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1312 PerlIO_clearerr(PL_parser->rsfp); 1313 else if (PL_parser->rsfp) 1314 (void)PerlIO_close(PL_parser->rsfp); 1315 PL_parser->rsfp = NULL; 1316 PL_parser->in_pod = PL_parser->filtered = 0; 1317 if (!PL_in_eval && PL_minus_p) { 1318 sv_catpvs(linestr, 1319 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1320 PL_minus_n = PL_minus_p = 0; 1321 } else if (!PL_in_eval && PL_minus_n) { 1322 sv_catpvs(linestr, /*{*/";}"); 1323 PL_minus_n = 0; 1324 } else 1325 sv_catpvs(linestr, ";"); 1326 got_some = 1; 1327 } 1328 buf = SvPVX(linestr); 1329 new_bufend_pos = SvCUR(linestr); 1330 PL_parser->bufend = buf + new_bufend_pos; 1331 PL_parser->bufptr = buf + bufptr_pos; 1332 PL_parser->oldbufptr = buf + oldbufptr_pos; 1333 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1334 PL_parser->linestart = buf + linestart_pos; 1335 if (PL_parser->last_uni) 1336 PL_parser->last_uni = buf + last_uni_pos; 1337 if (PL_parser->last_lop) 1338 PL_parser->last_lop = buf + last_lop_pos; 1339 if (PL_parser->preambling != NOLINE) { 1340 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1341 PL_parser->preambling = NOLINE; 1342 } 1343 if ( got_some_for_debugger 1344 && PERLDB_LINE_OR_SAVESRC 1345 && PL_curstash != PL_debstash) 1346 { 1347 /* debugger active and we're not compiling the debugger code, 1348 * so store the line into the debugger's array of lines 1349 */ 1350 update_debugger_info(NULL, buf+old_bufend_pos, 1351 new_bufend_pos-old_bufend_pos); 1352 } 1353 return got_some; 1354} 1355 1356/* 1357=for apidoc Amx|I32|lex_peek_unichar|U32 flags 1358 1359Looks ahead one (Unicode) character in the text currently being lexed. 1360Returns the codepoint (unsigned integer value) of the next character, 1361or -1 if lexing has reached the end of the input text. To consume the 1362peeked character, use L</lex_read_unichar>. 1363 1364If the next character is in (or extends into) the next chunk of input 1365text, the next chunk will be read in. Normally the current chunk will be 1366discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1367bit set, then the current chunk will not be discarded. 1368 1369If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1370is encountered, an exception is generated. 1371 1372=cut 1373*/ 1374 1375I32 1376Perl_lex_peek_unichar(pTHX_ U32 flags) 1377{ 1378 dVAR; 1379 char *s, *bufend; 1380 if (flags & ~(LEX_KEEP_PREVIOUS)) 1381 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1382 s = PL_parser->bufptr; 1383 bufend = PL_parser->bufend; 1384 if (UTF) { 1385 U8 head; 1386 I32 unichar; 1387 STRLEN len, retlen; 1388 if (s == bufend) { 1389 if (!lex_next_chunk(flags)) 1390 return -1; 1391 s = PL_parser->bufptr; 1392 bufend = PL_parser->bufend; 1393 } 1394 head = (U8)*s; 1395 if (UTF8_IS_INVARIANT(head)) 1396 return head; 1397 if (UTF8_IS_START(head)) { 1398 len = UTF8SKIP(&head); 1399 while ((STRLEN)(bufend-s) < len) { 1400 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1401 break; 1402 s = PL_parser->bufptr; 1403 bufend = PL_parser->bufend; 1404 } 1405 } 1406 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1407 if (retlen == (STRLEN)-1) { 1408 /* malformed UTF-8 */ 1409 ENTER; 1410 SAVESPTR(PL_warnhook); 1411 PL_warnhook = PERL_WARNHOOK_FATAL; 1412 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0); 1413 LEAVE; 1414 } 1415 return unichar; 1416 } else { 1417 if (s == bufend) { 1418 if (!lex_next_chunk(flags)) 1419 return -1; 1420 s = PL_parser->bufptr; 1421 } 1422 return (U8)*s; 1423 } 1424} 1425 1426/* 1427=for apidoc Amx|I32|lex_read_unichar|U32 flags 1428 1429Reads the next (Unicode) character in the text currently being lexed. 1430Returns the codepoint (unsigned integer value) of the character read, 1431and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1432if lexing has reached the end of the input text. To non-destructively 1433examine the next character, use L</lex_peek_unichar> instead. 1434 1435If the next character is in (or extends into) the next chunk of input 1436text, the next chunk will be read in. Normally the current chunk will be 1437discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1438bit set, then the current chunk will not be discarded. 1439 1440If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1441is encountered, an exception is generated. 1442 1443=cut 1444*/ 1445 1446I32 1447Perl_lex_read_unichar(pTHX_ U32 flags) 1448{ 1449 I32 c; 1450 if (flags & ~(LEX_KEEP_PREVIOUS)) 1451 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1452 c = lex_peek_unichar(flags); 1453 if (c != -1) { 1454 if (c == '\n') 1455 COPLINE_INC_WITH_HERELINES; 1456 if (UTF) 1457 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1458 else 1459 ++(PL_parser->bufptr); 1460 } 1461 return c; 1462} 1463 1464/* 1465=for apidoc Amx|void|lex_read_space|U32 flags 1466 1467Reads optional spaces, in Perl style, in the text currently being 1468lexed. The spaces may include ordinary whitespace characters and 1469Perl-style comments. C<#line> directives are processed if encountered. 1470L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1471at a non-space character (or the end of the input text). 1472 1473If spaces extend into the next chunk of input text, the next chunk will 1474be read in. Normally the current chunk will be discarded at the same 1475time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current 1476chunk will not be discarded. 1477 1478=cut 1479*/ 1480 1481#define LEX_NO_INCLINE 0x40000000 1482#define LEX_NO_NEXT_CHUNK 0x80000000 1483 1484void 1485Perl_lex_read_space(pTHX_ U32 flags) 1486{ 1487 char *s, *bufend; 1488 const bool can_incline = !(flags & LEX_NO_INCLINE); 1489 bool need_incline = 0; 1490 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1491 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1492 s = PL_parser->bufptr; 1493 bufend = PL_parser->bufend; 1494 while (1) { 1495 char c = *s; 1496 if (c == '#') { 1497 do { 1498 c = *++s; 1499 } while (!(c == '\n' || (c == 0 && s == bufend))); 1500 } else if (c == '\n') { 1501 s++; 1502 if (can_incline) { 1503 PL_parser->linestart = s; 1504 if (s == bufend) 1505 need_incline = 1; 1506 else 1507 incline(s); 1508 } 1509 } else if (isSPACE(c)) { 1510 s++; 1511 } else if (c == 0 && s == bufend) { 1512 bool got_more; 1513 line_t l; 1514 if (flags & LEX_NO_NEXT_CHUNK) 1515 break; 1516 PL_parser->bufptr = s; 1517 l = CopLINE(PL_curcop); 1518 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1519 got_more = lex_next_chunk(flags); 1520 CopLINE_set(PL_curcop, l); 1521 s = PL_parser->bufptr; 1522 bufend = PL_parser->bufend; 1523 if (!got_more) 1524 break; 1525 if (can_incline && need_incline && PL_parser->rsfp) { 1526 incline(s); 1527 need_incline = 0; 1528 } 1529 } else if (!c) { 1530 s++; 1531 } else { 1532 break; 1533 } 1534 } 1535 PL_parser->bufptr = s; 1536} 1537 1538/* 1539 1540=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn 1541 1542This function performs syntax checking on a prototype, C<proto>. 1543If C<warn> is true, any illegal characters or mismatched brackets 1544will trigger illegalproto warnings, declaring that they were 1545detected in the prototype for C<name>. 1546 1547The return value is C<true> if this is a valid prototype, and 1548C<false> if it is not, regardless of whether C<warn> was C<true> or 1549C<false>. 1550 1551Note that C<NULL> is a valid C<proto> and will always return C<true>. 1552 1553=cut 1554 1555 */ 1556 1557bool 1558Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) 1559{ 1560 STRLEN len, origlen; 1561 char *p = proto ? SvPV(proto, len) : NULL; 1562 bool bad_proto = FALSE; 1563 bool in_brackets = FALSE; 1564 bool after_slash = FALSE; 1565 char greedy_proto = ' '; 1566 bool proto_after_greedy_proto = FALSE; 1567 bool must_be_last = FALSE; 1568 bool underscore = FALSE; 1569 bool bad_proto_after_underscore = FALSE; 1570 1571 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1572 1573 if (!proto) 1574 return TRUE; 1575 1576 origlen = len; 1577 for (; len--; p++) { 1578 if (!isSPACE(*p)) { 1579 if (must_be_last) 1580 proto_after_greedy_proto = TRUE; 1581 if (underscore) { 1582 if (!strchr(";@%", *p)) 1583 bad_proto_after_underscore = TRUE; 1584 underscore = FALSE; 1585 } 1586 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { 1587 bad_proto = TRUE; 1588 } 1589 else { 1590 if (*p == '[') 1591 in_brackets = TRUE; 1592 else if (*p == ']') 1593 in_brackets = FALSE; 1594 else if ((*p == '@' || *p == '%') 1595 && !after_slash 1596 && !in_brackets ) 1597 { 1598 must_be_last = TRUE; 1599 greedy_proto = *p; 1600 } 1601 else if (*p == '_') 1602 underscore = TRUE; 1603 } 1604 if (*p == '\\') 1605 after_slash = TRUE; 1606 else 1607 after_slash = FALSE; 1608 } 1609 } 1610 1611 if (warn) { 1612 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1613 p -= origlen; 1614 p = SvUTF8(proto) 1615 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1616 origlen, UNI_DISPLAY_ISPRINT) 1617 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1618 1619 if (proto_after_greedy_proto) 1620 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1621 "Prototype after '%c' for %"SVf" : %s", 1622 greedy_proto, SVfARG(name), p); 1623 if (in_brackets) 1624 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1625 "Missing ']' in prototype for %"SVf" : %s", 1626 SVfARG(name), p); 1627 if (bad_proto) 1628 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1629 "Illegal character in prototype for %"SVf" : %s", 1630 SVfARG(name), p); 1631 if (bad_proto_after_underscore) 1632 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1633 "Illegal character after '_' in prototype for %"SVf" : %s", 1634 SVfARG(name), p); 1635 } 1636 1637 return (! (proto_after_greedy_proto || bad_proto) ); 1638} 1639 1640/* 1641 * S_incline 1642 * This subroutine has nothing to do with tilting, whether at windmills 1643 * or pinball tables. Its name is short for "increment line". It 1644 * increments the current line number in CopLINE(PL_curcop) and checks 1645 * to see whether the line starts with a comment of the form 1646 * # line 500 "foo.pm" 1647 * If so, it sets the current line number and file to the values in the comment. 1648 */ 1649 1650STATIC void 1651S_incline(pTHX_ const char *s) 1652{ 1653 const char *t; 1654 const char *n; 1655 const char *e; 1656 line_t line_num; 1657 UV uv; 1658 1659 PERL_ARGS_ASSERT_INCLINE; 1660 1661 COPLINE_INC_WITH_HERELINES; 1662 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1663 && s+1 == PL_bufend && *s == ';') { 1664 /* fake newline in string eval */ 1665 CopLINE_dec(PL_curcop); 1666 return; 1667 } 1668 if (*s++ != '#') 1669 return; 1670 while (SPACE_OR_TAB(*s)) 1671 s++; 1672 if (strnEQ(s, "line", 4)) 1673 s += 4; 1674 else 1675 return; 1676 if (SPACE_OR_TAB(*s)) 1677 s++; 1678 else 1679 return; 1680 while (SPACE_OR_TAB(*s)) 1681 s++; 1682 if (!isDIGIT(*s)) 1683 return; 1684 1685 n = s; 1686 while (isDIGIT(*s)) 1687 s++; 1688 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1689 return; 1690 while (SPACE_OR_TAB(*s)) 1691 s++; 1692 if (*s == '"' && (t = strchr(s+1, '"'))) { 1693 s++; 1694 e = t + 1; 1695 } 1696 else { 1697 t = s; 1698 while (*t && !isSPACE(*t)) 1699 t++; 1700 e = t; 1701 } 1702 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1703 e++; 1704 if (*e != '\n' && *e != '\0') 1705 return; /* false alarm */ 1706 1707 if (!grok_atoUV(n, &uv, &e)) 1708 return; 1709 line_num = ((line_t)uv) - 1; 1710 1711 if (t - s > 0) { 1712 const STRLEN len = t - s; 1713 1714 if (!PL_rsfp && !PL_parser->filtered) { 1715 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1716 * to *{"::_<newfilename"} */ 1717 /* However, the long form of evals is only turned on by the 1718 debugger - usually they're "(eval %lu)" */ 1719 GV * const cfgv = CopFILEGV(PL_curcop); 1720 if (cfgv) { 1721 char smallbuf[128]; 1722 STRLEN tmplen2 = len; 1723 char *tmpbuf2; 1724 GV *gv2; 1725 1726 if (tmplen2 + 2 <= sizeof smallbuf) 1727 tmpbuf2 = smallbuf; 1728 else 1729 Newx(tmpbuf2, tmplen2 + 2, char); 1730 1731 tmpbuf2[0] = '_'; 1732 tmpbuf2[1] = '<'; 1733 1734 memcpy(tmpbuf2 + 2, s, tmplen2); 1735 tmplen2 += 2; 1736 1737 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1738 if (!isGV(gv2)) { 1739 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1740 /* adjust ${"::_<newfilename"} to store the new file name */ 1741 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1742 /* The line number may differ. If that is the case, 1743 alias the saved lines that are in the array. 1744 Otherwise alias the whole array. */ 1745 if (CopLINE(PL_curcop) == line_num) { 1746 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1747 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1748 } 1749 else if (GvAV(cfgv)) { 1750 AV * const av = GvAV(cfgv); 1751 const I32 start = CopLINE(PL_curcop)+1; 1752 I32 items = AvFILLp(av) - start; 1753 if (items > 0) { 1754 AV * const av2 = GvAVn(gv2); 1755 SV **svp = AvARRAY(av) + start; 1756 I32 l = (I32)line_num+1; 1757 while (items--) 1758 av_store(av2, l++, SvREFCNT_inc(*svp++)); 1759 } 1760 } 1761 } 1762 1763 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1764 } 1765 } 1766 CopFILE_free(PL_curcop); 1767 CopFILE_setn(PL_curcop, s, len); 1768 } 1769 CopLINE_set(PL_curcop, line_num); 1770} 1771 1772#define skipspace(s) skipspace_flags(s, 0) 1773 1774 1775STATIC void 1776S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 1777{ 1778 AV *av = CopFILEAVx(PL_curcop); 1779 if (av) { 1780 SV * sv; 1781 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 1782 else { 1783 sv = *av_fetch(av, 0, 1); 1784 SvUPGRADE(sv, SVt_PVMG); 1785 } 1786 if (!SvPOK(sv)) sv_setpvs(sv,""); 1787 if (orig_sv) 1788 sv_catsv(sv, orig_sv); 1789 else 1790 sv_catpvn(sv, buf, len); 1791 if (!SvIOK(sv)) { 1792 (void)SvIOK_on(sv); 1793 SvIV_set(sv, 0); 1794 } 1795 if (PL_parser->preambling == NOLINE) 1796 av_store(av, CopLINE(PL_curcop), sv); 1797 } 1798} 1799 1800/* 1801 * S_skipspace 1802 * Called to gobble the appropriate amount and type of whitespace. 1803 * Skips comments as well. 1804 */ 1805 1806STATIC char * 1807S_skipspace_flags(pTHX_ char *s, U32 flags) 1808{ 1809 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 1810 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 1811 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) 1812 s++; 1813 } else { 1814 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 1815 PL_bufptr = s; 1816 lex_read_space(flags | LEX_KEEP_PREVIOUS | 1817 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 1818 LEX_NO_NEXT_CHUNK : 0)); 1819 s = PL_bufptr; 1820 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 1821 if (PL_linestart > PL_bufptr) 1822 PL_bufptr = PL_linestart; 1823 return s; 1824 } 1825 return s; 1826} 1827 1828/* 1829 * S_check_uni 1830 * Check the unary operators to ensure there's no ambiguity in how they're 1831 * used. An ambiguous piece of code would be: 1832 * rand + 5 1833 * This doesn't mean rand() + 5. Because rand() is a unary operator, 1834 * the +5 is its argument. 1835 */ 1836 1837STATIC void 1838S_check_uni(pTHX) 1839{ 1840 const char *s; 1841 const char *t; 1842 1843 if (PL_oldoldbufptr != PL_last_uni) 1844 return; 1845 while (isSPACE(*PL_last_uni)) 1846 PL_last_uni++; 1847 s = PL_last_uni; 1848 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-') 1849 s += UTF ? UTF8SKIP(s) : 1; 1850 if ((t = strchr(s, '(')) && t < PL_bufptr) 1851 return; 1852 1853 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 1854 "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous", 1855 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); 1856} 1857 1858/* 1859 * LOP : macro to build a list operator. Its behaviour has been replaced 1860 * with a subroutine, S_lop() for which LOP is just another name. 1861 */ 1862 1863#define LOP(f,x) return lop(f,x,s) 1864 1865/* 1866 * S_lop 1867 * Build a list operator (or something that might be one). The rules: 1868 * - if we have a next token, then it's a list operator (no parens) for 1869 * which the next token has already been parsed; e.g., 1870 * sort foo @args 1871 * sort foo (@args) 1872 * - if the next thing is an opening paren, then it's a function 1873 * - else it's a list operator 1874 */ 1875 1876STATIC I32 1877S_lop(pTHX_ I32 f, int x, char *s) 1878{ 1879 PERL_ARGS_ASSERT_LOP; 1880 1881 pl_yylval.ival = f; 1882 CLINE; 1883 PL_bufptr = s; 1884 PL_last_lop = PL_oldbufptr; 1885 PL_last_lop_op = (OPCODE)f; 1886 if (PL_nexttoke) 1887 goto lstop; 1888 PL_expect = x; 1889 if (*s == '(') 1890 return REPORT(FUNC); 1891 s = skipspace(s); 1892 if (*s == '(') 1893 return REPORT(FUNC); 1894 else { 1895 lstop: 1896 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 1897 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 1898 return REPORT(LSTOP); 1899 } 1900} 1901 1902/* 1903 * S_force_next 1904 * When the lexer realizes it knows the next token (for instance, 1905 * it is reordering tokens for the parser) then it can call S_force_next 1906 * to know what token to return the next time the lexer is called. Caller 1907 * will need to set PL_nextval[] and possibly PL_expect to ensure 1908 * the lexer handles the token correctly. 1909 */ 1910 1911STATIC void 1912S_force_next(pTHX_ I32 type) 1913{ 1914#ifdef DEBUGGING 1915 if (DEBUG_T_TEST) { 1916 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 1917 tokereport(type, &NEXTVAL_NEXTTOKE); 1918 } 1919#endif 1920 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); 1921 PL_nexttype[PL_nexttoke] = type; 1922 PL_nexttoke++; 1923 if (PL_lex_state != LEX_KNOWNEXT) { 1924 PL_lex_defer = PL_lex_state; 1925 PL_lex_state = LEX_KNOWNEXT; 1926 } 1927} 1928 1929/* 1930 * S_postderef 1931 * 1932 * This subroutine handles postfix deref syntax after the arrow has already 1933 * been emitted. @* $* etc. are emitted as two separate token right here. 1934 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 1935 * only the first, leaving yylex to find the next. 1936 */ 1937 1938static int 1939S_postderef(pTHX_ int const funny, char const next) 1940{ 1941 assert(funny == DOLSHARP || strchr("$@%&*", funny)); 1942 if (next == '*') { 1943 PL_expect = XOPERATOR; 1944 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 1945 assert('@' == funny || '$' == funny || DOLSHARP == funny); 1946 PL_lex_state = LEX_INTERPEND; 1947 force_next(POSTJOIN); 1948 } 1949 force_next(next); 1950 PL_bufptr+=2; 1951 } 1952 else { 1953 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL 1954 && !PL_lex_brackets) 1955 PL_lex_dojoin = 2; 1956 PL_expect = XOPERATOR; 1957 PL_bufptr++; 1958 } 1959 return funny; 1960} 1961 1962void 1963Perl_yyunlex(pTHX) 1964{ 1965 int yyc = PL_parser->yychar; 1966 if (yyc != YYEMPTY) { 1967 if (yyc) { 1968 NEXTVAL_NEXTTOKE = PL_parser->yylval; 1969 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { 1970 PL_lex_allbrackets--; 1971 PL_lex_brackets--; 1972 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 1973 } else if (yyc == '('/*)*/) { 1974 PL_lex_allbrackets--; 1975 yyc |= (2<<24); 1976 } 1977 force_next(yyc); 1978 } 1979 PL_parser->yychar = YYEMPTY; 1980 } 1981} 1982 1983STATIC SV * 1984S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 1985{ 1986 SV * const sv = newSVpvn_utf8(start, len, 1987 !IN_BYTES 1988 && UTF 1989 && !is_invariant_string((const U8*)start, len) 1990 && is_utf8_string((const U8*)start, len)); 1991 return sv; 1992} 1993 1994/* 1995 * S_force_word 1996 * When the lexer knows the next thing is a word (for instance, it has 1997 * just seen -> and it knows that the next char is a word char, then 1998 * it calls S_force_word to stick the next word into the PL_nexttoke/val 1999 * lookahead. 2000 * 2001 * Arguments: 2002 * char *start : buffer position (must be within PL_linestr) 2003 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD) 2004 * int check_keyword : if true, Perl checks to make sure the word isn't 2005 * a keyword (do this if the word is a label, e.g. goto FOO) 2006 * int allow_pack : if true, : characters will also be allowed (require, 2007 * use, etc. do this) 2008 */ 2009 2010STATIC char * 2011S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2012{ 2013 char *s; 2014 STRLEN len; 2015 2016 PERL_ARGS_ASSERT_FORCE_WORD; 2017 2018 start = skipspace(start); 2019 s = start; 2020 if (isIDFIRST_lazy_if(s,UTF) 2021 || (allow_pack && *s == ':') ) 2022 { 2023 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); 2024 if (check_keyword) { 2025 char *s2 = PL_tokenbuf; 2026 STRLEN len2 = len; 2027 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) 2028 s2 += 6, len2 -= 6; 2029 if (keyword(s2, len2, 0)) 2030 return start; 2031 } 2032 if (token == METHOD) { 2033 s = skipspace(s); 2034 if (*s == '(') 2035 PL_expect = XTERM; 2036 else { 2037 PL_expect = XOPERATOR; 2038 } 2039 } 2040 NEXTVAL_NEXTTOKE.opval 2041 = (OP*)newSVOP(OP_CONST,0, 2042 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2043 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2044 force_next(token); 2045 } 2046 return s; 2047} 2048 2049/* 2050 * S_force_ident 2051 * Called when the lexer wants $foo *foo &foo etc, but the program 2052 * text only contains the "foo" portion. The first argument is a pointer 2053 * to the "foo", and the second argument is the type symbol to prefix. 2054 * Forces the next token to be a "WORD". 2055 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2056 */ 2057 2058STATIC void 2059S_force_ident(pTHX_ const char *s, int kind) 2060{ 2061 PERL_ARGS_ASSERT_FORCE_IDENT; 2062 2063 if (s[0]) { 2064 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2065 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2066 UTF ? SVf_UTF8 : 0)); 2067 NEXTVAL_NEXTTOKE.opval = o; 2068 force_next(WORD); 2069 if (kind) { 2070 o->op_private = OPpCONST_ENTERED; 2071 /* XXX see note in pp_entereval() for why we forgo typo 2072 warnings if the symbol must be introduced in an eval. 2073 GSAR 96-10-12 */ 2074 gv_fetchpvn_flags(s, len, 2075 (PL_in_eval ? GV_ADDMULTI 2076 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2077 kind == '$' ? SVt_PV : 2078 kind == '@' ? SVt_PVAV : 2079 kind == '%' ? SVt_PVHV : 2080 SVt_PVGV 2081 ); 2082 } 2083 } 2084} 2085 2086static void 2087S_force_ident_maybe_lex(pTHX_ char pit) 2088{ 2089 NEXTVAL_NEXTTOKE.ival = pit; 2090 force_next('p'); 2091} 2092 2093NV 2094Perl_str_to_version(pTHX_ SV *sv) 2095{ 2096 NV retval = 0.0; 2097 NV nshift = 1.0; 2098 STRLEN len; 2099 const char *start = SvPV_const(sv,len); 2100 const char * const end = start + len; 2101 const bool utf = SvUTF8(sv) ? TRUE : FALSE; 2102 2103 PERL_ARGS_ASSERT_STR_TO_VERSION; 2104 2105 while (start < end) { 2106 STRLEN skip; 2107 UV n; 2108 if (utf) 2109 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2110 else { 2111 n = *(U8*)start; 2112 skip = 1; 2113 } 2114 retval += ((NV)n)/nshift; 2115 start += skip; 2116 nshift *= 1000; 2117 } 2118 return retval; 2119} 2120 2121/* 2122 * S_force_version 2123 * Forces the next token to be a version number. 2124 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2125 * and if "guessing" is TRUE, then no new token is created (and the caller 2126 * must use an alternative parsing method). 2127 */ 2128 2129STATIC char * 2130S_force_version(pTHX_ char *s, int guessing) 2131{ 2132 OP *version = NULL; 2133 char *d; 2134 2135 PERL_ARGS_ASSERT_FORCE_VERSION; 2136 2137 s = skipspace(s); 2138 2139 d = s; 2140 if (*d == 'v') 2141 d++; 2142 if (isDIGIT(*d)) { 2143 while (isDIGIT(*d) || *d == '_' || *d == '.') 2144 d++; 2145 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2146 SV *ver; 2147 s = scan_num(s, &pl_yylval); 2148 version = pl_yylval.opval; 2149 ver = cSVOPx(version)->op_sv; 2150 if (SvPOK(ver) && !SvNIOK(ver)) { 2151 SvUPGRADE(ver, SVt_PVNV); 2152 SvNV_set(ver, str_to_version(ver)); 2153 SvNOK_on(ver); /* hint that it is a version */ 2154 } 2155 } 2156 else if (guessing) { 2157 return s; 2158 } 2159 } 2160 2161 /* NOTE: The parser sees the package name and the VERSION swapped */ 2162 NEXTVAL_NEXTTOKE.opval = version; 2163 force_next(WORD); 2164 2165 return s; 2166} 2167 2168/* 2169 * S_force_strict_version 2170 * Forces the next token to be a version number using strict syntax rules. 2171 */ 2172 2173STATIC char * 2174S_force_strict_version(pTHX_ char *s) 2175{ 2176 OP *version = NULL; 2177 const char *errstr = NULL; 2178 2179 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2180 2181 while (isSPACE(*s)) /* leading whitespace */ 2182 s++; 2183 2184 if (is_STRICT_VERSION(s,&errstr)) { 2185 SV *ver = newSV(0); 2186 s = (char *)scan_version(s, ver, 0); 2187 version = newSVOP(OP_CONST, 0, ver); 2188 } 2189 else if ((*s != ';' && *s != '{' && *s != '}' ) 2190 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) 2191 { 2192 PL_bufptr = s; 2193 if (errstr) 2194 yyerror(errstr); /* version required */ 2195 return s; 2196 } 2197 2198 /* NOTE: The parser sees the package name and the VERSION swapped */ 2199 NEXTVAL_NEXTTOKE.opval = version; 2200 force_next(WORD); 2201 2202 return s; 2203} 2204 2205/* 2206 * S_tokeq 2207 * Tokenize a quoted string passed in as an SV. It finds the next 2208 * chunk, up to end of string or a backslash. It may make a new 2209 * SV containing that chunk (if HINT_NEW_STRING is on). It also 2210 * turns \\ into \. 2211 */ 2212 2213STATIC SV * 2214S_tokeq(pTHX_ SV *sv) 2215{ 2216 char *s; 2217 char *send; 2218 char *d; 2219 SV *pv = sv; 2220 2221 PERL_ARGS_ASSERT_TOKEQ; 2222 2223 assert (SvPOK(sv)); 2224 assert (SvLEN(sv)); 2225 assert (!SvIsCOW(sv)); 2226 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2227 goto finish; 2228 s = SvPVX(sv); 2229 send = SvEND(sv); 2230 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2231 while (s < send && !(*s == '\\' && s[1] == '\\')) 2232 s++; 2233 if (s == send) 2234 goto finish; 2235 d = s; 2236 if ( PL_hints & HINT_NEW_STRING ) { 2237 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2238 SVs_TEMP | SvUTF8(sv)); 2239 } 2240 while (s < send) { 2241 if (*s == '\\') { 2242 if (s + 1 < send && (s[1] == '\\')) 2243 s++; /* all that, just for this */ 2244 } 2245 *d++ = *s++; 2246 } 2247 *d = '\0'; 2248 SvCUR_set(sv, d - SvPVX_const(sv)); 2249 finish: 2250 if ( PL_hints & HINT_NEW_STRING ) 2251 return new_constant(NULL, 0, "q", sv, pv, "q", 1); 2252 return sv; 2253} 2254 2255/* 2256 * Now come three functions related to double-quote context, 2257 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2258 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2259 * interact with PL_lex_state, and create fake ( ... ) argument lists 2260 * to handle functions and concatenation. 2261 * For example, 2262 * "foo\lbar" 2263 * is tokenised as 2264 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2265 */ 2266 2267/* 2268 * S_sublex_start 2269 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2270 * 2271 * Pattern matching will set PL_lex_op to the pattern-matching op to 2272 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2273 * 2274 * OP_CONST and OP_READLINE are easy--just make the new op and return. 2275 * 2276 * Everything else becomes a FUNC. 2277 * 2278 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we 2279 * had an OP_CONST or OP_READLINE). This just sets us up for a 2280 * call to S_sublex_push(). 2281 */ 2282 2283STATIC I32 2284S_sublex_start(pTHX) 2285{ 2286 const I32 op_type = pl_yylval.ival; 2287 2288 if (op_type == OP_NULL) { 2289 pl_yylval.opval = PL_lex_op; 2290 PL_lex_op = NULL; 2291 return THING; 2292 } 2293 if (op_type == OP_CONST) { 2294 SV *sv = PL_lex_stuff; 2295 PL_lex_stuff = NULL; 2296 sv = tokeq(sv); 2297 2298 if (SvTYPE(sv) == SVt_PVIV) { 2299 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2300 STRLEN len; 2301 const char * const p = SvPV_const(sv, len); 2302 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2303 SvREFCNT_dec(sv); 2304 sv = nsv; 2305 } 2306 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv); 2307 return THING; 2308 } 2309 2310 PL_sublex_info.super_state = PL_lex_state; 2311 PL_sublex_info.sub_inwhat = (U16)op_type; 2312 PL_sublex_info.sub_op = PL_lex_op; 2313 PL_lex_state = LEX_INTERPPUSH; 2314 2315 PL_expect = XTERM; 2316 if (PL_lex_op) { 2317 pl_yylval.opval = PL_lex_op; 2318 PL_lex_op = NULL; 2319 return PMFUNC; 2320 } 2321 else 2322 return FUNC; 2323} 2324 2325/* 2326 * S_sublex_push 2327 * Create a new scope to save the lexing state. The scope will be 2328 * ended in S_sublex_done. Returns a '(', starting the function arguments 2329 * to the uc, lc, etc. found before. 2330 * Sets PL_lex_state to LEX_INTERPCONCAT. 2331 */ 2332 2333STATIC I32 2334S_sublex_push(pTHX) 2335{ 2336 LEXSHARED *shared; 2337 const bool is_heredoc = PL_multi_close == '<'; 2338 ENTER; 2339 2340 PL_lex_state = PL_sublex_info.super_state; 2341 SAVEI8(PL_lex_dojoin); 2342 SAVEI32(PL_lex_brackets); 2343 SAVEI32(PL_lex_allbrackets); 2344 SAVEI32(PL_lex_formbrack); 2345 SAVEI8(PL_lex_fakeeof); 2346 SAVEI32(PL_lex_casemods); 2347 SAVEI32(PL_lex_starts); 2348 SAVEI8(PL_lex_state); 2349 SAVEI8(PL_lex_defer); 2350 SAVESPTR(PL_lex_repl); 2351 SAVEVPTR(PL_lex_inpat); 2352 SAVEI16(PL_lex_inwhat); 2353 if (is_heredoc) 2354 { 2355 SAVECOPLINE(PL_curcop); 2356 SAVEI32(PL_multi_end); 2357 SAVEI32(PL_parser->herelines); 2358 PL_parser->herelines = 0; 2359 } 2360 SAVEI8(PL_multi_close); 2361 SAVEPPTR(PL_bufptr); 2362 SAVEPPTR(PL_bufend); 2363 SAVEPPTR(PL_oldbufptr); 2364 SAVEPPTR(PL_oldoldbufptr); 2365 SAVEPPTR(PL_last_lop); 2366 SAVEPPTR(PL_last_uni); 2367 SAVEPPTR(PL_linestart); 2368 SAVESPTR(PL_linestr); 2369 SAVEGENERICPV(PL_lex_brackstack); 2370 SAVEGENERICPV(PL_lex_casestack); 2371 SAVEGENERICPV(PL_parser->lex_shared); 2372 SAVEBOOL(PL_parser->lex_re_reparsing); 2373 SAVEI32(PL_copline); 2374 2375 /* The here-doc parser needs to be able to peek into outer lexing 2376 scopes to find the body of the here-doc. So we put PL_linestr and 2377 PL_bufptr into lex_shared, to ���share��� those values. 2378 */ 2379 PL_parser->lex_shared->ls_linestr = PL_linestr; 2380 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2381 2382 PL_linestr = PL_lex_stuff; 2383 PL_lex_repl = PL_sublex_info.repl; 2384 PL_lex_stuff = NULL; 2385 PL_sublex_info.repl = NULL; 2386 2387 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets 2388 set for an inner quote-like operator and then an error causes scope- 2389 popping. We must not have a PL_lex_stuff value left dangling, as 2390 that breaks assumptions elsewhere. See bug #123617. */ 2391 SAVEGENERICSV(PL_lex_stuff); 2392 SAVEGENERICSV(PL_sublex_info.repl); 2393 2394 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2395 = SvPVX(PL_linestr); 2396 PL_bufend += SvCUR(PL_linestr); 2397 PL_last_lop = PL_last_uni = NULL; 2398 SAVEFREESV(PL_linestr); 2399 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2400 2401 PL_lex_dojoin = FALSE; 2402 PL_lex_brackets = PL_lex_formbrack = 0; 2403 PL_lex_allbrackets = 0; 2404 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2405 Newx(PL_lex_brackstack, 120, char); 2406 Newx(PL_lex_casestack, 12, char); 2407 PL_lex_casemods = 0; 2408 *PL_lex_casestack = '\0'; 2409 PL_lex_starts = 0; 2410 PL_lex_state = LEX_INTERPCONCAT; 2411 if (is_heredoc) 2412 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2413 PL_copline = NOLINE; 2414 2415 Newxz(shared, 1, LEXSHARED); 2416 shared->ls_prev = PL_parser->lex_shared; 2417 PL_parser->lex_shared = shared; 2418 2419 PL_lex_inwhat = PL_sublex_info.sub_inwhat; 2420 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2421 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2422 PL_lex_inpat = PL_sublex_info.sub_op; 2423 else 2424 PL_lex_inpat = NULL; 2425 2426 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2427 PL_in_eval &= ~EVAL_RE_REPARSING; 2428 2429 return '('; 2430} 2431 2432/* 2433 * S_sublex_done 2434 * Restores lexer state after a S_sublex_push. 2435 */ 2436 2437STATIC I32 2438S_sublex_done(pTHX) 2439{ 2440 if (!PL_lex_starts++) { 2441 SV * const sv = newSVpvs(""); 2442 if (SvUTF8(PL_linestr)) 2443 SvUTF8_on(sv); 2444 PL_expect = XOPERATOR; 2445 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 2446 return THING; 2447 } 2448 2449 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2450 PL_lex_state = LEX_INTERPCASEMOD; 2451 return yylex(); 2452 } 2453 2454 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2455 assert(PL_lex_inwhat != OP_TRANSR); 2456 if (PL_lex_repl) { 2457 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2458 PL_linestr = PL_lex_repl; 2459 PL_lex_inpat = 0; 2460 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2461 PL_bufend += SvCUR(PL_linestr); 2462 PL_last_lop = PL_last_uni = NULL; 2463 PL_lex_dojoin = FALSE; 2464 PL_lex_brackets = 0; 2465 PL_lex_allbrackets = 0; 2466 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2467 PL_lex_casemods = 0; 2468 *PL_lex_casestack = '\0'; 2469 PL_lex_starts = 0; 2470 if (SvEVALED(PL_lex_repl)) { 2471 PL_lex_state = LEX_INTERPNORMAL; 2472 PL_lex_starts++; 2473 /* we don't clear PL_lex_repl here, so that we can check later 2474 whether this is an evalled subst; that means we rely on the 2475 logic to ensure sublex_done() is called again only via the 2476 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2477 } 2478 else { 2479 PL_lex_state = LEX_INTERPCONCAT; 2480 PL_lex_repl = NULL; 2481 } 2482 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2483 CopLINE(PL_curcop) += 2484 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow 2485 + PL_parser->herelines; 2486 PL_parser->herelines = 0; 2487 } 2488 return '/'; 2489 } 2490 else { 2491 const line_t l = CopLINE(PL_curcop); 2492 LEAVE; 2493 if (PL_multi_close == '<') 2494 PL_parser->herelines += l - PL_multi_end; 2495 PL_bufend = SvPVX(PL_linestr); 2496 PL_bufend += SvCUR(PL_linestr); 2497 PL_expect = XOPERATOR; 2498 return ')'; 2499 } 2500} 2501 2502PERL_STATIC_INLINE SV* 2503S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) 2504{ 2505 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2506 * interior, hence to the "}". Finds what the name resolves to, returning 2507 * an SV* containing it; NULL if no valid one found */ 2508 2509 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0); 2510 2511 HV * table; 2512 SV **cvp; 2513 SV *cv; 2514 SV *rv; 2515 HV *stash; 2516 const U8* first_bad_char_loc; 2517 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ 2518 2519 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2520 2521 if (!SvCUR(res)) { 2522 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), 2523 "Unknown charname '' is deprecated"); 2524 return res; 2525 } 2526 2527 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, 2528 e - backslash_ptr, 2529 &first_bad_char_loc)) 2530 { 2531 /* If warnings are on, this will print a more detailed analysis of what 2532 * is wrong than the error message below */ 2533 utf8n_to_uvchr(first_bad_char_loc, 2534 e - ((char *) first_bad_char_loc), 2535 NULL, 0); 2536 2537 /* We deliberately don't try to print the malformed character, which 2538 * might not print very well; it also may be just the first of many 2539 * malformations, so don't print what comes after it */ 2540 yyerror_pv(Perl_form(aTHX_ 2541 "Malformed UTF-8 character immediately after '%.*s'", 2542 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr), 2543 SVf_UTF8); 2544 return NULL; 2545 } 2546 2547 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, 2548 /* include the <}> */ 2549 e - backslash_ptr + 1); 2550 if (! SvPOK(res)) { 2551 SvREFCNT_dec_NN(res); 2552 return NULL; 2553 } 2554 2555 /* See if the charnames handler is the Perl core's, and if so, we can skip 2556 * the validation needed for a user-supplied one, as Perl's does its own 2557 * validation. */ 2558 table = GvHV(PL_hintgv); /* ^H */ 2559 cvp = hv_fetchs(table, "charnames", FALSE); 2560 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2561 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2562 { 2563 const char * const name = HvNAME(stash); 2564 if (HvNAMELEN(stash) == sizeof("_charnames")-1 2565 && strEQ(name, "_charnames")) { 2566 return res; 2567 } 2568 } 2569 2570 /* Here, it isn't Perl's charname handler. We can't rely on a 2571 * user-supplied handler to validate the input name. For non-ut8 input, 2572 * look to see that the first character is legal. Then loop through the 2573 * rest checking that each is a continuation */ 2574 2575 /* This code makes the reasonable assumption that the only Latin1-range 2576 * characters that begin a character name alias are alphabetic, otherwise 2577 * would have to create a isCHARNAME_BEGIN macro */ 2578 2579 if (! UTF) { 2580 if (! isALPHAU(*s)) { 2581 goto bad_charname; 2582 } 2583 s++; 2584 while (s < e) { 2585 if (! isCHARNAME_CONT(*s)) { 2586 goto bad_charname; 2587 } 2588 if (*s == ' ' && *(s-1) == ' ') { 2589 goto multi_spaces; 2590 } 2591 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) { 2592 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2593 "NO-BREAK SPACE in a charnames " 2594 "alias definition is deprecated"); 2595 } 2596 s++; 2597 } 2598 } 2599 else { 2600 /* Similarly for utf8. For invariants can check directly; for other 2601 * Latin1, can calculate their code point and check; otherwise use a 2602 * swash */ 2603 if (UTF8_IS_INVARIANT(*s)) { 2604 if (! isALPHAU(*s)) { 2605 goto bad_charname; 2606 } 2607 s++; 2608 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2609 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) { 2610 goto bad_charname; 2611 } 2612 s += 2; 2613 } 2614 else { 2615 if (! PL_utf8_charname_begin) { 2616 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2617 PL_utf8_charname_begin = _core_swash_init("utf8", 2618 "_Perl_Charname_Begin", 2619 &PL_sv_undef, 2620 1, 0, NULL, &flags); 2621 } 2622 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { 2623 goto bad_charname; 2624 } 2625 s += UTF8SKIP(s); 2626 } 2627 2628 while (s < e) { 2629 if (UTF8_IS_INVARIANT(*s)) { 2630 if (! isCHARNAME_CONT(*s)) { 2631 goto bad_charname; 2632 } 2633 if (*s == ' ' && *(s-1) == ' ') { 2634 goto multi_spaces; 2635 } 2636 s++; 2637 } 2638 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2639 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) 2640 { 2641 goto bad_charname; 2642 } 2643 if (*s == *NBSP_UTF8 2644 && *(s+1) == *(NBSP_UTF8+1) 2645 && ckWARN_d(WARN_DEPRECATED)) 2646 { 2647 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 2648 "NO-BREAK SPACE in a charnames " 2649 "alias definition is deprecated"); 2650 } 2651 s += 2; 2652 } 2653 else { 2654 if (! PL_utf8_charname_continue) { 2655 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; 2656 PL_utf8_charname_continue = _core_swash_init("utf8", 2657 "_Perl_Charname_Continue", 2658 &PL_sv_undef, 2659 1, 0, NULL, &flags); 2660 } 2661 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { 2662 goto bad_charname; 2663 } 2664 s += UTF8SKIP(s); 2665 } 2666 } 2667 } 2668 if (*(s-1) == ' ') { 2669 yyerror_pv( 2670 Perl_form(aTHX_ 2671 "charnames alias definitions may not contain trailing " 2672 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", 2673 (int)(s - backslash_ptr + 1), backslash_ptr, 2674 (int)(e - s + 1), s + 1 2675 ), 2676 UTF ? SVf_UTF8 : 0); 2677 return NULL; 2678 } 2679 2680 if (SvUTF8(res)) { /* Don't accept malformed input */ 2681 const U8* first_bad_char_loc; 2682 STRLEN len; 2683 const char* const str = SvPV_const(res, len); 2684 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { 2685 /* If warnings are on, this will print a more detailed analysis of 2686 * what is wrong than the error message below */ 2687 utf8n_to_uvchr(first_bad_char_loc, 2688 (char *) first_bad_char_loc - str, 2689 NULL, 0); 2690 2691 /* We deliberately don't try to print the malformed character, 2692 * which might not print very well; it also may be just the first 2693 * of many malformations, so don't print what comes after it */ 2694 yyerror_pv( 2695 Perl_form(aTHX_ 2696 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 2697 (int) (e - backslash_ptr + 1), backslash_ptr, 2698 (int) ((char *) first_bad_char_loc - str), str 2699 ), 2700 SVf_UTF8); 2701 return NULL; 2702 } 2703 } 2704 2705 return res; 2706 2707 bad_charname: { 2708 2709 /* The final %.*s makes sure that should the trailing NUL be missing 2710 * that this print won't run off the end of the string */ 2711 yyerror_pv( 2712 Perl_form(aTHX_ 2713 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 2714 (int)(s - backslash_ptr + 1), backslash_ptr, 2715 (int)(e - s + 1), s + 1 2716 ), 2717 UTF ? SVf_UTF8 : 0); 2718 return NULL; 2719 } 2720 2721 multi_spaces: 2722 yyerror_pv( 2723 Perl_form(aTHX_ 2724 "charnames alias definitions may not contain a sequence of " 2725 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", 2726 (int)(s - backslash_ptr + 1), backslash_ptr, 2727 (int)(e - s + 1), s + 1 2728 ), 2729 UTF ? SVf_UTF8 : 0); 2730 return NULL; 2731} 2732 2733/* 2734 scan_const 2735 2736 Extracts the next constant part of a pattern, double-quoted string, 2737 or transliteration. This is terrifying code. 2738 2739 For example, in parsing the double-quoted string "ab\x63$d", it would 2740 stop at the '$' and return an OP_CONST containing 'abc'. 2741 2742 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 2743 processing a pattern (PL_lex_inpat is true), a transliteration 2744 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 2745 2746 Returns a pointer to the character scanned up to. If this is 2747 advanced from the start pointer supplied (i.e. if anything was 2748 successfully parsed), will leave an OP_CONST for the substring scanned 2749 in pl_yylval. Caller must intuit reason for not parsing further 2750 by looking at the next characters herself. 2751 2752 In patterns: 2753 expand: 2754 \N{FOO} => \N{U+hex_for_character_FOO} 2755 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 2756 2757 pass through: 2758 all other \-char, including \N and \N{ apart from \N{ABC} 2759 2760 stops on: 2761 @ and $ where it appears to be a var, but not for $ as tail anchor 2762 \l \L \u \U \Q \E 2763 (?{ or (??{ 2764 2765 In transliterations: 2766 characters are VERY literal, except for - not at the start or end 2767 of the string, which indicates a range. If the range is in bytes, 2768 scan_const expands the range to the full set of intermediate 2769 characters. If the range is in utf8, the hyphen is replaced with 2770 a certain range mark which will be handled by pmtrans() in op.c. 2771 2772 In double-quoted strings: 2773 backslashes: 2774 double-quoted style: \r and \n 2775 constants: \x31, etc. 2776 deprecated backrefs: \1 (in substitution replacements) 2777 case and quoting: \U \Q \E 2778 stops on @ and $ 2779 2780 scan_const does *not* construct ops to handle interpolated strings. 2781 It stops processing as soon as it finds an embedded $ or @ variable 2782 and leaves it to the caller to work out what's going on. 2783 2784 embedded arrays (whether in pattern or not) could be: 2785 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 2786 2787 $ in double-quoted strings must be the symbol of an embedded scalar. 2788 2789 $ in pattern could be $foo or could be tail anchor. Assumption: 2790 it's a tail anchor if $ is the last thing in the string, or if it's 2791 followed by one of "()| \r\n\t" 2792 2793 \1 (backreferences) are turned into $1 in substitutions 2794 2795 The structure of the code is 2796 while (there's a character to process) { 2797 handle transliteration ranges 2798 skip regexp comments /(?#comment)/ and codes /(?{code})/ 2799 skip #-initiated comments in //x patterns 2800 check for embedded arrays 2801 check for embedded scalars 2802 if (backslash) { 2803 deprecate \1 in substitution replacements 2804 handle string-changing backslashes \l \U \Q \E, etc. 2805 switch (what was escaped) { 2806 handle \- in a transliteration (becomes a literal -) 2807 if a pattern and not \N{, go treat as regular character 2808 handle \132 (octal characters) 2809 handle \x15 and \x{1234} (hex characters) 2810 handle \N{name} (named characters, also \N{3,5} in a pattern) 2811 handle \cV (control characters) 2812 handle printf-style backslashes (\f, \r, \n, etc) 2813 } (end switch) 2814 continue 2815 } (end if backslash) 2816 handle regular character 2817 } (end while character to read) 2818 2819*/ 2820 2821STATIC char * 2822S_scan_const(pTHX_ char *start) 2823{ 2824 char *send = PL_bufend; /* end of the constant */ 2825 SV *sv = newSV(send - start); /* sv for the constant. See note below 2826 on sizing. */ 2827 char *s = start; /* start of the constant */ 2828 char *d = SvPVX(sv); /* destination for copies */ 2829 bool dorange = FALSE; /* are we in a translit range? */ 2830 bool didrange = FALSE; /* did we just finish a range? */ 2831 bool in_charclass = FALSE; /* within /[...]/ */ 2832 bool has_utf8 = FALSE; /* Output constant is UTF8 */ 2833 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be 2834 UTF8? But, this can show as true 2835 when the source isn't utf8, as for 2836 example when it is entirely composed 2837 of hex constants */ 2838 SV *res; /* result from charnames */ 2839 STRLEN offset_to_max; /* The offset in the output to where the range 2840 high-end character is temporarily placed */ 2841 2842 /* Note on sizing: The scanned constant is placed into sv, which is 2843 * initialized by newSV() assuming one byte of output for every byte of 2844 * input. This routine expects newSV() to allocate an extra byte for a 2845 * trailing NUL, which this routine will append if it gets to the end of 2846 * the input. There may be more bytes of input than output (eg., \N{LATIN 2847 * CAPITAL LETTER A}), or more output than input if the constant ends up 2848 * recoded to utf8, but each time a construct is found that might increase 2849 * the needed size, SvGROW() is called. Its size parameter each time is 2850 * based on the best guess estimate at the time, namely the length used so 2851 * far, plus the length the current construct will occupy, plus room for 2852 * the trailing NUL, plus one byte for every input byte still unscanned */ 2853 2854 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 2855 before set */ 2856#ifdef EBCDIC 2857 int backslash_N = 0; /* ? was the character from \N{} */ 2858 int non_portable_endpoint = 0; /* ? In a range is an endpoint 2859 platform-specific like \x65 */ 2860#endif 2861 2862 PERL_ARGS_ASSERT_SCAN_CONST; 2863 2864 assert(PL_lex_inwhat != OP_TRANSR); 2865 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 2866 /* If we are doing a trans and we know we want UTF8 set expectation */ 2867 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); 2868 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 2869 } 2870 2871 /* Protect sv from errors and fatal warnings. */ 2872 ENTER_with_name("scan_const"); 2873 SAVEFREESV(sv); 2874 2875 while (s < send 2876 || dorange /* Handle tr/// range at right edge of input */ 2877 ) { 2878 2879 /* get transliterations out of the way (they're most literal) */ 2880 if (PL_lex_inwhat == OP_TRANS) { 2881 2882 /* But there isn't any special handling necessary unless there is a 2883 * range, so for most cases we just drop down and handle the value 2884 * as any other. There are two exceptions. 2885 * 2886 * 1. A minus sign indicates that we are actually going to have 2887 * a range. In this case, skip the '-', set a flag, then drop 2888 * down to handle what should be the end range value. 2889 * 2. After we've handled that value, the next time through, that 2890 * flag is set and we fix up the range. 2891 * 2892 * Ranges entirely within Latin1 are expanded out entirely, in 2893 * order to avoid the significant overhead of making a swash. 2894 * Ranges that extend above Latin1 have to have a swash, so there 2895 * is no advantage to abbreviate them here, so they are stored here 2896 * as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies a 2897 * hyphen without any possible ambiguity. On EBCDIC machines, if 2898 * the range is expressed as Unicode, the Latin1 portion is 2899 * expanded out even if the entire range extends above Latin1. 2900 * This is because each code point in it has to be processed here 2901 * individually to get its native translation */ 2902 2903 if (! dorange) { 2904 2905 /* Here, we don't think we're in a range. If we've processed 2906 * at least one character, then see if this next one is a '-', 2907 * indicating the previous one was the start of a range. But 2908 * don't bother if we're too close to the end for the minus to 2909 * mean that. */ 2910 if (*s != '-' || s >= send - 1 || s == start) { 2911 2912 /* A regular character. Process like any other, but first 2913 * clear any flags */ 2914 didrange = FALSE; 2915 dorange = FALSE; 2916#ifdef EBCDIC 2917 non_portable_endpoint = 0; 2918 backslash_N = 0; 2919#endif 2920 /* Drops down to generic code to process current byte */ 2921 } 2922 else { 2923 if (didrange) { /* Something like y/A-C-Z// */ 2924 Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); 2925 } 2926 2927 dorange = TRUE; 2928 2929 s++; /* Skip past the minus */ 2930 2931 /* d now points to where the end-range character will be 2932 * placed. Save it so won't have to go finding it later, 2933 * and drop down to get that character. (Actually we 2934 * instead save the offset, to handle the case where a 2935 * realloc in the meantime could change the actual 2936 * pointer). We'll finish processing the range the next 2937 * time through the loop */ 2938 offset_to_max = d - SvPVX_const(sv); 2939 } 2940 } /* End of not a range */ 2941 else { 2942 /* Here we have parsed a range. Now must handle it. At this 2943 * point: 2944 * 'sv' is a SV* that contains the output string we are 2945 * constructing. The final two characters in that string 2946 * are the range start and range end, in order. 2947 * 'd' points to just beyond the range end in the 'sv' string, 2948 * where we would next place something 2949 * 'offset_to_max' is the offset in 'sv' at which the character 2950 * before 'd' begins. 2951 */ 2952 const char * max_ptr = SvPVX_const(sv) + offset_to_max; 2953 const char * min_ptr; 2954 IV range_min; 2955 IV range_max; /* last character in range */ 2956 STRLEN save_offset; 2957 STRLEN grow; 2958#ifndef EBCDIC /* Not meaningful except in EBCDIC, so initialize to false */ 2959 const bool convert_unicode = FALSE; 2960 const IV real_range_max = 0; 2961#else 2962 bool convert_unicode; 2963 IV real_range_max = 0; 2964#endif 2965 2966 /* Get the range-ends code point values. */ 2967 if (has_utf8) { 2968 /* We know the utf8 is valid, because we just constructed 2969 * it ourselves in previous loop iterations */ 2970 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); 2971 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); 2972 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); 2973 } 2974 else { 2975 min_ptr = max_ptr - 1; 2976 range_min = * (U8*) min_ptr; 2977 range_max = * (U8*) max_ptr; 2978 } 2979 2980#ifdef EBCDIC 2981 /* On EBCDIC platforms, we may have to deal with portable 2982 * ranges. These happen if at least one range endpoint is a 2983 * Unicode value (\N{...}), or if the range is a subset of 2984 * [A-Z] or [a-z], and both ends are literal characters, 2985 * like 'A', and not like \x{C1} */ 2986 if ((convert_unicode 2987 = cBOOL(backslash_N) /* \N{} forces Unicode, hence 2988 portable range */ 2989 || ( ! non_portable_endpoint 2990 && (( isLOWER_A(range_min) && isLOWER_A(range_max)) 2991 || (isUPPER_A(range_min) && isUPPER_A(range_max)))) 2992 )) { 2993 2994 /* Special handling is needed for these portable ranges. 2995 * They are defined to all be in Unicode terms, which 2996 * include all Unicode code points between the end points. 2997 * Convert to Unicode to get the Unicode range. Later we 2998 * will convert each code point in the range back to 2999 * native. */ 3000 range_min = NATIVE_TO_UNI(range_min); 3001 range_max = NATIVE_TO_UNI(range_max); 3002 } 3003#endif 3004 3005 if (range_min > range_max) { 3006 if (convert_unicode) { 3007 /* Need to convert back to native for meaningful 3008 * messages for this platform */ 3009 range_min = UNI_TO_NATIVE(range_min); 3010 range_max = UNI_TO_NATIVE(range_max); 3011 } 3012 3013 /* Use the characters themselves for the error message if 3014 * ASCII printables; otherwise some visible representation 3015 * of them */ 3016 if (isPRINT_A(range_min) && isPRINT_A(range_max)) { 3017 Perl_croak(aTHX_ 3018 "Invalid range \"%c-%c\" in transliteration operator", 3019 (char)range_min, (char)range_max); 3020 } 3021 else if (convert_unicode) { 3022 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3023 Perl_croak(aTHX_ 3024 "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\"" 3025 " in transliteration operator", 3026 range_min, range_max); 3027 } 3028 else { 3029 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3030 Perl_croak(aTHX_ 3031 "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\"" 3032 " in transliteration operator", 3033 range_min, range_max); 3034 } 3035 } 3036 3037 if (has_utf8) { 3038 3039 /* We try to avoid creating a swash. If the upper end of 3040 * this range is below 256, this range won't force a swash; 3041 * otherwise it does force a swash, and as long as we have 3042 * to have one, we might as well not expand things out. 3043 * But if it's EBCDIC, we may have to look at each 3044 * character below 256 if we have to convert to/from 3045 * Unicode values */ 3046 if (range_max > 255 3047#ifdef EBCDIC 3048 && (range_min > 255 || ! convert_unicode) 3049#endif 3050 ) { 3051 /* Move the high character one byte to the right; then 3052 * insert between it and the range begin, an illegal 3053 * byte which serves to indicate this is a range (using 3054 * a '-' could be ambiguous). */ 3055 char *e = d++; 3056 while (e-- > max_ptr) { 3057 *(e + 1) = *e; 3058 } 3059 *(e + 1) = (char) ILLEGAL_UTF8_BYTE; 3060 goto range_done; 3061 } 3062 3063 /* Here, we're going to expand out the range. For EBCDIC 3064 * the range can extend above 255 (not so in ASCII), so 3065 * for EBCDIC, split it into the parts above and below 3066 * 255/256 */ 3067#ifdef EBCDIC 3068 if (range_max > 255) { 3069 real_range_max = range_max; 3070 range_max = 255; 3071 } 3072#endif 3073 } 3074 3075 /* Here we need to expand out the string to contain each 3076 * character in the range. Grow the output to handle this */ 3077 3078 save_offset = min_ptr - SvPVX_const(sv); 3079 3080 /* The base growth is the number of code points in the range */ 3081 grow = range_max - range_min + 1; 3082 if (has_utf8) { 3083 3084 /* But if the output is UTF-8, some of those characters may 3085 * need two bytes (since the maximum range value here is 3086 * 255, the max bytes per character is two). On ASCII 3087 * platforms, it's not much trouble to get an accurate 3088 * count of what's needed. But on EBCDIC, the ones that 3089 * need 2 bytes are scattered around, so just use a worst 3090 * case value instead of calculating for that platform. */ 3091#ifdef EBCDIC 3092 grow *= 2; 3093#else 3094 /* Only those above 127 require 2 bytes. This may be 3095 * everything in the range, or not */ 3096 if (range_min > 127) { 3097 grow *= 2; 3098 } 3099 else if (range_max > 127) { 3100 grow += range_max - 127; 3101 } 3102#endif 3103 } 3104 3105 /* Subtract 3 for the bytes that were already accounted for 3106 * (min, max, and the hyphen) */ 3107 SvGROW(sv, SvLEN(sv) + grow - 3); 3108 d = SvPVX(sv) + save_offset; /* refresh d after realloc */ 3109 3110 /* Here, we expand out the range. On ASCII platforms, the 3111 * compiler should optimize out the 'convert_unicode==TRUE' 3112 * portion of this */ 3113 if (convert_unicode) { 3114 IV i; 3115 3116 /* Recall that the min and max are now in Unicode terms, so 3117 * we have to convert each character to its native 3118 * equivalent */ 3119 if (has_utf8) { 3120 for (i = range_min; i <= range_max; i++) { 3121 append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i), 3122 (U8 **) &d); 3123 } 3124 } 3125 else { 3126 for (i = range_min; i <= range_max; i++) { 3127 *d++ = (char)LATIN1_TO_NATIVE((U8) i); 3128 } 3129 } 3130 } 3131 else { 3132 IV i; 3133 3134 /* Here, no conversions are necessary, which means that the 3135 * first character in the range is already in 'd' and 3136 * valid, so we can skip overwriting it */ 3137 if (has_utf8) { 3138 d += UTF8SKIP(d); 3139 for (i = range_min + 1; i <= range_max; i++) { 3140 append_utf8_from_native_byte((U8) i, (U8 **) &d); 3141 } 3142 } 3143 else { 3144 d++; 3145 for (i = range_min + 1; i <= range_max; i++) { 3146 *d++ = (char)i; 3147 } 3148 } 3149 } 3150 3151 /* (Compilers should optimize this out for non-EBCDIC). If the 3152 * original range extended above 255, add in that portion */ 3153 if (real_range_max) { 3154 *d++ = (char) UTF8_TWO_BYTE_HI(0x100); 3155 *d++ = (char) UTF8_TWO_BYTE_LO(0x100); 3156 if (real_range_max > 0x101) 3157 *d++ = (char) ILLEGAL_UTF8_BYTE; 3158 if (real_range_max > 0x100) 3159 d = (char*)uvchr_to_utf8((U8*)d, real_range_max); 3160 } 3161 3162 range_done: 3163 /* mark the range as done, and continue */ 3164 didrange = TRUE; 3165 dorange = FALSE; 3166#ifdef EBCDIC 3167 non_portable_endpoint = 0; 3168 backslash_N = 0; 3169#endif 3170 continue; 3171 } /* End of is a range */ 3172 } /* End of transliteration. Joins main code after these else's */ 3173 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3174 char *s1 = s-1; 3175 int esc = 0; 3176 while (s1 >= start && *s1-- == '\\') 3177 esc = !esc; 3178 if (!esc) 3179 in_charclass = TRUE; 3180 } 3181 3182 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3183 char *s1 = s-1; 3184 int esc = 0; 3185 while (s1 >= start && *s1-- == '\\') 3186 esc = !esc; 3187 if (!esc) 3188 in_charclass = FALSE; 3189 } 3190 3191 /* skip for regexp comments /(?#comment)/, except for the last 3192 * char, which will be done separately. 3193 * Stop on (?{..}) and friends */ 3194 3195 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { 3196 if (s[2] == '#') { 3197 while (s+1 < send && *s != ')') 3198 *d++ = *s++; 3199 } 3200 else if (!PL_lex_casemods 3201 && ( s[2] == '{' /* This should match regcomp.c */ 3202 || (s[2] == '?' && s[3] == '{'))) 3203 { 3204 break; 3205 } 3206 } 3207 3208 /* likewise skip #-initiated comments in //x patterns */ 3209 else if (*s == '#' 3210 && PL_lex_inpat 3211 && !in_charclass 3212 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) 3213 { 3214 while (s+1 < send && *s != '\n') 3215 *d++ = *s++; 3216 } 3217 3218 /* no further processing of single-quoted regex */ 3219 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3220 goto default_action; 3221 3222 /* check for embedded arrays 3223 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3224 */ 3225 else if (*s == '@' && s[1]) { 3226 if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1])) 3227 break; 3228 if (strchr(":'{$", s[1])) 3229 break; 3230 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3231 break; /* in regexp, neither @+ nor @- are interpolated */ 3232 } 3233 3234 /* check for embedded scalars. only stop if we're sure it's a 3235 variable. 3236 */ 3237 else if (*s == '$') { 3238 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3239 break; 3240 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { 3241 if (s[1] == '\\') { 3242 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3243 "Possible unintended interpolation of $\\ in regex"); 3244 } 3245 break; /* in regexp, $ might be tail anchor */ 3246 } 3247 } 3248 3249 /* End of else if chain - OP_TRANS rejoin rest */ 3250 3251 /* backslashes */ 3252 if (*s == '\\' && s+1 < send) { 3253 char* e; /* Can be used for ending '}', etc. */ 3254 3255 s++; 3256 3257 /* warn on \1 - \9 in substitution replacements, but note that \11 3258 * is an octal; and \19 is \1 followed by '9' */ 3259 if (PL_lex_inwhat == OP_SUBST 3260 && !PL_lex_inpat 3261 && isDIGIT(*s) 3262 && *s != '0' 3263 && !isDIGIT(s[1])) 3264 { 3265 /* diag_listed_as: \%d better written as $%d */ 3266 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3267 *--s = '$'; 3268 break; 3269 } 3270 3271 /* string-change backslash escapes */ 3272 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { 3273 --s; 3274 break; 3275 } 3276 /* In a pattern, process \N, but skip any other backslash escapes. 3277 * This is because we don't want to translate an escape sequence 3278 * into a meta symbol and have the regex compiler use the meta 3279 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3280 * in spite of this, we do have to process \N here while the proper 3281 * charnames handler is in scope. See bugs #56444 and #62056. 3282 * 3283 * There is a complication because \N in a pattern may also stand 3284 * for 'match a non-nl', and not mean a charname, in which case its 3285 * processing should be deferred to the regex compiler. To be a 3286 * charname it must be followed immediately by a '{', and not look 3287 * like \N followed by a curly quantifier, i.e., not something like 3288 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3289 * quantifier */ 3290 else if (PL_lex_inpat 3291 && (*s != 'N' 3292 || s[1] != '{' 3293 || regcurly(s + 1))) 3294 { 3295 *d++ = '\\'; 3296 goto default_action; 3297 } 3298 3299 switch (*s) { 3300 default: 3301 { 3302 if ((isALPHANUMERIC(*s))) 3303 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3304 "Unrecognized escape \\%c passed through", 3305 *s); 3306 /* default action is to copy the quoted character */ 3307 goto default_action; 3308 } 3309 3310 /* eg. \132 indicates the octal constant 0132 */ 3311 case '0': case '1': case '2': case '3': 3312 case '4': case '5': case '6': case '7': 3313 { 3314 I32 flags = PERL_SCAN_SILENT_ILLDIGIT; 3315 STRLEN len = 3; 3316 uv = grok_oct(s, &len, &flags, NULL); 3317 s += len; 3318 if (len < 3 && s < send && isDIGIT(*s) 3319 && ckWARN(WARN_MISC)) 3320 { 3321 Perl_warner(aTHX_ packWARN(WARN_MISC), 3322 "%s", form_short_octal_warning(s, len)); 3323 } 3324 } 3325 goto NUM_ESCAPE_INSERT; 3326 3327 /* eg. \o{24} indicates the octal constant \024 */ 3328 case 'o': 3329 { 3330 const char* error; 3331 3332 bool valid = grok_bslash_o(&s, &uv, &error, 3333 TRUE, /* Output warning */ 3334 FALSE, /* Not strict */ 3335 TRUE, /* Output warnings for 3336 non-portables */ 3337 UTF); 3338 if (! valid) { 3339 yyerror(error); 3340 continue; 3341 } 3342 goto NUM_ESCAPE_INSERT; 3343 } 3344 3345 /* eg. \x24 indicates the hex constant 0x24 */ 3346 case 'x': 3347 { 3348 const char* error; 3349 3350 bool valid = grok_bslash_x(&s, &uv, &error, 3351 TRUE, /* Output warning */ 3352 FALSE, /* Not strict */ 3353 TRUE, /* Output warnings for 3354 non-portables */ 3355 UTF); 3356 if (! valid) { 3357 yyerror(error); 3358 continue; 3359 } 3360 } 3361 3362 NUM_ESCAPE_INSERT: 3363 /* Insert oct or hex escaped character. */ 3364 3365 /* Here uv is the ordinal of the next character being added */ 3366 if (UVCHR_IS_INVARIANT(uv)) { 3367 *d++ = (char) uv; 3368 } 3369 else { 3370 if (!has_utf8 && uv > 255) { 3371 /* Might need to recode whatever we have accumulated so 3372 * far if it contains any chars variant in utf8 or 3373 * utf-ebcdic. */ 3374 3375 SvCUR_set(sv, d - SvPVX_const(sv)); 3376 SvPOK_on(sv); 3377 *d = '\0'; 3378 /* See Note on sizing above. */ 3379 sv_utf8_upgrade_flags_grow( 3380 sv, 3381 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE 3382 /* Above-latin1 in string 3383 * implies no encoding */ 3384 |SV_UTF8_NO_ENCODING, 3385 UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1); 3386 d = SvPVX(sv) + SvCUR(sv); 3387 has_utf8 = TRUE; 3388 } 3389 3390 if (has_utf8) { 3391 /* Usually, there will already be enough room in 'sv' 3392 * since such escapes are likely longer than any UTF-8 3393 * sequence they can end up as. This isn't the case on 3394 * EBCDIC where \x{40000000} contains 12 bytes, and the 3395 * UTF-8 for it contains 14. And, we have to allow for 3396 * a trailing NUL. It probably can't happen on ASCII 3397 * platforms, but be safe */ 3398 const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv) 3399 + 1; 3400 if (UNLIKELY(needed > SvLEN(sv))) { 3401 SvCUR_set(sv, d - SvPVX_const(sv)); 3402 d = sv_grow(sv, needed) + SvCUR(sv); 3403 } 3404 3405 d = (char*)uvchr_to_utf8((U8*)d, uv); 3406 if (PL_lex_inwhat == OP_TRANS 3407 && PL_sublex_info.sub_op) 3408 { 3409 PL_sublex_info.sub_op->op_private |= 3410 (PL_lex_repl ? OPpTRANS_FROM_UTF 3411 : OPpTRANS_TO_UTF); 3412 } 3413 } 3414 else { 3415 *d++ = (char)uv; 3416 } 3417 } 3418#ifdef EBCDIC 3419 non_portable_endpoint++; 3420#endif 3421 continue; 3422 3423 case 'N': 3424 /* In a non-pattern \N must be like \N{U+0041}, or it can be a 3425 * named character, like \N{LATIN SMALL LETTER A}, or a named 3426 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND 3427 * GRAVE} (except y/// can't handle the latter, croaking). For 3428 * convenience all three forms are referred to as "named 3429 * characters" below. 3430 * 3431 * For patterns, \N also can mean to match a non-newline. Code 3432 * before this 'switch' statement should already have handled 3433 * this situation, and hence this code only has to deal with 3434 * the named character cases. 3435 * 3436 * For non-patterns, the named characters are converted to 3437 * their string equivalents. In patterns, named characters are 3438 * not converted to their ultimate forms for the same reasons 3439 * that other escapes aren't. Instead, they are converted to 3440 * the \N{U+...} form to get the value from the charnames that 3441 * is in effect right now, while preserving the fact that it 3442 * was a named character, so that the regex compiler knows 3443 * this. 3444 * 3445 * The structure of this section of code (besides checking for 3446 * errors and upgrading to utf8) is: 3447 * If the named character is of the form \N{U+...}, pass it 3448 * through if a pattern; otherwise convert the code point 3449 * to utf8 3450 * Otherwise must be some \N{NAME}: convert to 3451 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8 3452 * 3453 * Transliteration is an exception. The conversion to utf8 is 3454 * only done if the code point requires it to be representable. 3455 * 3456 * Here, 's' points to the 'N'; the test below is guaranteed to 3457 * succeed if we are being called on a pattern, as we already 3458 * know from a test above that the next character is a '{'. A 3459 * non-pattern \N must mean 'named character', which requires 3460 * braces */ 3461 s++; 3462 if (*s != '{') { 3463 yyerror("Missing braces on \\N{}"); 3464 continue; 3465 } 3466 s++; 3467 3468 /* If there is no matching '}', it is an error. */ 3469 if (! (e = strchr(s, '}'))) { 3470 if (! PL_lex_inpat) { 3471 yyerror("Missing right brace on \\N{}"); 3472 } else { 3473 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3474 } 3475 continue; 3476 } 3477 3478 /* Here it looks like a named character */ 3479 3480 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3481 s += 2; /* Skip to next char after the 'U+' */ 3482 if (PL_lex_inpat) { 3483 3484 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ 3485 /* Check the syntax. */ 3486 const char *orig_s; 3487 orig_s = s - 5; 3488 if (!isXDIGIT(*s)) { 3489 bad_NU: 3490 yyerror( 3491 "Invalid hexadecimal number in \\N{U+...}" 3492 ); 3493 s = e + 1; 3494 continue; 3495 } 3496 while (++s < e) { 3497 if (isXDIGIT(*s)) 3498 continue; 3499 else if ((*s == '.' || *s == '_') 3500 && isXDIGIT(s[1])) 3501 continue; 3502 goto bad_NU; 3503 } 3504 3505 /* Pass everything through unchanged. 3506 * +1 is for the '}' */ 3507 Copy(orig_s, d, e - orig_s + 1, char); 3508 d += e - orig_s + 1; 3509 } 3510 else { /* Not a pattern: convert the hex to string */ 3511 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 3512 | PERL_SCAN_SILENT_ILLDIGIT 3513 | PERL_SCAN_DISALLOW_PREFIX; 3514 STRLEN len = e - s; 3515 uv = grok_hex(s, &len, &flags, NULL); 3516 if (len == 0 || (len != (STRLEN)(e - s))) 3517 goto bad_NU; 3518 3519 /* For non-tr///, if the destination is not in utf8, 3520 * unconditionally recode it to be so. This is 3521 * because \N{} implies Unicode semantics, and scalars 3522 * have to be in utf8 to guarantee those semantics. 3523 * tr/// doesn't care about Unicode rules, so no need 3524 * there to upgrade to UTF-8 for small enough code 3525 * points */ 3526 if (! has_utf8 && ( uv > 0xFF 3527 || PL_lex_inwhat != OP_TRANS)) 3528 { 3529 SvCUR_set(sv, d - SvPVX_const(sv)); 3530 SvPOK_on(sv); 3531 *d = '\0'; 3532 /* See Note on sizing above. */ 3533 sv_utf8_upgrade_flags_grow( 3534 sv, 3535 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3536 UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1); 3537 d = SvPVX(sv) + SvCUR(sv); 3538 has_utf8 = TRUE; 3539 } 3540 3541 /* Add the (Unicode) code point to the output. */ 3542 if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) { 3543 *d++ = (char) LATIN1_TO_NATIVE(uv); 3544 } 3545 else { 3546 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0); 3547 } 3548 } 3549 } 3550 else /* Here is \N{NAME} but not \N{U+...}. */ 3551 if ((res = get_and_check_backslash_N_name(s, e))) 3552 { 3553 STRLEN len; 3554 const char *str = SvPV_const(res, len); 3555 if (PL_lex_inpat) { 3556 3557 if (! len) { /* The name resolved to an empty string */ 3558 Copy("\\N{}", d, 4, char); 3559 d += 4; 3560 } 3561 else { 3562 /* In order to not lose information for the regex 3563 * compiler, pass the result in the specially made 3564 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 3565 * the code points in hex of each character 3566 * returned by charnames */ 3567 3568 const char *str_end = str + len; 3569 const STRLEN off = d - SvPVX_const(sv); 3570 3571 if (! SvUTF8(res)) { 3572 /* For the non-UTF-8 case, we can determine the 3573 * exact length needed without having to parse 3574 * through the string. Each character takes up 3575 * 2 hex digits plus either a trailing dot or 3576 * the "}" */ 3577 const char initial_text[] = "\\N{U+"; 3578 const STRLEN initial_len = sizeof(initial_text) 3579 - 1; 3580 d = off + SvGROW(sv, off 3581 + 3 * len 3582 3583 /* +1 for trailing NUL */ 3584 + initial_len + 1 3585 3586 + (STRLEN)(send - e)); 3587 Copy(initial_text, d, initial_len, char); 3588 d += initial_len; 3589 while (str < str_end) { 3590 char hex_string[4]; 3591 int len = 3592 my_snprintf(hex_string, 3593 sizeof(hex_string), 3594 "%02X.", 3595 3596 /* The regex compiler is 3597 * expecting Unicode, not 3598 * native */ 3599 NATIVE_TO_LATIN1(*str)); 3600 PERL_MY_SNPRINTF_POST_GUARD(len, 3601 sizeof(hex_string)); 3602 Copy(hex_string, d, 3, char); 3603 d += 3; 3604 str++; 3605 } 3606 d--; /* Below, we will overwrite the final 3607 dot with a right brace */ 3608 } 3609 else { 3610 STRLEN char_length; /* cur char's byte length */ 3611 3612 /* and the number of bytes after this is 3613 * translated into hex digits */ 3614 STRLEN output_length; 3615 3616 /* 2 hex per byte; 2 chars for '\N'; 2 chars 3617 * for max('U+', '.'); and 1 for NUL */ 3618 char hex_string[2 * UTF8_MAXBYTES + 5]; 3619 3620 /* Get the first character of the result. */ 3621 U32 uv = utf8n_to_uvchr((U8 *) str, 3622 len, 3623 &char_length, 3624 UTF8_ALLOW_ANYUV); 3625 /* Convert first code point to Unicode hex, 3626 * including the boiler plate before it. */ 3627 output_length = 3628 my_snprintf(hex_string, sizeof(hex_string), 3629 "\\N{U+%X", 3630 (unsigned int) NATIVE_TO_UNI(uv)); 3631 3632 /* Make sure there is enough space to hold it */ 3633 d = off + SvGROW(sv, off 3634 + output_length 3635 + (STRLEN)(send - e) 3636 + 2); /* '}' + NUL */ 3637 /* And output it */ 3638 Copy(hex_string, d, output_length, char); 3639 d += output_length; 3640 3641 /* For each subsequent character, append dot and 3642 * its Unicode code point in hex */ 3643 while ((str += char_length) < str_end) { 3644 const STRLEN off = d - SvPVX_const(sv); 3645 U32 uv = utf8n_to_uvchr((U8 *) str, 3646 str_end - str, 3647 &char_length, 3648 UTF8_ALLOW_ANYUV); 3649 output_length = 3650 my_snprintf(hex_string, 3651 sizeof(hex_string), 3652 ".%X", 3653 (unsigned int) NATIVE_TO_UNI(uv)); 3654 3655 d = off + SvGROW(sv, off 3656 + output_length 3657 + (STRLEN)(send - e) 3658 + 2); /* '}' + NUL */ 3659 Copy(hex_string, d, output_length, char); 3660 d += output_length; 3661 } 3662 } 3663 3664 *d++ = '}'; /* Done. Add the trailing brace */ 3665 } 3666 } 3667 else { /* Here, not in a pattern. Convert the name to a 3668 * string. */ 3669 3670 if (PL_lex_inwhat == OP_TRANS) { 3671 str = SvPV_const(res, len); 3672 if (len > ((SvUTF8(res)) 3673 ? UTF8SKIP(str) 3674 : 1U)) 3675 { 3676 yyerror(Perl_form(aTHX_ 3677 "%.*s must not be a named sequence" 3678 " in transliteration operator", 3679 /* +1 to include the "}" */ 3680 (int) (e + 1 - start), start)); 3681 goto end_backslash_N; 3682 } 3683 } 3684 else if (! SvUTF8(res)) { 3685 /* Make sure \N{} return is UTF-8. This is because 3686 * \N{} implies Unicode semantics, and scalars have to 3687 * be in utf8 to guarantee those semantics; but not 3688 * needed in tr/// */ 3689 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING); 3690 str = SvPV_const(res, len); 3691 } 3692 3693 /* Upgrade destination to be utf8 if this new 3694 * component is */ 3695 if (! has_utf8 && SvUTF8(res)) { 3696 SvCUR_set(sv, d - SvPVX_const(sv)); 3697 SvPOK_on(sv); 3698 *d = '\0'; 3699 /* See Note on sizing above. */ 3700 sv_utf8_upgrade_flags_grow(sv, 3701 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3702 len + (STRLEN)(send - s) + 1); 3703 d = SvPVX(sv) + SvCUR(sv); 3704 has_utf8 = TRUE; 3705 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ 3706 3707 /* See Note on sizing above. (NOTE: SvCUR() is not 3708 * set correctly here). */ 3709 const STRLEN off = d - SvPVX_const(sv); 3710 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); 3711 } 3712 Copy(str, d, len, char); 3713 d += len; 3714 } 3715 3716 SvREFCNT_dec(res); 3717 3718 } /* End \N{NAME} */ 3719 3720 end_backslash_N: 3721#ifdef EBCDIC 3722 backslash_N++; /* \N{} is defined to be Unicode */ 3723#endif 3724 s = e + 1; /* Point to just after the '}' */ 3725 continue; 3726 3727 /* \c is a control character */ 3728 case 'c': 3729 s++; 3730 if (s < send) { 3731 *d++ = grok_bslash_c(*s++, 1); 3732 } 3733 else { 3734 yyerror("Missing control char name in \\c"); 3735 } 3736#ifdef EBCDIC 3737 non_portable_endpoint++; 3738#endif 3739 continue; 3740 3741 /* printf-style backslashes, formfeeds, newlines, etc */ 3742 case 'b': 3743 *d++ = '\b'; 3744 break; 3745 case 'n': 3746 *d++ = '\n'; 3747 break; 3748 case 'r': 3749 *d++ = '\r'; 3750 break; 3751 case 'f': 3752 *d++ = '\f'; 3753 break; 3754 case 't': 3755 *d++ = '\t'; 3756 break; 3757 case 'e': 3758 *d++ = ESC_NATIVE; 3759 break; 3760 case 'a': 3761 *d++ = '\a'; 3762 break; 3763 } /* end switch */ 3764 3765 s++; 3766 continue; 3767 } /* end if (backslash) */ 3768 3769 default_action: 3770 /* If we started with encoded form, or already know we want it, 3771 then encode the next character */ 3772 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { 3773 STRLEN len = 1; 3774 3775 /* One might think that it is wasted effort in the case of the 3776 * source being utf8 (this_utf8 == TRUE) to take the next character 3777 * in the source, convert it to an unsigned value, and then convert 3778 * it back again. But the source has not been validated here. The 3779 * routine that does the conversion checks for errors like 3780 * malformed utf8 */ 3781 3782 const UV nextuv = (this_utf8) 3783 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) 3784 : (UV) ((U8) *s); 3785 const STRLEN need = UVCHR_SKIP(nextuv); 3786 if (!has_utf8) { 3787 SvCUR_set(sv, d - SvPVX_const(sv)); 3788 SvPOK_on(sv); 3789 *d = '\0'; 3790 /* See Note on sizing above. */ 3791 sv_utf8_upgrade_flags_grow(sv, 3792 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3793 need + (STRLEN)(send - s) + 1); 3794 d = SvPVX(sv) + SvCUR(sv); 3795 has_utf8 = TRUE; 3796 } else if (need > len) { 3797 /* encoded value larger than old, may need extra space (NOTE: 3798 * SvCUR() is not set correctly here). See Note on sizing 3799 * above. */ 3800 const STRLEN off = d - SvPVX_const(sv); 3801 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; 3802 } 3803 s += len; 3804 3805 d = (char*)uvchr_to_utf8((U8*)d, nextuv); 3806 } 3807 else { 3808 *d++ = *s++; 3809 } 3810 } /* while loop to process each character */ 3811 3812 /* terminate the string and set up the sv */ 3813 *d = '\0'; 3814 SvCUR_set(sv, d - SvPVX_const(sv)); 3815 if (SvCUR(sv) >= SvLEN(sv)) 3816 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf 3817 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); 3818 3819 SvPOK_on(sv); 3820 if (IN_ENCODING && !has_utf8) { 3821 sv_recode_to_utf8(sv, _get_encoding()); 3822 if (SvUTF8(sv)) 3823 has_utf8 = TRUE; 3824 } 3825 if (has_utf8) { 3826 SvUTF8_on(sv); 3827 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { 3828 PL_sublex_info.sub_op->op_private |= 3829 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); 3830 } 3831 } 3832 3833 /* shrink the sv if we allocated more than we used */ 3834 if (SvCUR(sv) + 5 < SvLEN(sv)) { 3835 SvPV_shrink_to_cur(sv); 3836 } 3837 3838 /* return the substring (via pl_yylval) only if we parsed anything */ 3839 if (s > start) { 3840 char *s2 = start; 3841 for (; s2 < s; s2++) { 3842 if (*s2 == '\n') 3843 COPLINE_INC_WITH_HERELINES; 3844 } 3845 SvREFCNT_inc_simple_void_NN(sv); 3846 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 3847 && ! PL_parser->lex_re_reparsing) 3848 { 3849 const char *const key = PL_lex_inpat ? "qr" : "q"; 3850 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 3851 const char *type; 3852 STRLEN typelen; 3853 3854 if (PL_lex_inwhat == OP_TRANS) { 3855 type = "tr"; 3856 typelen = 2; 3857 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 3858 type = "s"; 3859 typelen = 1; 3860 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 3861 type = "q"; 3862 typelen = 1; 3863 } else { 3864 type = "qq"; 3865 typelen = 2; 3866 } 3867 3868 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 3869 type, typelen); 3870 } 3871 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 3872 } 3873 LEAVE_with_name("scan_const"); 3874 return s; 3875} 3876 3877/* S_intuit_more 3878 * Returns TRUE if there's more to the expression (e.g., a subscript), 3879 * FALSE otherwise. 3880 * 3881 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 3882 * 3883 * ->[ and ->{ return TRUE 3884 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 3885 * { and [ outside a pattern are always subscripts, so return TRUE 3886 * if we're outside a pattern and it's not { or [, then return FALSE 3887 * if we're in a pattern and the first char is a { 3888 * {4,5} (any digits around the comma) returns FALSE 3889 * if we're in a pattern and the first char is a [ 3890 * [] returns FALSE 3891 * [SOMETHING] has a funky algorithm to decide whether it's a 3892 * character class or not. It has to deal with things like 3893 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 3894 * anything else returns TRUE 3895 */ 3896 3897/* This is the one truly awful dwimmer necessary to conflate C and sed. */ 3898 3899STATIC int 3900S_intuit_more(pTHX_ char *s) 3901{ 3902 PERL_ARGS_ASSERT_INTUIT_MORE; 3903 3904 if (PL_lex_brackets) 3905 return TRUE; 3906 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 3907 return TRUE; 3908 if (*s == '-' && s[1] == '>' 3909 && FEATURE_POSTDEREF_QQ_IS_ENABLED 3910 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 3911 ||(s[2] == '@' && strchr("*[{",s[3])) )) 3912 return TRUE; 3913 if (*s != '{' && *s != '[') 3914 return FALSE; 3915 if (!PL_lex_inpat) 3916 return TRUE; 3917 3918 /* In a pattern, so maybe we have {n,m}. */ 3919 if (*s == '{') { 3920 if (regcurly(s)) { 3921 return FALSE; 3922 } 3923 return TRUE; 3924 } 3925 3926 /* On the other hand, maybe we have a character class */ 3927 3928 s++; 3929 if (*s == ']' || *s == '^') 3930 return FALSE; 3931 else { 3932 /* this is terrifying, and it works */ 3933 int weight; 3934 char seen[256]; 3935 const char * const send = strchr(s,']'); 3936 unsigned char un_char, last_un_char; 3937 char tmpbuf[sizeof PL_tokenbuf * 4]; 3938 3939 if (!send) /* has to be an expression */ 3940 return TRUE; 3941 weight = 2; /* let's weigh the evidence */ 3942 3943 if (*s == '$') 3944 weight -= 3; 3945 else if (isDIGIT(*s)) { 3946 if (s[1] != ']') { 3947 if (isDIGIT(s[1]) && s[2] == ']') 3948 weight -= 10; 3949 } 3950 else 3951 weight -= 100; 3952 } 3953 Zero(seen,256,char); 3954 un_char = 255; 3955 for (; s < send; s++) { 3956 last_un_char = un_char; 3957 un_char = (unsigned char)*s; 3958 switch (*s) { 3959 case '@': 3960 case '&': 3961 case '$': 3962 weight -= seen[un_char] * 10; 3963 if (isWORDCHAR_lazy_if(s+1,UTF)) { 3964 int len; 3965 char *tmp = PL_bufend; 3966 PL_bufend = (char*)send; 3967 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 3968 PL_bufend = tmp; 3969 len = (int)strlen(tmpbuf); 3970 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 3971 UTF ? SVf_UTF8 : 0, SVt_PV)) 3972 weight -= 100; 3973 else 3974 weight -= 10; 3975 } 3976 else if (*s == '$' 3977 && s[1] 3978 && strchr("[#!%*<>()-=",s[1])) 3979 { 3980 if (/*{*/ strchr("])} =",s[2])) 3981 weight -= 10; 3982 else 3983 weight -= 1; 3984 } 3985 break; 3986 case '\\': 3987 un_char = 254; 3988 if (s[1]) { 3989 if (strchr("wds]",s[1])) 3990 weight += 100; 3991 else if (seen[(U8)'\''] || seen[(U8)'"']) 3992 weight += 1; 3993 else if (strchr("rnftbxcav",s[1])) 3994 weight += 40; 3995 else if (isDIGIT(s[1])) { 3996 weight += 40; 3997 while (s[1] && isDIGIT(s[1])) 3998 s++; 3999 } 4000 } 4001 else 4002 weight += 100; 4003 break; 4004 case '-': 4005 if (s[1] == '\\') 4006 weight += 50; 4007 if (strchr("aA01! ",last_un_char)) 4008 weight += 30; 4009 if (strchr("zZ79~",s[1])) 4010 weight += 30; 4011 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4012 weight -= 5; /* cope with negative subscript */ 4013 break; 4014 default: 4015 if (!isWORDCHAR(last_un_char) 4016 && !(last_un_char == '$' || last_un_char == '@' 4017 || last_un_char == '&') 4018 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4019 char *d = s; 4020 while (isALPHA(*s)) 4021 s++; 4022 if (keyword(d, s - d, 0)) 4023 weight -= 150; 4024 } 4025 if (un_char == last_un_char + 1) 4026 weight += 5; 4027 weight -= seen[un_char]; 4028 break; 4029 } 4030 seen[un_char]++; 4031 } 4032 if (weight >= 0) /* probably a character class */ 4033 return FALSE; 4034 } 4035 4036 return TRUE; 4037} 4038 4039/* 4040 * S_intuit_method 4041 * 4042 * Does all the checking to disambiguate 4043 * foo bar 4044 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4045 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args). 4046 * 4047 * First argument is the stuff after the first token, e.g. "bar". 4048 * 4049 * Not a method if foo is a filehandle. 4050 * Not a method if foo is a subroutine prototyped to take a filehandle. 4051 * Not a method if it's really "Foo $bar" 4052 * Method if it's "foo $bar" 4053 * Not a method if it's really "print foo $bar" 4054 * Method if it's really "foo package::" (interpreted as package->foo) 4055 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4056 * Not a method if bar is a filehandle or package, but is quoted with 4057 * => 4058 */ 4059 4060STATIC int 4061S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) 4062{ 4063 char *s = start + (*start == '$'); 4064 char tmpbuf[sizeof PL_tokenbuf]; 4065 STRLEN len; 4066 GV* indirgv; 4067 /* Mustn't actually add anything to a symbol table. 4068 But also don't want to "initialise" any placeholder 4069 constants that might already be there into full 4070 blown PVGVs with attached PVCV. */ 4071 GV * const gv = 4072 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; 4073 4074 PERL_ARGS_ASSERT_INTUIT_METHOD; 4075 4076 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4077 return 0; 4078 if (cv && SvPOK(cv)) { 4079 const char *proto = CvPROTO(cv); 4080 if (proto) { 4081 while (*proto && (isSPACE(*proto) || *proto == ';')) 4082 proto++; 4083 if (*proto == '*') 4084 return 0; 4085 } 4086 } 4087 4088 if (*start == '$') { 4089 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY 4090 || isUPPER(*PL_tokenbuf)) 4091 return 0; 4092 s = skipspace(s); 4093 PL_bufptr = start; 4094 PL_expect = XREF; 4095 return *s == '(' ? FUNCMETH : METHOD; 4096 } 4097 4098 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 4099 /* start is the beginning of the possible filehandle/object, 4100 * and s is the end of it 4101 * tmpbuf is a copy of it (but with single quotes as double colons) 4102 */ 4103 4104 if (!keyword(tmpbuf, len, 0)) { 4105 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4106 len -= 2; 4107 tmpbuf[len] = '\0'; 4108 goto bare_package; 4109 } 4110 indirgv = gv_fetchpvn_flags(tmpbuf, len, 4111 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), 4112 SVt_PVCV); 4113 if (indirgv && SvTYPE(indirgv) != SVt_NULL 4114 && (!isGV(indirgv) || GvCVu(indirgv))) 4115 return 0; 4116 /* filehandle or package name makes it a method */ 4117 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4118 s = skipspace(s); 4119 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4120 return 0; /* no assumptions -- "=>" quotes bareword */ 4121 bare_package: 4122 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, 4123 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4124 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4125 PL_expect = XTERM; 4126 force_next(WORD); 4127 PL_bufptr = s; 4128 return *s == '(' ? FUNCMETH : METHOD; 4129 } 4130 } 4131 return 0; 4132} 4133 4134/* Encoded script support. filter_add() effectively inserts a 4135 * 'pre-processing' function into the current source input stream. 4136 * Note that the filter function only applies to the current source file 4137 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4138 * 4139 * The datasv parameter (which may be NULL) can be used to pass 4140 * private data to this instance of the filter. The filter function 4141 * can recover the SV using the FILTER_DATA macro and use it to 4142 * store private buffers and state information. 4143 * 4144 * The supplied datasv parameter is upgraded to a PVIO type 4145 * and the IoDIRP/IoANY field is used to store the function pointer, 4146 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4147 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4148 * private use must be set using malloc'd pointers. 4149 */ 4150 4151SV * 4152Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4153{ 4154 if (!funcp) 4155 return NULL; 4156 4157 if (!PL_parser) 4158 return NULL; 4159 4160 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4161 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4162 4163 if (!PL_rsfp_filters) 4164 PL_rsfp_filters = newAV(); 4165 if (!datasv) 4166 datasv = newSV(0); 4167 SvUPGRADE(datasv, SVt_PVIO); 4168 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4169 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4170 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4171 FPTR2DPTR(void *, IoANY(datasv)), 4172 SvPV_nolen(datasv))); 4173 av_unshift(PL_rsfp_filters, 1); 4174 av_store(PL_rsfp_filters, 0, datasv) ; 4175 if ( 4176 !PL_parser->filtered 4177 && PL_parser->lex_flags & LEX_EVALBYTES 4178 && PL_bufptr < PL_bufend 4179 ) { 4180 const char *s = PL_bufptr; 4181 while (s < PL_bufend) { 4182 if (*s == '\n') { 4183 SV *linestr = PL_parser->linestr; 4184 char *buf = SvPVX(linestr); 4185 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4186 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4187 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4188 STRLEN const linestart_pos = PL_parser->linestart - buf; 4189 STRLEN const last_uni_pos = 4190 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4191 STRLEN const last_lop_pos = 4192 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4193 av_push(PL_rsfp_filters, linestr); 4194 PL_parser->linestr = 4195 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4196 buf = SvPVX(PL_parser->linestr); 4197 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4198 PL_parser->bufptr = buf + bufptr_pos; 4199 PL_parser->oldbufptr = buf + oldbufptr_pos; 4200 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4201 PL_parser->linestart = buf + linestart_pos; 4202 if (PL_parser->last_uni) 4203 PL_parser->last_uni = buf + last_uni_pos; 4204 if (PL_parser->last_lop) 4205 PL_parser->last_lop = buf + last_lop_pos; 4206 SvLEN(linestr) = SvCUR(linestr); 4207 SvCUR(linestr) = s-SvPVX(linestr); 4208 PL_parser->filtered = 1; 4209 break; 4210 } 4211 s++; 4212 } 4213 } 4214 return(datasv); 4215} 4216 4217 4218/* Delete most recently added instance of this filter function. */ 4219void 4220Perl_filter_del(pTHX_ filter_t funcp) 4221{ 4222 SV *datasv; 4223 4224 PERL_ARGS_ASSERT_FILTER_DEL; 4225 4226#ifdef DEBUGGING 4227 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4228 FPTR2DPTR(void*, funcp))); 4229#endif 4230 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4231 return; 4232 /* if filter is on top of stack (usual case) just pop it off */ 4233 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4234 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4235 sv_free(av_pop(PL_rsfp_filters)); 4236 4237 return; 4238 } 4239 /* we need to search for the correct entry and clear it */ 4240 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4241} 4242 4243 4244/* Invoke the idxth filter function for the current rsfp. */ 4245/* maxlen 0 = read one text line */ 4246I32 4247Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4248{ 4249 filter_t funcp; 4250 SV *datasv = NULL; 4251 /* This API is bad. It should have been using unsigned int for maxlen. 4252 Not sure if we want to change the API, but if not we should sanity 4253 check the value here. */ 4254 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4255 4256 PERL_ARGS_ASSERT_FILTER_READ; 4257 4258 if (!PL_parser || !PL_rsfp_filters) 4259 return -1; 4260 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4261 /* Provide a default input filter to make life easy. */ 4262 /* Note that we append to the line. This is handy. */ 4263 DEBUG_P(PerlIO_printf(Perl_debug_log, 4264 "filter_read %d: from rsfp\n", idx)); 4265 if (correct_length) { 4266 /* Want a block */ 4267 int len ; 4268 const int old_len = SvCUR(buf_sv); 4269 4270 /* ensure buf_sv is large enough */ 4271 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4272 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4273 correct_length)) <= 0) { 4274 if (PerlIO_error(PL_rsfp)) 4275 return -1; /* error */ 4276 else 4277 return 0 ; /* end of file */ 4278 } 4279 SvCUR_set(buf_sv, old_len + len) ; 4280 SvPVX(buf_sv)[old_len + len] = '\0'; 4281 } else { 4282 /* Want a line */ 4283 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4284 if (PerlIO_error(PL_rsfp)) 4285 return -1; /* error */ 4286 else 4287 return 0 ; /* end of file */ 4288 } 4289 } 4290 return SvCUR(buf_sv); 4291 } 4292 /* Skip this filter slot if filter has been deleted */ 4293 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4294 DEBUG_P(PerlIO_printf(Perl_debug_log, 4295 "filter_read %d: skipped (filter deleted)\n", 4296 idx)); 4297 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4298 } 4299 if (SvTYPE(datasv) != SVt_PVIO) { 4300 if (correct_length) { 4301 /* Want a block */ 4302 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4303 if (!remainder) return 0; /* eof */ 4304 if (correct_length > remainder) correct_length = remainder; 4305 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4306 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4307 } else { 4308 /* Want a line */ 4309 const char *s = SvEND(datasv); 4310 const char *send = SvPVX(datasv) + SvLEN(datasv); 4311 while (s < send) { 4312 if (*s == '\n') { 4313 s++; 4314 break; 4315 } 4316 s++; 4317 } 4318 if (s == send) return 0; /* eof */ 4319 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4320 SvCUR_set(datasv, s-SvPVX(datasv)); 4321 } 4322 return SvCUR(buf_sv); 4323 } 4324 /* Get function pointer hidden within datasv */ 4325 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4326 DEBUG_P(PerlIO_printf(Perl_debug_log, 4327 "filter_read %d: via function %p (%s)\n", 4328 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4329 /* Call function. The function is expected to */ 4330 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4331 /* Return: <0:error, =0:eof, >0:not eof */ 4332 return (*funcp)(aTHX_ idx, buf_sv, correct_length); 4333} 4334 4335STATIC char * 4336S_filter_gets(pTHX_ SV *sv, STRLEN append) 4337{ 4338 PERL_ARGS_ASSERT_FILTER_GETS; 4339 4340#ifdef PERL_CR_FILTER 4341 if (!PL_rsfp_filters) { 4342 filter_add(S_cr_textfilter,NULL); 4343 } 4344#endif 4345 if (PL_rsfp_filters) { 4346 if (!append) 4347 SvCUR_set(sv, 0); /* start with empty line */ 4348 if (FILTER_READ(0, sv, 0) > 0) 4349 return ( SvPVX(sv) ) ; 4350 else 4351 return NULL ; 4352 } 4353 else 4354 return (sv_gets(sv, PL_rsfp, append)); 4355} 4356 4357STATIC HV * 4358S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4359{ 4360 GV *gv; 4361 4362 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4363 4364 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) 4365 return PL_curstash; 4366 4367 if (len > 2 4368 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') 4369 && (gv = gv_fetchpvn_flags(pkgname, 4370 len, 4371 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4372 { 4373 return GvHV(gv); /* Foo:: */ 4374 } 4375 4376 /* use constant CLASS => 'MyClass' */ 4377 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4378 if (gv && GvCV(gv)) { 4379 SV * const sv = cv_const_sv(GvCV(gv)); 4380 if (sv) 4381 return gv_stashsv(sv, 0); 4382 } 4383 4384 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4385} 4386 4387 4388STATIC char * 4389S_tokenize_use(pTHX_ int is_use, char *s) { 4390 PERL_ARGS_ASSERT_TOKENIZE_USE; 4391 4392 if (PL_expect != XSTATE) 4393 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4394 is_use ? "use" : "no")); 4395 PL_expect = XTERM; 4396 s = skipspace(s); 4397 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4398 s = force_version(s, TRUE); 4399 if (*s == ';' || *s == '}' 4400 || (s = skipspace(s), (*s == ';' || *s == '}'))) { 4401 NEXTVAL_NEXTTOKE.opval = NULL; 4402 force_next(WORD); 4403 } 4404 else if (*s == 'v') { 4405 s = force_word(s,WORD,FALSE,TRUE); 4406 s = force_version(s, FALSE); 4407 } 4408 } 4409 else { 4410 s = force_word(s,WORD,FALSE,TRUE); 4411 s = force_version(s, FALSE); 4412 } 4413 pl_yylval.ival = is_use; 4414 return s; 4415} 4416#ifdef DEBUGGING 4417 static const char* const exp_name[] = 4418 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 4419 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", 4420 "TERMORDORDOR" 4421 }; 4422#endif 4423 4424#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l) 4425STATIC bool 4426S_word_takes_any_delimeter(char *p, STRLEN len) 4427{ 4428 return (len == 1 && strchr("msyq", p[0])) 4429 || (len == 2 4430 && ((p[0] == 't' && p[1] == 'r') 4431 || (p[0] == 'q' && strchr("qwxr", p[1])))); 4432} 4433 4434static void 4435S_check_scalar_slice(pTHX_ char *s) 4436{ 4437 s++; 4438 while (*s == ' ' || *s == '\t') s++; 4439 if (*s == 'q' && s[1] == 'w' 4440 && !isWORDCHAR_lazy_if(s+2,UTF)) 4441 return; 4442 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s))) 4443 s += UTF ? UTF8SKIP(s) : 1; 4444 if (*s == '}' || *s == ']') 4445 pl_yylval.ival = OPpSLICEWARNING; 4446} 4447 4448/* 4449 yylex 4450 4451 Works out what to call the token just pulled out of the input 4452 stream. The yacc parser takes care of taking the ops we return and 4453 stitching them into a tree. 4454 4455 Returns: 4456 The type of the next token 4457 4458 Structure: 4459 Switch based on the current state: 4460 - if we already built the token before, use it 4461 - if we have a case modifier in a string, deal with that 4462 - handle other cases of interpolation inside a string 4463 - scan the next line if we are inside a format 4464 In the normal state switch on the next character: 4465 - default: 4466 if alphabetic, go to key lookup 4467 unrecoginized character - croak 4468 - 0/4/26: handle end-of-line or EOF 4469 - cases for whitespace 4470 - \n and #: handle comments and line numbers 4471 - various operators, brackets and sigils 4472 - numbers 4473 - quotes 4474 - 'v': vstrings (or go to key lookup) 4475 - 'x' repetition operator (or go to key lookup) 4476 - other ASCII alphanumerics (key lookup begins here): 4477 word before => ? 4478 keyword plugin 4479 scan built-in keyword (but do nothing with it yet) 4480 check for statement label 4481 check for lexical subs 4482 goto just_a_word if there is one 4483 see whether built-in keyword is overridden 4484 switch on keyword number: 4485 - default: just_a_word: 4486 not a built-in keyword; handle bareword lookup 4487 disambiguate between method and sub call 4488 fall back to bareword 4489 - cases for built-in keywords 4490*/ 4491 4492 4493int 4494Perl_yylex(pTHX) 4495{ 4496 dVAR; 4497 char *s = PL_bufptr; 4498 char *d; 4499 STRLEN len; 4500 bool bof = FALSE; 4501 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil); 4502 U8 formbrack = 0; 4503 U32 fake_eof = 0; 4504 4505 /* orig_keyword, gvp, and gv are initialized here because 4506 * jump to the label just_a_word_zero can bypass their 4507 * initialization later. */ 4508 I32 orig_keyword = 0; 4509 GV *gv = NULL; 4510 GV **gvp = NULL; 4511 4512 DEBUG_T( { 4513 SV* tmp = newSVpvs(""); 4514 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n", 4515 (IV)CopLINE(PL_curcop), 4516 lex_state_names[PL_lex_state], 4517 exp_name[PL_expect], 4518 pv_display(tmp, s, strlen(s), 0, 60)); 4519 SvREFCNT_dec(tmp); 4520 } ); 4521 4522 /* when we've already built the next token, just pull it out of the queue */ 4523 if (PL_nexttoke) { 4524 PL_nexttoke--; 4525 pl_yylval = PL_nextval[PL_nexttoke]; 4526 if (!PL_nexttoke) { 4527 PL_lex_state = PL_lex_defer; 4528 PL_lex_defer = LEX_NORMAL; 4529 } 4530 { 4531 I32 next_type; 4532 next_type = PL_nexttype[PL_nexttoke]; 4533 if (next_type & (7<<24)) { 4534 if (next_type & (1<<24)) { 4535 if (PL_lex_brackets > 100) 4536 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 4537 PL_lex_brackstack[PL_lex_brackets++] = 4538 (char) ((next_type >> 16) & 0xff); 4539 } 4540 if (next_type & (2<<24)) 4541 PL_lex_allbrackets++; 4542 if (next_type & (4<<24)) 4543 PL_lex_allbrackets--; 4544 next_type &= 0xffff; 4545 } 4546 return REPORT(next_type == 'p' ? pending_ident() : next_type); 4547 } 4548 } 4549 4550 switch (PL_lex_state) { 4551 case LEX_NORMAL: 4552 case LEX_INTERPNORMAL: 4553 break; 4554 4555 /* interpolated case modifiers like \L \U, including \Q and \E. 4556 when we get here, PL_bufptr is at the \ 4557 */ 4558 case LEX_INTERPCASEMOD: 4559#ifdef DEBUGGING 4560 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 4561 Perl_croak(aTHX_ 4562 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 4563 PL_bufptr, PL_bufend, *PL_bufptr); 4564#endif 4565 /* handle \E or end of string */ 4566 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 4567 /* if at a \E */ 4568 if (PL_lex_casemods) { 4569 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 4570 PL_lex_casestack[PL_lex_casemods] = '\0'; 4571 4572 if (PL_bufptr != PL_bufend 4573 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 4574 || oldmod == 'F')) { 4575 PL_bufptr += 2; 4576 PL_lex_state = LEX_INTERPCONCAT; 4577 } 4578 PL_lex_allbrackets--; 4579 return REPORT(')'); 4580 } 4581 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 4582 /* Got an unpaired \E */ 4583 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 4584 "Useless use of \\E"); 4585 } 4586 if (PL_bufptr != PL_bufend) 4587 PL_bufptr += 2; 4588 PL_lex_state = LEX_INTERPCONCAT; 4589 return yylex(); 4590 } 4591 else { 4592 DEBUG_T({ PerlIO_printf(Perl_debug_log, 4593 "### Saw case modifier\n"); }); 4594 s = PL_bufptr + 1; 4595 if (s[1] == '\\' && s[2] == 'E') { 4596 PL_bufptr = s + 3; 4597 PL_lex_state = LEX_INTERPCONCAT; 4598 return yylex(); 4599 } 4600 else { 4601 I32 tmp; 4602 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) 4603 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 4604 if ((*s == 'L' || *s == 'U' || *s == 'F') 4605 && (strchr(PL_lex_casestack, 'L') 4606 || strchr(PL_lex_casestack, 'U') 4607 || strchr(PL_lex_casestack, 'F'))) 4608 { 4609 PL_lex_casestack[--PL_lex_casemods] = '\0'; 4610 PL_lex_allbrackets--; 4611 return REPORT(')'); 4612 } 4613 if (PL_lex_casemods > 10) 4614 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 4615 PL_lex_casestack[PL_lex_casemods++] = *s; 4616 PL_lex_casestack[PL_lex_casemods] = '\0'; 4617 PL_lex_state = LEX_INTERPCONCAT; 4618 NEXTVAL_NEXTTOKE.ival = 0; 4619 force_next((2<<24)|'('); 4620 if (*s == 'l') 4621 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 4622 else if (*s == 'u') 4623 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 4624 else if (*s == 'L') 4625 NEXTVAL_NEXTTOKE.ival = OP_LC; 4626 else if (*s == 'U') 4627 NEXTVAL_NEXTTOKE.ival = OP_UC; 4628 else if (*s == 'Q') 4629 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 4630 else if (*s == 'F') 4631 NEXTVAL_NEXTTOKE.ival = OP_FC; 4632 else 4633 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 4634 PL_bufptr = s + 1; 4635 } 4636 force_next(FUNC); 4637 if (PL_lex_starts) { 4638 s = PL_bufptr; 4639 PL_lex_starts = 0; 4640 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4641 if (PL_lex_casemods == 1 && PL_lex_inpat) 4642 TOKEN(','); 4643 else 4644 AopNOASSIGN(OP_CONCAT); 4645 } 4646 else 4647 return yylex(); 4648 } 4649 4650 case LEX_INTERPPUSH: 4651 return REPORT(sublex_push()); 4652 4653 case LEX_INTERPSTART: 4654 if (PL_bufptr == PL_bufend) 4655 return REPORT(sublex_done()); 4656 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, 4657 "### Interpolated variable\n"); }); 4658 PL_expect = XTERM; 4659 /* for /@a/, we leave the joining for the regex engine to do 4660 * (unless we're within \Q etc) */ 4661 PL_lex_dojoin = (*PL_bufptr == '@' 4662 && (!PL_lex_inpat || PL_lex_casemods)); 4663 PL_lex_state = LEX_INTERPNORMAL; 4664 if (PL_lex_dojoin) { 4665 NEXTVAL_NEXTTOKE.ival = 0; 4666 force_next(','); 4667 force_ident("\"", '$'); 4668 NEXTVAL_NEXTTOKE.ival = 0; 4669 force_next('$'); 4670 NEXTVAL_NEXTTOKE.ival = 0; 4671 force_next((2<<24)|'('); 4672 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 4673 force_next(FUNC); 4674 } 4675 /* Convert (?{...}) and friends to 'do {...}' */ 4676 if (PL_lex_inpat && *PL_bufptr == '(') { 4677 PL_parser->lex_shared->re_eval_start = PL_bufptr; 4678 PL_bufptr += 2; 4679 if (*PL_bufptr != '{') 4680 PL_bufptr++; 4681 PL_expect = XTERMBLOCK; 4682 force_next(DO); 4683 } 4684 4685 if (PL_lex_starts++) { 4686 s = PL_bufptr; 4687 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4688 if (!PL_lex_casemods && PL_lex_inpat) 4689 TOKEN(','); 4690 else 4691 AopNOASSIGN(OP_CONCAT); 4692 } 4693 return yylex(); 4694 4695 case LEX_INTERPENDMAYBE: 4696 if (intuit_more(PL_bufptr)) { 4697 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 4698 break; 4699 } 4700 /* FALLTHROUGH */ 4701 4702 case LEX_INTERPEND: 4703 /* Treat state as LEX_NORMAL if we have no inner lexing scope. 4704 XXX This hack can be removed if we stop setting PL_lex_state to 4705 LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */ 4706 if (UNLIKELY(!PL_lex_inwhat)) { 4707 PL_lex_state = LEX_NORMAL; 4708 break; 4709 } 4710 4711 if (PL_lex_dojoin) { 4712 const U8 dojoin_was = PL_lex_dojoin; 4713 PL_lex_dojoin = FALSE; 4714 PL_lex_state = LEX_INTERPCONCAT; 4715 PL_lex_allbrackets--; 4716 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN); 4717 } 4718 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 4719 && SvEVALED(PL_lex_repl)) 4720 { 4721 if (PL_bufptr != PL_bufend) 4722 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 4723 PL_lex_repl = NULL; 4724 } 4725 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 4726 re_eval_str. If the here-doc body���s length equals the previous 4727 value of re_eval_start, re_eval_start will now be null. So 4728 check re_eval_str as well. */ 4729 if (PL_parser->lex_shared->re_eval_start 4730 || PL_parser->lex_shared->re_eval_str) { 4731 SV *sv; 4732 if (*PL_bufptr != ')') 4733 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 4734 PL_bufptr++; 4735 /* having compiled a (?{..}) expression, return the original 4736 * text too, as a const */ 4737 if (PL_parser->lex_shared->re_eval_str) { 4738 sv = PL_parser->lex_shared->re_eval_str; 4739 PL_parser->lex_shared->re_eval_str = NULL; 4740 SvCUR_set(sv, 4741 PL_bufptr - PL_parser->lex_shared->re_eval_start); 4742 SvPV_shrink_to_cur(sv); 4743 } 4744 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 4745 PL_bufptr - PL_parser->lex_shared->re_eval_start); 4746 NEXTVAL_NEXTTOKE.opval = 4747 (OP*)newSVOP(OP_CONST, 0, 4748 sv); 4749 force_next(THING); 4750 PL_parser->lex_shared->re_eval_start = NULL; 4751 PL_expect = XTERM; 4752 return REPORT(','); 4753 } 4754 4755 /* FALLTHROUGH */ 4756 case LEX_INTERPCONCAT: 4757#ifdef DEBUGGING 4758 if (PL_lex_brackets) 4759 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 4760 (long) PL_lex_brackets); 4761#endif 4762 /* Treat state as LEX_NORMAL when not in an inner lexing scope. 4763 XXX This hack can be removed if we stop setting PL_lex_state to 4764 LEX_KNOWNEXT. */ 4765 if (UNLIKELY(!PL_lex_inwhat)) { 4766 PL_lex_state = LEX_NORMAL; 4767 break; 4768 } 4769 4770 if (PL_bufptr == PL_bufend) 4771 return REPORT(sublex_done()); 4772 4773 /* m'foo' still needs to be parsed for possible (?{...}) */ 4774 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 4775 SV *sv = newSVsv(PL_linestr); 4776 sv = tokeq(sv); 4777 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 4778 s = PL_bufend; 4779 } 4780 else { 4781 s = scan_const(PL_bufptr); 4782 if (*s == '\\') 4783 PL_lex_state = LEX_INTERPCASEMOD; 4784 else 4785 PL_lex_state = LEX_INTERPSTART; 4786 } 4787 4788 if (s != PL_bufptr) { 4789 NEXTVAL_NEXTTOKE = pl_yylval; 4790 PL_expect = XTERM; 4791 force_next(THING); 4792 if (PL_lex_starts++) { 4793 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 4794 if (!PL_lex_casemods && PL_lex_inpat) 4795 TOKEN(','); 4796 else 4797 AopNOASSIGN(OP_CONCAT); 4798 } 4799 else { 4800 PL_bufptr = s; 4801 return yylex(); 4802 } 4803 } 4804 4805 return yylex(); 4806 case LEX_FORMLINE: 4807 s = scan_formline(PL_bufptr); 4808 if (!PL_lex_formbrack) 4809 { 4810 formbrack = 1; 4811 goto rightbracket; 4812 } 4813 PL_bufptr = s; 4814 return yylex(); 4815 } 4816 4817 /* We really do *not* want PL_linestr ever becoming a COW. */ 4818 assert (!SvIsCOW(PL_linestr)); 4819 s = PL_bufptr; 4820 PL_oldoldbufptr = PL_oldbufptr; 4821 PL_oldbufptr = s; 4822 PL_parser->saw_infix_sigil = 0; 4823 4824 retry: 4825 switch (*s) { 4826 default: 4827 if (UTF) { 4828 if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) { 4829 ENTER; 4830 SAVESPTR(PL_warnhook); 4831 PL_warnhook = PERL_WARNHOOK_FATAL; 4832 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0); 4833 LEAVE; 4834 } 4835 if (isIDFIRST_utf8((U8*)s)) { 4836 goto keylookup; 4837 } 4838 } 4839 else if (isALNUMC(*s)) { 4840 goto keylookup; 4841 } 4842 { 4843 SV *dsv = newSVpvs_flags("", SVs_TEMP); 4844 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s, 4845 UTF8SKIP(s), 4846 SVs_TEMP | SVf_UTF8), 4847 10, UNI_DISPLAY_ISPRINT) 4848 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 4849 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); 4850 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 4851 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; 4852 } else { 4853 d = PL_linestart; 4854 } 4855 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c, 4856 UTF8fARG(UTF, (s - d), d), 4857 (int) len + 1); 4858 } 4859 case 4: 4860 case 26: 4861 goto fake_eof; /* emulate EOF on ^D or ^Z */ 4862 case 0: 4863 if ((!PL_rsfp || PL_lex_inwhat) 4864 && (!PL_parser->filtered || s+1 < PL_bufend)) { 4865 PL_last_uni = 0; 4866 PL_last_lop = 0; 4867 if (PL_lex_brackets 4868 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) 4869 { 4870 yyerror((const char *) 4871 (PL_lex_formbrack 4872 ? "Format not terminated" 4873 : "Missing right curly or square bracket")); 4874 } 4875 DEBUG_T( { PerlIO_printf(Perl_debug_log, 4876 "### Tokener got EOF\n"); 4877 } ); 4878 TOKEN(0); 4879 } 4880 if (s++ < PL_bufend) 4881 goto retry; /* ignore stray nulls */ 4882 PL_last_uni = 0; 4883 PL_last_lop = 0; 4884 if (!PL_in_eval && !PL_preambled) { 4885 PL_preambled = TRUE; 4886 if (PL_perldb) { 4887 /* Generate a string of Perl code to load the debugger. 4888 * If PERL5DB is set, it will return the contents of that, 4889 * otherwise a compile-time require of perl5db.pl. */ 4890 4891 const char * const pdb = PerlEnv_getenv("PERL5DB"); 4892 4893 if (pdb) { 4894 sv_setpv(PL_linestr, pdb); 4895 sv_catpvs(PL_linestr,";"); 4896 } else { 4897 SETERRNO(0,SS_NORMAL); 4898 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 4899 } 4900 PL_parser->preambling = CopLINE(PL_curcop); 4901 } else 4902 sv_setpvs(PL_linestr,""); 4903 if (PL_preambleav) { 4904 SV **svp = AvARRAY(PL_preambleav); 4905 SV **const end = svp + AvFILLp(PL_preambleav); 4906 while(svp <= end) { 4907 sv_catsv(PL_linestr, *svp); 4908 ++svp; 4909 sv_catpvs(PL_linestr, ";"); 4910 } 4911 sv_free(MUTABLE_SV(PL_preambleav)); 4912 PL_preambleav = NULL; 4913 } 4914 if (PL_minus_E) 4915 sv_catpvs(PL_linestr, 4916 "use feature ':5." STRINGIFY(PERL_VERSION) "';"); 4917 if (PL_minus_n || PL_minus_p) { 4918 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 4919 if (PL_minus_l) 4920 sv_catpvs(PL_linestr,"chomp;"); 4921 if (PL_minus_a) { 4922 if (PL_minus_F) { 4923 if ((*PL_splitstr == '/' || *PL_splitstr == '\'' 4924 || *PL_splitstr == '"') 4925 && strchr(PL_splitstr + 1, *PL_splitstr)) 4926 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 4927 else { 4928 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 4929 bytes can be used as quoting characters. :-) */ 4930 const char *splits = PL_splitstr; 4931 sv_catpvs(PL_linestr, "our @F=split(q\0"); 4932 do { 4933 /* Need to \ \s */ 4934 if (*splits == '\\') 4935 sv_catpvn(PL_linestr, splits, 1); 4936 sv_catpvn(PL_linestr, splits, 1); 4937 } while (*splits++); 4938 /* This loop will embed the trailing NUL of 4939 PL_linestr as the last thing it does before 4940 terminating. */ 4941 sv_catpvs(PL_linestr, ");"); 4942 } 4943 } 4944 else 4945 sv_catpvs(PL_linestr,"our @F=split(' ');"); 4946 } 4947 } 4948 sv_catpvs(PL_linestr, "\n"); 4949 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4950 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4951 PL_last_lop = PL_last_uni = NULL; 4952 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) 4953 update_debugger_info(PL_linestr, NULL, 0); 4954 goto retry; 4955 } 4956 do { 4957 fake_eof = 0; 4958 bof = PL_rsfp ? TRUE : FALSE; 4959 if (0) { 4960 fake_eof: 4961 fake_eof = LEX_FAKE_EOF; 4962 } 4963 PL_bufptr = PL_bufend; 4964 COPLINE_INC_WITH_HERELINES; 4965 if (!lex_next_chunk(fake_eof)) { 4966 CopLINE_dec(PL_curcop); 4967 s = PL_bufptr; 4968 TOKEN(';'); /* not infinite loop because rsfp is NULL now */ 4969 } 4970 CopLINE_dec(PL_curcop); 4971 s = PL_bufptr; 4972 /* If it looks like the start of a BOM or raw UTF-16, 4973 * check if it in fact is. */ 4974 if (bof && PL_rsfp 4975 && (*s == 0 4976 || *(U8*)s == BOM_UTF8_FIRST_BYTE 4977 || *(U8*)s >= 0xFE 4978 || s[1] == 0)) 4979 { 4980 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 4981 bof = (offset == (Off_t)SvCUR(PL_linestr)); 4982#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 4983 /* offset may include swallowed CR */ 4984 if (!bof) 4985 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 4986#endif 4987 if (bof) { 4988 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4989 s = swallow_bom((U8*)s); 4990 } 4991 } 4992 if (PL_parser->in_pod) { 4993 /* Incest with pod. */ 4994 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { 4995 sv_setpvs(PL_linestr, ""); 4996 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 4997 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 4998 PL_last_lop = PL_last_uni = NULL; 4999 PL_parser->in_pod = 0; 5000 } 5001 } 5002 if (PL_rsfp || PL_parser->filtered) 5003 incline(s); 5004 } while (PL_parser->in_pod); 5005 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 5006 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5007 PL_last_lop = PL_last_uni = NULL; 5008 if (CopLINE(PL_curcop) == 1) { 5009 while (s < PL_bufend && isSPACE(*s)) 5010 s++; 5011 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 5012 s++; 5013 d = NULL; 5014 if (!PL_in_eval) { 5015 if (*s == '#' && *(s+1) == '!') 5016 d = s + 2; 5017#ifdef ALTERNATE_SHEBANG 5018 else { 5019 static char const as[] = ALTERNATE_SHEBANG; 5020 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 5021 d = s + (sizeof(as) - 1); 5022 } 5023#endif /* ALTERNATE_SHEBANG */ 5024 } 5025 if (d) { 5026 char *ipath; 5027 char *ipathend; 5028 5029 while (isSPACE(*d)) 5030 d++; 5031 ipath = d; 5032 while (*d && !isSPACE(*d)) 5033 d++; 5034 ipathend = d; 5035 5036#ifdef ARG_ZERO_IS_SCRIPT 5037 if (ipathend > ipath) { 5038 /* 5039 * HP-UX (at least) sets argv[0] to the script name, 5040 * which makes $^X incorrect. And Digital UNIX and Linux, 5041 * at least, set argv[0] to the basename of the Perl 5042 * interpreter. So, having found "#!", we'll set it right. 5043 */ 5044 SV* copfilesv = CopFILESV(PL_curcop); 5045 if (copfilesv) { 5046 SV * const x = 5047 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 5048 SVt_PV)); /* $^X */ 5049 assert(SvPOK(x) || SvGMAGICAL(x)); 5050 if (sv_eq(x, copfilesv)) { 5051 sv_setpvn(x, ipath, ipathend - ipath); 5052 SvSETMAGIC(x); 5053 } 5054 else { 5055 STRLEN blen; 5056 STRLEN llen; 5057 const char *bstart = SvPV_const(copfilesv, blen); 5058 const char * const lstart = SvPV_const(x, llen); 5059 if (llen < blen) { 5060 bstart += blen - llen; 5061 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 5062 sv_setpvn(x, ipath, ipathend - ipath); 5063 SvSETMAGIC(x); 5064 } 5065 } 5066 } 5067 } 5068 else { 5069 /* Anything to do if no copfilesv? */ 5070 } 5071 TAINT_NOT; /* $^X is always tainted, but that's OK */ 5072 } 5073#endif /* ARG_ZERO_IS_SCRIPT */ 5074 5075 /* 5076 * Look for options. 5077 */ 5078 d = instr(s,"perl -"); 5079 if (!d) { 5080 d = instr(s,"perl"); 5081#if defined(DOSISH) 5082 /* avoid getting into infinite loops when shebang 5083 * line contains "Perl" rather than "perl" */ 5084 if (!d) { 5085 for (d = ipathend-4; d >= ipath; --d) { 5086 if (isALPHA_FOLD_EQ(*d, 'p') 5087 && !ibcmp(d, "perl", 4)) 5088 { 5089 break; 5090 } 5091 } 5092 if (d < ipath) 5093 d = NULL; 5094 } 5095#endif 5096 } 5097#ifdef ALTERNATE_SHEBANG 5098 /* 5099 * If the ALTERNATE_SHEBANG on this system starts with a 5100 * character that can be part of a Perl expression, then if 5101 * we see it but not "perl", we're probably looking at the 5102 * start of Perl code, not a request to hand off to some 5103 * other interpreter. Similarly, if "perl" is there, but 5104 * not in the first 'word' of the line, we assume the line 5105 * contains the start of the Perl program. 5106 */ 5107 if (d && *s != '#') { 5108 const char *c = ipath; 5109 while (*c && !strchr("; \t\r\n\f\v#", *c)) 5110 c++; 5111 if (c < d) 5112 d = NULL; /* "perl" not in first word; ignore */ 5113 else 5114 *s = '#'; /* Don't try to parse shebang line */ 5115 } 5116#endif /* ALTERNATE_SHEBANG */ 5117 if (!d 5118 && *s == '#' 5119 && ipathend > ipath 5120 && !PL_minus_c 5121 && !instr(s,"indir") 5122 && instr(PL_origargv[0],"perl")) 5123 { 5124 dVAR; 5125 char **newargv; 5126 5127 *ipathend = '\0'; 5128 s = ipathend + 1; 5129 while (s < PL_bufend && isSPACE(*s)) 5130 s++; 5131 if (s < PL_bufend) { 5132 Newx(newargv,PL_origargc+3,char*); 5133 newargv[1] = s; 5134 while (s < PL_bufend && !isSPACE(*s)) 5135 s++; 5136 *s = '\0'; 5137 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 5138 } 5139 else 5140 newargv = PL_origargv; 5141 newargv[0] = ipath; 5142 PERL_FPU_PRE_EXEC 5143 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 5144 PERL_FPU_POST_EXEC 5145 Perl_croak(aTHX_ "Can't exec %s", ipath); 5146 } 5147 if (d) { 5148 while (*d && !isSPACE(*d)) 5149 d++; 5150 while (SPACE_OR_TAB(*d)) 5151 d++; 5152 5153 if (*d++ == '-') { 5154 const bool switches_done = PL_doswitches; 5155 const U32 oldpdb = PL_perldb; 5156 const bool oldn = PL_minus_n; 5157 const bool oldp = PL_minus_p; 5158 const char *d1 = d; 5159 5160 do { 5161 bool baduni = FALSE; 5162 if (*d1 == 'C') { 5163 const char *d2 = d1 + 1; 5164 if (parse_unicode_opts((const char **)&d2) 5165 != PL_unicode) 5166 baduni = TRUE; 5167 } 5168 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { 5169 const char * const m = d1; 5170 while (*d1 && !isSPACE(*d1)) 5171 d1++; 5172 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 5173 (int)(d1 - m), m); 5174 } 5175 d1 = moreswitches(d1); 5176 } while (d1); 5177 if (PL_doswitches && !switches_done) { 5178 int argc = PL_origargc; 5179 char **argv = PL_origargv; 5180 do { 5181 argc--,argv++; 5182 } while (argc && argv[0][0] == '-' && argv[0][1]); 5183 init_argv_symbols(argc,argv); 5184 } 5185 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) 5186 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 5187 /* if we have already added "LINE: while (<>) {", 5188 we must not do it again */ 5189 { 5190 sv_setpvs(PL_linestr, ""); 5191 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 5192 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 5193 PL_last_lop = PL_last_uni = NULL; 5194 PL_preambled = FALSE; 5195 if (PERLDB_LINE_OR_SAVESRC) 5196 (void)gv_fetchfile(PL_origfilename); 5197 goto retry; 5198 } 5199 } 5200 } 5201 } 5202 } 5203 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5204 PL_lex_state = LEX_FORMLINE; 5205 force_next(FORMRBRACK); 5206 TOKEN(';'); 5207 } 5208 goto retry; 5209 case '\r': 5210#ifdef PERL_STRICT_CR 5211 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 5212 Perl_croak(aTHX_ 5213 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 5214#endif 5215 case ' ': case '\t': case '\f': case '\v': 5216 s++; 5217 goto retry; 5218 case '#': 5219 case '\n': 5220 if (PL_lex_state != LEX_NORMAL 5221 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) 5222 { 5223 const bool in_comment = *s == '#'; 5224 if (*s == '#' && s == PL_linestart && PL_in_eval 5225 && !PL_rsfp && !PL_parser->filtered) { 5226 /* handle eval qq[#line 1 "foo"\n ...] */ 5227 CopLINE_dec(PL_curcop); 5228 incline(s); 5229 } 5230 d = s; 5231 while (d < PL_bufend && *d != '\n') 5232 d++; 5233 if (d < PL_bufend) 5234 d++; 5235 else if (d > PL_bufend) 5236 /* Found by Ilya: feed random input to Perl. */ 5237 Perl_croak(aTHX_ "panic: input overflow, %p > %p", 5238 d, PL_bufend); 5239 s = d; 5240 if (in_comment && d == PL_bufend 5241 && PL_lex_state == LEX_INTERPNORMAL 5242 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5243 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 5244 else 5245 incline(s); 5246 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 5247 PL_lex_state = LEX_FORMLINE; 5248 force_next(FORMRBRACK); 5249 TOKEN(';'); 5250 } 5251 } 5252 else { 5253 while (s < PL_bufend && *s != '\n') 5254 s++; 5255 if (s < PL_bufend) 5256 { 5257 s++; 5258 if (s < PL_bufend) 5259 incline(s); 5260 } 5261 else if (s > PL_bufend) 5262 /* Found by Ilya: feed random input to Perl. */ 5263 Perl_croak(aTHX_ "panic: input overflow"); 5264 } 5265 goto retry; 5266 case '-': 5267 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5268 I32 ftst = 0; 5269 char tmp; 5270 5271 s++; 5272 PL_bufptr = s; 5273 tmp = *s++; 5274 5275 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5276 s++; 5277 5278 if (strnEQ(s,"=>",2)) { 5279 s = force_word(PL_bufptr,WORD,FALSE,FALSE); 5280 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5281 OPERATOR('-'); /* unary minus */ 5282 } 5283 switch (tmp) { 5284 case 'r': ftst = OP_FTEREAD; break; 5285 case 'w': ftst = OP_FTEWRITE; break; 5286 case 'x': ftst = OP_FTEEXEC; break; 5287 case 'o': ftst = OP_FTEOWNED; break; 5288 case 'R': ftst = OP_FTRREAD; break; 5289 case 'W': ftst = OP_FTRWRITE; break; 5290 case 'X': ftst = OP_FTREXEC; break; 5291 case 'O': ftst = OP_FTROWNED; break; 5292 case 'e': ftst = OP_FTIS; break; 5293 case 'z': ftst = OP_FTZERO; break; 5294 case 's': ftst = OP_FTSIZE; break; 5295 case 'f': ftst = OP_FTFILE; break; 5296 case 'd': ftst = OP_FTDIR; break; 5297 case 'l': ftst = OP_FTLINK; break; 5298 case 'p': ftst = OP_FTPIPE; break; 5299 case 'S': ftst = OP_FTSOCK; break; 5300 case 'u': ftst = OP_FTSUID; break; 5301 case 'g': ftst = OP_FTSGID; break; 5302 case 'k': ftst = OP_FTSVTX; break; 5303 case 'b': ftst = OP_FTBLK; break; 5304 case 'c': ftst = OP_FTCHR; break; 5305 case 't': ftst = OP_FTTTY; break; 5306 case 'T': ftst = OP_FTTEXT; break; 5307 case 'B': ftst = OP_FTBINARY; break; 5308 case 'M': case 'A': case 'C': 5309 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5310 switch (tmp) { 5311 case 'M': ftst = OP_FTMTIME; break; 5312 case 'A': ftst = OP_FTATIME; break; 5313 case 'C': ftst = OP_FTCTIME; break; 5314 default: break; 5315 } 5316 break; 5317 default: 5318 break; 5319 } 5320 if (ftst) { 5321 PL_last_uni = PL_oldbufptr; 5322 PL_last_lop_op = (OPCODE)ftst; 5323 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5324 "### Saw file test %c\n", (int)tmp); 5325 } ); 5326 FTST(ftst); 5327 } 5328 else { 5329 /* Assume it was a minus followed by a one-letter named 5330 * subroutine call (or a -bareword), then. */ 5331 DEBUG_T( { PerlIO_printf(Perl_debug_log, 5332 "### '-%c' looked like a file test but was not\n", 5333 (int) tmp); 5334 } ); 5335 s = --PL_bufptr; 5336 } 5337 } 5338 { 5339 const char tmp = *s++; 5340 if (*s == tmp) { 5341 s++; 5342 if (PL_expect == XOPERATOR) 5343 TERM(POSTDEC); 5344 else 5345 OPERATOR(PREDEC); 5346 } 5347 else if (*s == '>') { 5348 s++; 5349 s = skipspace(s); 5350 if (((*s == '$' || *s == '&') && s[1] == '*') 5351 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5352 ||((*s == '@' || *s == '%') && strchr("*[{", s[1])) 5353 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5354 ) 5355 { 5356 PL_expect = XPOSTDEREF; 5357 TOKEN(ARROW); 5358 } 5359 if (isIDFIRST_lazy_if(s,UTF)) { 5360 s = force_word(s,METHOD,FALSE,TRUE); 5361 TOKEN(ARROW); 5362 } 5363 else if (*s == '$') 5364 OPERATOR(ARROW); 5365 else 5366 TERM(ARROW); 5367 } 5368 if (PL_expect == XOPERATOR) { 5369 if (*s == '=' 5370 && !PL_lex_allbrackets 5371 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5372 { 5373 s--; 5374 TOKEN(0); 5375 } 5376 Aop(OP_SUBTRACT); 5377 } 5378 else { 5379 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5380 check_uni(); 5381 OPERATOR('-'); /* unary minus */ 5382 } 5383 } 5384 5385 case '+': 5386 { 5387 const char tmp = *s++; 5388 if (*s == tmp) { 5389 s++; 5390 if (PL_expect == XOPERATOR) 5391 TERM(POSTINC); 5392 else 5393 OPERATOR(PREINC); 5394 } 5395 if (PL_expect == XOPERATOR) { 5396 if (*s == '=' 5397 && !PL_lex_allbrackets 5398 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5399 { 5400 s--; 5401 TOKEN(0); 5402 } 5403 Aop(OP_ADD); 5404 } 5405 else { 5406 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5407 check_uni(); 5408 OPERATOR('+'); 5409 } 5410 } 5411 5412 case '*': 5413 if (PL_expect == XPOSTDEREF) POSTDEREF('*'); 5414 if (PL_expect != XOPERATOR) { 5415 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5416 PL_expect = XOPERATOR; 5417 force_ident(PL_tokenbuf, '*'); 5418 if (!*PL_tokenbuf) 5419 PREREF('*'); 5420 TERM('*'); 5421 } 5422 s++; 5423 if (*s == '*') { 5424 s++; 5425 if (*s == '=' && !PL_lex_allbrackets 5426 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5427 { 5428 s -= 2; 5429 TOKEN(0); 5430 } 5431 PWop(OP_POW); 5432 } 5433 if (*s == '=' 5434 && !PL_lex_allbrackets 5435 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5436 { 5437 s--; 5438 TOKEN(0); 5439 } 5440 PL_parser->saw_infix_sigil = 1; 5441 Mop(OP_MULTIPLY); 5442 5443 case '%': 5444 { 5445 if (PL_expect == XOPERATOR) { 5446 if (s[1] == '=' 5447 && !PL_lex_allbrackets 5448 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5449 { 5450 TOKEN(0); 5451 } 5452 ++s; 5453 PL_parser->saw_infix_sigil = 1; 5454 Mop(OP_MODULO); 5455 } 5456 else if (PL_expect == XPOSTDEREF) POSTDEREF('%'); 5457 PL_tokenbuf[0] = '%'; 5458 s = scan_ident(s, PL_tokenbuf + 1, 5459 sizeof PL_tokenbuf - 1, FALSE); 5460 pl_yylval.ival = 0; 5461 if (!PL_tokenbuf[1]) { 5462 PREREF('%'); 5463 } 5464 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 5465 if (*s == '[') 5466 PL_tokenbuf[0] = '@'; 5467 } 5468 PL_expect = XOPERATOR; 5469 force_ident_maybe_lex('%'); 5470 TERM('%'); 5471 } 5472 case '^': 5473 d = s; 5474 bof = FEATURE_BITWISE_IS_ENABLED; 5475 if (bof && s[1] == '.') 5476 s++; 5477 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5478 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5479 { 5480 s = d; 5481 TOKEN(0); 5482 } 5483 s++; 5484 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); 5485 case '[': 5486 if (PL_lex_brackets > 100) 5487 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5488 PL_lex_brackstack[PL_lex_brackets++] = 0; 5489 PL_lex_allbrackets++; 5490 { 5491 const char tmp = *s++; 5492 OPERATOR(tmp); 5493 } 5494 case '~': 5495 if (s[1] == '~' 5496 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) 5497 { 5498 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 5499 TOKEN(0); 5500 s += 2; 5501 Perl_ck_warner_d(aTHX_ 5502 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 5503 "Smartmatch is experimental"); 5504 Eop(OP_SMARTMATCH); 5505 } 5506 s++; 5507 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { 5508 s++; 5509 BCop(OP_SCOMPLEMENT); 5510 } 5511 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); 5512 case ',': 5513 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 5514 TOKEN(0); 5515 s++; 5516 OPERATOR(','); 5517 case ':': 5518 if (s[1] == ':') { 5519 len = 0; 5520 goto just_a_word_zero_gv; 5521 } 5522 s++; 5523 { 5524 OP *attrs; 5525 5526 switch (PL_expect) { 5527 case XOPERATOR: 5528 if (!PL_in_my || PL_lex_state != LEX_NORMAL) 5529 break; 5530 PL_bufptr = s; /* update in case we back off */ 5531 if (*s == '=') { 5532 Perl_croak(aTHX_ 5533 "Use of := for an empty attribute list is not allowed"); 5534 } 5535 goto grabattrs; 5536 case XATTRBLOCK: 5537 PL_expect = XBLOCK; 5538 goto grabattrs; 5539 case XATTRTERM: 5540 PL_expect = XTERMBLOCK; 5541 grabattrs: 5542 s = skipspace(s); 5543 attrs = NULL; 5544 while (isIDFIRST_lazy_if(s,UTF)) { 5545 I32 tmp; 5546 SV *sv; 5547 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 5548 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5549 if (tmp < 0) tmp = -tmp; 5550 switch (tmp) { 5551 case KEY_or: 5552 case KEY_and: 5553 case KEY_for: 5554 case KEY_foreach: 5555 case KEY_unless: 5556 case KEY_if: 5557 case KEY_while: 5558 case KEY_until: 5559 goto got_attrs; 5560 default: 5561 break; 5562 } 5563 } 5564 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 5565 if (*d == '(') { 5566 d = scan_str(d,TRUE,TRUE,FALSE,NULL); 5567 COPLINE_SET_FROM_MULTI_END; 5568 if (!d) { 5569 /* MUST advance bufptr here to avoid bogus 5570 "at end of line" context messages from yyerror(). 5571 */ 5572 PL_bufptr = s + len; 5573 yyerror("Unterminated attribute parameter in attribute list"); 5574 if (attrs) 5575 op_free(attrs); 5576 sv_free(sv); 5577 return REPORT(0); /* EOF indicator */ 5578 } 5579 } 5580 if (PL_lex_stuff) { 5581 sv_catsv(sv, PL_lex_stuff); 5582 attrs = op_append_elem(OP_LIST, attrs, 5583 newSVOP(OP_CONST, 0, sv)); 5584 SvREFCNT_dec_NN(PL_lex_stuff); 5585 PL_lex_stuff = NULL; 5586 } 5587 else { 5588 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { 5589 sv_free(sv); 5590 if (PL_in_my == KEY_our) { 5591 deprecate(":unique"); 5592 } 5593 else 5594 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); 5595 } 5596 5597 /* NOTE: any CV attrs applied here need to be part of 5598 the CVf_BUILTIN_ATTRS define in cv.h! */ 5599 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { 5600 sv_free(sv); 5601 CvLVALUE_on(PL_compcv); 5602 } 5603 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { 5604 sv_free(sv); 5605 deprecate(":locked"); 5606 } 5607 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { 5608 sv_free(sv); 5609 CvMETHOD_on(PL_compcv); 5610 } 5611 else if (!PL_in_my && len == 5 5612 && strnEQ(SvPVX(sv), "const", len)) 5613 { 5614 sv_free(sv); 5615 Perl_ck_warner_d(aTHX_ 5616 packWARN(WARN_EXPERIMENTAL__CONST_ATTR), 5617 ":const is experimental" 5618 ); 5619 CvANONCONST_on(PL_compcv); 5620 if (!CvANON(PL_compcv)) 5621 yyerror(":const is not permitted on named " 5622 "subroutines"); 5623 } 5624 /* After we've set the flags, it could be argued that 5625 we don't need to do the attributes.pm-based setting 5626 process, and shouldn't bother appending recognized 5627 flags. To experiment with that, uncomment the 5628 following "else". (Note that's already been 5629 uncommented. That keeps the above-applied built-in 5630 attributes from being intercepted (and possibly 5631 rejected) by a package's attribute routines, but is 5632 justified by the performance win for the common case 5633 of applying only built-in attributes.) */ 5634 else 5635 attrs = op_append_elem(OP_LIST, attrs, 5636 newSVOP(OP_CONST, 0, 5637 sv)); 5638 } 5639 s = skipspace(d); 5640 if (*s == ':' && s[1] != ':') 5641 s = skipspace(s+1); 5642 else if (s == d) 5643 break; /* require real whitespace or :'s */ 5644 /* XXX losing whitespace on sequential attributes here */ 5645 } 5646 { 5647 if (*s != ';' 5648 && *s != '}' 5649 && !(PL_expect == XOPERATOR 5650 ? (*s == '=' || *s == ')') 5651 : (*s == '{' || *s == '('))) 5652 { 5653 const char q = ((*s == '\'') ? '"' : '\''); 5654 /* If here for an expression, and parsed no attrs, back 5655 off. */ 5656 if (PL_expect == XOPERATOR && !attrs) { 5657 s = PL_bufptr; 5658 break; 5659 } 5660 /* MUST advance bufptr here to avoid bogus "at end of line" 5661 context messages from yyerror(). 5662 */ 5663 PL_bufptr = s; 5664 yyerror( (const char *) 5665 (*s 5666 ? Perl_form(aTHX_ "Invalid separator character " 5667 "%c%c%c in attribute list", q, *s, q) 5668 : "Unterminated attribute list" ) ); 5669 if (attrs) 5670 op_free(attrs); 5671 OPERATOR(':'); 5672 } 5673 } 5674 got_attrs: 5675 if (attrs) { 5676 NEXTVAL_NEXTTOKE.opval = attrs; 5677 force_next(THING); 5678 } 5679 TOKEN(COLONATTR); 5680 } 5681 } 5682 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 5683 s--; 5684 TOKEN(0); 5685 } 5686 PL_lex_allbrackets--; 5687 OPERATOR(':'); 5688 case '(': 5689 s++; 5690 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 5691 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 5692 else 5693 PL_expect = XTERM; 5694 s = skipspace(s); 5695 PL_lex_allbrackets++; 5696 TOKEN('('); 5697 case ';': 5698 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 5699 TOKEN(0); 5700 CLINE; 5701 s++; 5702 PL_expect = XSTATE; 5703 TOKEN(';'); 5704 case ')': 5705 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 5706 TOKEN(0); 5707 s++; 5708 PL_lex_allbrackets--; 5709 s = skipspace(s); 5710 if (*s == '{') 5711 PREBLOCK(')'); 5712 TERM(')'); 5713 case ']': 5714 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 5715 TOKEN(0); 5716 s++; 5717 if (PL_lex_brackets <= 0) 5718 /* diag_listed_as: Unmatched right %s bracket */ 5719 yyerror("Unmatched right square bracket"); 5720 else 5721 --PL_lex_brackets; 5722 PL_lex_allbrackets--; 5723 if (PL_lex_state == LEX_INTERPNORMAL) { 5724 if (PL_lex_brackets == 0) { 5725 if (*s == '-' && s[1] == '>') 5726 PL_lex_state = LEX_INTERPENDMAYBE; 5727 else if (*s != '[' && *s != '{') 5728 PL_lex_state = LEX_INTERPEND; 5729 } 5730 } 5731 TERM(']'); 5732 case '{': 5733 s++; 5734 leftbracket: 5735 if (PL_lex_brackets > 100) { 5736 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 5737 } 5738 switch (PL_expect) { 5739 case XTERM: 5740 case XTERMORDORDOR: 5741 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5742 PL_lex_allbrackets++; 5743 OPERATOR(HASHBRACK); 5744 case XOPERATOR: 5745 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5746 s++; 5747 d = s; 5748 PL_tokenbuf[0] = '\0'; 5749 if (d < PL_bufend && *d == '-') { 5750 PL_tokenbuf[0] = '-'; 5751 d++; 5752 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5753 d++; 5754 } 5755 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) { 5756 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 5757 FALSE, &len); 5758 while (d < PL_bufend && SPACE_OR_TAB(*d)) 5759 d++; 5760 if (*d == '}') { 5761 const char minus = (PL_tokenbuf[0] == '-'); 5762 s = force_word(s + minus, WORD, FALSE, TRUE); 5763 if (minus) 5764 force_next('-'); 5765 } 5766 } 5767 /* FALLTHROUGH */ 5768 case XATTRTERM: 5769 case XTERMBLOCK: 5770 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5771 PL_lex_allbrackets++; 5772 PL_expect = XSTATE; 5773 break; 5774 case XATTRBLOCK: 5775 case XBLOCK: 5776 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 5777 PL_lex_allbrackets++; 5778 PL_expect = XSTATE; 5779 break; 5780 case XBLOCKTERM: 5781 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 5782 PL_lex_allbrackets++; 5783 PL_expect = XSTATE; 5784 break; 5785 default: { 5786 const char *t; 5787 if (PL_oldoldbufptr == PL_last_lop) 5788 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 5789 else 5790 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 5791 PL_lex_allbrackets++; 5792 s = skipspace(s); 5793 if (*s == '}') { 5794 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 5795 PL_expect = XTERM; 5796 /* This hack is to get the ${} in the message. */ 5797 PL_bufptr = s+1; 5798 yyerror("syntax error"); 5799 break; 5800 } 5801 OPERATOR(HASHBRACK); 5802 } 5803 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { 5804 /* ${...} or @{...} etc., but not print {...} 5805 * Skip the disambiguation and treat this as a block. 5806 */ 5807 goto block_expectation; 5808 } 5809 /* This hack serves to disambiguate a pair of curlies 5810 * as being a block or an anon hash. Normally, expectation 5811 * determines that, but in cases where we're not in a 5812 * position to expect anything in particular (like inside 5813 * eval"") we have to resolve the ambiguity. This code 5814 * covers the case where the first term in the curlies is a 5815 * quoted string. Most other cases need to be explicitly 5816 * disambiguated by prepending a "+" before the opening 5817 * curly in order to force resolution as an anon hash. 5818 * 5819 * XXX should probably propagate the outer expectation 5820 * into eval"" to rely less on this hack, but that could 5821 * potentially break current behavior of eval"". 5822 * GSAR 97-07-21 5823 */ 5824 t = s; 5825 if (*s == '\'' || *s == '"' || *s == '`') { 5826 /* common case: get past first string, handling escapes */ 5827 for (t++; t < PL_bufend && *t != *s;) 5828 if (*t++ == '\\') 5829 t++; 5830 t++; 5831 } 5832 else if (*s == 'q') { 5833 if (++t < PL_bufend 5834 && (!isWORDCHAR(*t) 5835 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 5836 && !isWORDCHAR(*t)))) 5837 { 5838 /* skip q//-like construct */ 5839 const char *tmps; 5840 char open, close, term; 5841 I32 brackets = 1; 5842 5843 while (t < PL_bufend && isSPACE(*t)) 5844 t++; 5845 /* check for q => */ 5846 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 5847 OPERATOR(HASHBRACK); 5848 } 5849 term = *t; 5850 open = term; 5851 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 5852 term = tmps[5]; 5853 close = term; 5854 if (open == close) 5855 for (t++; t < PL_bufend; t++) { 5856 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 5857 t++; 5858 else if (*t == open) 5859 break; 5860 } 5861 else { 5862 for (t++; t < PL_bufend; t++) { 5863 if (*t == '\\' && t+1 < PL_bufend) 5864 t++; 5865 else if (*t == close && --brackets <= 0) 5866 break; 5867 else if (*t == open) 5868 brackets++; 5869 } 5870 } 5871 t++; 5872 } 5873 else 5874 /* skip plain q word */ 5875 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 5876 t += UTF ? UTF8SKIP(t) : 1; 5877 } 5878 else if (isWORDCHAR_lazy_if(t,UTF)) { 5879 t += UTF ? UTF8SKIP(t) : 1; 5880 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) 5881 t += UTF ? UTF8SKIP(t) : 1; 5882 } 5883 while (t < PL_bufend && isSPACE(*t)) 5884 t++; 5885 /* if comma follows first term, call it an anon hash */ 5886 /* XXX it could be a comma expression with loop modifiers */ 5887 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 5888 || (*t == '=' && t[1] == '>'))) 5889 OPERATOR(HASHBRACK); 5890 if (PL_expect == XREF) 5891 { 5892 block_expectation: 5893 /* If there is an opening brace or 'sub:', treat it 5894 as a term to make ${{...}}{k} and &{sub:attr...} 5895 dwim. Otherwise, treat it as a statement, so 5896 map {no strict; ...} works. 5897 */ 5898 s = skipspace(s); 5899 if (*s == '{') { 5900 PL_expect = XTERM; 5901 break; 5902 } 5903 if (strnEQ(s, "sub", 3)) { 5904 d = s + 3; 5905 d = skipspace(d); 5906 if (*d == ':') { 5907 PL_expect = XTERM; 5908 break; 5909 } 5910 } 5911 PL_expect = XSTATE; 5912 } 5913 else { 5914 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 5915 PL_expect = XSTATE; 5916 } 5917 } 5918 break; 5919 } 5920 pl_yylval.ival = CopLINE(PL_curcop); 5921 PL_copline = NOLINE; /* invalidate current command line number */ 5922 TOKEN(formbrack ? '=' : '{'); 5923 case '}': 5924 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 5925 TOKEN(0); 5926 rightbracket: 5927 s++; 5928 if (PL_lex_brackets <= 0) 5929 /* diag_listed_as: Unmatched right %s bracket */ 5930 yyerror("Unmatched right curly bracket"); 5931 else 5932 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 5933 PL_lex_allbrackets--; 5934 if (PL_lex_state == LEX_INTERPNORMAL) { 5935 if (PL_lex_brackets == 0) { 5936 if (PL_expect & XFAKEBRACK) { 5937 PL_expect &= XENUMMASK; 5938 PL_lex_state = LEX_INTERPEND; 5939 PL_bufptr = s; 5940 return yylex(); /* ignore fake brackets */ 5941 } 5942 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 5943 && SvEVALED(PL_lex_repl)) 5944 PL_lex_state = LEX_INTERPEND; 5945 else if (*s == '-' && s[1] == '>') 5946 PL_lex_state = LEX_INTERPENDMAYBE; 5947 else if (*s != '[' && *s != '{') 5948 PL_lex_state = LEX_INTERPEND; 5949 } 5950 } 5951 if (PL_expect & XFAKEBRACK) { 5952 PL_expect &= XENUMMASK; 5953 PL_bufptr = s; 5954 return yylex(); /* ignore fake brackets */ 5955 } 5956 force_next(formbrack ? '.' : '}'); 5957 if (formbrack) LEAVE; 5958 if (formbrack == 2) { /* means . where arguments were expected */ 5959 force_next(';'); 5960 TOKEN(FORMRBRACK); 5961 } 5962 TOKEN(';'); 5963 case '&': 5964 if (PL_expect == XPOSTDEREF) POSTDEREF('&'); 5965 s++; 5966 if (*s++ == '&') { 5967 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5968 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 5969 s -= 2; 5970 TOKEN(0); 5971 } 5972 AOPERATOR(ANDAND); 5973 } 5974 s--; 5975 if (PL_expect == XOPERATOR) { 5976 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON) 5977 && isIDFIRST_lazy_if(s,UTF)) 5978 { 5979 CopLINE_dec(PL_curcop); 5980 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 5981 CopLINE_inc(PL_curcop); 5982 } 5983 d = s; 5984 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 5985 s++; 5986 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5987 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 5988 s = d; 5989 s--; 5990 TOKEN(0); 5991 } 5992 if (d == s) { 5993 PL_parser->saw_infix_sigil = 1; 5994 BAop(bof ? OP_NBIT_AND : OP_BIT_AND); 5995 } 5996 else 5997 BAop(OP_SBIT_AND); 5998 } 5999 6000 PL_tokenbuf[0] = '&'; 6001 s = scan_ident(s - 1, PL_tokenbuf + 1, 6002 sizeof PL_tokenbuf - 1, TRUE); 6003 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6004 if (PL_tokenbuf[1]) { 6005 force_ident_maybe_lex('&'); 6006 } 6007 else 6008 PREREF('&'); 6009 TERM('&'); 6010 6011 case '|': 6012 s++; 6013 if (*s++ == '|') { 6014 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6015 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6016 s -= 2; 6017 TOKEN(0); 6018 } 6019 AOPERATOR(OROR); 6020 } 6021 s--; 6022 d = s; 6023 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6024 s++; 6025 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6026 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6027 s = d - 1; 6028 TOKEN(0); 6029 } 6030 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); 6031 case '=': 6032 s++; 6033 { 6034 const char tmp = *s++; 6035 if (tmp == '=') { 6036 if (!PL_lex_allbrackets 6037 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6038 { 6039 s -= 2; 6040 TOKEN(0); 6041 } 6042 Eop(OP_EQ); 6043 } 6044 if (tmp == '>') { 6045 if (!PL_lex_allbrackets 6046 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 6047 { 6048 s -= 2; 6049 TOKEN(0); 6050 } 6051 OPERATOR(','); 6052 } 6053 if (tmp == '~') 6054 PMop(OP_MATCH); 6055 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 6056 && strchr("+-*/%.^&|<",tmp)) 6057 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6058 "Reversed %c= operator",(int)tmp); 6059 s--; 6060 if (PL_expect == XSTATE 6061 && isALPHA(tmp) 6062 && (s == PL_linestart+1 || s[-2] == '\n') ) 6063 { 6064 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) 6065 || PL_lex_state != LEX_NORMAL) { 6066 d = PL_bufend; 6067 while (s < d) { 6068 if (*s++ == '\n') { 6069 incline(s); 6070 if (strnEQ(s,"=cut",4)) { 6071 s = strchr(s,'\n'); 6072 if (s) 6073 s++; 6074 else 6075 s = d; 6076 incline(s); 6077 goto retry; 6078 } 6079 } 6080 } 6081 goto retry; 6082 } 6083 s = PL_bufend; 6084 PL_parser->in_pod = 1; 6085 goto retry; 6086 } 6087 } 6088 if (PL_expect == XBLOCK) { 6089 const char *t = s; 6090#ifdef PERL_STRICT_CR 6091 while (SPACE_OR_TAB(*t)) 6092#else 6093 while (SPACE_OR_TAB(*t) || *t == '\r') 6094#endif 6095 t++; 6096 if (*t == '\n' || *t == '#') { 6097 formbrack = 1; 6098 ENTER; 6099 SAVEI8(PL_parser->form_lex_state); 6100 SAVEI32(PL_lex_formbrack); 6101 PL_parser->form_lex_state = PL_lex_state; 6102 PL_lex_formbrack = PL_lex_brackets + 1; 6103 goto leftbracket; 6104 } 6105 } 6106 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6107 s--; 6108 TOKEN(0); 6109 } 6110 pl_yylval.ival = 0; 6111 OPERATOR(ASSIGNOP); 6112 case '!': 6113 s++; 6114 { 6115 const char tmp = *s++; 6116 if (tmp == '=') { 6117 /* was this !=~ where !~ was meant? 6118 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6119 6120 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6121 const char *t = s+1; 6122 6123 while (t < PL_bufend && isSPACE(*t)) 6124 ++t; 6125 6126 if (*t == '/' || *t == '?' 6127 || ((*t == 'm' || *t == 's' || *t == 'y') 6128 && !isWORDCHAR(t[1])) 6129 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6130 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6131 "!=~ should be !~"); 6132 } 6133 if (!PL_lex_allbrackets 6134 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6135 { 6136 s -= 2; 6137 TOKEN(0); 6138 } 6139 Eop(OP_NE); 6140 } 6141 if (tmp == '~') 6142 PMop(OP_NOT); 6143 } 6144 s--; 6145 OPERATOR('!'); 6146 case '<': 6147 if (PL_expect != XOPERATOR) { 6148 if (s[1] != '<' && !strchr(s,'>')) 6149 check_uni(); 6150 if (s[1] == '<' && s[2] != '>') 6151 s = scan_heredoc(s); 6152 else 6153 s = scan_inputsymbol(s); 6154 PL_expect = XOPERATOR; 6155 TOKEN(sublex_start()); 6156 } 6157 s++; 6158 { 6159 char tmp = *s++; 6160 if (tmp == '<') { 6161 if (*s == '=' && !PL_lex_allbrackets 6162 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6163 { 6164 s -= 2; 6165 TOKEN(0); 6166 } 6167 SHop(OP_LEFT_SHIFT); 6168 } 6169 if (tmp == '=') { 6170 tmp = *s++; 6171 if (tmp == '>') { 6172 if (!PL_lex_allbrackets 6173 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6174 { 6175 s -= 3; 6176 TOKEN(0); 6177 } 6178 Eop(OP_NCMP); 6179 } 6180 s--; 6181 if (!PL_lex_allbrackets 6182 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6183 { 6184 s -= 2; 6185 TOKEN(0); 6186 } 6187 Rop(OP_LE); 6188 } 6189 } 6190 s--; 6191 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6192 s--; 6193 TOKEN(0); 6194 } 6195 Rop(OP_LT); 6196 case '>': 6197 s++; 6198 { 6199 const char tmp = *s++; 6200 if (tmp == '>') { 6201 if (*s == '=' && !PL_lex_allbrackets 6202 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6203 { 6204 s -= 2; 6205 TOKEN(0); 6206 } 6207 SHop(OP_RIGHT_SHIFT); 6208 } 6209 else if (tmp == '=') { 6210 if (!PL_lex_allbrackets 6211 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6212 { 6213 s -= 2; 6214 TOKEN(0); 6215 } 6216 Rop(OP_GE); 6217 } 6218 } 6219 s--; 6220 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6221 s--; 6222 TOKEN(0); 6223 } 6224 Rop(OP_GT); 6225 6226 case '$': 6227 CLINE; 6228 6229 if (PL_expect == XOPERATOR) { 6230 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6231 return deprecate_commaless_var_list(); 6232 } 6233 } 6234 else if (PL_expect == XPOSTDEREF) { 6235 if (s[1] == '#') { 6236 s++; 6237 POSTDEREF(DOLSHARP); 6238 } 6239 POSTDEREF('$'); 6240 } 6241 6242 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) { 6243 PL_tokenbuf[0] = '@'; 6244 s = scan_ident(s + 1, PL_tokenbuf + 1, 6245 sizeof PL_tokenbuf - 1, FALSE); 6246 if (PL_expect == XOPERATOR) { 6247 d = s; 6248 if (PL_bufptr > s) { 6249 d = PL_bufptr-1; 6250 PL_bufptr = PL_oldbufptr; 6251 } 6252 no_op("Array length", d); 6253 } 6254 if (!PL_tokenbuf[1]) 6255 PREREF(DOLSHARP); 6256 PL_expect = XOPERATOR; 6257 force_ident_maybe_lex('#'); 6258 TOKEN(DOLSHARP); 6259 } 6260 6261 PL_tokenbuf[0] = '$'; 6262 s = scan_ident(s, PL_tokenbuf + 1, 6263 sizeof PL_tokenbuf - 1, FALSE); 6264 if (PL_expect == XOPERATOR) { 6265 d = s; 6266 if (PL_bufptr > s) { 6267 d = PL_bufptr-1; 6268 PL_bufptr = PL_oldbufptr; 6269 } 6270 no_op("Scalar", d); 6271 } 6272 if (!PL_tokenbuf[1]) { 6273 if (s == PL_bufend) 6274 yyerror("Final $ should be \\$ or $name"); 6275 PREREF('$'); 6276 } 6277 6278 d = s; 6279 { 6280 const char tmp = *s; 6281 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6282 s = skipspace(s); 6283 6284 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6285 && intuit_more(s)) { 6286 if (*s == '[') { 6287 PL_tokenbuf[0] = '@'; 6288 if (ckWARN(WARN_SYNTAX)) { 6289 char *t = s+1; 6290 6291 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') 6292 t += UTF ? UTF8SKIP(t) : 1; 6293 if (*t++ == ',') { 6294 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ 6295 while (t < PL_bufend && *t != ']') 6296 t++; 6297 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6298 "Multidimensional syntax %"UTF8f" not supported", 6299 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr)); 6300 } 6301 } 6302 } 6303 else if (*s == '{') { 6304 char *t; 6305 PL_tokenbuf[0] = '%'; 6306 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) 6307 && (t = strchr(s, '}')) && (t = strchr(t, '='))) 6308 { 6309 char tmpbuf[sizeof PL_tokenbuf]; 6310 do { 6311 t++; 6312 } while (isSPACE(*t)); 6313 if (isIDFIRST_lazy_if(t,UTF)) { 6314 STRLEN len; 6315 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, 6316 &len); 6317 while (isSPACE(*t)) 6318 t++; 6319 if (*t == ';' 6320 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) 6321 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6322 "You need to quote \"%"UTF8f"\"", 6323 UTF8fARG(UTF, len, tmpbuf)); 6324 } 6325 } 6326 } 6327 } 6328 6329 PL_expect = XOPERATOR; 6330 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) { 6331 const bool islop = (PL_last_lop == PL_oldoldbufptr); 6332 if (!islop || PL_last_lop_op == OP_GREPSTART) 6333 PL_expect = XOPERATOR; 6334 else if (strchr("$@\"'`q", *s)) 6335 PL_expect = XTERM; /* e.g. print $fh "foo" */ 6336 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF)) 6337 PL_expect = XTERM; /* e.g. print $fh &sub */ 6338 else if (isIDFIRST_lazy_if(s,UTF)) { 6339 char tmpbuf[sizeof PL_tokenbuf]; 6340 int t2; 6341 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); 6342 if ((t2 = keyword(tmpbuf, len, 0))) { 6343 /* binary operators exclude handle interpretations */ 6344 switch (t2) { 6345 case -KEY_x: 6346 case -KEY_eq: 6347 case -KEY_ne: 6348 case -KEY_gt: 6349 case -KEY_lt: 6350 case -KEY_ge: 6351 case -KEY_le: 6352 case -KEY_cmp: 6353 break; 6354 default: 6355 PL_expect = XTERM; /* e.g. print $fh length() */ 6356 break; 6357 } 6358 } 6359 else { 6360 PL_expect = XTERM; /* e.g. print $fh subr() */ 6361 } 6362 } 6363 else if (isDIGIT(*s)) 6364 PL_expect = XTERM; /* e.g. print $fh 3 */ 6365 else if (*s == '.' && isDIGIT(s[1])) 6366 PL_expect = XTERM; /* e.g. print $fh .3 */ 6367 else if ((*s == '?' || *s == '-' || *s == '+') 6368 && !isSPACE(s[1]) && s[1] != '=') 6369 PL_expect = XTERM; /* e.g. print $fh -1 */ 6370 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 6371 && s[1] != '/') 6372 PL_expect = XTERM; /* e.g. print $fh /.../ 6373 XXX except DORDOR operator 6374 */ 6375 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 6376 && s[2] != '=') 6377 PL_expect = XTERM; /* print $fh <<"EOF" */ 6378 } 6379 } 6380 force_ident_maybe_lex('$'); 6381 TOKEN('$'); 6382 6383 case '@': 6384 if (PL_expect == XPOSTDEREF) 6385 POSTDEREF('@'); 6386 PL_tokenbuf[0] = '@'; 6387 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6388 if (PL_expect == XOPERATOR) { 6389 d = s; 6390 if (PL_bufptr > s) { 6391 d = PL_bufptr-1; 6392 PL_bufptr = PL_oldbufptr; 6393 } 6394 no_op("Array", d); 6395 } 6396 pl_yylval.ival = 0; 6397 if (!PL_tokenbuf[1]) { 6398 PREREF('@'); 6399 } 6400 if (PL_lex_state == LEX_NORMAL) 6401 s = skipspace(s); 6402 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { 6403 if (*s == '{') 6404 PL_tokenbuf[0] = '%'; 6405 6406 /* Warn about @ where they meant $. */ 6407 if (*s == '[' || *s == '{') { 6408 if (ckWARN(WARN_SYNTAX)) { 6409 S_check_scalar_slice(aTHX_ s); 6410 } 6411 } 6412 } 6413 PL_expect = XOPERATOR; 6414 force_ident_maybe_lex('@'); 6415 TERM('@'); 6416 6417 case '/': /* may be division, defined-or, or pattern */ 6418 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { 6419 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6420 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6421 TOKEN(0); 6422 s += 2; 6423 AOPERATOR(DORDOR); 6424 } 6425 else if (PL_expect == XOPERATOR) { 6426 s++; 6427 if (*s == '=' && !PL_lex_allbrackets 6428 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6429 { 6430 s--; 6431 TOKEN(0); 6432 } 6433 Mop(OP_DIVIDE); 6434 } 6435 else { 6436 /* Disable warning on "study /blah/" */ 6437 if (PL_oldoldbufptr == PL_last_uni 6438 && (*PL_last_uni != 's' || s - PL_last_uni < 5 6439 || memNE(PL_last_uni, "study", 5) 6440 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) 6441 )) 6442 check_uni(); 6443 s = scan_pat(s,OP_MATCH); 6444 TERM(sublex_start()); 6445 } 6446 6447 case '?': /* conditional */ 6448 s++; 6449 if (!PL_lex_allbrackets 6450 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) 6451 { 6452 s--; 6453 TOKEN(0); 6454 } 6455 PL_lex_allbrackets++; 6456 OPERATOR('?'); 6457 6458 case '.': 6459 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 6460#ifdef PERL_STRICT_CR 6461 && s[1] == '\n' 6462#else 6463 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 6464#endif 6465 && (s == PL_linestart || s[-1] == '\n') ) 6466 { 6467 PL_expect = XSTATE; 6468 formbrack = 2; /* dot seen where arguments expected */ 6469 goto rightbracket; 6470 } 6471 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 6472 s += 3; 6473 OPERATOR(YADAYADA); 6474 } 6475 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 6476 char tmp = *s++; 6477 if (*s == tmp) { 6478 if (!PL_lex_allbrackets 6479 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) 6480 { 6481 s--; 6482 TOKEN(0); 6483 } 6484 s++; 6485 if (*s == tmp) { 6486 s++; 6487 pl_yylval.ival = OPf_SPECIAL; 6488 } 6489 else 6490 pl_yylval.ival = 0; 6491 OPERATOR(DOTDOT); 6492 } 6493 if (*s == '=' && !PL_lex_allbrackets 6494 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6495 { 6496 s--; 6497 TOKEN(0); 6498 } 6499 Aop(OP_CONCAT); 6500 } 6501 /* FALLTHROUGH */ 6502 case '0': case '1': case '2': case '3': case '4': 6503 case '5': case '6': case '7': case '8': case '9': 6504 s = scan_num(s, &pl_yylval); 6505 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 6506 if (PL_expect == XOPERATOR) 6507 no_op("Number",s); 6508 TERM(THING); 6509 6510 case '\'': 6511 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6512 if (!s) 6513 missingterm(NULL); 6514 COPLINE_SET_FROM_MULTI_END; 6515 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6516 if (PL_expect == XOPERATOR) { 6517 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6518 return deprecate_commaless_var_list(); 6519 } 6520 else 6521 no_op("String",s); 6522 } 6523 pl_yylval.ival = OP_CONST; 6524 TERM(sublex_start()); 6525 6526 case '"': 6527 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6528 DEBUG_T( { 6529 if (s) 6530 printbuf("### Saw string before %s\n", s); 6531 else 6532 PerlIO_printf(Perl_debug_log, 6533 "### Saw unterminated string\n"); 6534 } ); 6535 if (PL_expect == XOPERATOR) { 6536 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { 6537 return deprecate_commaless_var_list(); 6538 } 6539 else 6540 no_op("String",s); 6541 } 6542 if (!s) 6543 missingterm(NULL); 6544 pl_yylval.ival = OP_CONST; 6545 /* FIXME. I think that this can be const if char *d is replaced by 6546 more localised variables. */ 6547 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6548 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6549 pl_yylval.ival = OP_STRINGIFY; 6550 break; 6551 } 6552 } 6553 if (pl_yylval.ival == OP_CONST) 6554 COPLINE_SET_FROM_MULTI_END; 6555 TERM(sublex_start()); 6556 6557 case '`': 6558 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6559 DEBUG_T( { 6560 if (s) 6561 printbuf("### Saw backtick string before %s\n", s); 6562 else 6563 PerlIO_printf(Perl_debug_log, 6564 "### Saw unterminated backtick string\n"); 6565 } ); 6566 if (PL_expect == XOPERATOR) 6567 no_op("Backticks",s); 6568 if (!s) 6569 missingterm(NULL); 6570 pl_yylval.ival = OP_BACKTICK; 6571 TERM(sublex_start()); 6572 6573 case '\\': 6574 s++; 6575 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6576 && isDIGIT(*s)) 6577 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6578 *s, *s); 6579 if (PL_expect == XOPERATOR) 6580 no_op("Backslash",s); 6581 OPERATOR(REFGEN); 6582 6583 case 'v': 6584 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 6585 char *start = s + 2; 6586 while (isDIGIT(*start) || *start == '_') 6587 start++; 6588 if (*start == '.' && isDIGIT(start[1])) { 6589 s = scan_num(s, &pl_yylval); 6590 TERM(THING); 6591 } 6592 else if ((*start == ':' && start[1] == ':') 6593 || (PL_expect == XSTATE && *start == ':')) 6594 goto keylookup; 6595 else if (PL_expect == XSTATE) { 6596 d = start; 6597 while (d < PL_bufend && isSPACE(*d)) d++; 6598 if (*d == ':') goto keylookup; 6599 } 6600 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 6601 if (!isALPHA(*start) && (PL_expect == XTERM 6602 || PL_expect == XREF || PL_expect == XSTATE 6603 || PL_expect == XTERMORDORDOR)) { 6604 GV *const gv = gv_fetchpvn_flags(s, start - s, 6605 UTF ? SVf_UTF8 : 0, SVt_PVCV); 6606 if (!gv) { 6607 s = scan_num(s, &pl_yylval); 6608 TERM(THING); 6609 } 6610 } 6611 } 6612 goto keylookup; 6613 case 'x': 6614 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 6615 s++; 6616 Mop(OP_REPEAT); 6617 } 6618 goto keylookup; 6619 6620 case '_': 6621 case 'a': case 'A': 6622 case 'b': case 'B': 6623 case 'c': case 'C': 6624 case 'd': case 'D': 6625 case 'e': case 'E': 6626 case 'f': case 'F': 6627 case 'g': case 'G': 6628 case 'h': case 'H': 6629 case 'i': case 'I': 6630 case 'j': case 'J': 6631 case 'k': case 'K': 6632 case 'l': case 'L': 6633 case 'm': case 'M': 6634 case 'n': case 'N': 6635 case 'o': case 'O': 6636 case 'p': case 'P': 6637 case 'q': case 'Q': 6638 case 'r': case 'R': 6639 case 's': case 'S': 6640 case 't': case 'T': 6641 case 'u': case 'U': 6642 case 'V': 6643 case 'w': case 'W': 6644 case 'X': 6645 case 'y': case 'Y': 6646 case 'z': case 'Z': 6647 6648 keylookup: { 6649 bool anydelim; 6650 bool lex; 6651 I32 tmp; 6652 SV *sv; 6653 CV *cv; 6654 PADOFFSET off; 6655 OP *rv2cv_op; 6656 6657 lex = FALSE; 6658 orig_keyword = 0; 6659 off = 0; 6660 sv = NULL; 6661 cv = NULL; 6662 gv = NULL; 6663 gvp = NULL; 6664 rv2cv_op = NULL; 6665 6666 PL_bufptr = s; 6667 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 6668 6669 /* Some keywords can be followed by any delimiter, including ':' */ 6670 anydelim = word_takes_any_delimeter(PL_tokenbuf, len); 6671 6672 /* x::* is just a word, unless x is "CORE" */ 6673 if (!anydelim && *s == ':' && s[1] == ':') { 6674 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE; 6675 goto just_a_word; 6676 } 6677 6678 d = s; 6679 while (d < PL_bufend && isSPACE(*d)) 6680 d++; /* no comments skipped here, or s### is misparsed */ 6681 6682 /* Is this a word before a => operator? */ 6683 if (*d == '=' && d[1] == '>') { 6684 fat_arrow: 6685 CLINE; 6686 pl_yylval.opval 6687 = (OP*)newSVOP(OP_CONST, 0, 6688 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 6689 pl_yylval.opval->op_private = OPpCONST_BARE; 6690 TERM(WORD); 6691 } 6692 6693 /* Check for plugged-in keyword */ 6694 { 6695 OP *o; 6696 int result; 6697 char *saved_bufptr = PL_bufptr; 6698 PL_bufptr = s; 6699 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 6700 s = PL_bufptr; 6701 if (result == KEYWORD_PLUGIN_DECLINE) { 6702 /* not a plugged-in keyword */ 6703 PL_bufptr = saved_bufptr; 6704 } else if (result == KEYWORD_PLUGIN_STMT) { 6705 pl_yylval.opval = o; 6706 CLINE; 6707 if (!PL_nexttoke) PL_expect = XSTATE; 6708 return REPORT(PLUGSTMT); 6709 } else if (result == KEYWORD_PLUGIN_EXPR) { 6710 pl_yylval.opval = o; 6711 CLINE; 6712 if (!PL_nexttoke) PL_expect = XOPERATOR; 6713 return REPORT(PLUGEXPR); 6714 } else { 6715 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", 6716 PL_tokenbuf); 6717 } 6718 } 6719 6720 /* Check for built-in keyword */ 6721 tmp = keyword(PL_tokenbuf, len, 0); 6722 6723 /* Is this a label? */ 6724 if (!anydelim && PL_expect == XSTATE 6725 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 6726 s = d + 1; 6727 pl_yylval.pval = savepvn(PL_tokenbuf, len+1); 6728 pl_yylval.pval[len] = '\0'; 6729 pl_yylval.pval[len+1] = UTF ? 1 : 0; 6730 CLINE; 6731 TOKEN(LABEL); 6732 } 6733 6734 /* Check for lexical sub */ 6735 if (PL_expect != XOPERATOR) { 6736 char tmpbuf[sizeof PL_tokenbuf + 1]; 6737 *tmpbuf = '&'; 6738 Copy(PL_tokenbuf, tmpbuf+1, len, char); 6739 off = pad_findmy_pvn(tmpbuf, len+1, 0); 6740 if (off != NOT_IN_PAD) { 6741 assert(off); /* we assume this is boolean-true below */ 6742 if (PAD_COMPNAME_FLAGS_isOUR(off)) { 6743 HV * const stash = PAD_COMPNAME_OURSTASH(off); 6744 HEK * const stashname = HvNAME_HEK(stash); 6745 sv = newSVhek(stashname); 6746 sv_catpvs(sv, "::"); 6747 sv_catpvn_flags(sv, PL_tokenbuf, len, 6748 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 6749 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), 6750 SVt_PVCV); 6751 off = 0; 6752 if (!gv) { 6753 sv_free(sv); 6754 sv = NULL; 6755 goto just_a_word; 6756 } 6757 } 6758 else { 6759 rv2cv_op = newOP(OP_PADANY, 0); 6760 rv2cv_op->op_targ = off; 6761 cv = find_lexical_cv(off); 6762 } 6763 lex = TRUE; 6764 goto just_a_word; 6765 } 6766 off = 0; 6767 } 6768 6769 if (tmp < 0) { /* second-class keyword? */ 6770 GV *ogv = NULL; /* override (winner) */ 6771 GV *hgv = NULL; /* hidden (loser) */ 6772 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 6773 CV *cv; 6774 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 6775 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 6776 SVt_PVCV)) 6777 && (cv = GvCVu(gv))) 6778 { 6779 if (GvIMPORTED_CV(gv)) 6780 ogv = gv; 6781 else if (! CvMETHOD(cv)) 6782 hgv = gv; 6783 } 6784 if (!ogv 6785 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, 6786 len, FALSE)) 6787 && (gv = *gvp) 6788 && (isGV_with_GP(gv) 6789 ? GvCVu(gv) && GvIMPORTED_CV(gv) 6790 : SvPCS_IMPORTED(gv) 6791 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 6792 len, 0), 1))) 6793 { 6794 ogv = gv; 6795 } 6796 } 6797 if (ogv) { 6798 orig_keyword = tmp; 6799 tmp = 0; /* overridden by import or by GLOBAL */ 6800 } 6801 else if (gv && !gvp 6802 && -tmp==KEY_lock /* XXX generalizable kludge */ 6803 && GvCVu(gv)) 6804 { 6805 tmp = 0; /* any sub overrides "weak" keyword */ 6806 } 6807 else { /* no override */ 6808 tmp = -tmp; 6809 if (tmp == KEY_dump) { 6810 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 6811 "dump() better written as CORE::dump()"); 6812 } 6813 gv = NULL; 6814 gvp = 0; 6815 if (hgv && tmp != KEY_x) /* never ambiguous */ 6816 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 6817 "Ambiguous call resolved as CORE::%s(), " 6818 "qualify as such or use &", 6819 GvENAME(hgv)); 6820 } 6821 } 6822 6823 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ 6824 && (!anydelim || *s != '#')) { 6825 /* no override, and not s### either; skipspace is safe here 6826 * check for => on following line */ 6827 bool arrow; 6828 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 6829 STRLEN soff = s - SvPVX(PL_linestr); 6830 s = skipspace_flags(s, LEX_NO_INCLINE); 6831 arrow = *s == '=' && s[1] == '>'; 6832 PL_bufptr = SvPVX(PL_linestr) + bufoff; 6833 s = SvPVX(PL_linestr) + soff; 6834 if (arrow) 6835 goto fat_arrow; 6836 } 6837 6838 reserved_word: 6839 switch (tmp) { 6840 6841 default: /* not a keyword */ 6842 /* Trade off - by using this evil construction we can pull the 6843 variable gv into the block labelled keylookup. If not, then 6844 we have to give it function scope so that the goto from the 6845 earlier ':' case doesn't bypass the initialisation. */ 6846 if (0) { 6847 just_a_word_zero_gv: 6848 sv = NULL; 6849 cv = NULL; 6850 gv = NULL; 6851 gvp = NULL; 6852 rv2cv_op = NULL; 6853 orig_keyword = 0; 6854 lex = 0; 6855 off = 0; 6856 } 6857 just_a_word: { 6858 int pkgname = 0; 6859 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 6860 bool safebw; 6861 6862 6863 /* Get the rest if it looks like a package qualifier */ 6864 6865 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 6866 STRLEN morelen; 6867 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 6868 TRUE, &morelen); 6869 if (!morelen) 6870 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", 6871 UTF8fARG(UTF, len, PL_tokenbuf), 6872 *s == '\'' ? "'" : "::"); 6873 len += morelen; 6874 pkgname = 1; 6875 } 6876 6877 if (PL_expect == XOPERATOR) { 6878 if (PL_bufptr == PL_linestart) { 6879 CopLINE_dec(PL_curcop); 6880 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6881 CopLINE_inc(PL_curcop); 6882 } 6883 else 6884 no_op("Bareword",s); 6885 } 6886 6887 /* See if the name is "Foo::", 6888 in which case Foo is a bareword 6889 (and a package name). */ 6890 6891 if (len > 2 6892 && PL_tokenbuf[len - 2] == ':' 6893 && PL_tokenbuf[len - 1] == ':') 6894 { 6895 if (ckWARN(WARN_BAREWORD) 6896 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 6897 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 6898 "Bareword \"%"UTF8f"\" refers to nonexistent package", 6899 UTF8fARG(UTF, len, PL_tokenbuf)); 6900 len -= 2; 6901 PL_tokenbuf[len] = '\0'; 6902 gv = NULL; 6903 gvp = 0; 6904 safebw = TRUE; 6905 } 6906 else { 6907 safebw = FALSE; 6908 } 6909 6910 /* if we saw a global override before, get the right name */ 6911 6912 if (!sv) 6913 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, 6914 len); 6915 if (gvp) { 6916 SV * const tmp_sv = sv; 6917 sv = newSVpvs("CORE::GLOBAL::"); 6918 sv_catsv(sv, tmp_sv); 6919 SvREFCNT_dec(tmp_sv); 6920 } 6921 6922 6923 /* Presume this is going to be a bareword of some sort. */ 6924 CLINE; 6925 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); 6926 pl_yylval.opval->op_private = OPpCONST_BARE; 6927 6928 /* And if "Foo::", then that's what it certainly is. */ 6929 if (safebw) 6930 goto safe_bareword; 6931 6932 if (!off) 6933 { 6934 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); 6935 const_op->op_private = OPpCONST_BARE; 6936 rv2cv_op = 6937 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); 6938 cv = lex 6939 ? isGV(gv) 6940 ? GvCV(gv) 6941 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV 6942 ? (CV *)SvRV(gv) 6943 : ((CV *)gv) 6944 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); 6945 } 6946 6947 /* Use this var to track whether intuit_method has been 6948 called. intuit_method returns 0 or > 255. */ 6949 tmp = 1; 6950 6951 /* See if it's the indirect object for a list operator. */ 6952 6953 if (PL_oldoldbufptr 6954 && PL_oldoldbufptr < PL_bufptr 6955 && (PL_oldoldbufptr == PL_last_lop 6956 || PL_oldoldbufptr == PL_last_uni) 6957 && /* NO SKIPSPACE BEFORE HERE! */ 6958 (PL_expect == XREF 6959 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) 6960 == OA_FILEREF)) 6961 { 6962 bool immediate_paren = *s == '('; 6963 6964 /* (Now we can afford to cross potential line boundary.) */ 6965 s = skipspace(s); 6966 6967 /* Two barewords in a row may indicate method call. */ 6968 6969 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') 6970 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) 6971 { 6972 goto method; 6973 } 6974 6975 /* If not a declared subroutine, it's an indirect object. */ 6976 /* (But it's an indir obj regardless for sort.) */ 6977 /* Also, if "_" follows a filetest operator, it's a bareword */ 6978 6979 if ( 6980 ( !immediate_paren && (PL_last_lop_op == OP_SORT 6981 || (!cv 6982 && (PL_last_lop_op != OP_MAPSTART 6983 && PL_last_lop_op != OP_GREPSTART)))) 6984 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 6985 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) 6986 == OA_FILESTATOP)) 6987 ) 6988 { 6989 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 6990 goto bareword; 6991 } 6992 } 6993 6994 PL_expect = XOPERATOR; 6995 s = skipspace(s); 6996 6997 /* Is this a word before a => operator? */ 6998 if (*s == '=' && s[1] == '>' && !pkgname) { 6999 op_free(rv2cv_op); 7000 CLINE; 7001 if (gvp || (lex && !off)) { 7002 assert (cSVOPx(pl_yylval.opval)->op_sv == sv); 7003 /* This is our own scalar, created a few lines 7004 above, so this is safe. */ 7005 SvREADONLY_off(sv); 7006 sv_setpv(sv, PL_tokenbuf); 7007 if (UTF && !IN_BYTES 7008 && is_utf8_string((U8*)PL_tokenbuf, len)) 7009 SvUTF8_on(sv); 7010 SvREADONLY_on(sv); 7011 } 7012 TERM(WORD); 7013 } 7014 7015 /* If followed by a paren, it's certainly a subroutine. */ 7016 if (*s == '(') { 7017 CLINE; 7018 if (cv) { 7019 d = s + 1; 7020 while (SPACE_OR_TAB(*d)) 7021 d++; 7022 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { 7023 s = d + 1; 7024 goto its_constant; 7025 } 7026 } 7027 NEXTVAL_NEXTTOKE.opval = 7028 off ? rv2cv_op : pl_yylval.opval; 7029 if (off) 7030 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7031 else op_free(rv2cv_op), force_next(WORD); 7032 pl_yylval.ival = 0; 7033 TOKEN('&'); 7034 } 7035 7036 /* If followed by var or block, call it a method (unless sub) */ 7037 7038 if ((*s == '$' || *s == '{') && !cv) { 7039 op_free(rv2cv_op); 7040 PL_last_lop = PL_oldbufptr; 7041 PL_last_lop_op = OP_METHOD; 7042 if (!PL_lex_allbrackets 7043 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7044 { 7045 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7046 } 7047 PL_expect = XBLOCKTERM; 7048 PL_bufptr = s; 7049 return REPORT(METHOD); 7050 } 7051 7052 /* If followed by a bareword, see if it looks like indir obj. */ 7053 7054 if (tmp == 1 && !orig_keyword 7055 && (isIDFIRST_lazy_if(s,UTF) || *s == '$') 7056 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) { 7057 method: 7058 if (lex && !off) { 7059 assert(cSVOPx(pl_yylval.opval)->op_sv == sv); 7060 SvREADONLY_off(sv); 7061 sv_setpvn(sv, PL_tokenbuf, len); 7062 if (UTF && !IN_BYTES 7063 && is_utf8_string((U8*)PL_tokenbuf, len)) 7064 SvUTF8_on (sv); 7065 else SvUTF8_off(sv); 7066 } 7067 op_free(rv2cv_op); 7068 if (tmp == METHOD && !PL_lex_allbrackets 7069 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7070 { 7071 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7072 } 7073 return REPORT(tmp); 7074 } 7075 7076 /* Not a method, so call it a subroutine (if defined) */ 7077 7078 if (cv) { 7079 /* Check for a constant sub */ 7080 if ((sv = cv_const_sv_or_av(cv))) { 7081 its_constant: 7082 op_free(rv2cv_op); 7083 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7084 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7085 if (SvTYPE(sv) == SVt_PVAV) 7086 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7087 pl_yylval.opval); 7088 else { 7089 pl_yylval.opval->op_private = 0; 7090 pl_yylval.opval->op_folded = 1; 7091 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7092 } 7093 TOKEN(WORD); 7094 } 7095 7096 op_free(pl_yylval.opval); 7097 pl_yylval.opval = 7098 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; 7099 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7100 PL_last_lop = PL_oldbufptr; 7101 PL_last_lop_op = OP_ENTERSUB; 7102 /* Is there a prototype? */ 7103 if ( 7104 SvPOK(cv)) 7105 { 7106 STRLEN protolen = CvPROTOLEN(cv); 7107 const char *proto = CvPROTO(cv); 7108 bool optional; 7109 proto = S_strip_spaces(aTHX_ proto, &protolen); 7110 if (!protolen) 7111 TERM(FUNC0SUB); 7112 if ((optional = *proto == ';')) 7113 do 7114 proto++; 7115 while (*proto == ';'); 7116 if ( 7117 ( 7118 ( 7119 *proto == '$' || *proto == '_' 7120 || *proto == '*' || *proto == '+' 7121 ) 7122 && proto[1] == '\0' 7123 ) 7124 || ( 7125 *proto == '\\' && proto[1] && proto[2] == '\0' 7126 ) 7127 ) 7128 UNIPROTO(UNIOPSUB,optional); 7129 if (*proto == '\\' && proto[1] == '[') { 7130 const char *p = proto + 2; 7131 while(*p && *p != ']') 7132 ++p; 7133 if(*p == ']' && !p[1]) 7134 UNIPROTO(UNIOPSUB,optional); 7135 } 7136 if (*proto == '&' && *s == '{') { 7137 if (PL_curstash) 7138 sv_setpvs(PL_subname, "__ANON__"); 7139 else 7140 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 7141 if (!PL_lex_allbrackets 7142 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7143 { 7144 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7145 } 7146 PREBLOCK(LSTOPSUB); 7147 } 7148 } 7149 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7150 PL_expect = XTERM; 7151 force_next(off ? PRIVATEREF : WORD); 7152 if (!PL_lex_allbrackets 7153 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7154 { 7155 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7156 } 7157 TOKEN(NOAMP); 7158 } 7159 7160 /* Call it a bare word */ 7161 7162 if (PL_hints & HINT_STRICT_SUBS) 7163 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7164 else { 7165 bareword: 7166 /* after "print" and similar functions (corresponding to 7167 * "F? L" in opcode.pl), whatever wasn't already parsed as 7168 * a filehandle should be subject to "strict subs". 7169 * Likewise for the optional indirect-object argument to system 7170 * or exec, which can't be a bareword */ 7171 if ((PL_last_lop_op == OP_PRINT 7172 || PL_last_lop_op == OP_PRTF 7173 || PL_last_lop_op == OP_SAY 7174 || PL_last_lop_op == OP_SYSTEM 7175 || PL_last_lop_op == OP_EXEC) 7176 && (PL_hints & HINT_STRICT_SUBS)) 7177 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7178 if (lastchar != '-') { 7179 if (ckWARN(WARN_RESERVED)) { 7180 d = PL_tokenbuf; 7181 while (isLOWER(*d)) 7182 d++; 7183 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) 7184 { 7185 /* PL_warn_reserved is constant */ 7186 GCC_DIAG_IGNORE(-Wformat-nonliteral); 7187 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7188 PL_tokenbuf); 7189 GCC_DIAG_RESTORE; 7190 } 7191 } 7192 } 7193 } 7194 op_free(rv2cv_op); 7195 7196 safe_bareword: 7197 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7198 && saw_infix_sigil) { 7199 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7200 "Operator or semicolon missing before %c%"UTF8f, 7201 lastchar, 7202 UTF8fARG(UTF, strlen(PL_tokenbuf), 7203 PL_tokenbuf)); 7204 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7205 "Ambiguous use of %c resolved as operator %c", 7206 lastchar, lastchar); 7207 } 7208 TOKEN(WORD); 7209 } 7210 7211 case KEY___FILE__: 7212 FUN0OP( 7213 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) 7214 ); 7215 7216 case KEY___LINE__: 7217 FUN0OP( 7218 (OP*)newSVOP(OP_CONST, 0, 7219 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop))) 7220 ); 7221 7222 case KEY___PACKAGE__: 7223 FUN0OP( 7224 (OP*)newSVOP(OP_CONST, 0, 7225 (PL_curstash 7226 ? newSVhek(HvNAME_HEK(PL_curstash)) 7227 : &PL_sv_undef)) 7228 ); 7229 7230 case KEY___DATA__: 7231 case KEY___END__: { 7232 GV *gv; 7233 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { 7234 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 7235 ? PL_curstash 7236 : PL_defstash; 7237 gv = (GV *)*hv_fetchs(stash, "DATA", 1); 7238 if (!isGV(gv)) 7239 gv_init(gv,stash,"DATA",4,0); 7240 GvMULTI_on(gv); 7241 if (!GvIO(gv)) 7242 GvIOp(gv) = newIO(); 7243 IoIFP(GvIOp(gv)) = PL_rsfp; 7244#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC) 7245 { 7246 const int fd = PerlIO_fileno(PL_rsfp); 7247 if (fd >= 3) { 7248 fcntl(fd,F_SETFD, FD_CLOEXEC); 7249 } 7250 } 7251#endif 7252 /* Mark this internal pseudo-handle as clean */ 7253 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 7254 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 7255 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 7256 else 7257 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 7258#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 7259 /* if the script was opened in binmode, we need to revert 7260 * it to text mode for compatibility; but only iff it has CRs 7261 * XXX this is a questionable hack at best. */ 7262 if (PL_bufend-PL_bufptr > 2 7263 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 7264 { 7265 Off_t loc = 0; 7266 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 7267 loc = PerlIO_tell(PL_rsfp); 7268 (void)PerlIO_seek(PL_rsfp, 0L, 0); 7269 } 7270#ifdef NETWARE 7271 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { 7272#else 7273 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 7274#endif /* NETWARE */ 7275 if (loc > 0) 7276 PerlIO_seek(PL_rsfp, loc, 0); 7277 } 7278 } 7279#endif 7280#ifdef PERLIO_LAYERS 7281 if (!IN_BYTES) { 7282 if (UTF) 7283 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 7284 else if (IN_ENCODING) { 7285 SV *name; 7286 dSP; 7287 ENTER; 7288 SAVETMPS; 7289 PUSHMARK(sp); 7290 XPUSHs(_get_encoding()); 7291 PUTBACK; 7292 call_method("name", G_SCALAR); 7293 SPAGAIN; 7294 name = POPs; 7295 PUTBACK; 7296 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 7297 Perl_form(aTHX_ ":encoding(%"SVf")", 7298 SVfARG(name))); 7299 FREETMPS; 7300 LEAVE; 7301 } 7302 } 7303#endif 7304 PL_rsfp = NULL; 7305 } 7306 goto fake_eof; 7307 } 7308 7309 case KEY___SUB__: 7310 FUN0OP(CvCLONE(PL_compcv) 7311 ? newOP(OP_RUNCV, 0) 7312 : newPVOP(OP_RUNCV,0,NULL)); 7313 7314 case KEY_AUTOLOAD: 7315 case KEY_DESTROY: 7316 case KEY_BEGIN: 7317 case KEY_UNITCHECK: 7318 case KEY_CHECK: 7319 case KEY_INIT: 7320 case KEY_END: 7321 if (PL_expect == XSTATE) { 7322 s = PL_bufptr; 7323 goto really_sub; 7324 } 7325 goto just_a_word; 7326 7327 case_KEY_CORE: 7328 { 7329 STRLEN olen = len; 7330 d = s; 7331 s += 2; 7332 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); 7333 if ((*s == ':' && s[1] == ':') 7334 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 7335 { 7336 s = d; 7337 len = olen; 7338 Copy(PL_bufptr, PL_tokenbuf, olen, char); 7339 goto just_a_word; 7340 } 7341 if (!tmp) 7342 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", 7343 UTF8fARG(UTF, len, PL_tokenbuf)); 7344 if (tmp < 0) 7345 tmp = -tmp; 7346 else if (tmp == KEY_require || tmp == KEY_do 7347 || tmp == KEY_glob) 7348 /* that's a way to remember we saw "CORE::" */ 7349 orig_keyword = tmp; 7350 goto reserved_word; 7351 } 7352 7353 case KEY_abs: 7354 UNI(OP_ABS); 7355 7356 case KEY_alarm: 7357 UNI(OP_ALARM); 7358 7359 case KEY_accept: 7360 LOP(OP_ACCEPT,XTERM); 7361 7362 case KEY_and: 7363 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7364 return REPORT(0); 7365 OPERATOR(ANDOP); 7366 7367 case KEY_atan2: 7368 LOP(OP_ATAN2,XTERM); 7369 7370 case KEY_bind: 7371 LOP(OP_BIND,XTERM); 7372 7373 case KEY_binmode: 7374 LOP(OP_BINMODE,XTERM); 7375 7376 case KEY_bless: 7377 LOP(OP_BLESS,XTERM); 7378 7379 case KEY_break: 7380 FUN0(OP_BREAK); 7381 7382 case KEY_chop: 7383 UNI(OP_CHOP); 7384 7385 case KEY_continue: 7386 /* We have to disambiguate the two senses of 7387 "continue". If the next token is a '{' then 7388 treat it as the start of a continue block; 7389 otherwise treat it as a control operator. 7390 */ 7391 s = skipspace(s); 7392 if (*s == '{') 7393 PREBLOCK(CONTINUE); 7394 else 7395 FUN0(OP_CONTINUE); 7396 7397 case KEY_chdir: 7398 /* may use HOME */ 7399 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7400 UNI(OP_CHDIR); 7401 7402 case KEY_close: 7403 UNI(OP_CLOSE); 7404 7405 case KEY_closedir: 7406 UNI(OP_CLOSEDIR); 7407 7408 case KEY_cmp: 7409 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7410 return REPORT(0); 7411 Eop(OP_SCMP); 7412 7413 case KEY_caller: 7414 UNI(OP_CALLER); 7415 7416 case KEY_crypt: 7417#ifdef FCRYPT 7418 if (!PL_cryptseen) { 7419 PL_cryptseen = TRUE; 7420 init_des(); 7421 } 7422#endif 7423 LOP(OP_CRYPT,XTERM); 7424 7425 case KEY_chmod: 7426 LOP(OP_CHMOD,XTERM); 7427 7428 case KEY_chown: 7429 LOP(OP_CHOWN,XTERM); 7430 7431 case KEY_connect: 7432 LOP(OP_CONNECT,XTERM); 7433 7434 case KEY_chr: 7435 UNI(OP_CHR); 7436 7437 case KEY_cos: 7438 UNI(OP_COS); 7439 7440 case KEY_chroot: 7441 UNI(OP_CHROOT); 7442 7443 case KEY_default: 7444 PREBLOCK(DEFAULT); 7445 7446 case KEY_do: 7447 s = skipspace(s); 7448 if (*s == '{') 7449 PRETERMBLOCK(DO); 7450 if (*s != '\'') { 7451 *PL_tokenbuf = '&'; 7452 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7453 1, &len); 7454 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) 7455 && !keyword(PL_tokenbuf + 1, len, 0)) { 7456 d = skipspace(d); 7457 if (*d == '(') { 7458 force_ident_maybe_lex('&'); 7459 s = d; 7460 } 7461 } 7462 } 7463 if (orig_keyword == KEY_do) { 7464 orig_keyword = 0; 7465 pl_yylval.ival = 1; 7466 } 7467 else 7468 pl_yylval.ival = 0; 7469 OPERATOR(DO); 7470 7471 case KEY_die: 7472 PL_hints |= HINT_BLOCK_SCOPE; 7473 LOP(OP_DIE,XTERM); 7474 7475 case KEY_defined: 7476 UNI(OP_DEFINED); 7477 7478 case KEY_delete: 7479 UNI(OP_DELETE); 7480 7481 case KEY_dbmopen: 7482 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7483 STR_WITH_LEN("NDBM_File::"), 7484 STR_WITH_LEN("DB_File::"), 7485 STR_WITH_LEN("GDBM_File::"), 7486 STR_WITH_LEN("SDBM_File::"), 7487 STR_WITH_LEN("ODBM_File::"), 7488 NULL); 7489 LOP(OP_DBMOPEN,XTERM); 7490 7491 case KEY_dbmclose: 7492 UNI(OP_DBMCLOSE); 7493 7494 case KEY_dump: 7495 LOOPX(OP_DUMP); 7496 7497 case KEY_else: 7498 PREBLOCK(ELSE); 7499 7500 case KEY_elsif: 7501 pl_yylval.ival = CopLINE(PL_curcop); 7502 OPERATOR(ELSIF); 7503 7504 case KEY_eq: 7505 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7506 return REPORT(0); 7507 Eop(OP_SEQ); 7508 7509 case KEY_exists: 7510 UNI(OP_EXISTS); 7511 7512 case KEY_exit: 7513 UNI(OP_EXIT); 7514 7515 case KEY_eval: 7516 s = skipspace(s); 7517 if (*s == '{') { /* block eval */ 7518 PL_expect = XTERMBLOCK; 7519 UNIBRACK(OP_ENTERTRY); 7520 } 7521 else { /* string eval */ 7522 PL_expect = XTERM; 7523 UNIBRACK(OP_ENTEREVAL); 7524 } 7525 7526 case KEY_evalbytes: 7527 PL_expect = XTERM; 7528 UNIBRACK(-OP_ENTEREVAL); 7529 7530 case KEY_eof: 7531 UNI(OP_EOF); 7532 7533 case KEY_exp: 7534 UNI(OP_EXP); 7535 7536 case KEY_each: 7537 UNI(OP_EACH); 7538 7539 case KEY_exec: 7540 LOP(OP_EXEC,XREF); 7541 7542 case KEY_endhostent: 7543 FUN0(OP_EHOSTENT); 7544 7545 case KEY_endnetent: 7546 FUN0(OP_ENETENT); 7547 7548 case KEY_endservent: 7549 FUN0(OP_ESERVENT); 7550 7551 case KEY_endprotoent: 7552 FUN0(OP_EPROTOENT); 7553 7554 case KEY_endpwent: 7555 FUN0(OP_EPWENT); 7556 7557 case KEY_endgrent: 7558 FUN0(OP_EGRENT); 7559 7560 case KEY_for: 7561 case KEY_foreach: 7562 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7563 return REPORT(0); 7564 pl_yylval.ival = CopLINE(PL_curcop); 7565 s = skipspace(s); 7566 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { 7567 char *p = s; 7568 7569 if ((PL_bufend - p) >= 3 7570 && strnEQ(p, "my", 2) && isSPACE(*(p + 2))) 7571 { 7572 p += 2; 7573 } 7574 else if ((PL_bufend - p) >= 4 7575 && strnEQ(p, "our", 3) && isSPACE(*(p + 3))) 7576 p += 3; 7577 p = skipspace(p); 7578 /* skip optional package name, as in "for my abc $x (..)" */ 7579 if (isIDFIRST_lazy_if(p,UTF)) { 7580 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 7581 p = skipspace(p); 7582 } 7583 if (*p != '$') 7584 Perl_croak(aTHX_ "Missing $ on loop variable"); 7585 } 7586 OPERATOR(FOR); 7587 7588 case KEY_formline: 7589 LOP(OP_FORMLINE,XTERM); 7590 7591 case KEY_fork: 7592 FUN0(OP_FORK); 7593 7594 case KEY_fc: 7595 UNI(OP_FC); 7596 7597 case KEY_fcntl: 7598 LOP(OP_FCNTL,XTERM); 7599 7600 case KEY_fileno: 7601 UNI(OP_FILENO); 7602 7603 case KEY_flock: 7604 LOP(OP_FLOCK,XTERM); 7605 7606 case KEY_gt: 7607 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7608 return REPORT(0); 7609 Rop(OP_SGT); 7610 7611 case KEY_ge: 7612 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7613 return REPORT(0); 7614 Rop(OP_SGE); 7615 7616 case KEY_grep: 7617 LOP(OP_GREPSTART, XREF); 7618 7619 case KEY_goto: 7620 LOOPX(OP_GOTO); 7621 7622 case KEY_gmtime: 7623 UNI(OP_GMTIME); 7624 7625 case KEY_getc: 7626 UNIDOR(OP_GETC); 7627 7628 case KEY_getppid: 7629 FUN0(OP_GETPPID); 7630 7631 case KEY_getpgrp: 7632 UNI(OP_GETPGRP); 7633 7634 case KEY_getpriority: 7635 LOP(OP_GETPRIORITY,XTERM); 7636 7637 case KEY_getprotobyname: 7638 UNI(OP_GPBYNAME); 7639 7640 case KEY_getprotobynumber: 7641 LOP(OP_GPBYNUMBER,XTERM); 7642 7643 case KEY_getprotoent: 7644 FUN0(OP_GPROTOENT); 7645 7646 case KEY_getpwent: 7647 FUN0(OP_GPWENT); 7648 7649 case KEY_getpwnam: 7650 UNI(OP_GPWNAM); 7651 7652 case KEY_getpwuid: 7653 UNI(OP_GPWUID); 7654 7655 case KEY_getpeername: 7656 UNI(OP_GETPEERNAME); 7657 7658 case KEY_gethostbyname: 7659 UNI(OP_GHBYNAME); 7660 7661 case KEY_gethostbyaddr: 7662 LOP(OP_GHBYADDR,XTERM); 7663 7664 case KEY_gethostent: 7665 FUN0(OP_GHOSTENT); 7666 7667 case KEY_getnetbyname: 7668 UNI(OP_GNBYNAME); 7669 7670 case KEY_getnetbyaddr: 7671 LOP(OP_GNBYADDR,XTERM); 7672 7673 case KEY_getnetent: 7674 FUN0(OP_GNETENT); 7675 7676 case KEY_getservbyname: 7677 LOP(OP_GSBYNAME,XTERM); 7678 7679 case KEY_getservbyport: 7680 LOP(OP_GSBYPORT,XTERM); 7681 7682 case KEY_getservent: 7683 FUN0(OP_GSERVENT); 7684 7685 case KEY_getsockname: 7686 UNI(OP_GETSOCKNAME); 7687 7688 case KEY_getsockopt: 7689 LOP(OP_GSOCKOPT,XTERM); 7690 7691 case KEY_getgrent: 7692 FUN0(OP_GGRENT); 7693 7694 case KEY_getgrnam: 7695 UNI(OP_GGRNAM); 7696 7697 case KEY_getgrgid: 7698 UNI(OP_GGRGID); 7699 7700 case KEY_getlogin: 7701 FUN0(OP_GETLOGIN); 7702 7703 case KEY_given: 7704 pl_yylval.ival = CopLINE(PL_curcop); 7705 Perl_ck_warner_d(aTHX_ 7706 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 7707 "given is experimental"); 7708 OPERATOR(GIVEN); 7709 7710 case KEY_glob: 7711 LOP( 7712 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, 7713 XTERM 7714 ); 7715 7716 case KEY_hex: 7717 UNI(OP_HEX); 7718 7719 case KEY_if: 7720 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 7721 return REPORT(0); 7722 pl_yylval.ival = CopLINE(PL_curcop); 7723 OPERATOR(IF); 7724 7725 case KEY_index: 7726 LOP(OP_INDEX,XTERM); 7727 7728 case KEY_int: 7729 UNI(OP_INT); 7730 7731 case KEY_ioctl: 7732 LOP(OP_IOCTL,XTERM); 7733 7734 case KEY_join: 7735 LOP(OP_JOIN,XTERM); 7736 7737 case KEY_keys: 7738 UNI(OP_KEYS); 7739 7740 case KEY_kill: 7741 LOP(OP_KILL,XTERM); 7742 7743 case KEY_last: 7744 LOOPX(OP_LAST); 7745 7746 case KEY_lc: 7747 UNI(OP_LC); 7748 7749 case KEY_lcfirst: 7750 UNI(OP_LCFIRST); 7751 7752 case KEY_local: 7753 pl_yylval.ival = 0; 7754 OPERATOR(LOCAL); 7755 7756 case KEY_length: 7757 UNI(OP_LENGTH); 7758 7759 case KEY_lt: 7760 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7761 return REPORT(0); 7762 Rop(OP_SLT); 7763 7764 case KEY_le: 7765 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7766 return REPORT(0); 7767 Rop(OP_SLE); 7768 7769 case KEY_localtime: 7770 UNI(OP_LOCALTIME); 7771 7772 case KEY_log: 7773 UNI(OP_LOG); 7774 7775 case KEY_link: 7776 LOP(OP_LINK,XTERM); 7777 7778 case KEY_listen: 7779 LOP(OP_LISTEN,XTERM); 7780 7781 case KEY_lock: 7782 UNI(OP_LOCK); 7783 7784 case KEY_lstat: 7785 UNI(OP_LSTAT); 7786 7787 case KEY_m: 7788 s = scan_pat(s,OP_MATCH); 7789 TERM(sublex_start()); 7790 7791 case KEY_map: 7792 LOP(OP_MAPSTART, XREF); 7793 7794 case KEY_mkdir: 7795 LOP(OP_MKDIR,XTERM); 7796 7797 case KEY_msgctl: 7798 LOP(OP_MSGCTL,XTERM); 7799 7800 case KEY_msgget: 7801 LOP(OP_MSGGET,XTERM); 7802 7803 case KEY_msgrcv: 7804 LOP(OP_MSGRCV,XTERM); 7805 7806 case KEY_msgsnd: 7807 LOP(OP_MSGSND,XTERM); 7808 7809 case KEY_our: 7810 case KEY_my: 7811 case KEY_state: 7812 if (PL_in_my) { 7813 yyerror(Perl_form(aTHX_ 7814 "Can't redeclare \"%s\" in \"%s\"", 7815 tmp == KEY_my ? "my" : 7816 tmp == KEY_state ? "state" : "our", 7817 PL_in_my == KEY_my ? "my" : 7818 PL_in_my == KEY_state ? "state" : "our")); 7819 } 7820 PL_in_my = (U16)tmp; 7821 s = skipspace(s); 7822 if (isIDFIRST_lazy_if(s,UTF)) { 7823 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); 7824 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) 7825 { 7826 if (!FEATURE_LEXSUBS_IS_ENABLED) 7827 Perl_croak(aTHX_ 7828 "Experimental \"%s\" subs not enabled", 7829 tmp == KEY_my ? "my" : 7830 tmp == KEY_state ? "state" : "our"); 7831 Perl_ck_warner_d(aTHX_ 7832 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS), 7833 "The lexical_subs feature is experimental"); 7834 goto really_sub; 7835 } 7836 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 7837 if (!PL_in_my_stash) { 7838 char tmpbuf[1024]; 7839 int len; 7840 PL_bufptr = s; 7841 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 7842 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf)); 7843 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 7844 } 7845 } 7846 pl_yylval.ival = 1; 7847 OPERATOR(MY); 7848 7849 case KEY_next: 7850 LOOPX(OP_NEXT); 7851 7852 case KEY_ne: 7853 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7854 return REPORT(0); 7855 Eop(OP_SNE); 7856 7857 case KEY_no: 7858 s = tokenize_use(0, s); 7859 TOKEN(USE); 7860 7861 case KEY_not: 7862 if (*s == '(' || (s = skipspace(s), *s == '(')) 7863 FUN1(OP_NOT); 7864 else { 7865 if (!PL_lex_allbrackets 7866 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7867 { 7868 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7869 } 7870 OPERATOR(NOTOP); 7871 } 7872 7873 case KEY_open: 7874 s = skipspace(s); 7875 if (isIDFIRST_lazy_if(s,UTF)) { 7876 const char *t; 7877 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, 7878 &len); 7879 for (t=d; isSPACE(*t);) 7880 t++; 7881 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 7882 /* [perl #16184] */ 7883 && !(t[0] == '=' && t[1] == '>') 7884 && !(t[0] == ':' && t[1] == ':') 7885 && !keyword(s, d-s, 0) 7886 ) { 7887 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 7888 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", 7889 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 7890 } 7891 } 7892 LOP(OP_OPEN,XTERM); 7893 7894 case KEY_or: 7895 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7896 return REPORT(0); 7897 pl_yylval.ival = OP_OR; 7898 OPERATOR(OROP); 7899 7900 case KEY_ord: 7901 UNI(OP_ORD); 7902 7903 case KEY_oct: 7904 UNI(OP_OCT); 7905 7906 case KEY_opendir: 7907 LOP(OP_OPEN_DIR,XTERM); 7908 7909 case KEY_print: 7910 checkcomma(s,PL_tokenbuf,"filehandle"); 7911 LOP(OP_PRINT,XREF); 7912 7913 case KEY_printf: 7914 checkcomma(s,PL_tokenbuf,"filehandle"); 7915 LOP(OP_PRTF,XREF); 7916 7917 case KEY_prototype: 7918 UNI(OP_PROTOTYPE); 7919 7920 case KEY_push: 7921 LOP(OP_PUSH,XTERM); 7922 7923 case KEY_pop: 7924 UNIDOR(OP_POP); 7925 7926 case KEY_pos: 7927 UNIDOR(OP_POS); 7928 7929 case KEY_pack: 7930 LOP(OP_PACK,XTERM); 7931 7932 case KEY_package: 7933 s = force_word(s,WORD,FALSE,TRUE); 7934 s = skipspace(s); 7935 s = force_strict_version(s); 7936 PREBLOCK(PACKAGE); 7937 7938 case KEY_pipe: 7939 LOP(OP_PIPE_OP,XTERM); 7940 7941 case KEY_q: 7942 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 7943 if (!s) 7944 missingterm(NULL); 7945 COPLINE_SET_FROM_MULTI_END; 7946 pl_yylval.ival = OP_CONST; 7947 TERM(sublex_start()); 7948 7949 case KEY_quotemeta: 7950 UNI(OP_QUOTEMETA); 7951 7952 case KEY_qw: { 7953 OP *words = NULL; 7954 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 7955 if (!s) 7956 missingterm(NULL); 7957 COPLINE_SET_FROM_MULTI_END; 7958 PL_expect = XOPERATOR; 7959 if (SvCUR(PL_lex_stuff)) { 7960 int warned_comma = !ckWARN(WARN_QW); 7961 int warned_comment = warned_comma; 7962 d = SvPV_force(PL_lex_stuff, len); 7963 while (len) { 7964 for (; isSPACE(*d) && len; --len, ++d) 7965 /**/; 7966 if (len) { 7967 SV *sv; 7968 const char *b = d; 7969 if (!warned_comma || !warned_comment) { 7970 for (; !isSPACE(*d) && len; --len, ++d) { 7971 if (!warned_comma && *d == ',') { 7972 Perl_warner(aTHX_ packWARN(WARN_QW), 7973 "Possible attempt to separate words with commas"); 7974 ++warned_comma; 7975 } 7976 else if (!warned_comment && *d == '#') { 7977 Perl_warner(aTHX_ packWARN(WARN_QW), 7978 "Possible attempt to put comments in qw() list"); 7979 ++warned_comment; 7980 } 7981 } 7982 } 7983 else { 7984 for (; !isSPACE(*d) && len; --len, ++d) 7985 /**/; 7986 } 7987 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 7988 words = op_append_elem(OP_LIST, words, 7989 newSVOP(OP_CONST, 0, tokeq(sv))); 7990 } 7991 } 7992 } 7993 if (!words) 7994 words = newNULLLIST(); 7995 SvREFCNT_dec_NN(PL_lex_stuff); 7996 PL_lex_stuff = NULL; 7997 PL_expect = XOPERATOR; 7998 pl_yylval.opval = sawparens(words); 7999 TOKEN(QWLIST); 8000 } 8001 8002 case KEY_qq: 8003 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8004 if (!s) 8005 missingterm(NULL); 8006 pl_yylval.ival = OP_STRINGIFY; 8007 if (SvIVX(PL_lex_stuff) == '\'') 8008 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8009 TERM(sublex_start()); 8010 8011 case KEY_qr: 8012 s = scan_pat(s,OP_QR); 8013 TERM(sublex_start()); 8014 8015 case KEY_qx: 8016 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8017 if (!s) 8018 missingterm(NULL); 8019 pl_yylval.ival = OP_BACKTICK; 8020 TERM(sublex_start()); 8021 8022 case KEY_return: 8023 OLDLOP(OP_RETURN); 8024 8025 case KEY_require: 8026 s = skipspace(s); 8027 if (isDIGIT(*s)) { 8028 s = force_version(s, FALSE); 8029 } 8030 else if (*s != 'v' || !isDIGIT(s[1]) 8031 || (s = force_version(s, TRUE), *s == 'v')) 8032 { 8033 *PL_tokenbuf = '\0'; 8034 s = force_word(s,WORD,TRUE,TRUE); 8035 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) 8036 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 8037 GV_ADD | (UTF ? SVf_UTF8 : 0)); 8038 else if (*s == '<') 8039 yyerror("<> at require-statement should be quotes"); 8040 } 8041 if (orig_keyword == KEY_require) { 8042 orig_keyword = 0; 8043 pl_yylval.ival = 1; 8044 } 8045 else 8046 pl_yylval.ival = 0; 8047 PL_expect = PL_nexttoke ? XOPERATOR : XTERM; 8048 PL_bufptr = s; 8049 PL_last_uni = PL_oldbufptr; 8050 PL_last_lop_op = OP_REQUIRE; 8051 s = skipspace(s); 8052 return REPORT( (int)REQUIRE ); 8053 8054 case KEY_reset: 8055 UNI(OP_RESET); 8056 8057 case KEY_redo: 8058 LOOPX(OP_REDO); 8059 8060 case KEY_rename: 8061 LOP(OP_RENAME,XTERM); 8062 8063 case KEY_rand: 8064 UNI(OP_RAND); 8065 8066 case KEY_rmdir: 8067 UNI(OP_RMDIR); 8068 8069 case KEY_rindex: 8070 LOP(OP_RINDEX,XTERM); 8071 8072 case KEY_read: 8073 LOP(OP_READ,XTERM); 8074 8075 case KEY_readdir: 8076 UNI(OP_READDIR); 8077 8078 case KEY_readline: 8079 UNIDOR(OP_READLINE); 8080 8081 case KEY_readpipe: 8082 UNIDOR(OP_BACKTICK); 8083 8084 case KEY_rewinddir: 8085 UNI(OP_REWINDDIR); 8086 8087 case KEY_recv: 8088 LOP(OP_RECV,XTERM); 8089 8090 case KEY_reverse: 8091 LOP(OP_REVERSE,XTERM); 8092 8093 case KEY_readlink: 8094 UNIDOR(OP_READLINK); 8095 8096 case KEY_ref: 8097 UNI(OP_REF); 8098 8099 case KEY_s: 8100 s = scan_subst(s); 8101 if (pl_yylval.opval) 8102 TERM(sublex_start()); 8103 else 8104 TOKEN(1); /* force error */ 8105 8106 case KEY_say: 8107 checkcomma(s,PL_tokenbuf,"filehandle"); 8108 LOP(OP_SAY,XREF); 8109 8110 case KEY_chomp: 8111 UNI(OP_CHOMP); 8112 8113 case KEY_scalar: 8114 UNI(OP_SCALAR); 8115 8116 case KEY_select: 8117 LOP(OP_SELECT,XTERM); 8118 8119 case KEY_seek: 8120 LOP(OP_SEEK,XTERM); 8121 8122 case KEY_semctl: 8123 LOP(OP_SEMCTL,XTERM); 8124 8125 case KEY_semget: 8126 LOP(OP_SEMGET,XTERM); 8127 8128 case KEY_semop: 8129 LOP(OP_SEMOP,XTERM); 8130 8131 case KEY_send: 8132 LOP(OP_SEND,XTERM); 8133 8134 case KEY_setpgrp: 8135 LOP(OP_SETPGRP,XTERM); 8136 8137 case KEY_setpriority: 8138 LOP(OP_SETPRIORITY,XTERM); 8139 8140 case KEY_sethostent: 8141 UNI(OP_SHOSTENT); 8142 8143 case KEY_setnetent: 8144 UNI(OP_SNETENT); 8145 8146 case KEY_setservent: 8147 UNI(OP_SSERVENT); 8148 8149 case KEY_setprotoent: 8150 UNI(OP_SPROTOENT); 8151 8152 case KEY_setpwent: 8153 FUN0(OP_SPWENT); 8154 8155 case KEY_setgrent: 8156 FUN0(OP_SGRENT); 8157 8158 case KEY_seekdir: 8159 LOP(OP_SEEKDIR,XTERM); 8160 8161 case KEY_setsockopt: 8162 LOP(OP_SSOCKOPT,XTERM); 8163 8164 case KEY_shift: 8165 UNIDOR(OP_SHIFT); 8166 8167 case KEY_shmctl: 8168 LOP(OP_SHMCTL,XTERM); 8169 8170 case KEY_shmget: 8171 LOP(OP_SHMGET,XTERM); 8172 8173 case KEY_shmread: 8174 LOP(OP_SHMREAD,XTERM); 8175 8176 case KEY_shmwrite: 8177 LOP(OP_SHMWRITE,XTERM); 8178 8179 case KEY_shutdown: 8180 LOP(OP_SHUTDOWN,XTERM); 8181 8182 case KEY_sin: 8183 UNI(OP_SIN); 8184 8185 case KEY_sleep: 8186 UNI(OP_SLEEP); 8187 8188 case KEY_socket: 8189 LOP(OP_SOCKET,XTERM); 8190 8191 case KEY_socketpair: 8192 LOP(OP_SOCKPAIR,XTERM); 8193 8194 case KEY_sort: 8195 checkcomma(s,PL_tokenbuf,"subroutine name"); 8196 s = skipspace(s); 8197 PL_expect = XTERM; 8198 s = force_word(s,WORD,TRUE,TRUE); 8199 LOP(OP_SORT,XREF); 8200 8201 case KEY_split: 8202 LOP(OP_SPLIT,XTERM); 8203 8204 case KEY_sprintf: 8205 LOP(OP_SPRINTF,XTERM); 8206 8207 case KEY_splice: 8208 LOP(OP_SPLICE,XTERM); 8209 8210 case KEY_sqrt: 8211 UNI(OP_SQRT); 8212 8213 case KEY_srand: 8214 UNI(OP_SRAND); 8215 8216 case KEY_stat: 8217 UNI(OP_STAT); 8218 8219 case KEY_study: 8220 UNI(OP_STUDY); 8221 8222 case KEY_substr: 8223 LOP(OP_SUBSTR,XTERM); 8224 8225 case KEY_format: 8226 case KEY_sub: 8227 really_sub: 8228 { 8229 char * const tmpbuf = PL_tokenbuf + 1; 8230 expectation attrful; 8231 bool have_name, have_proto; 8232 const int key = tmp; 8233 SV *format_name = NULL; 8234 8235 d = s; 8236 s = skipspace(s); 8237 8238 if (isIDFIRST_lazy_if(s,UTF) 8239 || *s == '\'' 8240 || (*s == ':' && s[1] == ':')) 8241 { 8242 8243 PL_expect = XBLOCK; 8244 attrful = XATTRBLOCK; 8245 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 8246 &len); 8247 if (key == KEY_format) 8248 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 8249 *PL_tokenbuf = '&'; 8250 if (memchr(tmpbuf, ':', len) || key != KEY_sub 8251 || pad_findmy_pvn( 8252 PL_tokenbuf, len + 1, 0 8253 ) != NOT_IN_PAD) 8254 sv_setpvn(PL_subname, tmpbuf, len); 8255 else { 8256 sv_setsv(PL_subname,PL_curstname); 8257 sv_catpvs(PL_subname,"::"); 8258 sv_catpvn(PL_subname,tmpbuf,len); 8259 } 8260 if (SvUTF8(PL_linestr)) 8261 SvUTF8_on(PL_subname); 8262 have_name = TRUE; 8263 8264 8265 s = skipspace(d); 8266 } 8267 else { 8268 if (key == KEY_my || key == KEY_our || key==KEY_state) 8269 { 8270 *d = '\0'; 8271 /* diag_listed_as: Missing name in "%s sub" */ 8272 Perl_croak(aTHX_ 8273 "Missing name in \"%s\"", PL_bufptr); 8274 } 8275 PL_expect = XTERMBLOCK; 8276 attrful = XATTRTERM; 8277 sv_setpvs(PL_subname,"?"); 8278 have_name = FALSE; 8279 } 8280 8281 if (key == KEY_format) { 8282 if (format_name) { 8283 NEXTVAL_NEXTTOKE.opval 8284 = (OP*)newSVOP(OP_CONST,0, format_name); 8285 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 8286 force_next(WORD); 8287 } 8288 PREBLOCK(FORMAT); 8289 } 8290 8291 /* Look for a prototype */ 8292 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) { 8293 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8294 COPLINE_SET_FROM_MULTI_END; 8295 if (!s) 8296 Perl_croak(aTHX_ "Prototype not terminated"); 8297 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); 8298 have_proto = TRUE; 8299 8300 s = skipspace(s); 8301 } 8302 else 8303 have_proto = FALSE; 8304 8305 if (*s == ':' && s[1] != ':') 8306 PL_expect = attrful; 8307 else if ((*s != '{' && *s != '(') && key != KEY_format) { 8308 assert(key == KEY_sub || key == KEY_AUTOLOAD || 8309 key == KEY_DESTROY || key == KEY_BEGIN || 8310 key == KEY_UNITCHECK || key == KEY_CHECK || 8311 key == KEY_INIT || key == KEY_END || 8312 key == KEY_my || key == KEY_state || 8313 key == KEY_our); 8314 if (!have_name) 8315 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 8316 else if (*s != ';' && *s != '}') 8317 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname)); 8318 } 8319 8320 if (have_proto) { 8321 NEXTVAL_NEXTTOKE.opval = 8322 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff); 8323 PL_lex_stuff = NULL; 8324 force_next(THING); 8325 } 8326 if (!have_name) { 8327 if (PL_curstash) 8328 sv_setpvs(PL_subname, "__ANON__"); 8329 else 8330 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 8331 TOKEN(ANONSUB); 8332 } 8333 force_ident_maybe_lex('&'); 8334 TOKEN(SUB); 8335 } 8336 8337 case KEY_system: 8338 LOP(OP_SYSTEM,XREF); 8339 8340 case KEY_symlink: 8341 LOP(OP_SYMLINK,XTERM); 8342 8343 case KEY_syscall: 8344 LOP(OP_SYSCALL,XTERM); 8345 8346 case KEY_sysopen: 8347 LOP(OP_SYSOPEN,XTERM); 8348 8349 case KEY_sysseek: 8350 LOP(OP_SYSSEEK,XTERM); 8351 8352 case KEY_sysread: 8353 LOP(OP_SYSREAD,XTERM); 8354 8355 case KEY_syswrite: 8356 LOP(OP_SYSWRITE,XTERM); 8357 8358 case KEY_tr: 8359 case KEY_y: 8360 s = scan_trans(s); 8361 TERM(sublex_start()); 8362 8363 case KEY_tell: 8364 UNI(OP_TELL); 8365 8366 case KEY_telldir: 8367 UNI(OP_TELLDIR); 8368 8369 case KEY_tie: 8370 LOP(OP_TIE,XTERM); 8371 8372 case KEY_tied: 8373 UNI(OP_TIED); 8374 8375 case KEY_time: 8376 FUN0(OP_TIME); 8377 8378 case KEY_times: 8379 FUN0(OP_TMS); 8380 8381 case KEY_truncate: 8382 LOP(OP_TRUNCATE,XTERM); 8383 8384 case KEY_uc: 8385 UNI(OP_UC); 8386 8387 case KEY_ucfirst: 8388 UNI(OP_UCFIRST); 8389 8390 case KEY_untie: 8391 UNI(OP_UNTIE); 8392 8393 case KEY_until: 8394 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8395 return REPORT(0); 8396 pl_yylval.ival = CopLINE(PL_curcop); 8397 OPERATOR(UNTIL); 8398 8399 case KEY_unless: 8400 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8401 return REPORT(0); 8402 pl_yylval.ival = CopLINE(PL_curcop); 8403 OPERATOR(UNLESS); 8404 8405 case KEY_unlink: 8406 LOP(OP_UNLINK,XTERM); 8407 8408 case KEY_undef: 8409 UNIDOR(OP_UNDEF); 8410 8411 case KEY_unpack: 8412 LOP(OP_UNPACK,XTERM); 8413 8414 case KEY_utime: 8415 LOP(OP_UTIME,XTERM); 8416 8417 case KEY_umask: 8418 UNIDOR(OP_UMASK); 8419 8420 case KEY_unshift: 8421 LOP(OP_UNSHIFT,XTERM); 8422 8423 case KEY_use: 8424 s = tokenize_use(1, s); 8425 TOKEN(USE); 8426 8427 case KEY_values: 8428 UNI(OP_VALUES); 8429 8430 case KEY_vec: 8431 LOP(OP_VEC,XTERM); 8432 8433 case KEY_when: 8434 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8435 return REPORT(0); 8436 pl_yylval.ival = CopLINE(PL_curcop); 8437 Perl_ck_warner_d(aTHX_ 8438 packWARN(WARN_EXPERIMENTAL__SMARTMATCH), 8439 "when is experimental"); 8440 OPERATOR(WHEN); 8441 8442 case KEY_while: 8443 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8444 return REPORT(0); 8445 pl_yylval.ival = CopLINE(PL_curcop); 8446 OPERATOR(WHILE); 8447 8448 case KEY_warn: 8449 PL_hints |= HINT_BLOCK_SCOPE; 8450 LOP(OP_WARN,XTERM); 8451 8452 case KEY_wait: 8453 FUN0(OP_WAIT); 8454 8455 case KEY_waitpid: 8456 LOP(OP_WAITPID,XTERM); 8457 8458 case KEY_wantarray: 8459 FUN0(OP_WANTARRAY); 8460 8461 case KEY_write: 8462 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8463 * we use the same number on EBCDIC */ 8464 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8465 UNI(OP_ENTERWRITE); 8466 8467 case KEY_x: 8468 if (PL_expect == XOPERATOR) { 8469 if (*s == '=' && !PL_lex_allbrackets 8470 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8471 { 8472 return REPORT(0); 8473 } 8474 Mop(OP_REPEAT); 8475 } 8476 check_uni(); 8477 goto just_a_word; 8478 8479 case KEY_xor: 8480 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8481 return REPORT(0); 8482 pl_yylval.ival = OP_XOR; 8483 OPERATOR(OROP); 8484 } 8485 }} 8486} 8487 8488/* 8489 S_pending_ident 8490 8491 Looks up an identifier in the pad or in a package 8492 8493 Returns: 8494 PRIVATEREF if this is a lexical name. 8495 WORD if this belongs to a package. 8496 8497 Structure: 8498 if we're in a my declaration 8499 croak if they tried to say my($foo::bar) 8500 build the ops for a my() declaration 8501 if it's an access to a my() variable 8502 build ops for access to a my() variable 8503 if in a dq string, and they've said @foo and we can't find @foo 8504 warn 8505 build ops for a bareword 8506*/ 8507 8508static int 8509S_pending_ident(pTHX) 8510{ 8511 PADOFFSET tmp = 0; 8512 const char pit = (char)pl_yylval.ival; 8513 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 8514 /* All routes through this function want to know if there is a colon. */ 8515 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 8516 8517 DEBUG_T({ PerlIO_printf(Perl_debug_log, 8518 "### Pending identifier '%s'\n", PL_tokenbuf); }); 8519 8520 /* if we're in a my(), we can't allow dynamics here. 8521 $foo'bar has already been turned into $foo::bar, so 8522 just check for colons. 8523 8524 if it's a legal name, the OP is a PADANY. 8525 */ 8526 if (PL_in_my) { 8527 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 8528 if (has_colon) 8529 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 8530 "variable %s in \"our\"", 8531 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 8532 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 8533 } 8534 else { 8535 if (has_colon) { 8536 /* "my" variable %s can't be in a package */ 8537 /* PL_no_myglob is constant */ 8538 GCC_DIAG_IGNORE(-Wformat-nonliteral); 8539 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 8540 PL_in_my == KEY_my ? "my" : "state", 8541 *PL_tokenbuf == '&' ? "subroutin" : "variabl", 8542 PL_tokenbuf), 8543 UTF ? SVf_UTF8 : 0); 8544 GCC_DIAG_RESTORE; 8545 } 8546 8547 pl_yylval.opval = newOP(OP_PADANY, 0); 8548 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 8549 UTF ? SVf_UTF8 : 0); 8550 return PRIVATEREF; 8551 } 8552 } 8553 8554 /* 8555 build the ops for accesses to a my() variable. 8556 */ 8557 8558 if (!has_colon) { 8559 if (!PL_in_my) 8560 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 8561 0); 8562 if (tmp != NOT_IN_PAD) { 8563 /* might be an "our" variable" */ 8564 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 8565 /* build ops for a bareword */ 8566 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 8567 HEK * const stashname = HvNAME_HEK(stash); 8568 SV * const sym = newSVhek(stashname); 8569 sv_catpvs(sym, "::"); 8570 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 8571 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); 8572 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8573 if (pit != '&') 8574 gv_fetchsv(sym, 8575 GV_ADDMULTI, 8576 ((PL_tokenbuf[0] == '$') ? SVt_PV 8577 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8578 : SVt_PVHV)); 8579 return WORD; 8580 } 8581 8582 pl_yylval.opval = newOP(OP_PADANY, 0); 8583 pl_yylval.opval->op_targ = tmp; 8584 return PRIVATEREF; 8585 } 8586 } 8587 8588 /* 8589 Whine if they've said @foo in a doublequoted string, 8590 and @foo isn't a variable we can find in the symbol 8591 table. 8592 */ 8593 if (ckWARN(WARN_AMBIGUOUS) 8594 && pit == '@' 8595 && PL_lex_state != LEX_NORMAL 8596 && !PL_lex_brackets) 8597 { 8598 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 8599 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV); 8600 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 8601 /* DO NOT warn for @- and @+ */ 8602 && !( PL_tokenbuf[2] == '\0' 8603 && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) 8604 ) 8605 { 8606 /* Downgraded from fatal to warning 20000522 mjd */ 8607 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 8608 "Possible unintended interpolation of %"UTF8f 8609 " in string", 8610 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 8611 } 8612 } 8613 8614 /* build ops for a bareword */ 8615 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, 8616 newSVpvn_flags(PL_tokenbuf + 1, 8617 tokenbuf_len - 1, 8618 UTF ? SVf_UTF8 : 0 )); 8619 pl_yylval.opval->op_private = OPpCONST_ENTERED; 8620 if (pit != '&') 8621 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, 8622 (PL_in_eval ? GV_ADDMULTI : GV_ADD) 8623 | ( UTF ? SVf_UTF8 : 0 ), 8624 ((PL_tokenbuf[0] == '$') ? SVt_PV 8625 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 8626 : SVt_PVHV)); 8627 return WORD; 8628} 8629 8630STATIC void 8631S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 8632{ 8633 PERL_ARGS_ASSERT_CHECKCOMMA; 8634 8635 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 8636 if (ckWARN(WARN_SYNTAX)) { 8637 int level = 1; 8638 const char *w; 8639 for (w = s+2; *w && level; w++) { 8640 if (*w == '(') 8641 ++level; 8642 else if (*w == ')') 8643 --level; 8644 } 8645 while (isSPACE(*w)) 8646 ++w; 8647 /* the list of chars below is for end of statements or 8648 * block / parens, boolean operators (&&, ||, //) and branch 8649 * constructs (or, and, if, until, unless, while, err, for). 8650 * Not a very solid hack... */ 8651 if (!*w || !strchr(";&/|})]oaiuwef!=", *w)) 8652 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 8653 "%s (...) interpreted as function",name); 8654 } 8655 } 8656 while (s < PL_bufend && isSPACE(*s)) 8657 s++; 8658 if (*s == '(') 8659 s++; 8660 while (s < PL_bufend && isSPACE(*s)) 8661 s++; 8662 if (isIDFIRST_lazy_if(s,UTF)) { 8663 const char * const w = s; 8664 s += UTF ? UTF8SKIP(s) : 1; 8665 while (isWORDCHAR_lazy_if(s,UTF)) 8666 s += UTF ? UTF8SKIP(s) : 1; 8667 while (s < PL_bufend && isSPACE(*s)) 8668 s++; 8669 if (*s == ',') { 8670 GV* gv; 8671 PADOFFSET off; 8672 if (keyword(w, s - w, 0)) 8673 return; 8674 8675 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 8676 if (gv && GvCVu(gv)) 8677 return; 8678 if (s - w <= 254) { 8679 char tmpbuf[256]; 8680 Copy(w, tmpbuf+1, s - w, char); 8681 *tmpbuf = '&'; 8682 off = pad_findmy_pvn(tmpbuf, s-w+1, 0); 8683 if (off != NOT_IN_PAD) return; 8684 } 8685 Perl_croak(aTHX_ "No comma allowed after %s", what); 8686 } 8687 } 8688} 8689 8690/* S_new_constant(): do any overload::constant lookup. 8691 8692 Either returns sv, or mortalizes/frees sv and returns a new SV*. 8693 Best used as sv=new_constant(..., sv, ...). 8694 If s, pv are NULL, calls subroutine with one argument, 8695 and <type> is used with error messages only. 8696 <type> is assumed to be well formed UTF-8 */ 8697 8698STATIC SV * 8699S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 8700 SV *sv, SV *pv, const char *type, STRLEN typelen) 8701{ 8702 dSP; 8703 HV * table = GvHV(PL_hintgv); /* ^H */ 8704 SV *res; 8705 SV *errsv = NULL; 8706 SV **cvp; 8707 SV *cv, *typesv; 8708 const char *why1 = "", *why2 = "", *why3 = ""; 8709 8710 PERL_ARGS_ASSERT_NEW_CONSTANT; 8711 /* We assume that this is true: */ 8712 if (*key == 'c') { assert (strEQ(key, "charnames")); } 8713 assert(type || s); 8714 8715 /* charnames doesn't work well if there have been errors found */ 8716 if (PL_error_count > 0 && *key == 'c') 8717 { 8718 SvREFCNT_dec_NN(sv); 8719 return &PL_sv_undef; 8720 } 8721 8722 sv_2mortal(sv); /* Parent created it permanently */ 8723 if (!table 8724 || ! (PL_hints & HINT_LOCALIZE_HH) 8725 || ! (cvp = hv_fetch(table, key, keylen, FALSE)) 8726 || ! SvOK(*cvp)) 8727 { 8728 char *msg; 8729 8730 /* Here haven't found what we're looking for. If it is charnames, 8731 * perhaps it needs to be loaded. Try doing that before giving up */ 8732 if (*key == 'c') { 8733 Perl_load_module(aTHX_ 8734 0, 8735 newSVpvs("_charnames"), 8736 /* version parameter; no need to specify it, as if 8737 * we get too early a version, will fail anyway, 8738 * not being able to find '_charnames' */ 8739 NULL, 8740 newSVpvs(":full"), 8741 newSVpvs(":short"), 8742 NULL); 8743 assert(sp == PL_stack_sp); 8744 table = GvHV(PL_hintgv); 8745 if (table 8746 && (PL_hints & HINT_LOCALIZE_HH) 8747 && (cvp = hv_fetch(table, key, keylen, FALSE)) 8748 && SvOK(*cvp)) 8749 { 8750 goto now_ok; 8751 } 8752 } 8753 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { 8754 msg = Perl_form(aTHX_ 8755 "Constant(%.*s) unknown", 8756 (int)(type ? typelen : len), 8757 (type ? type: s)); 8758 } 8759 else { 8760 why1 = "$^H{"; 8761 why2 = key; 8762 why3 = "} is not defined"; 8763 report: 8764 if (*key == 'c') { 8765 msg = Perl_form(aTHX_ 8766 /* The +3 is for '\N{'; -4 for that, plus '}' */ 8767 "Unknown charname '%.*s'", (int)typelen - 4, type + 3 8768 ); 8769 } 8770 else { 8771 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", 8772 (int)(type ? typelen : len), 8773 (type ? type: s), why1, why2, why3); 8774 } 8775 } 8776 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 8777 return SvREFCNT_inc_simple_NN(sv); 8778 } 8779 now_ok: 8780 cv = *cvp; 8781 if (!pv && s) 8782 pv = newSVpvn_flags(s, len, SVs_TEMP); 8783 if (type && pv) 8784 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 8785 else 8786 typesv = &PL_sv_undef; 8787 8788 PUSHSTACKi(PERLSI_OVERLOAD); 8789 ENTER ; 8790 SAVETMPS; 8791 8792 PUSHMARK(SP) ; 8793 EXTEND(sp, 3); 8794 if (pv) 8795 PUSHs(pv); 8796 PUSHs(sv); 8797 if (pv) 8798 PUSHs(typesv); 8799 PUTBACK; 8800 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 8801 8802 SPAGAIN ; 8803 8804 /* Check the eval first */ 8805 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 8806 STRLEN errlen; 8807 const char * errstr; 8808 sv_catpvs(errsv, "Propagated"); 8809 errstr = SvPV_const(errsv, errlen); 8810 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 8811 (void)POPs; 8812 res = SvREFCNT_inc_simple_NN(sv); 8813 } 8814 else { 8815 res = POPs; 8816 SvREFCNT_inc_simple_void_NN(res); 8817 } 8818 8819 PUTBACK ; 8820 FREETMPS ; 8821 LEAVE ; 8822 POPSTACK; 8823 8824 if (!SvOK(res)) { 8825 why1 = "Call to &{$^H{"; 8826 why2 = key; 8827 why3 = "}} did not return a defined value"; 8828 sv = res; 8829 (void)sv_2mortal(sv); 8830 goto report; 8831 } 8832 8833 return res; 8834} 8835 8836PERL_STATIC_INLINE void 8837S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { 8838 PERL_ARGS_ASSERT_PARSE_IDENT; 8839 8840 for (;;) { 8841 if (*d >= e) 8842 Perl_croak(aTHX_ "%s", ident_too_long); 8843 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { 8844 /* The UTF-8 case must come first, otherwise things 8845 * like c\N{COMBINING TILDE} would start failing, as the 8846 * isWORDCHAR_A case below would gobble the 'c' up. 8847 */ 8848 8849 char *t = *s + UTF8SKIP(*s); 8850 while (isIDCONT_utf8((U8*)t)) 8851 t += UTF8SKIP(t); 8852 if (*d + (t - *s) > e) 8853 Perl_croak(aTHX_ "%s", ident_too_long); 8854 Copy(*s, *d, t - *s, char); 8855 *d += t - *s; 8856 *s = t; 8857 } 8858 else if ( isWORDCHAR_A(**s) ) { 8859 do { 8860 *(*d)++ = *(*s)++; 8861 } while (isWORDCHAR_A(**s) && *d < e); 8862 } 8863 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { 8864 *(*d)++ = ':'; 8865 *(*d)++ = ':'; 8866 (*s)++; 8867 } 8868 else if (allow_package && **s == ':' && (*s)[1] == ':' 8869 /* Disallow things like Foo::$bar. For the curious, this is 8870 * the code path that triggers the "Bad name after" warning 8871 * when looking for barewords. 8872 */ 8873 && (*s)[2] != '$') { 8874 *(*d)++ = *(*s)++; 8875 *(*d)++ = *(*s)++; 8876 } 8877 else 8878 break; 8879 } 8880 return; 8881} 8882 8883/* Returns a NUL terminated string, with the length of the string written to 8884 *slp 8885 */ 8886STATIC char * 8887S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 8888{ 8889 char *d = dest; 8890 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 8891 bool is_utf8 = cBOOL(UTF); 8892 8893 PERL_ARGS_ASSERT_SCAN_WORD; 8894 8895 parse_ident(&s, &d, e, allow_package, is_utf8); 8896 *d = '\0'; 8897 *slp = d - dest; 8898 return s; 8899} 8900 8901/* Is the byte 'd' a legal single character identifier name? 'u' is true 8902 * iff Unicode semantics are to be used. The legal ones are any of: 8903 * a) all ASCII characters except: 8904 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; 8905 * 2) '{' 8906 * The final case currently doesn't get this far in the program, so we 8907 * don't test for it. If that were to change, it would be ok to allow it. 8908 * c) When not under Unicode rules, any upper Latin1 character 8909 * d) Otherwise, when unicode rules are used, all XIDS characters. 8910 * 8911 * Because all ASCII characters have the same representation whether 8912 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and 8913 * '{' without knowing if is UTF-8 or not. 8914 * EBCDIC already uses the rules that ASCII platforms will use after the 8915 * deprecation cycle; see comment below about the deprecation. */ 8916#ifdef EBCDIC 8917# define VALID_LEN_ONE_IDENT(s, is_utf8) \ 8918 (isGRAPH_A(*(s)) || ((is_utf8) \ 8919 ? isIDFIRST_utf8((U8*) (s)) \ 8920 : (isGRAPH_L1(*s) \ 8921 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) 8922#else 8923# define VALID_LEN_ONE_IDENT(s, is_utf8) \ 8924 (isGRAPH_A(*(s)) || ((is_utf8) \ 8925 ? isIDFIRST_utf8((U8*) (s)) \ 8926 : ! isASCII_utf8((U8*) (s)))) 8927#endif 8928 8929STATIC char * 8930S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 8931{ 8932 I32 herelines = PL_parser->herelines; 8933 SSize_t bracket = -1; 8934 char funny = *s++; 8935 char *d = dest; 8936 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 8937 bool is_utf8 = cBOOL(UTF); 8938 I32 orig_copline = 0, tmp_copline = 0; 8939 8940 PERL_ARGS_ASSERT_SCAN_IDENT; 8941 8942 if (isSPACE(*s) || !*s) 8943 s = skipspace(s); 8944 if (isDIGIT(*s)) { 8945 while (isDIGIT(*s)) { 8946 if (d >= e) 8947 Perl_croak(aTHX_ "%s", ident_too_long); 8948 *d++ = *s++; 8949 } 8950 } 8951 else { /* See if it is a "normal" identifier */ 8952 parse_ident(&s, &d, e, 1, is_utf8); 8953 } 8954 *d = '\0'; 8955 d = dest; 8956 if (*d) { 8957 /* Either a digit variable, or parse_ident() found an identifier 8958 (anything valid as a bareword), so job done and return. */ 8959 if (PL_lex_state != LEX_NORMAL) 8960 PL_lex_state = LEX_INTERPENDMAYBE; 8961 return s; 8962 } 8963 8964 /* Here, it is not a run-of-the-mill identifier name */ 8965 8966 if (*s == '$' && s[1] 8967 && (isIDFIRST_lazy_if(s+1,is_utf8) 8968 || isDIGIT_A((U8)s[1]) 8969 || s[1] == '$' 8970 || s[1] == '{' 8971 || strnEQ(s+1,"::",2)) ) 8972 { 8973 /* Dereferencing a value in a scalar variable. 8974 The alternatives are different syntaxes for a scalar variable. 8975 Using ' as a leading package separator isn't allowed. :: is. */ 8976 return s; 8977 } 8978 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 8979 if (*s == '{') { 8980 bracket = s - SvPVX(PL_linestr); 8981 s++; 8982 orig_copline = CopLINE(PL_curcop); 8983 if (s < PL_bufend && isSPACE(*s)) { 8984 s = skipspace(s); 8985 } 8986 } 8987 if ((s <= PL_bufend - (is_utf8) 8988 ? UTF8SKIP(s) 8989 : 1) 8990 && VALID_LEN_ONE_IDENT(s, is_utf8)) 8991 { 8992 /* Deprecate all non-graphic characters. Include SHY as a non-graphic, 8993 * because often it has no graphic representation. (We can't get to 8994 * here with SHY when 'is_utf8' is true, so no need to include a UTF-8 8995 * test for it.) */ 8996 if ((is_utf8) 8997 ? ! isGRAPH_utf8( (U8*) s) 8998 : (! isGRAPH_L1( (U8) *s) 8999 || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD)))) 9000 { 9001 deprecate("literal non-graphic characters in variable names"); 9002 } 9003 9004 if (is_utf8) { 9005 const STRLEN skip = UTF8SKIP(s); 9006 STRLEN i; 9007 d[skip] = '\0'; 9008 for ( i = 0; i < skip; i++ ) 9009 d[i] = *s++; 9010 } 9011 else { 9012 *d = *s++; 9013 d[1] = '\0'; 9014 } 9015 } 9016 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 9017 if (*d == '^' && *s && isCONTROLVAR(*s)) { 9018 *d = toCTRL(*s); 9019 s++; 9020 } 9021 /* Warn about ambiguous code after unary operators if {...} notation isn't 9022 used. There's no difference in ambiguity; it's merely a heuristic 9023 about when not to warn. */ 9024 else if (ck_uni && bracket == -1) 9025 check_uni(); 9026 if (bracket != -1) { 9027 bool skip; 9028 char *s2; 9029 /* If we were processing {...} notation then... */ 9030 if (isIDFIRST_lazy_if(d,is_utf8)) { 9031 /* if it starts as a valid identifier, assume that it is one. 9032 (the later check for } being at the expected point will trap 9033 cases where this doesn't pan out.) */ 9034 d += is_utf8 ? UTF8SKIP(d) : 1; 9035 parse_ident(&s, &d, e, 1, is_utf8); 9036 *d = '\0'; 9037 tmp_copline = CopLINE(PL_curcop); 9038 if (s < PL_bufend && isSPACE(*s)) { 9039 s = skipspace(s); 9040 } 9041 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 9042 /* ${foo[0]} and ${foo{bar}} notation. */ 9043 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 9044 const char * const brack = 9045 (const char *) 9046 ((*s == '[') ? "[...]" : "{...}"); 9047 orig_copline = CopLINE(PL_curcop); 9048 CopLINE_set(PL_curcop, tmp_copline); 9049 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 9050 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9051 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 9052 funny, dest, brack, funny, dest, brack); 9053 CopLINE_set(PL_curcop, orig_copline); 9054 } 9055 bracket++; 9056 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 9057 PL_lex_allbrackets++; 9058 return s; 9059 } 9060 } 9061 /* Handle extended ${^Foo} variables 9062 * 1999-02-27 mjd-perl-patch@plover.com */ 9063 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 9064 && isWORDCHAR(*s)) 9065 { 9066 d++; 9067 while (isWORDCHAR(*s) && d < e) { 9068 *d++ = *s++; 9069 } 9070 if (d >= e) 9071 Perl_croak(aTHX_ "%s", ident_too_long); 9072 *d = '\0'; 9073 } 9074 9075 if ( !tmp_copline ) 9076 tmp_copline = CopLINE(PL_curcop); 9077 if ((skip = s < PL_bufend && isSPACE(*s))) 9078 /* Avoid incrementing line numbers or resetting PL_linestart, 9079 in case we have to back up. */ 9080 s2 = skipspace_flags(s, LEX_NO_INCLINE); 9081 else 9082 s2 = s; 9083 9084 /* Expect to find a closing } after consuming any trailing whitespace. 9085 */ 9086 if (*s2 == '}') { 9087 /* Now increment line numbers if applicable. */ 9088 if (skip) 9089 s = skipspace(s); 9090 s++; 9091 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 9092 PL_lex_state = LEX_INTERPEND; 9093 PL_expect = XREF; 9094 } 9095 if (PL_lex_state == LEX_NORMAL) { 9096 if (ckWARN(WARN_AMBIGUOUS) 9097 && (keyword(dest, d - dest, 0) 9098 || get_cvn_flags(dest, d - dest, is_utf8 9099 ? SVf_UTF8 9100 : 0))) 9101 { 9102 SV *tmp = newSVpvn_flags( dest, d - dest, 9103 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 9104 if (funny == '#') 9105 funny = '@'; 9106 orig_copline = CopLINE(PL_curcop); 9107 CopLINE_set(PL_curcop, tmp_copline); 9108 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9109 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf, 9110 funny, SVfARG(tmp), funny, SVfARG(tmp)); 9111 CopLINE_set(PL_curcop, orig_copline); 9112 } 9113 } 9114 } 9115 else { 9116 /* Didn't find the closing } at the point we expected, so restore 9117 state such that the next thing to process is the opening { and */ 9118 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 9119 CopLINE_set(PL_curcop, orig_copline); 9120 PL_parser->herelines = herelines; 9121 *dest = '\0'; 9122 } 9123 } 9124 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) 9125 PL_lex_state = LEX_INTERPEND; 9126 return s; 9127} 9128 9129static bool 9130S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { 9131 9132 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag 9133 * found in the parse starting at 's', based on the subset that are valid 9134 * in this context input to this routine in 'valid_flags'. Advances s. 9135 * Returns TRUE if the input should be treated as a valid flag, so the next 9136 * char may be as well; otherwise FALSE. 'charset' should point to a NUL 9137 * upon first call on the current regex. This routine will set it to any 9138 * charset modifier found. The caller shouldn't change it. This way, 9139 * another charset modifier encountered in the parse can be detected as an 9140 * error, as we have decided to allow only one */ 9141 9142 const char c = **s; 9143 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 9144 9145 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 9146 if (isWORDCHAR_lazy_if(*s, UTF)) { 9147 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 9148 UTF ? SVf_UTF8 : 0); 9149 (*s) += charlen; 9150 /* Pretend that it worked, so will continue processing before 9151 * dieing */ 9152 return TRUE; 9153 } 9154 return FALSE; 9155 } 9156 9157 switch (c) { 9158 9159 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); 9160 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 9161 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 9162 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 9163 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 9164 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 9165 case LOCALE_PAT_MOD: 9166 if (*charset) { 9167 goto multiple_charsets; 9168 } 9169 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 9170 *charset = c; 9171 break; 9172 case UNICODE_PAT_MOD: 9173 if (*charset) { 9174 goto multiple_charsets; 9175 } 9176 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 9177 *charset = c; 9178 break; 9179 case ASCII_RESTRICT_PAT_MOD: 9180 if (! *charset) { 9181 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 9182 } 9183 else { 9184 9185 /* Error if previous modifier wasn't an 'a', but if it was, see 9186 * if, and accept, a second occurrence (only) */ 9187 if (*charset != 'a' 9188 || get_regex_charset(*pmfl) 9189 != REGEX_ASCII_RESTRICTED_CHARSET) 9190 { 9191 goto multiple_charsets; 9192 } 9193 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 9194 } 9195 *charset = c; 9196 break; 9197 case DEPENDS_PAT_MOD: 9198 if (*charset) { 9199 goto multiple_charsets; 9200 } 9201 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 9202 *charset = c; 9203 break; 9204 } 9205 9206 (*s)++; 9207 return TRUE; 9208 9209 multiple_charsets: 9210 if (*charset != c) { 9211 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 9212 } 9213 else if (c == 'a') { 9214 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 9215 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 9216 } 9217 else { 9218 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 9219 } 9220 9221 /* Pretend that it worked, so will continue processing before dieing */ 9222 (*s)++; 9223 return TRUE; 9224} 9225 9226STATIC char * 9227S_scan_pat(pTHX_ char *start, I32 type) 9228{ 9229 PMOP *pm; 9230 char *s; 9231 const char * const valid_flags = 9232 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 9233 char charset = '\0'; /* character set modifier */ 9234 unsigned int x_mod_count = 0; 9235 9236 PERL_ARGS_ASSERT_SCAN_PAT; 9237 9238 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); 9239 if (!s) 9240 Perl_croak(aTHX_ "Search pattern not terminated"); 9241 9242 pm = (PMOP*)newPMOP(type, 0); 9243 if (PL_multi_open == '?') { 9244 /* This is the only point in the code that sets PMf_ONCE: */ 9245 pm->op_pmflags |= PMf_ONCE; 9246 9247 /* Hence it's safe to do this bit of PMOP book-keeping here, which 9248 allows us to restrict the list needed by reset to just the ?? 9249 matches. */ 9250 assert(type != OP_TRANS); 9251 if (PL_curstash) { 9252 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 9253 U32 elements; 9254 if (!mg) { 9255 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 9256 0); 9257 } 9258 elements = mg->mg_len / sizeof(PMOP**); 9259 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 9260 ((PMOP**)mg->mg_ptr) [elements++] = pm; 9261 mg->mg_len = elements * sizeof(PMOP**); 9262 PmopSTASH_set(pm,PL_curstash); 9263 } 9264 } 9265 9266 /* if qr/...(?{..}).../, then need to parse the pattern within a new 9267 * anon CV. False positives like qr/[(?{]/ are harmless */ 9268 9269 if (type == OP_QR) { 9270 STRLEN len; 9271 char *e, *p = SvPV(PL_lex_stuff, len); 9272 e = p + len; 9273 for (; p < e; p++) { 9274 if (p[0] == '(' && p[1] == '?' 9275 && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) 9276 { 9277 pm->op_pmflags |= PMf_HAS_CV; 9278 break; 9279 } 9280 } 9281 pm->op_pmflags |= PMf_IS_QR; 9282 } 9283 9284 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), 9285 &s, &charset, &x_mod_count)) 9286 {}; 9287 /* issue a warning if /c is specified,but /g is not */ 9288 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 9289 { 9290 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 9291 "Use of /c modifier is meaningless without /g" ); 9292 } 9293 9294 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); 9295 9296 PL_lex_op = (OP*)pm; 9297 pl_yylval.ival = OP_MATCH; 9298 return s; 9299} 9300 9301STATIC char * 9302S_scan_subst(pTHX_ char *start) 9303{ 9304 char *s; 9305 PMOP *pm; 9306 I32 first_start; 9307 line_t first_line; 9308 I32 es = 0; 9309 char charset = '\0'; /* character set modifier */ 9310 unsigned int x_mod_count = 0; 9311 char *t; 9312 9313 PERL_ARGS_ASSERT_SCAN_SUBST; 9314 9315 pl_yylval.ival = OP_NULL; 9316 9317 s = scan_str(start, TRUE, FALSE, FALSE, &t); 9318 9319 if (!s) 9320 Perl_croak(aTHX_ "Substitution pattern not terminated"); 9321 9322 s = t; 9323 9324 first_start = PL_multi_start; 9325 first_line = CopLINE(PL_curcop); 9326 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 9327 if (!s) { 9328 SvREFCNT_dec_NN(PL_lex_stuff); 9329 PL_lex_stuff = NULL; 9330 Perl_croak(aTHX_ "Substitution replacement not terminated"); 9331 } 9332 PL_multi_start = first_start; /* so whole substitution is taken together */ 9333 9334 pm = (PMOP*)newPMOP(OP_SUBST, 0); 9335 9336 9337 while (*s) { 9338 if (*s == EXEC_PAT_MOD) { 9339 s++; 9340 es++; 9341 } 9342 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), 9343 &s, &charset, &x_mod_count)) 9344 { 9345 break; 9346 } 9347 } 9348 9349 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); 9350 9351 if ((pm->op_pmflags & PMf_CONTINUE)) { 9352 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 9353 } 9354 9355 if (es) { 9356 SV * const repl = newSVpvs(""); 9357 9358 PL_multi_end = 0; 9359 pm->op_pmflags |= PMf_EVAL; 9360 while (es-- > 0) { 9361 if (es) 9362 sv_catpvs(repl, "eval "); 9363 else 9364 sv_catpvs(repl, "do "); 9365 } 9366 sv_catpvs(repl, "{"); 9367 sv_catsv(repl, PL_sublex_info.repl); 9368 sv_catpvs(repl, "}"); 9369 SvEVALED_on(repl); 9370 SvREFCNT_dec(PL_sublex_info.repl); 9371 PL_sublex_info.repl = repl; 9372 } 9373 if (CopLINE(PL_curcop) != first_line) { 9374 sv_upgrade(PL_sublex_info.repl, SVt_PVNV); 9375 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow = 9376 CopLINE(PL_curcop) - first_line; 9377 CopLINE_set(PL_curcop, first_line); 9378 } 9379 9380 PL_lex_op = (OP*)pm; 9381 pl_yylval.ival = OP_SUBST; 9382 return s; 9383} 9384 9385STATIC char * 9386S_scan_trans(pTHX_ char *start) 9387{ 9388 char* s; 9389 OP *o; 9390 U8 squash; 9391 U8 del; 9392 U8 complement; 9393 bool nondestruct = 0; 9394 char *t; 9395 9396 PERL_ARGS_ASSERT_SCAN_TRANS; 9397 9398 pl_yylval.ival = OP_NULL; 9399 9400 s = scan_str(start,FALSE,FALSE,FALSE,&t); 9401 if (!s) 9402 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 9403 9404 s = t; 9405 9406 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 9407 if (!s) { 9408 SvREFCNT_dec_NN(PL_lex_stuff); 9409 PL_lex_stuff = NULL; 9410 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 9411 } 9412 9413 complement = del = squash = 0; 9414 while (1) { 9415 switch (*s) { 9416 case 'c': 9417 complement = OPpTRANS_COMPLEMENT; 9418 break; 9419 case 'd': 9420 del = OPpTRANS_DELETE; 9421 break; 9422 case 's': 9423 squash = OPpTRANS_SQUASH; 9424 break; 9425 case 'r': 9426 nondestruct = 1; 9427 break; 9428 default: 9429 goto no_more; 9430 } 9431 s++; 9432 } 9433 no_more: 9434 9435 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 9436 o->op_private &= ~OPpTRANS_ALL; 9437 o->op_private |= del|squash|complement| 9438 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| 9439 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0); 9440 9441 PL_lex_op = o; 9442 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 9443 9444 9445 return s; 9446} 9447 9448/* scan_heredoc 9449 Takes a pointer to the first < in <<FOO. 9450 Returns a pointer to the byte following <<FOO. 9451 9452 This function scans a heredoc, which involves different methods 9453 depending on whether we are in a string eval, quoted construct, etc. 9454 This is because PL_linestr could containing a single line of input, or 9455 a whole string being evalled, or the contents of the current quote- 9456 like operator. 9457 9458 The two basic methods are: 9459 - Steal lines from the input stream 9460 - Scan the heredoc in PL_linestr and remove it therefrom 9461 9462 In a file scope or filtered eval, the first method is used; in a 9463 string eval, the second. 9464 9465 In a quote-like operator, we have to choose between the two, 9466 depending on where we can find a newline. We peek into outer lex- 9467 ing scopes until we find one with a newline in it. If we reach the 9468 outermost lexing scope and it is a file, we use the stream method. 9469 Otherwise it is treated as an eval. 9470*/ 9471 9472STATIC char * 9473S_scan_heredoc(pTHX_ char *s) 9474{ 9475 I32 op_type = OP_SCALAR; 9476 I32 len; 9477 SV *tmpstr; 9478 char term; 9479 char *d; 9480 char *e; 9481 char *peek; 9482 const bool infile = PL_rsfp || PL_parser->filtered; 9483 const line_t origline = CopLINE(PL_curcop); 9484 LEXSHARED *shared = PL_parser->lex_shared; 9485 9486 PERL_ARGS_ASSERT_SCAN_HEREDOC; 9487 9488 s += 2; 9489 d = PL_tokenbuf + 1; 9490 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 9491 *PL_tokenbuf = '\n'; 9492 peek = s; 9493 while (SPACE_OR_TAB(*peek)) 9494 peek++; 9495 if (*peek == '`' || *peek == '\'' || *peek =='"') { 9496 s = peek; 9497 term = *s++; 9498 s = delimcpy(d, e, s, PL_bufend, term, &len); 9499 if (s == PL_bufend) 9500 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 9501 d += len; 9502 s++; 9503 } 9504 else { 9505 if (*s == '\\') 9506 /* <<\FOO is equivalent to <<'FOO' */ 9507 s++, term = '\''; 9508 else 9509 term = '"'; 9510 if (!isWORDCHAR_lazy_if(s,UTF)) 9511 deprecate("bare << to mean <<\"\""); 9512 peek = s; 9513 while (isWORDCHAR_lazy_if(peek,UTF)) { 9514 peek += UTF ? UTF8SKIP(peek) : 1; 9515 } 9516 len = (peek - s >= e - d) ? (e - d) : (peek - s); 9517 Copy(s, d, len, char); 9518 s += len; 9519 d += len; 9520 } 9521 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 9522 Perl_croak(aTHX_ "Delimiter for here document is too long"); 9523 *d++ = '\n'; 9524 *d = '\0'; 9525 len = d - PL_tokenbuf; 9526 9527#ifndef PERL_STRICT_CR 9528 d = strchr(s, '\r'); 9529 if (d) { 9530 char * const olds = s; 9531 s = d; 9532 while (s < PL_bufend) { 9533 if (*s == '\r') { 9534 *d++ = '\n'; 9535 if (*++s == '\n') 9536 s++; 9537 } 9538 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 9539 *d++ = *s++; 9540 s++; 9541 } 9542 else 9543 *d++ = *s++; 9544 } 9545 *d = '\0'; 9546 PL_bufend = d; 9547 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 9548 s = olds; 9549 } 9550#endif 9551 9552 tmpstr = newSV_type(SVt_PVIV); 9553 SvGROW(tmpstr, 80); 9554 if (term == '\'') { 9555 op_type = OP_CONST; 9556 SvIV_set(tmpstr, -1); 9557 } 9558 else if (term == '`') { 9559 op_type = OP_BACKTICK; 9560 SvIV_set(tmpstr, '\\'); 9561 } 9562 9563 PL_multi_start = origline + 1 + PL_parser->herelines; 9564 PL_multi_open = PL_multi_close = '<'; 9565 /* inside a string eval or quote-like operator */ 9566 if (!infile || PL_lex_inwhat) { 9567 SV *linestr; 9568 char *bufend; 9569 char * const olds = s; 9570 PERL_CONTEXT * const cx = CX_CUR(); 9571 /* These two fields are not set until an inner lexing scope is 9572 entered. But we need them set here. */ 9573 shared->ls_bufptr = s; 9574 shared->ls_linestr = PL_linestr; 9575 if (PL_lex_inwhat) 9576 /* Look for a newline. If the current buffer does not have one, 9577 peek into the line buffer of the parent lexing scope, going 9578 up as many levels as necessary to find one with a newline 9579 after bufptr. 9580 */ 9581 while (!(s = (char *)memchr( 9582 (void *)shared->ls_bufptr, '\n', 9583 SvEND(shared->ls_linestr)-shared->ls_bufptr 9584 ))) { 9585 shared = shared->ls_prev; 9586 /* shared is only null if we have gone beyond the outermost 9587 lexing scope. In a file, we will have broken out of the 9588 loop in the previous iteration. In an eval, the string buf- 9589 fer ends with "\n;", so the while condition above will have 9590 evaluated to false. So shared can never be null. Or so you 9591 might think. Odd syntax errors like s;@{<<; can gobble up 9592 the implicit semicolon at the end of a flie, causing the 9593 file handle to be closed even when we are not in a string 9594 eval. So shared may be null in that case. */ 9595 if (UNLIKELY(!shared)) 9596 goto interminable; 9597 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 9598 most lexing scope. In a file, shared->ls_linestr at that 9599 level is just one line, so there is no body to steal. */ 9600 if (infile && !shared->ls_prev) { 9601 s = olds; 9602 goto streaming; 9603 } 9604 } 9605 else { /* eval or we've already hit EOF */ 9606 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 9607 if (!s) 9608 goto interminable; 9609 } 9610 linestr = shared->ls_linestr; 9611 bufend = SvEND(linestr); 9612 d = s; 9613 while (s < bufend - len + 1 9614 && memNE(s,PL_tokenbuf,len) ) 9615 { 9616 if (*s++ == '\n') 9617 ++PL_parser->herelines; 9618 } 9619 if (s >= bufend - len + 1) { 9620 goto interminable; 9621 } 9622 sv_setpvn(tmpstr,d+1,s-d); 9623 s += len - 1; 9624 /* the preceding stmt passes a newline */ 9625 PL_parser->herelines++; 9626 9627 /* s now points to the newline after the heredoc terminator. 9628 d points to the newline before the body of the heredoc. 9629 */ 9630 9631 /* We are going to modify linestr in place here, so set 9632 aside copies of the string if necessary for re-evals or 9633 (caller $n)[6]. */ 9634 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 9635 check shared->re_eval_str. */ 9636 if (shared->re_eval_start || shared->re_eval_str) { 9637 /* Set aside the rest of the regexp */ 9638 if (!shared->re_eval_str) 9639 shared->re_eval_str = 9640 newSVpvn(shared->re_eval_start, 9641 bufend - shared->re_eval_start); 9642 shared->re_eval_start -= s-d; 9643 } 9644 if (cxstack_ix >= 0 9645 && CxTYPE(cx) == CXt_EVAL 9646 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL 9647 && cx->blk_eval.cur_text == linestr) 9648 { 9649 cx->blk_eval.cur_text = newSVsv(linestr); 9650 SvSCREAM_on(cx->blk_eval.cur_text); 9651 } 9652 /* Copy everything from s onwards back to d. */ 9653 Move(s,d,bufend-s + 1,char); 9654 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 9655 /* Setting PL_bufend only applies when we have not dug deeper 9656 into other scopes, because sublex_done sets PL_bufend to 9657 SvEND(PL_linestr). */ 9658 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); 9659 s = olds; 9660 } 9661 else 9662 { 9663 SV *linestr_save; 9664 char *oldbufptr_save; 9665 streaming: 9666 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ 9667 term = PL_tokenbuf[1]; 9668 len--; 9669 linestr_save = PL_linestr; /* must restore this afterwards */ 9670 d = s; /* and this */ 9671 oldbufptr_save = PL_oldbufptr; 9672 PL_linestr = newSVpvs(""); 9673 PL_bufend = SvPVX(PL_linestr); 9674 while (1) { 9675 PL_bufptr = PL_bufend; 9676 CopLINE_set(PL_curcop, 9677 origline + 1 + PL_parser->herelines); 9678 if (!lex_next_chunk(LEX_NO_TERM) 9679 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { 9680 /* Simply freeing linestr_save might seem simpler here, as it 9681 does not matter what PL_linestr points to, since we are 9682 about to croak; but in a quote-like op, linestr_save 9683 will have been prospectively freed already, via 9684 SAVEFREESV(PL_linestr) in sublex_push, so it���s easier to 9685 restore PL_linestr. */ 9686 SvREFCNT_dec_NN(PL_linestr); 9687 PL_linestr = linestr_save; 9688 PL_oldbufptr = oldbufptr_save; 9689 goto interminable; 9690 } 9691 CopLINE_set(PL_curcop, origline); 9692 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 9693 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 9694 /* ^That should be enough to avoid this needing to grow: */ 9695 sv_catpvs(PL_linestr, "\n\0"); 9696 assert(s == SvPVX(PL_linestr)); 9697 PL_bufend = SvEND(PL_linestr); 9698 } 9699 s = PL_bufptr; 9700 PL_parser->herelines++; 9701 PL_last_lop = PL_last_uni = NULL; 9702#ifndef PERL_STRICT_CR 9703 if (PL_bufend - PL_linestart >= 2) { 9704 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') 9705 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 9706 { 9707 PL_bufend[-2] = '\n'; 9708 PL_bufend--; 9709 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 9710 } 9711 else if (PL_bufend[-1] == '\r') 9712 PL_bufend[-1] = '\n'; 9713 } 9714 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 9715 PL_bufend[-1] = '\n'; 9716#endif 9717 if (*s == term && PL_bufend-s >= len 9718 && memEQ(s,PL_tokenbuf + 1,len)) { 9719 SvREFCNT_dec(PL_linestr); 9720 PL_linestr = linestr_save; 9721 PL_linestart = SvPVX(linestr_save); 9722 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 9723 PL_oldbufptr = oldbufptr_save; 9724 s = d; 9725 break; 9726 } 9727 else { 9728 sv_catsv(tmpstr,PL_linestr); 9729 } 9730 } 9731 } 9732 PL_multi_end = origline + PL_parser->herelines; 9733 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 9734 SvPV_shrink_to_cur(tmpstr); 9735 } 9736 if (!IN_BYTES) { 9737 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 9738 SvUTF8_on(tmpstr); 9739 else if (IN_ENCODING) 9740 sv_recode_to_utf8(tmpstr, _get_encoding()); 9741 } 9742 PL_lex_stuff = tmpstr; 9743 pl_yylval.ival = op_type; 9744 return s; 9745 9746 interminable: 9747 SvREFCNT_dec(tmpstr); 9748 CopLINE_set(PL_curcop, origline); 9749 missingterm(PL_tokenbuf + 1); 9750} 9751 9752/* scan_inputsymbol 9753 takes: current position in input buffer 9754 returns: new position in input buffer 9755 side-effects: pl_yylval and lex_op are set. 9756 9757 This code handles: 9758 9759 <> read from ARGV 9760 <<>> read from ARGV without magic open 9761 <FH> read from filehandle 9762 <pkg::FH> read from package qualified filehandle 9763 <pkg'FH> read from package qualified filehandle 9764 <$fh> read from filehandle in $fh 9765 <*.h> filename glob 9766 9767*/ 9768 9769STATIC char * 9770S_scan_inputsymbol(pTHX_ char *start) 9771{ 9772 char *s = start; /* current position in buffer */ 9773 char *end; 9774 I32 len; 9775 bool nomagicopen = FALSE; 9776 char *d = PL_tokenbuf; /* start of temp holding space */ 9777 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 9778 9779 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 9780 9781 end = strchr(s, '\n'); 9782 if (!end) 9783 end = PL_bufend; 9784 if (s[1] == '<' && s[2] == '>' && s[3] == '>') { 9785 nomagicopen = TRUE; 9786 *d = '\0'; 9787 len = 0; 9788 s += 3; 9789 } 9790 else 9791 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 9792 9793 /* die if we didn't have space for the contents of the <>, 9794 or if it didn't end, or if we see a newline 9795 */ 9796 9797 if (len >= (I32)sizeof PL_tokenbuf) 9798 Perl_croak(aTHX_ "Excessively long <> operator"); 9799 if (s >= end) 9800 Perl_croak(aTHX_ "Unterminated <> operator"); 9801 9802 s++; 9803 9804 /* check for <$fh> 9805 Remember, only scalar variables are interpreted as filehandles by 9806 this code. Anything more complex (e.g., <$fh{$num}>) will be 9807 treated as a glob() call. 9808 This code makes use of the fact that except for the $ at the front, 9809 a scalar variable and a filehandle look the same. 9810 */ 9811 if (*d == '$' && d[1]) d++; 9812 9813 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 9814 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':')) 9815 d += UTF ? UTF8SKIP(d) : 1; 9816 9817 /* If we've tried to read what we allow filehandles to look like, and 9818 there's still text left, then it must be a glob() and not a getline. 9819 Use scan_str to pull out the stuff between the <> and treat it 9820 as nothing more than a string. 9821 */ 9822 9823 if (d - PL_tokenbuf != len) { 9824 pl_yylval.ival = OP_GLOB; 9825 s = scan_str(start,FALSE,FALSE,FALSE,NULL); 9826 if (!s) 9827 Perl_croak(aTHX_ "Glob not terminated"); 9828 return s; 9829 } 9830 else { 9831 bool readline_overriden = FALSE; 9832 GV *gv_readline; 9833 /* we're in a filehandle read situation */ 9834 d = PL_tokenbuf; 9835 9836 /* turn <> into <ARGV> */ 9837 if (!len) 9838 Copy("ARGV",d,5,char); 9839 9840 /* Check whether readline() is overriden */ 9841 if ((gv_readline = gv_override("readline",8))) 9842 readline_overriden = TRUE; 9843 9844 /* if <$fh>, create the ops to turn the variable into a 9845 filehandle 9846 */ 9847 if (*d == '$') { 9848 /* try to find it in the pad for this block, otherwise find 9849 add symbol table ops 9850 */ 9851 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); 9852 if (tmp != NOT_IN_PAD) { 9853 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9854 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9855 HEK * const stashname = HvNAME_HEK(stash); 9856 SV * const sym = sv_2mortal(newSVhek(stashname)); 9857 sv_catpvs(sym, "::"); 9858 sv_catpv(sym, d+1); 9859 d = SvPVX(sym); 9860 goto intro_sym; 9861 } 9862 else { 9863 OP * const o = newOP(OP_PADSV, 0); 9864 o->op_targ = tmp; 9865 PL_lex_op = readline_overriden 9866 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 9867 op_append_elem(OP_LIST, o, 9868 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 9869 : (OP*)newUNOP(OP_READLINE, 0, o); 9870 } 9871 } 9872 else { 9873 GV *gv; 9874 ++d; 9875 intro_sym: 9876 gv = gv_fetchpv(d, 9877 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), 9878 SVt_PV); 9879 PL_lex_op = readline_overriden 9880 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 9881 op_append_elem(OP_LIST, 9882 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 9883 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 9884 : (OP*)newUNOP(OP_READLINE, 0, 9885 newUNOP(OP_RV2SV, 0, 9886 newGVOP(OP_GV, 0, gv))); 9887 } 9888 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 9889 pl_yylval.ival = OP_NULL; 9890 } 9891 9892 /* If it's none of the above, it must be a literal filehandle 9893 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 9894 else { 9895 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 9896 PL_lex_op = readline_overriden 9897 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, 9898 op_append_elem(OP_LIST, 9899 newGVOP(OP_GV, 0, gv), 9900 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 9901 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); 9902 pl_yylval.ival = OP_NULL; 9903 } 9904 } 9905 9906 return s; 9907} 9908 9909 9910/* scan_str 9911 takes: 9912 start position in buffer 9913 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but 9914 only if they are of the open/close form 9915 keep_delims preserve the delimiters around the string 9916 re_reparse compiling a run-time /(?{})/: 9917 collapse // to /, and skip encoding src 9918 delimp if non-null, this is set to the position of 9919 the closing delimiter, or just after it if 9920 the closing and opening delimiters differ 9921 (i.e., the opening delimiter of a substitu- 9922 tion replacement) 9923 returns: position to continue reading from buffer 9924 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 9925 updates the read buffer. 9926 9927 This subroutine pulls a string out of the input. It is called for: 9928 q single quotes q(literal text) 9929 ' single quotes 'literal text' 9930 qq double quotes qq(interpolate $here please) 9931 " double quotes "interpolate $here please" 9932 qx backticks qx(/bin/ls -l) 9933 ` backticks `/bin/ls -l` 9934 qw quote words @EXPORT_OK = qw( func() $spam ) 9935 m// regexp match m/this/ 9936 s/// regexp substitute s/this/that/ 9937 tr/// string transliterate tr/this/that/ 9938 y/// string transliterate y/this/that/ 9939 ($*@) sub prototypes sub foo ($) 9940 (stuff) sub attr parameters sub foo : attr(stuff) 9941 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 9942 9943 In most of these cases (all but <>, patterns and transliterate) 9944 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 9945 calls scan_str(). s/// makes yylex() call scan_subst() which calls 9946 scan_str(). tr/// and y/// make yylex() call scan_trans() which 9947 calls scan_str(). 9948 9949 It skips whitespace before the string starts, and treats the first 9950 character as the delimiter. If the delimiter is one of ([{< then 9951 the corresponding "close" character )]}> is used as the closing 9952 delimiter. It allows quoting of delimiters, and if the string has 9953 balanced delimiters ([{<>}]) it allows nesting. 9954 9955 On success, the SV with the resulting string is put into lex_stuff or, 9956 if that is already non-NULL, into lex_repl. The second case occurs only 9957 when parsing the RHS of the special constructs s/// and tr/// (y///). 9958 For convenience, the terminating delimiter character is stuffed into 9959 SvIVX of the SV. 9960*/ 9961 9962STATIC char * 9963S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, 9964 char **delimp 9965 ) 9966{ 9967 SV *sv; /* scalar value: string */ 9968 const char *tmps; /* temp string, used for delimiter matching */ 9969 char *s = start; /* current position in the buffer */ 9970 char term; /* terminating character */ 9971 char *to; /* current position in the sv's data */ 9972 I32 brackets = 1; /* bracket nesting level */ 9973 bool has_utf8 = FALSE; /* is there any utf8 content? */ 9974 I32 termcode; /* terminating char. code */ 9975 U8 termstr[UTF8_MAXBYTES]; /* terminating string */ 9976 STRLEN termlen; /* length of terminating string */ 9977 int last_off = 0; /* last position for nesting bracket */ 9978 line_t herelines; 9979 9980 PERL_ARGS_ASSERT_SCAN_STR; 9981 9982 /* skip space before the delimiter */ 9983 if (isSPACE(*s)) { 9984 s = skipspace(s); 9985 } 9986 9987 /* mark where we are, in case we need to report errors */ 9988 CLINE; 9989 9990 /* after skipping whitespace, the next character is the terminator */ 9991 term = *s; 9992 if (!UTF) { 9993 termcode = termstr[0] = term; 9994 termlen = 1; 9995 } 9996 else { 9997 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); 9998 Copy(s, termstr, termlen, U8); 9999 if (!UTF8_IS_INVARIANT(term)) 10000 has_utf8 = TRUE; 10001 } 10002 10003 /* mark where we are */ 10004 PL_multi_start = CopLINE(PL_curcop); 10005 PL_multi_open = term; 10006 herelines = PL_parser->herelines; 10007 10008 /* find corresponding closing delimiter */ 10009 if (term && (tmps = strchr("([{< )]}> )]}>",term))) 10010 termcode = termstr[0] = term = tmps[5]; 10011 10012 PL_multi_close = term; 10013 10014 if (PL_multi_open == PL_multi_close) { 10015 keep_bracketed_quoted = FALSE; 10016 } 10017 10018 /* create a new SV to hold the contents. 79 is the SV's initial length. 10019 What a random number. */ 10020 sv = newSV_type(SVt_PVIV); 10021 SvGROW(sv, 80); 10022 SvIV_set(sv, termcode); 10023 (void)SvPOK_only(sv); /* validate pointer */ 10024 10025 /* move past delimiter and try to read a complete string */ 10026 if (keep_delims) 10027 sv_catpvn(sv, s, termlen); 10028 s += termlen; 10029 for (;;) { 10030 if (IN_ENCODING && !UTF && !re_reparse) { 10031 bool cont = TRUE; 10032 10033 while (cont) { 10034 int offset = s - SvPVX_const(PL_linestr); 10035 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr, 10036 &offset, (char*)termstr, termlen); 10037 const char *ns; 10038 char *svlast; 10039 10040 if (SvIsCOW(PL_linestr)) { 10041 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos; 10042 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos; 10043 STRLEN last_lop_pos, re_eval_start_pos, s_pos; 10044 char *buf = SvPVX(PL_linestr); 10045 bufend_pos = PL_parser->bufend - buf; 10046 bufptr_pos = PL_parser->bufptr - buf; 10047 oldbufptr_pos = PL_parser->oldbufptr - buf; 10048 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 10049 linestart_pos = PL_parser->linestart - buf; 10050 last_uni_pos = PL_parser->last_uni 10051 ? PL_parser->last_uni - buf 10052 : 0; 10053 last_lop_pos = PL_parser->last_lop 10054 ? PL_parser->last_lop - buf 10055 : 0; 10056 re_eval_start_pos = 10057 PL_parser->lex_shared->re_eval_start ? 10058 PL_parser->lex_shared->re_eval_start - buf : 0; 10059 s_pos = s - buf; 10060 10061 sv_force_normal(PL_linestr); 10062 10063 buf = SvPVX(PL_linestr); 10064 PL_parser->bufend = buf + bufend_pos; 10065 PL_parser->bufptr = buf + bufptr_pos; 10066 PL_parser->oldbufptr = buf + oldbufptr_pos; 10067 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 10068 PL_parser->linestart = buf + linestart_pos; 10069 if (PL_parser->last_uni) 10070 PL_parser->last_uni = buf + last_uni_pos; 10071 if (PL_parser->last_lop) 10072 PL_parser->last_lop = buf + last_lop_pos; 10073 if (PL_parser->lex_shared->re_eval_start) 10074 PL_parser->lex_shared->re_eval_start = 10075 buf + re_eval_start_pos; 10076 s = buf + s_pos; 10077 } 10078 ns = SvPVX_const(PL_linestr) + offset; 10079 svlast = SvEND(sv) - 1; 10080 10081 for (; s < ns; s++) { 10082 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10083 COPLINE_INC_WITH_HERELINES; 10084 } 10085 if (!found) 10086 goto read_more_line; 10087 else { 10088 /* handle quoted delimiters */ 10089 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { 10090 const char *t; 10091 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) 10092 t--; 10093 if ((svlast-1 - t) % 2) { 10094 if (!keep_bracketed_quoted) { 10095 *(svlast-1) = term; 10096 *svlast = '\0'; 10097 SvCUR_set(sv, SvCUR(sv) - 1); 10098 } 10099 continue; 10100 } 10101 } 10102 if (PL_multi_open == PL_multi_close) { 10103 cont = FALSE; 10104 } 10105 else { 10106 const char *t; 10107 char *w; 10108 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { 10109 /* At here, all closes are "was quoted" one, 10110 so we don't check PL_multi_close. */ 10111 if (*t == '\\') { 10112 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open) 10113 t++; 10114 else 10115 *w++ = *t++; 10116 } 10117 else if (*t == PL_multi_open) 10118 brackets++; 10119 10120 *w = *t; 10121 } 10122 if (w < t) { 10123 *w++ = term; 10124 *w = '\0'; 10125 SvCUR_set(sv, w - SvPVX_const(sv)); 10126 } 10127 last_off = w - SvPVX(sv); 10128 if (--brackets <= 0) 10129 cont = FALSE; 10130 } 10131 } 10132 } 10133 if (!keep_delims) { 10134 SvCUR_set(sv, SvCUR(sv) - 1); 10135 *SvEND(sv) = '\0'; 10136 } 10137 break; 10138 } 10139 10140 /* extend sv if need be */ 10141 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 10142 /* set 'to' to the next character in the sv's string */ 10143 to = SvPVX(sv)+SvCUR(sv); 10144 10145 /* if open delimiter is the close delimiter read unbridle */ 10146 if (PL_multi_open == PL_multi_close) { 10147 for (; s < PL_bufend; s++,to++) { 10148 /* embedded newlines increment the current line number */ 10149 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10150 COPLINE_INC_WITH_HERELINES; 10151 /* handle quoted delimiters */ 10152 if (*s == '\\' && s+1 < PL_bufend && term != '\\') { 10153 if (!keep_bracketed_quoted 10154 && (s[1] == term 10155 || (re_reparse && s[1] == '\\')) 10156 ) 10157 s++; 10158 else /* any other quotes are simply copied straight through */ 10159 *to++ = *s++; 10160 } 10161 /* terminate when run out of buffer (the for() condition), or 10162 have found the terminator */ 10163 else if (*s == term) { 10164 if (termlen == 1) 10165 break; 10166 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) 10167 break; 10168 } 10169 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10170 has_utf8 = TRUE; 10171 *to = *s; 10172 } 10173 } 10174 10175 /* if the terminator isn't the same as the start character (e.g., 10176 matched brackets), we have to allow more in the quoting, and 10177 be prepared for nested brackets. 10178 */ 10179 else { 10180 /* read until we run out of string, or we find the terminator */ 10181 for (; s < PL_bufend; s++,to++) { 10182 /* embedded newlines increment the line count */ 10183 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 10184 COPLINE_INC_WITH_HERELINES; 10185 /* backslashes can escape the open or closing characters */ 10186 if (*s == '\\' && s+1 < PL_bufend) { 10187 if (!keep_bracketed_quoted 10188 && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) 10189 { 10190 s++; 10191 } 10192 else 10193 *to++ = *s++; 10194 } 10195 /* allow nested opens and closes */ 10196 else if (*s == PL_multi_close && --brackets <= 0) 10197 break; 10198 else if (*s == PL_multi_open) 10199 brackets++; 10200 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) 10201 has_utf8 = TRUE; 10202 *to = *s; 10203 } 10204 } 10205 /* terminate the copied string and update the sv's end-of-string */ 10206 *to = '\0'; 10207 SvCUR_set(sv, to - SvPVX_const(sv)); 10208 10209 /* 10210 * this next chunk reads more into the buffer if we're not done yet 10211 */ 10212 10213 if (s < PL_bufend) 10214 break; /* handle case where we are done yet :-) */ 10215 10216#ifndef PERL_STRICT_CR 10217 if (to - SvPVX_const(sv) >= 2) { 10218 if ( (to[-2] == '\r' && to[-1] == '\n') 10219 || (to[-2] == '\n' && to[-1] == '\r')) 10220 { 10221 to[-2] = '\n'; 10222 to--; 10223 SvCUR_set(sv, to - SvPVX_const(sv)); 10224 } 10225 else if (to[-1] == '\r') 10226 to[-1] = '\n'; 10227 } 10228 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 10229 to[-1] = '\n'; 10230#endif 10231 10232 read_more_line: 10233 /* if we're out of file, or a read fails, bail and reset the current 10234 line marker so we can report where the unterminated string began 10235 */ 10236 COPLINE_INC_WITH_HERELINES; 10237 PL_bufptr = PL_bufend; 10238 if (!lex_next_chunk(0)) { 10239 sv_free(sv); 10240 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 10241 return NULL; 10242 } 10243 s = PL_bufptr; 10244 } 10245 10246 /* at this point, we have successfully read the delimited string */ 10247 10248 if (!IN_ENCODING || UTF || re_reparse) { 10249 10250 if (keep_delims) 10251 sv_catpvn(sv, s, termlen); 10252 s += termlen; 10253 } 10254 if (has_utf8 || (IN_ENCODING && !re_reparse)) 10255 SvUTF8_on(sv); 10256 10257 PL_multi_end = CopLINE(PL_curcop); 10258 CopLINE_set(PL_curcop, PL_multi_start); 10259 PL_parser->herelines = herelines; 10260 10261 /* if we allocated too much space, give some back */ 10262 if (SvCUR(sv) + 5 < SvLEN(sv)) { 10263 SvLEN_set(sv, SvCUR(sv) + 1); 10264 SvPV_renew(sv, SvLEN(sv)); 10265 } 10266 10267 /* decide whether this is the first or second quoted string we've read 10268 for this op 10269 */ 10270 10271 if (PL_lex_stuff) 10272 PL_sublex_info.repl = sv; 10273 else 10274 PL_lex_stuff = sv; 10275 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s; 10276 return s; 10277} 10278 10279/* 10280 scan_num 10281 takes: pointer to position in buffer 10282 returns: pointer to new position in buffer 10283 side-effects: builds ops for the constant in pl_yylval.op 10284 10285 Read a number in any of the formats that Perl accepts: 10286 10287 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 10288 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 10289 0b[01](_?[01])* binary integers 10290 0[0-7](_?[0-7])* octal integers 10291 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 10292 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats 10293 10294 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 10295 thing it reads. 10296 10297 If it reads a number without a decimal point or an exponent, it will 10298 try converting the number to an integer and see if it can do so 10299 without loss of precision. 10300*/ 10301 10302char * 10303Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 10304{ 10305 const char *s = start; /* current position in buffer */ 10306 char *d; /* destination in temp buffer */ 10307 char *e; /* end of temp buffer */ 10308 NV nv; /* number read, as a double */ 10309 SV *sv = NULL; /* place to put the converted number */ 10310 bool floatit; /* boolean: int or float? */ 10311 const char *lastub = NULL; /* position of last underbar */ 10312 static const char* const number_too_long = "Number too long"; 10313 /* Hexadecimal floating point. 10314 * 10315 * In many places (where we have quads and NV is IEEE 754 double) 10316 * we can fit the mantissa bits of a NV into an unsigned quad. 10317 * (Note that UVs might not be quads even when we have quads.) 10318 * This will not work everywhere, though (either no quads, or 10319 * using long doubles), in which case we have to resort to NV, 10320 * which will probably mean horrible loss of precision due to 10321 * multiple fp operations. */ 10322 bool hexfp = FALSE; 10323 int total_bits = 0; 10324 int significant_bits = 0; 10325#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) 10326# define HEXFP_UQUAD 10327 Uquad_t hexfp_uquad = 0; 10328 int hexfp_frac_bits = 0; 10329#else 10330# define HEXFP_NV 10331 NV hexfp_nv = 0.0; 10332#endif 10333 NV hexfp_mult = 1.0; 10334 UV high_non_zero = 0; /* highest digit */ 10335 int non_zero_integer_digits = 0; 10336 10337 PERL_ARGS_ASSERT_SCAN_NUM; 10338 10339 /* We use the first character to decide what type of number this is */ 10340 10341 switch (*s) { 10342 default: 10343 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 10344 10345 /* if it starts with a 0, it could be an octal number, a decimal in 10346 0.13 disguise, or a hexadecimal number, or a binary number. */ 10347 case '0': 10348 { 10349 /* variables: 10350 u holds the "number so far" 10351 shift the power of 2 of the base 10352 (hex == 4, octal == 3, binary == 1) 10353 overflowed was the number more than we can hold? 10354 10355 Shift is used when we add a digit. It also serves as an "are 10356 we in octal/hex/binary?" indicator to disallow hex characters 10357 when in octal mode. 10358 */ 10359 NV n = 0.0; 10360 UV u = 0; 10361 I32 shift; 10362 bool overflowed = FALSE; 10363 bool just_zero = TRUE; /* just plain 0 or binary number? */ 10364 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 10365 static const char* const bases[5] = 10366 { "", "binary", "", "octal", "hexadecimal" }; 10367 static const char* const Bases[5] = 10368 { "", "Binary", "", "Octal", "Hexadecimal" }; 10369 static const char* const maxima[5] = 10370 { "", 10371 "0b11111111111111111111111111111111", 10372 "", 10373 "037777777777", 10374 "0xffffffff" }; 10375 const char *base, *Base, *max; 10376 10377 /* check for hex */ 10378 if (isALPHA_FOLD_EQ(s[1], 'x')) { 10379 shift = 4; 10380 s += 2; 10381 just_zero = FALSE; 10382 } else if (isALPHA_FOLD_EQ(s[1], 'b')) { 10383 shift = 1; 10384 s += 2; 10385 just_zero = FALSE; 10386 } 10387 /* check for a decimal in disguise */ 10388 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) 10389 goto decimal; 10390 /* so it must be octal */ 10391 else { 10392 shift = 3; 10393 s++; 10394 } 10395 10396 if (*s == '_') { 10397 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10398 "Misplaced _ in number"); 10399 lastub = s++; 10400 } 10401 10402 base = bases[shift]; 10403 Base = Bases[shift]; 10404 max = maxima[shift]; 10405 10406 /* read the rest of the number */ 10407 for (;;) { 10408 /* x is used in the overflow test, 10409 b is the digit we're adding on. */ 10410 UV x, b; 10411 10412 switch (*s) { 10413 10414 /* if we don't mention it, we're done */ 10415 default: 10416 goto out; 10417 10418 /* _ are ignored -- but warned about if consecutive */ 10419 case '_': 10420 if (lastub && s == lastub + 1) 10421 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10422 "Misplaced _ in number"); 10423 lastub = s++; 10424 break; 10425 10426 /* 8 and 9 are not octal */ 10427 case '8': case '9': 10428 if (shift == 3) 10429 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 10430 /* FALLTHROUGH */ 10431 10432 /* octal digits */ 10433 case '2': case '3': case '4': 10434 case '5': case '6': case '7': 10435 if (shift == 1) 10436 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 10437 /* FALLTHROUGH */ 10438 10439 case '0': case '1': 10440 b = *s++ & 15; /* ASCII digit -> value of digit */ 10441 goto digit; 10442 10443 /* hex digits */ 10444 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 10445 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 10446 /* make sure they said 0x */ 10447 if (shift != 4) 10448 goto out; 10449 b = (*s++ & 7) + 9; 10450 10451 /* Prepare to put the digit we have onto the end 10452 of the number so far. We check for overflows. 10453 */ 10454 10455 digit: 10456 just_zero = FALSE; 10457 if (!overflowed) { 10458 x = u << shift; /* make room for the digit */ 10459 10460 total_bits += shift; 10461 10462 if ((x >> shift) != u 10463 && !(PL_hints & HINT_NEW_BINARY)) { 10464 overflowed = TRUE; 10465 n = (NV) u; 10466 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 10467 "Integer overflow in %s number", 10468 base); 10469 } else 10470 u = x | b; /* add the digit to the end */ 10471 } 10472 if (overflowed) { 10473 n *= nvshift[shift]; 10474 /* If an NV has not enough bits in its 10475 * mantissa to represent an UV this summing of 10476 * small low-order numbers is a waste of time 10477 * (because the NV cannot preserve the 10478 * low-order bits anyway): we could just 10479 * remember when did we overflow and in the 10480 * end just multiply n by the right 10481 * amount. */ 10482 n += (NV) b; 10483 } 10484 10485 if (high_non_zero == 0 && b > 0) 10486 high_non_zero = b; 10487 10488 if (high_non_zero) 10489 non_zero_integer_digits++; 10490 10491 /* this could be hexfp, but peek ahead 10492 * to avoid matching ".." */ 10493 if (UNLIKELY(HEXFP_PEEK(s))) { 10494 goto out; 10495 } 10496 10497 break; 10498 } 10499 } 10500 10501 /* if we get here, we had success: make a scalar value from 10502 the number. 10503 */ 10504 out: 10505 10506 /* final misplaced underbar check */ 10507 if (s[-1] == '_') { 10508 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 10509 } 10510 10511 if (UNLIKELY(HEXFP_PEEK(s))) { 10512 /* Do sloppy (on the underbars) but quick detection 10513 * (and value construction) for hexfp, the decimal 10514 * detection will shortly be more thorough with the 10515 * underbar checks. */ 10516 const char* h = s; 10517 significant_bits = non_zero_integer_digits * shift; 10518#ifdef HEXFP_UQUAD 10519 hexfp_uquad = u; 10520#else /* HEXFP_NV */ 10521 hexfp_nv = u; 10522#endif 10523 /* Ignore the leading zero bits of 10524 * the high (first) non-zero digit. */ 10525 if (high_non_zero) { 10526 if (high_non_zero < 0x8) 10527 significant_bits--; 10528 if (high_non_zero < 0x4) 10529 significant_bits--; 10530 if (high_non_zero < 0x2) 10531 significant_bits--; 10532 } 10533 10534 if (*h == '.') { 10535#ifdef HEXFP_NV 10536 NV nv_mult = 1.0; 10537#endif 10538 bool accumulate = TRUE; 10539 for (h++; (isXDIGIT(*h) || *h == '_'); h++) { 10540 if (isXDIGIT(*h)) { 10541 U8 b = XDIGIT_VALUE(*h); 10542 significant_bits += shift; 10543#ifdef HEXFP_UQUAD 10544 if (accumulate) { 10545 if (significant_bits < NV_MANT_DIG) { 10546 /* We are in the long "run" of xdigits, 10547 * accumulate the full four bits. */ 10548 hexfp_uquad <<= shift; 10549 hexfp_uquad |= b; 10550 hexfp_frac_bits += shift; 10551 } else { 10552 /* We are at a hexdigit either at, 10553 * or straddling, the edge of mantissa. 10554 * We will try grabbing as many as 10555 * possible bits. */ 10556 int tail = 10557 significant_bits - NV_MANT_DIG; 10558 if (tail <= 0) 10559 tail += shift; 10560 hexfp_uquad <<= tail; 10561 hexfp_uquad |= b >> (shift - tail); 10562 hexfp_frac_bits += tail; 10563 10564 /* Ignore the trailing zero bits 10565 * of the last non-zero xdigit. 10566 * 10567 * The assumption here is that if 10568 * one has input of e.g. the xdigit 10569 * eight (0x8), there is only one 10570 * bit being input, not the full 10571 * four bits. Conversely, if one 10572 * specifies a zero xdigit, the 10573 * assumption is that one really 10574 * wants all those bits to be zero. */ 10575 if (b) { 10576 if ((b & 0x1) == 0x0) { 10577 significant_bits--; 10578 if ((b & 0x2) == 0x0) { 10579 significant_bits--; 10580 if ((b & 0x4) == 0x0) { 10581 significant_bits--; 10582 } 10583 } 10584 } 10585 } 10586 10587 accumulate = FALSE; 10588 } 10589 } else { 10590 /* Keep skipping the xdigits, and 10591 * accumulating the significant bits, 10592 * but do not shift the uquad 10593 * (which would catastrophically drop 10594 * high-order bits) or accumulate the 10595 * xdigits anymore. */ 10596 } 10597#else /* HEXFP_NV */ 10598 if (accumulate) { 10599 nv_mult /= 16.0; 10600 if (nv_mult > 0.0) 10601 hexfp_nv += b * nv_mult; 10602 else 10603 accumulate = FALSE; 10604 } 10605#endif 10606 } 10607 if (significant_bits >= NV_MANT_DIG) 10608 accumulate = FALSE; 10609 } 10610 } 10611 10612 if ((total_bits > 0 || significant_bits > 0) && 10613 isALPHA_FOLD_EQ(*h, 'p')) { 10614 bool negexp = FALSE; 10615 h++; 10616 if (*h == '+') 10617 h++; 10618 else if (*h == '-') { 10619 negexp = TRUE; 10620 h++; 10621 } 10622 if (isDIGIT(*h)) { 10623 I32 hexfp_exp = 0; 10624 while (isDIGIT(*h) || *h == '_') { 10625 if (isDIGIT(*h)) { 10626 hexfp_exp *= 10; 10627 hexfp_exp += *h - '0'; 10628#ifdef NV_MIN_EXP 10629 if (negexp 10630 && -hexfp_exp < NV_MIN_EXP - 1) { 10631 /* NOTE: this means that the exponent 10632 * underflow warning happens for 10633 * the IEEE 754 subnormals (denormals), 10634 * because DBL_MIN_EXP etc are the lowest 10635 * possible binary (or, rather, DBL_RADIX-base) 10636 * exponent for normals, not subnormals. 10637 * 10638 * This may or may not be a good thing. */ 10639 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 10640 "Hexadecimal float: exponent underflow"); 10641 break; 10642 } 10643#endif 10644#ifdef NV_MAX_EXP 10645 if (!negexp 10646 && hexfp_exp > NV_MAX_EXP - 1) { 10647 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 10648 "Hexadecimal float: exponent overflow"); 10649 break; 10650 } 10651#endif 10652 } 10653 h++; 10654 } 10655 if (negexp) 10656 hexfp_exp = -hexfp_exp; 10657#ifdef HEXFP_UQUAD 10658 hexfp_exp -= hexfp_frac_bits; 10659#endif 10660 hexfp_mult = Perl_pow(2.0, hexfp_exp); 10661 hexfp = TRUE; 10662 goto decimal; 10663 } 10664 } 10665 } 10666 10667 if (overflowed) { 10668 if (n > 4294967295.0) 10669 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 10670 "%s number > %s non-portable", 10671 Base, max); 10672 sv = newSVnv(n); 10673 } 10674 else { 10675#if UVSIZE > 4 10676 if (u > 0xffffffff) 10677 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 10678 "%s number > %s non-portable", 10679 Base, max); 10680#endif 10681 sv = newSVuv(u); 10682 } 10683 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 10684 sv = new_constant(start, s - start, "integer", 10685 sv, NULL, NULL, 0); 10686 else if (PL_hints & HINT_NEW_BINARY) 10687 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0); 10688 } 10689 break; 10690 10691 /* 10692 handle decimal numbers. 10693 we're also sent here when we read a 0 as the first digit 10694 */ 10695 case '1': case '2': case '3': case '4': case '5': 10696 case '6': case '7': case '8': case '9': case '.': 10697 decimal: 10698 d = PL_tokenbuf; 10699 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 10700 floatit = FALSE; 10701 if (hexfp) { 10702 floatit = TRUE; 10703 *d++ = '0'; 10704 *d++ = 'x'; 10705 s = start + 2; 10706 } 10707 10708 /* read next group of digits and _ and copy into d */ 10709 while (isDIGIT(*s) 10710 || *s == '_' 10711 || UNLIKELY(hexfp && isXDIGIT(*s))) 10712 { 10713 /* skip underscores, checking for misplaced ones 10714 if -w is on 10715 */ 10716 if (*s == '_') { 10717 if (lastub && s == lastub + 1) 10718 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10719 "Misplaced _ in number"); 10720 lastub = s++; 10721 } 10722 else { 10723 /* check for end of fixed-length buffer */ 10724 if (d >= e) 10725 Perl_croak(aTHX_ "%s", number_too_long); 10726 /* if we're ok, copy the character */ 10727 *d++ = *s++; 10728 } 10729 } 10730 10731 /* final misplaced underbar check */ 10732 if (lastub && s == lastub + 1) { 10733 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); 10734 } 10735 10736 /* read a decimal portion if there is one. avoid 10737 3..5 being interpreted as the number 3. followed 10738 by .5 10739 */ 10740 if (*s == '.' && s[1] != '.') { 10741 floatit = TRUE; 10742 *d++ = *s++; 10743 10744 if (*s == '_') { 10745 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10746 "Misplaced _ in number"); 10747 lastub = s; 10748 } 10749 10750 /* copy, ignoring underbars, until we run out of digits. 10751 */ 10752 for (; isDIGIT(*s) 10753 || *s == '_' 10754 || UNLIKELY(hexfp && isXDIGIT(*s)); 10755 s++) 10756 { 10757 /* fixed length buffer check */ 10758 if (d >= e) 10759 Perl_croak(aTHX_ "%s", number_too_long); 10760 if (*s == '_') { 10761 if (lastub && s == lastub + 1) 10762 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10763 "Misplaced _ in number"); 10764 lastub = s; 10765 } 10766 else 10767 *d++ = *s; 10768 } 10769 /* fractional part ending in underbar? */ 10770 if (s[-1] == '_') { 10771 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10772 "Misplaced _ in number"); 10773 } 10774 if (*s == '.' && isDIGIT(s[1])) { 10775 /* oops, it's really a v-string, but without the "v" */ 10776 s = start; 10777 goto vstring; 10778 } 10779 } 10780 10781 /* read exponent part, if present */ 10782 if ((isALPHA_FOLD_EQ(*s, 'e') 10783 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) 10784 && strchr("+-0123456789_", s[1])) 10785 { 10786 floatit = TRUE; 10787 10788 /* regardless of whether user said 3E5 or 3e5, use lower 'e', 10789 ditto for p (hexfloats) */ 10790 if ((isALPHA_FOLD_EQ(*s, 'e'))) { 10791 /* At least some Mach atof()s don't grok 'E' */ 10792 *d++ = 'e'; 10793 } 10794 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { 10795 *d++ = 'p'; 10796 } 10797 10798 s++; 10799 10800 10801 /* stray preinitial _ */ 10802 if (*s == '_') { 10803 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10804 "Misplaced _ in number"); 10805 lastub = s++; 10806 } 10807 10808 /* allow positive or negative exponent */ 10809 if (*s == '+' || *s == '-') 10810 *d++ = *s++; 10811 10812 /* stray initial _ */ 10813 if (*s == '_') { 10814 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10815 "Misplaced _ in number"); 10816 lastub = s++; 10817 } 10818 10819 /* read digits of exponent */ 10820 while (isDIGIT(*s) || *s == '_') { 10821 if (isDIGIT(*s)) { 10822 if (d >= e) 10823 Perl_croak(aTHX_ "%s", number_too_long); 10824 *d++ = *s++; 10825 } 10826 else { 10827 if (((lastub && s == lastub + 1) 10828 || (!isDIGIT(s[1]) && s[1] != '_'))) 10829 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 10830 "Misplaced _ in number"); 10831 lastub = s++; 10832 } 10833 } 10834 } 10835 10836 10837 /* 10838 We try to do an integer conversion first if no characters 10839 indicating "float" have been found. 10840 */ 10841 10842 if (!floatit) { 10843 UV uv; 10844 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 10845 10846 if (flags == IS_NUMBER_IN_UV) { 10847 if (uv <= IV_MAX) 10848 sv = newSViv(uv); /* Prefer IVs over UVs. */ 10849 else 10850 sv = newSVuv(uv); 10851 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 10852 if (uv <= (UV) IV_MIN) 10853 sv = newSViv(-(IV)uv); 10854 else 10855 floatit = TRUE; 10856 } else 10857 floatit = TRUE; 10858 } 10859 if (floatit) { 10860 STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); 10861 /* terminate the string */ 10862 *d = '\0'; 10863 if (UNLIKELY(hexfp)) { 10864# ifdef NV_MANT_DIG 10865 if (significant_bits > NV_MANT_DIG) 10866 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 10867 "Hexadecimal float: mantissa overflow"); 10868# endif 10869#ifdef HEXFP_UQUAD 10870 nv = hexfp_uquad * hexfp_mult; 10871#else /* HEXFP_NV */ 10872 nv = hexfp_nv * hexfp_mult; 10873#endif 10874 } else { 10875 nv = Atof(PL_tokenbuf); 10876 } 10877 RESTORE_LC_NUMERIC_UNDERLYING(); 10878 sv = newSVnv(nv); 10879 } 10880 10881 if ( floatit 10882 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 10883 const char *const key = floatit ? "float" : "integer"; 10884 const STRLEN keylen = floatit ? 5 : 7; 10885 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 10886 key, keylen, sv, NULL, NULL, 0); 10887 } 10888 break; 10889 10890 /* if it starts with a v, it could be a v-string */ 10891 case 'v': 10892 vstring: 10893 sv = newSV(5); /* preallocate storage space */ 10894 ENTER_with_name("scan_vstring"); 10895 SAVEFREESV(sv); 10896 s = scan_vstring(s, PL_bufend, sv); 10897 SvREFCNT_inc_simple_void_NN(sv); 10898 LEAVE_with_name("scan_vstring"); 10899 break; 10900 } 10901 10902 /* make the op for the constant and return */ 10903 10904 if (sv) 10905 lvalp->opval = newSVOP(OP_CONST, 0, sv); 10906 else 10907 lvalp->opval = NULL; 10908 10909 return (char *)s; 10910} 10911 10912STATIC char * 10913S_scan_formline(pTHX_ char *s) 10914{ 10915 char *eol; 10916 char *t; 10917 SV * const stuff = newSVpvs(""); 10918 bool needargs = FALSE; 10919 bool eofmt = FALSE; 10920 10921 PERL_ARGS_ASSERT_SCAN_FORMLINE; 10922 10923 while (!needargs) { 10924 if (*s == '.') { 10925 t = s+1; 10926#ifdef PERL_STRICT_CR 10927 while (SPACE_OR_TAB(*t)) 10928 t++; 10929#else 10930 while (SPACE_OR_TAB(*t) || *t == '\r') 10931 t++; 10932#endif 10933 if (*t == '\n' || t == PL_bufend) { 10934 eofmt = TRUE; 10935 break; 10936 } 10937 } 10938 eol = (char *) memchr(s,'\n',PL_bufend-s); 10939 if (!eol++) 10940 eol = PL_bufend; 10941 if (*s != '#') { 10942 for (t = s; t < eol; t++) { 10943 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 10944 needargs = FALSE; 10945 goto enough; /* ~~ must be first line in formline */ 10946 } 10947 if (*t == '@' || *t == '^') 10948 needargs = TRUE; 10949 } 10950 if (eol > s) { 10951 sv_catpvn(stuff, s, eol-s); 10952#ifndef PERL_STRICT_CR 10953 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 10954 char *end = SvPVX(stuff) + SvCUR(stuff); 10955 end[-2] = '\n'; 10956 end[-1] = '\0'; 10957 SvCUR_set(stuff, SvCUR(stuff) - 1); 10958 } 10959#endif 10960 } 10961 else 10962 break; 10963 } 10964 s = (char*)eol; 10965 if ((PL_rsfp || PL_parser->filtered) 10966 && PL_parser->form_lex_state == LEX_NORMAL) { 10967 bool got_some; 10968 PL_bufptr = PL_bufend; 10969 COPLINE_INC_WITH_HERELINES; 10970 got_some = lex_next_chunk(0); 10971 CopLINE_dec(PL_curcop); 10972 s = PL_bufptr; 10973 if (!got_some) 10974 break; 10975 } 10976 incline(s); 10977 } 10978 enough: 10979 if (!SvCUR(stuff) || needargs) 10980 PL_lex_state = PL_parser->form_lex_state; 10981 if (SvCUR(stuff)) { 10982 PL_expect = XSTATE; 10983 if (needargs) { 10984 const char *s2 = s; 10985 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f' 10986 || *s2 == '\v') 10987 s2++; 10988 if (*s2 == '{') { 10989 PL_expect = XTERMBLOCK; 10990 NEXTVAL_NEXTTOKE.ival = 0; 10991 force_next(DO); 10992 } 10993 NEXTVAL_NEXTTOKE.ival = 0; 10994 force_next(FORMLBRACK); 10995 } 10996 if (!IN_BYTES) { 10997 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 10998 SvUTF8_on(stuff); 10999 else if (IN_ENCODING) 11000 sv_recode_to_utf8(stuff, _get_encoding()); 11001 } 11002 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); 11003 force_next(THING); 11004 } 11005 else { 11006 SvREFCNT_dec(stuff); 11007 if (eofmt) 11008 PL_lex_formbrack = 0; 11009 } 11010 return s; 11011} 11012 11013I32 11014Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 11015{ 11016 const I32 oldsavestack_ix = PL_savestack_ix; 11017 CV* const outsidecv = PL_compcv; 11018 11019 SAVEI32(PL_subline); 11020 save_item(PL_subname); 11021 SAVESPTR(PL_compcv); 11022 11023 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 11024 CvFLAGS(PL_compcv) |= flags; 11025 11026 PL_subline = CopLINE(PL_curcop); 11027 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 11028 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 11029 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 11030 if (outsidecv && CvPADLIST(outsidecv)) 11031 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; 11032 11033 return oldsavestack_ix; 11034} 11035 11036static int 11037S_yywarn(pTHX_ const char *const s, U32 flags) 11038{ 11039 PERL_ARGS_ASSERT_YYWARN; 11040 11041 PL_in_eval |= EVAL_WARNONLY; 11042 yyerror_pv(s, flags); 11043 return 0; 11044} 11045 11046int 11047Perl_yyerror(pTHX_ const char *const s) 11048{ 11049 PERL_ARGS_ASSERT_YYERROR; 11050 return yyerror_pvn(s, strlen(s), 0); 11051} 11052 11053int 11054Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 11055{ 11056 PERL_ARGS_ASSERT_YYERROR_PV; 11057 return yyerror_pvn(s, strlen(s), flags); 11058} 11059 11060int 11061Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 11062{ 11063 const char *context = NULL; 11064 int contlen = -1; 11065 SV *msg; 11066 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 11067 int yychar = PL_parser->yychar; 11068 11069 PERL_ARGS_ASSERT_YYERROR_PVN; 11070 11071 if (!yychar || (yychar == ';' && !PL_rsfp)) 11072 sv_catpvs(where_sv, "at EOF"); 11073 else if ( PL_oldoldbufptr 11074 && PL_bufptr > PL_oldoldbufptr 11075 && PL_bufptr - PL_oldoldbufptr < 200 11076 && PL_oldoldbufptr != PL_oldbufptr 11077 && PL_oldbufptr != PL_bufptr) 11078 { 11079 /* 11080 Only for NetWare: 11081 The code below is removed for NetWare because it abends/crashes on NetWare 11082 when the script has error such as not having the closing quotes like: 11083 if ($var eq "value) 11084 Checking of white spaces is anyway done in NetWare code. 11085 */ 11086#ifndef NETWARE 11087 while (isSPACE(*PL_oldoldbufptr)) 11088 PL_oldoldbufptr++; 11089#endif 11090 context = PL_oldoldbufptr; 11091 contlen = PL_bufptr - PL_oldoldbufptr; 11092 } 11093 else if ( PL_oldbufptr 11094 && PL_bufptr > PL_oldbufptr 11095 && PL_bufptr - PL_oldbufptr < 200 11096 && PL_oldbufptr != PL_bufptr) { 11097 /* 11098 Only for NetWare: 11099 The code below is removed for NetWare because it abends/crashes on NetWare 11100 when the script has error such as not having the closing quotes like: 11101 if ($var eq "value) 11102 Checking of white spaces is anyway done in NetWare code. 11103 */ 11104#ifndef NETWARE 11105 while (isSPACE(*PL_oldbufptr)) 11106 PL_oldbufptr++; 11107#endif 11108 context = PL_oldbufptr; 11109 contlen = PL_bufptr - PL_oldbufptr; 11110 } 11111 else if (yychar > 255) 11112 sv_catpvs(where_sv, "next token ???"); 11113 else if (yychar == YYEMPTY) { 11114 if ( PL_lex_state == LEX_NORMAL 11115 || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) 11116 sv_catpvs(where_sv, "at end of line"); 11117 else if (PL_lex_inpat) 11118 sv_catpvs(where_sv, "within pattern"); 11119 else 11120 sv_catpvs(where_sv, "within string"); 11121 } 11122 else { 11123 sv_catpvs(where_sv, "next char "); 11124 if (yychar < 32) 11125 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 11126 else if (isPRINT_LC(yychar)) { 11127 const char string = yychar; 11128 sv_catpvn(where_sv, &string, 1); 11129 } 11130 else 11131 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 11132 } 11133 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 11134 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", 11135 OutCopFILE(PL_curcop), 11136 (IV)(PL_parser->preambling == NOLINE 11137 ? CopLINE(PL_curcop) 11138 : PL_parser->preambling)); 11139 if (context) 11140 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", 11141 UTF8fARG(UTF, contlen, context)); 11142 else 11143 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); 11144 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { 11145 Perl_sv_catpvf(aTHX_ msg, 11146 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", 11147 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); 11148 PL_multi_end = 0; 11149 } 11150 if (PL_in_eval & EVAL_WARNONLY) { 11151 PL_in_eval &= ~EVAL_WARNONLY; 11152 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); 11153 } 11154 else 11155 qerror(msg); 11156 if (PL_error_count >= 10) { 11157 SV * errsv; 11158 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) 11159 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", 11160 SVfARG(errsv), OutCopFILE(PL_curcop)); 11161 else 11162 Perl_croak(aTHX_ "%s has too many errors.\n", 11163 OutCopFILE(PL_curcop)); 11164 } 11165 PL_in_my = 0; 11166 PL_in_my_stash = NULL; 11167 return 0; 11168} 11169 11170STATIC char* 11171S_swallow_bom(pTHX_ U8 *s) 11172{ 11173 const STRLEN slen = SvCUR(PL_linestr); 11174 11175 PERL_ARGS_ASSERT_SWALLOW_BOM; 11176 11177 switch (s[0]) { 11178 case 0xFF: 11179 if (s[1] == 0xFE) { 11180 /* UTF-16 little-endian? (or UTF-32LE?) */ 11181 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 11182 /* diag_listed_as: Unsupported script encoding %s */ 11183 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 11184#ifndef PERL_NO_UTF16_FILTER 11185 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 11186 s += 2; 11187 if (PL_bufend > (char*)s) { 11188 s = add_utf16_textfilter(s, TRUE); 11189 } 11190#else 11191 /* diag_listed_as: Unsupported script encoding %s */ 11192 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11193#endif 11194 } 11195 break; 11196 case 0xFE: 11197 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 11198#ifndef PERL_NO_UTF16_FILTER 11199 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 11200 s += 2; 11201 if (PL_bufend > (char *)s) { 11202 s = add_utf16_textfilter(s, FALSE); 11203 } 11204#else 11205 /* diag_listed_as: Unsupported script encoding %s */ 11206 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11207#endif 11208 } 11209 break; 11210 case BOM_UTF8_FIRST_BYTE: { 11211 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ 11212 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { 11213 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 11214 s += len + 1; /* UTF-8 */ 11215 } 11216 break; 11217 } 11218 case 0: 11219 if (slen > 3) { 11220 if (s[1] == 0) { 11221 if (s[2] == 0xFE && s[3] == 0xFF) { 11222 /* UTF-32 big-endian */ 11223 /* diag_listed_as: Unsupported script encoding %s */ 11224 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 11225 } 11226 } 11227 else if (s[2] == 0 && s[3] != 0) { 11228 /* Leading bytes 11229 * 00 xx 00 xx 11230 * are a good indicator of UTF-16BE. */ 11231#ifndef PERL_NO_UTF16_FILTER 11232 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 11233 s = add_utf16_textfilter(s, FALSE); 11234#else 11235 /* diag_listed_as: Unsupported script encoding %s */ 11236 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 11237#endif 11238 } 11239 } 11240 break; 11241 11242 default: 11243 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 11244 /* Leading bytes 11245 * xx 00 xx 00 11246 * are a good indicator of UTF-16LE. */ 11247#ifndef PERL_NO_UTF16_FILTER 11248 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 11249 s = add_utf16_textfilter(s, TRUE); 11250#else 11251 /* diag_listed_as: Unsupported script encoding %s */ 11252 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 11253#endif 11254 } 11255 } 11256 return (char*)s; 11257} 11258 11259 11260#ifndef PERL_NO_UTF16_FILTER 11261static I32 11262S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 11263{ 11264 SV *const filter = FILTER_DATA(idx); 11265 /* We re-use this each time round, throwing the contents away before we 11266 return. */ 11267 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 11268 SV *const utf8_buffer = filter; 11269 IV status = IoPAGE(filter); 11270 const bool reverse = cBOOL(IoLINES(filter)); 11271 I32 retval; 11272 11273 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 11274 11275 /* As we're automatically added, at the lowest level, and hence only called 11276 from this file, we can be sure that we're not called in block mode. Hence 11277 don't bother writing code to deal with block mode. */ 11278 if (maxlen) { 11279 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 11280 } 11281 if (status < 0) { 11282 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); 11283 } 11284 DEBUG_P(PerlIO_printf(Perl_debug_log, 11285 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11286 FPTR2DPTR(void *, S_utf16_textfilter), 11287 reverse ? 'l' : 'b', idx, maxlen, status, 11288 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11289 11290 while (1) { 11291 STRLEN chars; 11292 STRLEN have; 11293 I32 newlen; 11294 U8 *end; 11295 /* First, look in our buffer of existing UTF-8 data: */ 11296 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 11297 11298 if (nl) { 11299 ++nl; 11300 } else if (status == 0) { 11301 /* EOF */ 11302 IoPAGE(filter) = 0; 11303 nl = SvEND(utf8_buffer); 11304 } 11305 if (nl) { 11306 STRLEN got = nl - SvPVX(utf8_buffer); 11307 /* Did we have anything to append? */ 11308 retval = got != 0; 11309 sv_catpvn(sv, SvPVX(utf8_buffer), got); 11310 /* Everything else in this code works just fine if SVp_POK isn't 11311 set. This, however, needs it, and we need it to work, else 11312 we loop infinitely because the buffer is never consumed. */ 11313 sv_chop(utf8_buffer, nl); 11314 break; 11315 } 11316 11317 /* OK, not a complete line there, so need to read some more UTF-16. 11318 Read an extra octect if the buffer currently has an odd number. */ 11319 while (1) { 11320 if (status <= 0) 11321 break; 11322 if (SvCUR(utf16_buffer) >= 2) { 11323 /* Location of the high octet of the last complete code point. 11324 Gosh, UTF-16 is a pain. All the benefits of variable length, 11325 *coupled* with all the benefits of partial reads and 11326 endianness. */ 11327 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 11328 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 11329 11330 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 11331 break; 11332 } 11333 11334 /* We have the first half of a surrogate. Read more. */ 11335 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 11336 } 11337 11338 status = FILTER_READ(idx + 1, utf16_buffer, 11339 160 + (SvCUR(utf16_buffer) & 1)); 11340 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); 11341 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 11342 if (status < 0) { 11343 /* Error */ 11344 IoPAGE(filter) = status; 11345 return status; 11346 } 11347 } 11348 11349 chars = SvCUR(utf16_buffer) >> 1; 11350 have = SvCUR(utf8_buffer); 11351 SvGROW(utf8_buffer, have + chars * 3 + 1); 11352 11353 if (reverse) { 11354 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 11355 (U8*)SvPVX_const(utf8_buffer) + have, 11356 chars * 2, &newlen); 11357 } else { 11358 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 11359 (U8*)SvPVX_const(utf8_buffer) + have, 11360 chars * 2, &newlen); 11361 } 11362 SvCUR_set(utf8_buffer, have + newlen); 11363 *end = '\0'; 11364 11365 /* No need to keep this SV "well-formed" with a '\0' after the end, as 11366 it's private to us, and utf16_to_utf8{,reversed} take a 11367 (pointer,length) pair, rather than a NUL-terminated string. */ 11368 if(SvCUR(utf16_buffer) & 1) { 11369 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 11370 SvCUR_set(utf16_buffer, 1); 11371 } else { 11372 SvCUR_set(utf16_buffer, 0); 11373 } 11374 } 11375 DEBUG_P(PerlIO_printf(Perl_debug_log, 11376 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", 11377 status, 11378 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 11379 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 11380 return retval; 11381} 11382 11383static U8 * 11384S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 11385{ 11386 SV *filter = filter_add(S_utf16_textfilter, NULL); 11387 11388 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 11389 11390 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 11391 sv_setpvs(filter, ""); 11392 IoLINES(filter) = reversed; 11393 IoPAGE(filter) = 1; /* Not EOF */ 11394 11395 /* Sadly, we have to return a valid pointer, come what may, so we have to 11396 ignore any error return from this. */ 11397 SvCUR_set(PL_linestr, 0); 11398 if (FILTER_READ(0, PL_linestr, 0)) { 11399 SvUTF8_on(PL_linestr); 11400 } else { 11401 SvUTF8_on(PL_linestr); 11402 } 11403 PL_bufend = SvEND(PL_linestr); 11404 return (U8*)SvPVX(PL_linestr); 11405} 11406#endif 11407 11408/* 11409Returns a pointer to the next character after the parsed 11410vstring, as well as updating the passed in sv. 11411 11412Function must be called like 11413 11414 sv = sv_2mortal(newSV(5)); 11415 s = scan_vstring(s,e,sv); 11416 11417where s and e are the start and end of the string. 11418The sv should already be large enough to store the vstring 11419passed in, for performance reasons. 11420 11421This function may croak if fatal warnings are enabled in the 11422calling scope, hence the sv_2mortal in the example (to prevent 11423a leak). Make sure to do SvREFCNT_inc afterwards if you use 11424sv_2mortal. 11425 11426*/ 11427 11428char * 11429Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 11430{ 11431 const char *pos = s; 11432 const char *start = s; 11433 11434 PERL_ARGS_ASSERT_SCAN_VSTRING; 11435 11436 if (*pos == 'v') pos++; /* get past 'v' */ 11437 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11438 pos++; 11439 if ( *pos != '.') { 11440 /* this may not be a v-string if followed by => */ 11441 const char *next = pos; 11442 while (next < e && isSPACE(*next)) 11443 ++next; 11444 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 11445 /* return string not v-string */ 11446 sv_setpvn(sv,(char *)s,pos-s); 11447 return (char *)pos; 11448 } 11449 } 11450 11451 if (!isALPHA(*pos)) { 11452 U8 tmpbuf[UTF8_MAXBYTES+1]; 11453 11454 if (*s == 'v') 11455 s++; /* get past 'v' */ 11456 11457 sv_setpvs(sv, ""); 11458 11459 for (;;) { 11460 /* this is atoi() that tolerates underscores */ 11461 U8 *tmpend; 11462 UV rev = 0; 11463 const char *end = pos; 11464 UV mult = 1; 11465 while (--end >= s) { 11466 if (*end != '_') { 11467 const UV orev = rev; 11468 rev += (*end - '0') * mult; 11469 mult *= 10; 11470 if (orev > rev) 11471 /* diag_listed_as: Integer overflow in %s number */ 11472 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 11473 "Integer overflow in decimal number"); 11474 } 11475 } 11476 11477 /* Append native character for the rev point */ 11478 tmpend = uvchr_to_utf8(tmpbuf, rev); 11479 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 11480 if (!UVCHR_IS_INVARIANT(rev)) 11481 SvUTF8_on(sv); 11482 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 11483 s = ++pos; 11484 else { 11485 s = pos; 11486 break; 11487 } 11488 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 11489 pos++; 11490 } 11491 SvPOK_on(sv); 11492 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 11493 SvRMAGICAL_on(sv); 11494 } 11495 return (char *)s; 11496} 11497 11498int 11499Perl_keyword_plugin_standard(pTHX_ 11500 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 11501{ 11502 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 11503 PERL_UNUSED_CONTEXT; 11504 PERL_UNUSED_ARG(keyword_ptr); 11505 PERL_UNUSED_ARG(keyword_len); 11506 PERL_UNUSED_ARG(op_ptr); 11507 return KEYWORD_PLUGIN_DECLINE; 11508} 11509 11510#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 11511static void 11512S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 11513{ 11514 SAVEI32(PL_lex_brackets); 11515 if (PL_lex_brackets > 100) 11516 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 11517 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 11518 SAVEI32(PL_lex_allbrackets); 11519 PL_lex_allbrackets = 0; 11520 SAVEI8(PL_lex_fakeeof); 11521 PL_lex_fakeeof = (U8)fakeeof; 11522 if(yyparse(gramtype) && !PL_parser->error_count) 11523 qerror(Perl_mess(aTHX_ "Parse error")); 11524} 11525 11526#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 11527static OP * 11528S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 11529{ 11530 OP *o; 11531 ENTER; 11532 SAVEVPTR(PL_eval_root); 11533 PL_eval_root = NULL; 11534 parse_recdescent(gramtype, fakeeof); 11535 o = PL_eval_root; 11536 LEAVE; 11537 return o; 11538} 11539 11540#define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 11541static OP * 11542S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 11543{ 11544 OP *exprop; 11545 if (flags & ~PARSE_OPTIONAL) 11546 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 11547 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 11548 if (!exprop && !(flags & PARSE_OPTIONAL)) { 11549 if (!PL_parser->error_count) 11550 qerror(Perl_mess(aTHX_ "Parse error")); 11551 exprop = newOP(OP_NULL, 0); 11552 } 11553 return exprop; 11554} 11555 11556/* 11557=for apidoc Amx|OP *|parse_arithexpr|U32 flags 11558 11559Parse a Perl arithmetic expression. This may contain operators of precedence 11560down to the bit shift operators. The expression must be followed (and thus 11561terminated) either by a comparison or lower-precedence operator or by 11562something that would normally terminate an expression such as semicolon. 11563If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 11564otherwise it is mandatory. It is up to the caller to ensure that the 11565dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11566the source of the code to be parsed and the lexical context for the 11567expression. 11568 11569The op tree representing the expression is returned. If an optional 11570expression is absent, a null pointer is returned, otherwise the pointer 11571will be non-null. 11572 11573If an error occurs in parsing or compilation, in most cases a valid op 11574tree is returned anyway. The error is reflected in the parser state, 11575normally resulting in a single exception at the top level of parsing 11576which covers all the compilation errors that occurred. Some compilation 11577errors, however, will throw an exception immediately. 11578 11579=cut 11580*/ 11581 11582OP * 11583Perl_parse_arithexpr(pTHX_ U32 flags) 11584{ 11585 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 11586} 11587 11588/* 11589=for apidoc Amx|OP *|parse_termexpr|U32 flags 11590 11591Parse a Perl term expression. This may contain operators of precedence 11592down to the assignment operators. The expression must be followed (and thus 11593terminated) either by a comma or lower-precedence operator or by 11594something that would normally terminate an expression such as semicolon. 11595If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 11596otherwise it is mandatory. It is up to the caller to ensure that the 11597dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11598the source of the code to be parsed and the lexical context for the 11599expression. 11600 11601The op tree representing the expression is returned. If an optional 11602expression is absent, a null pointer is returned, otherwise the pointer 11603will be non-null. 11604 11605If an error occurs in parsing or compilation, in most cases a valid op 11606tree is returned anyway. The error is reflected in the parser state, 11607normally resulting in a single exception at the top level of parsing 11608which covers all the compilation errors that occurred. Some compilation 11609errors, however, will throw an exception immediately. 11610 11611=cut 11612*/ 11613 11614OP * 11615Perl_parse_termexpr(pTHX_ U32 flags) 11616{ 11617 return parse_expr(LEX_FAKEEOF_COMMA, flags); 11618} 11619 11620/* 11621=for apidoc Amx|OP *|parse_listexpr|U32 flags 11622 11623Parse a Perl list expression. This may contain operators of precedence 11624down to the comma operator. The expression must be followed (and thus 11625terminated) either by a low-precedence logic operator such as C<or> or by 11626something that would normally terminate an expression such as semicolon. 11627If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 11628otherwise it is mandatory. It is up to the caller to ensure that the 11629dynamic parser state (L</PL_parser> et al) is correctly set to reflect 11630the source of the code to be parsed and the lexical context for the 11631expression. 11632 11633The op tree representing the expression is returned. If an optional 11634expression is absent, a null pointer is returned, otherwise the pointer 11635will be non-null. 11636 11637If an error occurs in parsing or compilation, in most cases a valid op 11638tree is returned anyway. The error is reflected in the parser state, 11639normally resulting in a single exception at the top level of parsing 11640which covers all the compilation errors that occurred. Some compilation 11641errors, however, will throw an exception immediately. 11642 11643=cut 11644*/ 11645 11646OP * 11647Perl_parse_listexpr(pTHX_ U32 flags) 11648{ 11649 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 11650} 11651 11652/* 11653=for apidoc Amx|OP *|parse_fullexpr|U32 flags 11654 11655Parse a single complete Perl expression. This allows the full 11656expression grammar, including the lowest-precedence operators such 11657as C<or>. The expression must be followed (and thus terminated) by a 11658token that an expression would normally be terminated by: end-of-file, 11659closing bracketing punctuation, semicolon, or one of the keywords that 11660signals a postfix expression-statement modifier. If C<flags> has the 11661C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is 11662mandatory. It is up to the caller to ensure that the dynamic parser 11663state (L</PL_parser> et al) is correctly set to reflect the source of 11664the code to be parsed and the lexical context for the expression. 11665 11666The op tree representing the expression is returned. If an optional 11667expression is absent, a null pointer is returned, otherwise the pointer 11668will be non-null. 11669 11670If an error occurs in parsing or compilation, in most cases a valid op 11671tree is returned anyway. The error is reflected in the parser state, 11672normally resulting in a single exception at the top level of parsing 11673which covers all the compilation errors that occurred. Some compilation 11674errors, however, will throw an exception immediately. 11675 11676=cut 11677*/ 11678 11679OP * 11680Perl_parse_fullexpr(pTHX_ U32 flags) 11681{ 11682 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 11683} 11684 11685/* 11686=for apidoc Amx|OP *|parse_block|U32 flags 11687 11688Parse a single complete Perl code block. This consists of an opening 11689brace, a sequence of statements, and a closing brace. The block 11690constitutes a lexical scope, so C<my> variables and various compile-time 11691effects can be contained within it. It is up to the caller to ensure 11692that the dynamic parser state (L</PL_parser> et al) is correctly set to 11693reflect the source of the code to be parsed and the lexical context for 11694the statement. 11695 11696The op tree representing the code block is returned. This is always a 11697real op, never a null pointer. It will normally be a C<lineseq> list, 11698including C<nextstate> or equivalent ops. No ops to construct any kind 11699of runtime scope are included by virtue of it being a block. 11700 11701If an error occurs in parsing or compilation, in most cases a valid op 11702tree (most likely null) is returned anyway. The error is reflected in 11703the parser state, normally resulting in a single exception at the top 11704level of parsing which covers all the compilation errors that occurred. 11705Some compilation errors, however, will throw an exception immediately. 11706 11707The C<flags> parameter is reserved for future use, and must always 11708be zero. 11709 11710=cut 11711*/ 11712 11713OP * 11714Perl_parse_block(pTHX_ U32 flags) 11715{ 11716 if (flags) 11717 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 11718 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 11719} 11720 11721/* 11722=for apidoc Amx|OP *|parse_barestmt|U32 flags 11723 11724Parse a single unadorned Perl statement. This may be a normal imperative 11725statement or a declaration that has compile-time effect. It does not 11726include any label or other affixture. It is up to the caller to ensure 11727that the dynamic parser state (L</PL_parser> et al) is correctly set to 11728reflect the source of the code to be parsed and the lexical context for 11729the statement. 11730 11731The op tree representing the statement is returned. This may be a 11732null pointer if the statement is null, for example if it was actually 11733a subroutine definition (which has compile-time side effects). If not 11734null, it will be ops directly implementing the statement, suitable to 11735pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 11736equivalent op (except for those embedded in a scope contained entirely 11737within the statement). 11738 11739If an error occurs in parsing or compilation, in most cases a valid op 11740tree (most likely null) is returned anyway. The error is reflected in 11741the parser state, normally resulting in a single exception at the top 11742level of parsing which covers all the compilation errors that occurred. 11743Some compilation errors, however, will throw an exception immediately. 11744 11745The C<flags> parameter is reserved for future use, and must always 11746be zero. 11747 11748=cut 11749*/ 11750 11751OP * 11752Perl_parse_barestmt(pTHX_ U32 flags) 11753{ 11754 if (flags) 11755 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 11756 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 11757} 11758 11759/* 11760=for apidoc Amx|SV *|parse_label|U32 flags 11761 11762Parse a single label, possibly optional, of the type that may prefix a 11763Perl statement. It is up to the caller to ensure that the dynamic parser 11764state (L</PL_parser> et al) is correctly set to reflect the source of 11765the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the 11766label is optional, otherwise it is mandatory. 11767 11768The name of the label is returned in the form of a fresh scalar. If an 11769optional label is absent, a null pointer is returned. 11770 11771If an error occurs in parsing, which can only occur if the label is 11772mandatory, a valid label is returned anyway. The error is reflected in 11773the parser state, normally resulting in a single exception at the top 11774level of parsing which covers all the compilation errors that occurred. 11775 11776=cut 11777*/ 11778 11779SV * 11780Perl_parse_label(pTHX_ U32 flags) 11781{ 11782 if (flags & ~PARSE_OPTIONAL) 11783 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 11784 if (PL_lex_state == LEX_KNOWNEXT) { 11785 PL_parser->yychar = yylex(); 11786 if (PL_parser->yychar == LABEL) { 11787 char * const lpv = pl_yylval.pval; 11788 STRLEN llen = strlen(lpv); 11789 PL_parser->yychar = YYEMPTY; 11790 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0); 11791 } else { 11792 yyunlex(); 11793 goto no_label; 11794 } 11795 } else { 11796 char *s, *t; 11797 STRLEN wlen, bufptr_pos; 11798 lex_read_space(0); 11799 t = s = PL_bufptr; 11800 if (!isIDFIRST_lazy_if(s, UTF)) 11801 goto no_label; 11802 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); 11803 if (word_takes_any_delimeter(s, wlen)) 11804 goto no_label; 11805 bufptr_pos = s - SvPVX(PL_linestr); 11806 PL_bufptr = t; 11807 lex_read_space(LEX_KEEP_PREVIOUS); 11808 t = PL_bufptr; 11809 s = SvPVX(PL_linestr) + bufptr_pos; 11810 if (t[0] == ':' && t[1] != ':') { 11811 PL_oldoldbufptr = PL_oldbufptr; 11812 PL_oldbufptr = s; 11813 PL_bufptr = t+1; 11814 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 11815 } else { 11816 PL_bufptr = s; 11817 no_label: 11818 if (flags & PARSE_OPTIONAL) { 11819 return NULL; 11820 } else { 11821 qerror(Perl_mess(aTHX_ "Parse error")); 11822 return newSVpvs("x"); 11823 } 11824 } 11825 } 11826} 11827 11828/* 11829=for apidoc Amx|OP *|parse_fullstmt|U32 flags 11830 11831Parse a single complete Perl statement. This may be a normal imperative 11832statement or a declaration that has compile-time effect, and may include 11833optional labels. It is up to the caller to ensure that the dynamic 11834parser state (L</PL_parser> et al) is correctly set to reflect the source 11835of the code to be parsed and the lexical context for the statement. 11836 11837The op tree representing the statement is returned. This may be a 11838null pointer if the statement is null, for example if it was actually 11839a subroutine definition (which has compile-time side effects). If not 11840null, it will be the result of a L</newSTATEOP> call, normally including 11841a C<nextstate> or equivalent op. 11842 11843If an error occurs in parsing or compilation, in most cases a valid op 11844tree (most likely null) is returned anyway. The error is reflected in 11845the parser state, normally resulting in a single exception at the top 11846level of parsing which covers all the compilation errors that occurred. 11847Some compilation errors, however, will throw an exception immediately. 11848 11849The C<flags> parameter is reserved for future use, and must always 11850be zero. 11851 11852=cut 11853*/ 11854 11855OP * 11856Perl_parse_fullstmt(pTHX_ U32 flags) 11857{ 11858 if (flags) 11859 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 11860 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 11861} 11862 11863/* 11864=for apidoc Amx|OP *|parse_stmtseq|U32 flags 11865 11866Parse a sequence of zero or more Perl statements. These may be normal 11867imperative statements, including optional labels, or declarations 11868that have compile-time effect, or any mixture thereof. The statement 11869sequence ends when a closing brace or end-of-file is encountered in a 11870place where a new statement could have validly started. It is up to 11871the caller to ensure that the dynamic parser state (L</PL_parser> et al) 11872is correctly set to reflect the source of the code to be parsed and the 11873lexical context for the statements. 11874 11875The op tree representing the statement sequence is returned. This may 11876be a null pointer if the statements were all null, for example if there 11877were no statements or if there were only subroutine definitions (which 11878have compile-time side effects). If not null, it will be a C<lineseq> 11879list, normally including C<nextstate> or equivalent ops. 11880 11881If an error occurs in parsing or compilation, in most cases a valid op 11882tree is returned anyway. The error is reflected in the parser state, 11883normally resulting in a single exception at the top level of parsing 11884which covers all the compilation errors that occurred. Some compilation 11885errors, however, will throw an exception immediately. 11886 11887The C<flags> parameter is reserved for future use, and must always 11888be zero. 11889 11890=cut 11891*/ 11892 11893OP * 11894Perl_parse_stmtseq(pTHX_ U32 flags) 11895{ 11896 OP *stmtseqop; 11897 I32 c; 11898 if (flags) 11899 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 11900 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 11901 c = lex_peek_unichar(0); 11902 if (c != -1 && c != /*{*/'}') 11903 qerror(Perl_mess(aTHX_ "Parse error")); 11904 return stmtseqop; 11905} 11906 11907#define lex_token_boundary() S_lex_token_boundary(aTHX) 11908static void 11909S_lex_token_boundary(pTHX) 11910{ 11911 PL_oldoldbufptr = PL_oldbufptr; 11912 PL_oldbufptr = PL_bufptr; 11913} 11914 11915#define parse_opt_lexvar() S_parse_opt_lexvar(aTHX) 11916static OP * 11917S_parse_opt_lexvar(pTHX) 11918{ 11919 I32 sigil, c; 11920 char *s, *d; 11921 OP *var; 11922 lex_token_boundary(); 11923 sigil = lex_read_unichar(0); 11924 if (lex_peek_unichar(0) == '#') { 11925 qerror(Perl_mess(aTHX_ "Parse error")); 11926 return NULL; 11927 } 11928 lex_read_space(0); 11929 c = lex_peek_unichar(0); 11930 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c))) 11931 return NULL; 11932 s = PL_bufptr; 11933 d = PL_tokenbuf + 1; 11934 PL_tokenbuf[0] = (char)sigil; 11935 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF)); 11936 PL_bufptr = s; 11937 if (d == PL_tokenbuf+1) 11938 return NULL; 11939 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, 11940 OPf_MOD | (OPpLVAL_INTRO<<8)); 11941 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); 11942 return var; 11943} 11944 11945OP * 11946Perl_parse_subsignature(pTHX) 11947{ 11948 I32 c; 11949 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0; 11950 OP *initops = NULL; 11951 lex_read_space(0); 11952 c = lex_peek_unichar(0); 11953 while (c != /*(*/')') { 11954 switch (c) { 11955 case '$': { 11956 OP *var, *expr; 11957 if (prev_type == 2) 11958 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 11959 var = parse_opt_lexvar(); 11960 expr = var ? 11961 newBINOP(OP_AELEM, 0, 11962 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)), 11963 OP_RV2AV), 11964 newSVOP(OP_CONST, 0, newSViv(pos))) : 11965 NULL; 11966 lex_read_space(0); 11967 c = lex_peek_unichar(0); 11968 if (c == '=') { 11969 lex_token_boundary(); 11970 lex_read_unichar(0); 11971 lex_read_space(0); 11972 c = lex_peek_unichar(0); 11973 if (c == ',' || c == /*(*/')') { 11974 if (var) 11975 qerror(Perl_mess(aTHX_ "Optional parameter " 11976 "lacks default expression")); 11977 } else { 11978 OP *defexpr = parse_termexpr(0); 11979 if (defexpr->op_type == OP_UNDEF 11980 && !(defexpr->op_flags & OPf_KIDS)) 11981 { 11982 op_free(defexpr); 11983 } else { 11984 OP *ifop = 11985 newBINOP(OP_GE, 0, 11986 scalar(newUNOP(OP_RV2AV, 0, 11987 newGVOP(OP_GV, 0, PL_defgv))), 11988 newSVOP(OP_CONST, 0, newSViv(pos+1))); 11989 expr = var ? 11990 newCONDOP(0, ifop, expr, defexpr) : 11991 newLOGOP(OP_OR, 0, ifop, defexpr); 11992 } 11993 } 11994 prev_type = 1; 11995 } else { 11996 if (prev_type == 1) 11997 qerror(Perl_mess(aTHX_ "Mandatory parameter " 11998 "follows optional parameter")); 11999 prev_type = 0; 12000 min_arity = pos + 1; 12001 } 12002 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr); 12003 if (expr) 12004 initops = op_append_list(OP_LINESEQ, initops, 12005 newSTATEOP(0, NULL, expr)); 12006 max_arity = ++pos; 12007 } break; 12008 case '@': 12009 case '%': { 12010 OP *var; 12011 if (prev_type == 2) 12012 qerror(Perl_mess(aTHX_ "Slurpy parameter not last")); 12013 var = parse_opt_lexvar(); 12014 if (c == '%') { 12015 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0, 12016 newBINOP(OP_BIT_AND, 0, 12017 scalar(newUNOP(OP_RV2AV, 0, 12018 newGVOP(OP_GV, 0, PL_defgv))), 12019 newSVOP(OP_CONST, 0, newSViv(1))), 12020 op_convert_list(OP_DIE, 0, 12021 op_convert_list(OP_SPRINTF, 0, 12022 op_append_list(OP_LIST, 12023 newSVOP(OP_CONST, 0, 12024 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")), 12025 newSLICEOP(0, 12026 op_append_list(OP_LIST, 12027 newSVOP(OP_CONST, 0, newSViv(1)), 12028 newSVOP(OP_CONST, 0, newSViv(2))), 12029 newOP(OP_CALLER, 0)))))); 12030 if (pos != min_arity) 12031 chkop = newLOGOP(OP_AND, 0, 12032 newBINOP(OP_GT, 0, 12033 scalar(newUNOP(OP_RV2AV, 0, 12034 newGVOP(OP_GV, 0, PL_defgv))), 12035 newSVOP(OP_CONST, 0, newSViv(pos))), 12036 chkop); 12037 initops = op_append_list(OP_LINESEQ, 12038 newSTATEOP(0, NULL, chkop), 12039 initops); 12040 } 12041 if (var) { 12042 OP *slice = pos ? 12043 op_prepend_elem(OP_ASLICE, 12044 newOP(OP_PUSHMARK, 0), 12045 newLISTOP(OP_ASLICE, 0, 12046 list(newRANGE(0, 12047 newSVOP(OP_CONST, 0, newSViv(pos)), 12048 newUNOP(OP_AV2ARYLEN, 0, 12049 ref(newUNOP(OP_RV2AV, 0, 12050 newGVOP(OP_GV, 0, PL_defgv)), 12051 OP_AV2ARYLEN)))), 12052 ref(newUNOP(OP_RV2AV, 0, 12053 newGVOP(OP_GV, 0, PL_defgv)), 12054 OP_ASLICE))) : 12055 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)); 12056 initops = op_append_list(OP_LINESEQ, initops, 12057 newSTATEOP(0, NULL, 12058 newASSIGNOP(OPf_STACKED, var, 0, slice))); 12059 } 12060 prev_type = 2; 12061 max_arity = -1; 12062 } break; 12063 default: 12064 parse_error: 12065 qerror(Perl_mess(aTHX_ "Parse error")); 12066 return NULL; 12067 } 12068 lex_read_space(0); 12069 c = lex_peek_unichar(0); 12070 switch (c) { 12071 case /*(*/')': break; 12072 case ',': 12073 do { 12074 lex_token_boundary(); 12075 lex_read_unichar(0); 12076 lex_read_space(0); 12077 c = lex_peek_unichar(0); 12078 } while (c == ','); 12079 break; 12080 default: 12081 goto parse_error; 12082 } 12083 } 12084 if (min_arity != 0) { 12085 initops = op_append_list(OP_LINESEQ, 12086 newSTATEOP(0, NULL, 12087 newLOGOP(OP_OR, 0, 12088 newBINOP(OP_GE, 0, 12089 scalar(newUNOP(OP_RV2AV, 0, 12090 newGVOP(OP_GV, 0, PL_defgv))), 12091 newSVOP(OP_CONST, 0, newSViv(min_arity))), 12092 op_convert_list(OP_DIE, 0, 12093 op_convert_list(OP_SPRINTF, 0, 12094 op_append_list(OP_LIST, 12095 newSVOP(OP_CONST, 0, 12096 newSVpvs("Too few arguments for subroutine at %s line %d.\n")), 12097 newSLICEOP(0, 12098 op_append_list(OP_LIST, 12099 newSVOP(OP_CONST, 0, newSViv(1)), 12100 newSVOP(OP_CONST, 0, newSViv(2))), 12101 newOP(OP_CALLER, 0))))))), 12102 initops); 12103 } 12104 if (max_arity != -1) { 12105 initops = op_append_list(OP_LINESEQ, 12106 newSTATEOP(0, NULL, 12107 newLOGOP(OP_OR, 0, 12108 newBINOP(OP_LE, 0, 12109 scalar(newUNOP(OP_RV2AV, 0, 12110 newGVOP(OP_GV, 0, PL_defgv))), 12111 newSVOP(OP_CONST, 0, newSViv(max_arity))), 12112 op_convert_list(OP_DIE, 0, 12113 op_convert_list(OP_SPRINTF, 0, 12114 op_append_list(OP_LIST, 12115 newSVOP(OP_CONST, 0, 12116 newSVpvs("Too many arguments for subroutine at %s line %d.\n")), 12117 newSLICEOP(0, 12118 op_append_list(OP_LIST, 12119 newSVOP(OP_CONST, 0, newSViv(1)), 12120 newSVOP(OP_CONST, 0, newSViv(2))), 12121 newOP(OP_CALLER, 0))))))), 12122 initops); 12123 } 12124 return initops; 12125} 12126 12127/* 12128 * ex: set ts=8 sts=4 sw=4 et: 12129 */ 12130