1/* toke.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11/* 12 * 'It all comes from here, the stench and the peril.' --Frodo 13 * 14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] 15 */ 16 17/* 18 * This file is the lexer for Perl. It's closely linked to the 19 * parser, perly.y. 20 * 21 * The main routine is yylex(), which returns the next token. 22 */ 23 24/* 25=head1 Lexer interface 26This is the lower layer of the Perl parser, managing characters and tokens. 27 28=for apidoc AmnU|yy_parser *|PL_parser 29 30Pointer to a structure encapsulating the state of the parsing operation 31currently in progress. The pointer can be locally changed to perform 32a nested parse without interfering with the state of an outer parse. 33Individual members of C<PL_parser> have their own documentation. 34 35=cut 36*/ 37 38#include "EXTERN.h" 39#define PERL_IN_TOKE_C 40#include "perl.h" 41#include "invlist_inline.h" 42 43#define new_constant(a,b,c,d,e,f,g, h) \ 44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h) 45 46#define pl_yylval (PL_parser->yylval) 47 48/* XXX temporary backwards compatibility */ 49#define PL_lex_brackets (PL_parser->lex_brackets) 50#define PL_lex_allbrackets (PL_parser->lex_allbrackets) 51#define PL_lex_fakeeof (PL_parser->lex_fakeeof) 52#define PL_lex_brackstack (PL_parser->lex_brackstack) 53#define PL_lex_casemods (PL_parser->lex_casemods) 54#define PL_lex_casestack (PL_parser->lex_casestack) 55#define PL_lex_dojoin (PL_parser->lex_dojoin) 56#define PL_lex_formbrack (PL_parser->lex_formbrack) 57#define PL_lex_inpat (PL_parser->lex_inpat) 58#define PL_lex_inwhat (PL_parser->lex_inwhat) 59#define PL_lex_op (PL_parser->lex_op) 60#define PL_lex_repl (PL_parser->lex_repl) 61#define PL_lex_starts (PL_parser->lex_starts) 62#define PL_lex_stuff (PL_parser->lex_stuff) 63#define PL_multi_start (PL_parser->multi_start) 64#define PL_multi_open (PL_parser->multi_open) 65#define PL_multi_close (PL_parser->multi_close) 66#define PL_preambled (PL_parser->preambled) 67#define PL_linestr (PL_parser->linestr) 68#define PL_expect (PL_parser->expect) 69#define PL_copline (PL_parser->copline) 70#define PL_bufptr (PL_parser->bufptr) 71#define PL_oldbufptr (PL_parser->oldbufptr) 72#define PL_oldoldbufptr (PL_parser->oldoldbufptr) 73#define PL_linestart (PL_parser->linestart) 74#define PL_bufend (PL_parser->bufend) 75#define PL_last_uni (PL_parser->last_uni) 76#define PL_last_lop (PL_parser->last_lop) 77#define PL_last_lop_op (PL_parser->last_lop_op) 78#define PL_lex_state (PL_parser->lex_state) 79#define PL_rsfp (PL_parser->rsfp) 80#define PL_rsfp_filters (PL_parser->rsfp_filters) 81#define PL_in_my (PL_parser->in_my) 82#define PL_in_my_stash (PL_parser->in_my_stash) 83#define PL_tokenbuf (PL_parser->tokenbuf) 84#define PL_multi_end (PL_parser->multi_end) 85#define PL_error_count (PL_parser->error_count) 86 87# define PL_nexttoke (PL_parser->nexttoke) 88# define PL_nexttype (PL_parser->nexttype) 89# define PL_nextval (PL_parser->nextval) 90 91 92#define SvEVALED(sv) \ 93 (SvTYPE(sv) >= SVt_PVNV \ 94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen) 95 96static const char ident_too_long[] = "Identifier too long"; 97static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'"; 98 99# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] 100 101#define XENUMMASK 0x3f 102#define XFAKEEOF 0x40 103#define XFAKEBRACK 0x80 104 105#ifdef USE_UTF8_SCRIPTS 106# define UTF cBOOL(!IN_BYTES) 107#else 108# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) 109#endif 110 111/* The maximum number of characters preceding the unrecognized one to display */ 112#define UNRECOGNIZED_PRECEDE_COUNT 10 113 114/* In variables named $^X, these are the legal values for X. 115 * 1999-02-27 mjd-perl-patch@plover.com */ 116#define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x))) 117 118/* Non-identifier plugin infix operators are allowed any printing character 119 * except spaces, digits, or identifier chars 120 */ 121#define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c)) 122/* Plugin infix operators may not begin with a quote symbol */ 123#define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'') 124 125#define PLUGINFIX_IS_ENABLED UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard) 126 127#define SPACE_OR_TAB(c) isBLANK_A(c) 128 129#define HEXFP_PEEK(s) \ 130 (((s[0] == '.') && \ 131 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ 132 isALPHA_FOLD_EQ(s[0], 'p')) 133 134/* LEX_* are values for PL_lex_state, the state of the lexer. 135 * They are arranged oddly so that the guard on the switch statement 136 * can get by with a single comparison (if the compiler is smart enough). 137 * 138 * These values refer to the various states within a sublex parse, 139 * i.e. within a double quotish string 140 */ 141 142/* #define LEX_NOTPARSING 11 is done in perl.h. */ 143 144#define LEX_NORMAL 10 /* normal code (ie not within "...") */ 145#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */ 146#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */ 147#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ 148#define LEX_INTERPSTART 6 /* expecting the start of a $var */ 149 150 /* at end of code, eg "$x" followed by: */ 151#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ 152#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ 153 154#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of 155 string or after \E, $foo, etc */ 156#define LEX_INTERPCONST 2 /* NOT USED */ 157#define LEX_FORMLINE 1 /* expecting a format line */ 158 159/* returned to yyl_try() to request it to retry the parse loop, expected to only 160 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof() 161 can also return it. 162 163 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1, 164 other token values are 258 or higher (see perly.h), so -1 should be 165 a safe value here. 166*/ 167#define YYL_RETRY (-1) 168 169#ifdef DEBUGGING 170static const char* const lex_state_names[] = { 171 "KNOWNEXT", 172 "FORMLINE", 173 "INTERPCONST", 174 "INTERPCONCAT", 175 "INTERPENDMAYBE", 176 "INTERPEND", 177 "INTERPSTART", 178 "INTERPPUSH", 179 "INTERPCASEMOD", 180 "INTERPNORMAL", 181 "NORMAL" 182}; 183#endif 184 185#include "keywords.h" 186 187/* CLINE is a macro that ensures PL_copline has a sane value */ 188 189#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) 190 191/* 192 * Convenience functions to return different tokens and prime the 193 * lexer for the next token. They all take an argument. 194 * 195 * TOKEN : generic token (used for '(', DOLSHARP, etc) 196 * OPERATOR : generic operator 197 * AOPERATOR : assignment operator 198 * PREBLOCK : beginning the block after an if, while, foreach, ... 199 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref) 200 * PREREF : *EXPR where EXPR is not a simple identifier 201 * TERM : expression term 202 * POSTDEREF : postfix dereference (->$* ->@[...] etc.) 203 * LOOPX : loop exiting command (goto, last, dump, etc) 204 * FTST : file test operator 205 * FUN0 : zero-argument function 206 * FUN0OP : zero-argument function, with its op created in this file 207 * FUN1 : not used, except for not, which isn't a UNIOP 208 * BOop : bitwise or or xor 209 * BAop : bitwise and 210 * BCop : bitwise complement 211 * SHop : shift operator 212 * PWop : power operator 213 * PMop : pattern-matching operator 214 * Aop : addition-level operator 215 * AopNOASSIGN : addition-level operator that is never part of .= 216 * Mop : multiplication-level operator 217 * ChEop : chaining equality-testing operator 218 * NCEop : non-chaining comparison operator at equality precedence 219 * ChRop : chaining relational operator <= != gt 220 * NCRop : non-chaining relational operator isa 221 * 222 * Also see LOP and lop() below. 223 */ 224 225#ifdef DEBUGGING /* Serve -DT. */ 226# define REPORT(retval) tokereport((I32)retval, &pl_yylval) 227#else 228# define REPORT(retval) (retval) 229#endif 230 231#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) 232#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) 233#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) 234#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) 235#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) 236#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) 237#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) 238#define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER)) 239#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) 240#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \ 241 pl_yylval.ival=f, \ 242 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ 243 REPORT((int)LOOPEX)) 244#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) 245#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) 246#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) 247#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) 248#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) 249#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) 250#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ 251 REPORT(PERLY_TILDE) 252#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) 253#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) 254#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) 255#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) 256#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) 257#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) 258#define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP)) 259#define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP)) 260#define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP)) 261#define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP)) 262 263/* This bit of chicanery makes a unary function followed by 264 * a parenthesis into a function with one argument, highest precedence. 265 * The UNIDOR macro is for unary functions that can be followed by the // 266 * operator (such as C<shift // 0>). 267 */ 268#define UNI3(f,x,have_x) { \ 269 pl_yylval.ival = f; \ 270 if (have_x) PL_expect = x; \ 271 PL_bufptr = s; \ 272 PL_last_uni = PL_oldbufptr; \ 273 PL_last_lop_op = (f) < 0 ? -(f) : (f); \ 274 if (*s == '(') \ 275 return REPORT( (int)FUNC1 ); \ 276 s = skipspace(s); \ 277 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ 278 } 279#define UNI(f) UNI3(f,XTERM,1) 280#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) 281#define UNIPROTO(f,optional) { \ 282 if (optional) PL_last_uni = PL_oldbufptr; \ 283 OPERATOR(f); \ 284 } 285 286#define UNIBRACK(f) UNI3(f,0,0) 287 288/* return has special case parsing. 289 * 290 * List operators have low precedence. Functions have high precedence. 291 * Every built in, *except return*, if written with () around its arguments, is 292 * parsed as a function. Hence every other list built in: 293 * 294 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9 295 * 429 296 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5 297 * 639 298 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()' 299 * Useless use of a constant (2) in void context at -e line 1. 300 * Useless use of a constant (4) in void context at -e line 1. 301 * 302 * $ 303 * 304 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a 305 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string. 306 * 307 * Whereas return: 308 * 309 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()' 310 * 2 311 * 4 312 * 9 313 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()' 314 * Useless use of a constant (2) in void context at -e line 1. 315 * Useless use of a constant (4) in void context at -e line 1. 316 * 9 317 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()' 318 * Useless use of a constant (2) in void context at -e line 1. 319 * Useless use of a constant (4) in void context at -e line 1. 320 * 9 321 * $ 322 * 323 * and: 324 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()' 325 * 2 326 * 4 327 * 6 328 * 329 * This last example is what we expect, but it's clearly inconsistent with how 330 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently 331 * followed. 332 * 333 * 334 * Perl 3 attempted to be consistent: 335 * 336 * The rules are more consistent about where parens are needed and 337 * where they are not. In particular, unary operators and list operators now 338 * behave like functions if they're called like functions. 339 * 340 * However, the behaviour for return was reverted to the "old" parsing with 341 * patches 9-12: 342 * 343 * The construct 344 * return (1,2,3); 345 * did not do what was expected, since return was swallowing the 346 * parens in order to consider itself a function. The solution, 347 * since return never wants any trailing expression such as 348 * return (1,2,3) + 2; 349 * is to simply make return an exception to the paren-makes-a-function 350 * rule, and treat it the way it always was, so that it doesn't 351 * strip the parens. 352 * 353 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with 354 * LOP(OP_RETURN, XTERM); 355 * 356 * and constructs such as 357 * 358 * return (Internals::V())[2] 359 * 360 * turn into syntax errors 361 */ 362 363#define OLDLOP(f) \ 364 do { \ 365 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ 366 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ 367 pl_yylval.ival = (f); \ 368 PL_expect = XTERM; \ 369 PL_bufptr = s; \ 370 return (int)LSTOP; \ 371 } while(0) 372 373#define COPLINE_INC_WITH_HERELINES \ 374 STMT_START { \ 375 CopLINE_inc(PL_curcop); \ 376 if (PL_parser->herelines) \ 377 CopLINE(PL_curcop) += PL_parser->herelines, \ 378 PL_parser->herelines = 0; \ 379 } STMT_END 380/* Called after scan_str to update CopLINE(PL_curcop), but only when there 381 * is no sublex_push to follow. */ 382#define COPLINE_SET_FROM_MULTI_END \ 383 STMT_START { \ 384 CopLINE_set(PL_curcop, PL_multi_end); \ 385 if (PL_multi_end != PL_multi_start) \ 386 PL_parser->herelines = 0; \ 387 } STMT_END 388 389 390/* A file-local structure for passing around information about subroutines and 391 * related definable words */ 392struct code { 393 SV *sv; 394 CV *cv; 395 GV *gv, **gvp; 396 OP *rv2cv_op; 397 PADOFFSET off; 398 bool lex; 399}; 400 401static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; 402 403#ifdef DEBUGGING 404 405/* how to interpret the pl_yylval associated with the token */ 406enum token_type { 407 TOKENTYPE_NONE, 408 TOKENTYPE_IVAL, 409 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */ 410 TOKENTYPE_PVAL, 411 TOKENTYPE_OPVAL 412}; 413 414#define DEBUG_TOKEN(Type, Name) \ 415 { Name, TOKENTYPE_##Type, #Name } 416 417static struct debug_tokens { 418 const int token; 419 enum token_type type; 420 const char *name; 421} const debug_tokens[] = 422{ 423 DEBUG_TOKEN (OPNUM, ADDOP), 424 DEBUG_TOKEN (NONE, ANDAND), 425 DEBUG_TOKEN (NONE, ANDOP), 426 DEBUG_TOKEN (NONE, ARROW), 427 DEBUG_TOKEN (OPNUM, ASSIGNOP), 428 DEBUG_TOKEN (OPNUM, BITANDOP), 429 DEBUG_TOKEN (OPNUM, BITOROP), 430 DEBUG_TOKEN (OPNUM, CHEQOP), 431 DEBUG_TOKEN (OPNUM, CHRELOP), 432 DEBUG_TOKEN (NONE, COLONATTR), 433 DEBUG_TOKEN (NONE, DOLSHARP), 434 DEBUG_TOKEN (NONE, DORDOR), 435 DEBUG_TOKEN (IVAL, DOTDOT), 436 DEBUG_TOKEN (NONE, FORMLBRACK), 437 DEBUG_TOKEN (NONE, FORMRBRACK), 438 DEBUG_TOKEN (OPNUM, FUNC), 439 DEBUG_TOKEN (OPNUM, FUNC0), 440 DEBUG_TOKEN (OPVAL, FUNC0OP), 441 DEBUG_TOKEN (OPVAL, FUNC0SUB), 442 DEBUG_TOKEN (OPNUM, FUNC1), 443 DEBUG_TOKEN (NONE, HASHBRACK), 444 DEBUG_TOKEN (IVAL, KW_CATCH), 445 DEBUG_TOKEN (IVAL, KW_CLASS), 446 DEBUG_TOKEN (IVAL, KW_CONTINUE), 447 DEBUG_TOKEN (IVAL, KW_DEFAULT), 448 DEBUG_TOKEN (IVAL, KW_DO), 449 DEBUG_TOKEN (IVAL, KW_ELSE), 450 DEBUG_TOKEN (IVAL, KW_ELSIF), 451 DEBUG_TOKEN (IVAL, KW_FIELD), 452 DEBUG_TOKEN (IVAL, KW_GIVEN), 453 DEBUG_TOKEN (IVAL, KW_FOR), 454 DEBUG_TOKEN (IVAL, KW_FORMAT), 455 DEBUG_TOKEN (IVAL, KW_IF), 456 DEBUG_TOKEN (IVAL, KW_LOCAL), 457 DEBUG_TOKEN (IVAL, KW_METHOD_anon), 458 DEBUG_TOKEN (IVAL, KW_METHOD_named), 459 DEBUG_TOKEN (IVAL, KW_MY), 460 DEBUG_TOKEN (IVAL, KW_PACKAGE), 461 DEBUG_TOKEN (IVAL, KW_REQUIRE), 462 DEBUG_TOKEN (IVAL, KW_SUB_anon), 463 DEBUG_TOKEN (IVAL, KW_SUB_anon_sig), 464 DEBUG_TOKEN (IVAL, KW_SUB_named), 465 DEBUG_TOKEN (IVAL, KW_SUB_named_sig), 466 DEBUG_TOKEN (IVAL, KW_TRY), 467 DEBUG_TOKEN (IVAL, KW_USE_or_NO), 468 DEBUG_TOKEN (IVAL, KW_UNLESS), 469 DEBUG_TOKEN (IVAL, KW_UNTIL), 470 DEBUG_TOKEN (IVAL, KW_WHEN), 471 DEBUG_TOKEN (IVAL, KW_WHILE), 472 DEBUG_TOKEN (OPVAL, LABEL), 473 DEBUG_TOKEN (OPNUM, LOOPEX), 474 DEBUG_TOKEN (OPNUM, LSTOP), 475 DEBUG_TOKEN (OPVAL, LSTOPSUB), 476 DEBUG_TOKEN (OPNUM, MATCHOP), 477 DEBUG_TOKEN (OPVAL, METHCALL), 478 DEBUG_TOKEN (OPVAL, METHCALL0), 479 DEBUG_TOKEN (OPNUM, MULOP), 480 DEBUG_TOKEN (OPNUM, NCEQOP), 481 DEBUG_TOKEN (OPNUM, NCRELOP), 482 DEBUG_TOKEN (NONE, NOAMP), 483 DEBUG_TOKEN (NONE, NOTOP), 484 DEBUG_TOKEN (IVAL, OROP), 485 DEBUG_TOKEN (NONE, OROR), 486 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND), 487 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE), 488 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN), 489 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE), 490 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN), 491 DEBUG_TOKEN (IVAL, PERLY_COLON), 492 DEBUG_TOKEN (IVAL, PERLY_COMMA), 493 DEBUG_TOKEN (IVAL, PERLY_DOT), 494 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN), 495 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK), 496 DEBUG_TOKEN (IVAL, PERLY_MINUS), 497 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN), 498 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN), 499 DEBUG_TOKEN (IVAL, PERLY_PLUS), 500 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK), 501 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON), 502 DEBUG_TOKEN (IVAL, PERLY_SLASH), 503 DEBUG_TOKEN (IVAL, PERLY_SNAIL), 504 DEBUG_TOKEN (IVAL, PERLY_STAR), 505 DEBUG_TOKEN (IVAL, PERLY_TILDE), 506 DEBUG_TOKEN (OPVAL, PLUGEXPR), 507 DEBUG_TOKEN (OPVAL, PLUGSTMT), 508 DEBUG_TOKEN (PVAL, PLUGIN_ADD_OP), 509 DEBUG_TOKEN (PVAL, PLUGIN_ASSIGN_OP), 510 DEBUG_TOKEN (PVAL, PLUGIN_HIGH_OP), 511 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_OP), 512 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_OP), 513 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_LOW_OP), 514 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_LOW_OP), 515 DEBUG_TOKEN (PVAL, PLUGIN_LOW_OP), 516 DEBUG_TOKEN (PVAL, PLUGIN_MUL_OP), 517 DEBUG_TOKEN (PVAL, PLUGIN_POW_OP), 518 DEBUG_TOKEN (PVAL, PLUGIN_REL_OP), 519 DEBUG_TOKEN (OPVAL, PMFUNC), 520 DEBUG_TOKEN (NONE, POSTJOIN), 521 DEBUG_TOKEN (NONE, POSTDEC), 522 DEBUG_TOKEN (NONE, POSTINC), 523 DEBUG_TOKEN (OPNUM, POWOP), 524 DEBUG_TOKEN (NONE, PREDEC), 525 DEBUG_TOKEN (NONE, PREINC), 526 DEBUG_TOKEN (OPVAL, PRIVATEREF), 527 DEBUG_TOKEN (OPVAL, QWLIST), 528 DEBUG_TOKEN (NONE, REFGEN), 529 DEBUG_TOKEN (OPNUM, SHIFTOP), 530 DEBUG_TOKEN (NONE, SUBLEXEND), 531 DEBUG_TOKEN (NONE, SUBLEXSTART), 532 DEBUG_TOKEN (OPVAL, THING), 533 DEBUG_TOKEN (NONE, UMINUS), 534 DEBUG_TOKEN (OPNUM, UNIOP), 535 DEBUG_TOKEN (OPVAL, UNIOPSUB), 536 DEBUG_TOKEN (OPVAL, BAREWORD), 537 DEBUG_TOKEN (IVAL, YADAYADA), 538 { 0, TOKENTYPE_NONE, NULL } 539}; 540 541#undef DEBUG_TOKEN 542 543/* dump the returned token in rv, plus any optional arg in pl_yylval */ 544 545STATIC int 546S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) 547{ 548 PERL_ARGS_ASSERT_TOKEREPORT; 549 550 if (DEBUG_T_TEST) { 551 const char *name = NULL; 552 enum token_type type = TOKENTYPE_NONE; 553 const struct debug_tokens *p; 554 SV* const report = newSVpvs("<== "); 555 556 for (p = debug_tokens; p->token; p++) { 557 if (p->token == (int)rv) { 558 name = p->name; 559 type = p->type; 560 break; 561 } 562 } 563 if (name) 564 Perl_sv_catpv(aTHX_ report, name); 565 else if (isGRAPH(rv)) 566 { 567 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); 568 if ((char)rv == 'p') 569 sv_catpvs(report, " (pending identifier)"); 570 } 571 else if (!rv) 572 sv_catpvs(report, "EOF"); 573 else 574 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv); 575 switch (type) { 576 case TOKENTYPE_NONE: 577 break; 578 case TOKENTYPE_IVAL: 579 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival); 580 break; 581 case TOKENTYPE_OPNUM: 582 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", 583 PL_op_name[lvalp->ival]); 584 break; 585 case TOKENTYPE_PVAL: 586 Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval); 587 break; 588 case TOKENTYPE_OPVAL: 589 if (lvalp->opval) { 590 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", 591 PL_op_name[lvalp->opval->op_type]); 592 if (lvalp->opval->op_type == OP_CONST) { 593 Perl_sv_catpvf(aTHX_ report, " %s", 594 SvPEEK(cSVOPx_sv(lvalp->opval))); 595 } 596 597 } 598 else 599 sv_catpvs(report, "(opval=null)"); 600 break; 601 } 602 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); 603 }; 604 return (int)rv; 605} 606 607 608/* print the buffer with suitable escapes */ 609 610STATIC void 611S_printbuf(pTHX_ const char *const fmt, const char *const s) 612{ 613 SV* const tmp = newSVpvs(""); 614 615 PERL_ARGS_ASSERT_PRINTBUF; 616 617 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */ 618 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); 619 GCC_DIAG_RESTORE_STMT; 620 SvREFCNT_dec(tmp); 621} 622 623#endif 624 625/* 626 * S_ao 627 * 628 * This subroutine looks for an '=' next to the operator that has just been 629 * parsed and turns it into an ASSIGNOP if it finds one. 630 */ 631 632STATIC int 633S_ao(pTHX_ int toketype) 634{ 635 if (*PL_bufptr == '=') { 636 PL_bufptr++; 637 638 switch (toketype) { 639 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break; 640 case OROR: pl_yylval.ival = OP_ORASSIGN; break; 641 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break; 642 } 643 644 toketype = ASSIGNOP; 645 } 646 return REPORT(toketype); 647} 648 649/* 650 * S_no_op 651 * When Perl expects an operator and finds something else, no_op 652 * prints the warning. It always prints "<something> found where 653 * operator expected. It prints "Missing semicolon on previous line?" 654 * if the surprise occurs at the start of the line. "do you need to 655 * predeclare ..." is printed out for code like "sub bar; foo bar $x" 656 * where the compiler doesn't know if foo is a method call or a function. 657 * It prints "Missing operator before end of line" if there's nothing 658 * after the missing operator, or "... before <...>" if there is something 659 * after the missing operator. 660 * 661 * PL_bufptr is expected to point to the start of the thing that was found, 662 * and s after the next token or partial token. 663 */ 664 665STATIC void 666S_no_op(pTHX_ const char *const what, char *s) 667{ 668 char * const oldbp = PL_bufptr; 669 const bool is_first = (PL_oldbufptr == PL_linestart); 670 SV *message = sv_2mortal( newSVpvf( 671 PERL_DIAG_WARN_SYNTAX("%s found where operator expected"), 672 what 673 ) ); 674 675 PERL_ARGS_ASSERT_NO_OP; 676 677 if (!s) 678 s = oldbp; 679 else 680 PL_bufptr = s; 681 682 if (ckWARN_d(WARN_SYNTAX)) { 683 bool has_more = FALSE; 684 if (is_first) { 685 has_more = TRUE; 686 sv_catpvs(message, 687 " (Missing semicolon on previous line?)"); 688 } 689 else if (PL_oldoldbufptr) { 690 /* yyerror (via yywarn) would do this itself, so we should too */ 691 const char *t; 692 for (t = PL_oldoldbufptr; 693 t < PL_bufptr && isSPACE(*t); 694 t += UTF ? UTF8SKIP(t) : 1) 695 { 696 NOOP; 697 } 698 /* see if we can identify the cause of the warning */ 699 if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF)) 700 { 701 const char *t_start= t; 702 for ( ; 703 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); 704 t += UTF ? UTF8SKIP(t) : 1) 705 { 706 NOOP; 707 } 708 if (t < PL_bufptr && isSPACE(*t)) { 709 has_more = TRUE; 710 sv_catpvf( message, 711 " (Do you need to predeclare \"%" UTF8f "\"?)", 712 UTF8fARG(UTF, t - t_start, t_start)); 713 } 714 } 715 } 716 if (!has_more) { 717 const char *t= oldbp; 718 assert(s >= oldbp); 719 while (t < s && isSPACE(*t)) { 720 t += UTF ? UTF8SKIP(t) : 1; 721 } 722 723 sv_catpvf(message, 724 " (Missing operator before \"%" UTF8f "\"?)", 725 UTF8fARG(UTF, s - t, t)); 726 } 727 } 728 yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0); 729 PL_bufptr = oldbp; 730} 731 732/* 733 * S_missingterm 734 * Complain about missing quote/regexp/heredoc terminator. 735 * If it's called with NULL then it cauterizes the line buffer. 736 * If we're in a delimited string and the delimiter is a control 737 * character, it's reformatted into a two-char sequence like ^C. 738 * This is fatal. 739 */ 740 741STATIC void 742S_missingterm(pTHX_ char *s, STRLEN len) 743{ 744 char tmpbuf[UTF8_MAXBYTES + 1]; 745 char q; 746 bool uni = FALSE; 747 if (s) { 748 char * const nl = (char *) my_memrchr(s, '\n', len); 749 if (nl) { 750 *nl = '\0'; 751 len = nl - s; 752 } 753 uni = UTF; 754 } 755 else if (PL_multi_close < 32) { 756 *tmpbuf = '^'; 757 tmpbuf[1] = (char)toCTRL(PL_multi_close); 758 tmpbuf[2] = '\0'; 759 s = tmpbuf; 760 len = 2; 761 } 762 else { 763 if (! UTF && LIKELY(PL_multi_close < 256)) { 764 *tmpbuf = (char)PL_multi_close; 765 tmpbuf[1] = '\0'; 766 len = 1; 767 } 768 else { 769 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); 770 *end = '\0'; 771 len = end - tmpbuf; 772 uni = TRUE; 773 } 774 s = tmpbuf; 775 } 776 q = memchr(s, '"', len) ? '\'' : '"'; 777 Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c" 778 " anywhere before EOF", q, UTF8fARG(uni, len, s), q); 779} 780 781#include "feature.h" 782 783/* 784 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and 785 * utf16-to-utf8-reversed. 786 */ 787 788#ifdef PERL_CR_FILTER 789static void 790strip_return(SV *sv) 791{ 792 const char *s = SvPVX_const(sv); 793 const char * const e = s + SvCUR(sv); 794 795 PERL_ARGS_ASSERT_STRIP_RETURN; 796 797 /* outer loop optimized to do nothing if there are no CR-LFs */ 798 while (s < e) { 799 if (*s++ == '\r' && *s == '\n') { 800 /* hit a CR-LF, need to copy the rest */ 801 char *d = s - 1; 802 *d++ = *s++; 803 while (s < e) { 804 if (*s == '\r' && s[1] == '\n') 805 s++; 806 *d++ = *s++; 807 } 808 SvCUR(sv) -= s - d; 809 return; 810 } 811 } 812} 813 814STATIC I32 815S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) 816{ 817 const I32 count = FILTER_READ(idx+1, sv, maxlen); 818 if (count > 0 && !maxlen) 819 strip_return(sv); 820 return count; 821} 822#endif 823 824/* 825=for apidoc lex_start 826 827Creates and initialises a new lexer/parser state object, supplying 828a context in which to lex and parse from a new source of Perl code. 829A pointer to the new state object is placed in L</PL_parser>. An entry 830is made on the save stack so that upon unwinding, the new state object 831will be destroyed and the former value of L</PL_parser> will be restored. 832Nothing else need be done to clean up the parsing context. 833 834The code to be parsed comes from C<line> and C<rsfp>. C<line>, if 835non-null, provides a string (in SV form) containing code to be parsed. 836A copy of the string is made, so subsequent modification of C<line> 837does not affect parsing. C<rsfp>, if non-null, provides an input stream 838from which code will be read to be parsed. If both are non-null, the 839code in C<line> comes first and must consist of complete lines of input, 840and C<rsfp> supplies the remainder of the source. 841 842The C<flags> parameter is reserved for future use. Currently it is only 843used by perl internally, so extensions should always pass zero. 844 845=cut 846*/ 847 848/* LEX_START_SAME_FILTER indicates that this is not a new file, so it 849 can share filters with the current parser. 850 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the 851 caller, hence isn't owned by the parser, so shouldn't be closed on parser 852 destruction. This is used to handle the case of defaulting to reading the 853 script from the standard input because no filename was given on the command 854 line (without getting confused by situation where STDIN has been closed, so 855 the script handle is opened on fd 0) */ 856 857void 858Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) 859{ 860 const char *s = NULL; 861 yy_parser *parser, *oparser; 862 863 if (flags && flags & ~LEX_START_FLAGS) 864 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); 865 866 /* create and initialise a parser */ 867 868 Newxz(parser, 1, yy_parser); 869 parser->old_parser = oparser = PL_parser; 870 PL_parser = parser; 871 872 parser->stack = NULL; 873 parser->stack_max1 = NULL; 874 parser->ps = NULL; 875 876 /* on scope exit, free this parser and restore any outer one */ 877 SAVEPARSER(parser); 878 parser->saved_curcop = PL_curcop; 879 880 /* initialise lexer state */ 881 882 parser->nexttoke = 0; 883 parser->error_count = oparser ? oparser->error_count : 0; 884 parser->copline = parser->preambling = NOLINE; 885 parser->lex_state = LEX_NORMAL; 886 parser->expect = XSTATE; 887 parser->rsfp = rsfp; 888 parser->recheck_utf8_validity = TRUE; 889 parser->rsfp_filters = 890 !(flags & LEX_START_SAME_FILTER) || !oparser 891 ? NULL 892 : MUTABLE_AV(SvREFCNT_inc( 893 oparser->rsfp_filters 894 ? oparser->rsfp_filters 895 : (oparser->rsfp_filters = newAV()) 896 )); 897 898 Newx(parser->lex_brackstack, 120, char); 899 Newx(parser->lex_casestack, 12, char); 900 *parser->lex_casestack = '\0'; 901 Newxz(parser->lex_shared, 1, LEXSHARED); 902 903 if (line) { 904 Size_t len; 905 const U8* first_bad_char_loc; 906 907 s = SvPV_const(line, len); 908 909 if ( SvUTF8(line) 910 && UNLIKELY(! is_utf8_string_loc((U8 *) s, 911 SvCUR(line), 912 &first_bad_char_loc))) 913 { 914 _force_out_malformed_utf8_message(first_bad_char_loc, 915 (U8 *) s + SvCUR(line), 916 0, 917 1 /* 1 means die */ ); 918 NOT_REACHED; /* NOTREACHED */ 919 } 920 921 parser->linestr = flags & LEX_START_COPIED 922 ? SvREFCNT_inc_simple_NN(line) 923 : newSVpvn_flags(s, len, SvUTF8(line)); 924 if (!rsfp) 925 sv_catpvs(parser->linestr, "\n;"); 926 } else { 927 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); 928 } 929 930 parser->oldoldbufptr = 931 parser->oldbufptr = 932 parser->bufptr = 933 parser->linestart = SvPVX(parser->linestr); 934 parser->bufend = parser->bufptr + SvCUR(parser->linestr); 935 parser->last_lop = parser->last_uni = NULL; 936 937 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 938 |LEX_DONT_CLOSE_RSFP)); 939 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES 940 |LEX_DONT_CLOSE_RSFP)); 941 942 parser->in_pod = parser->filtered = 0; 943} 944 945 946/* delete a parser object */ 947 948void 949Perl_parser_free(pTHX_ const yy_parser *parser) 950{ 951 PERL_ARGS_ASSERT_PARSER_FREE; 952 953 PL_curcop = parser->saved_curcop; 954 SvREFCNT_dec(parser->linestr); 955 956 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 957 PerlIO_clearerr(parser->rsfp); 958 else if (parser->rsfp && (!parser->old_parser 959 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) 960 PerlIO_close(parser->rsfp); 961 SvREFCNT_dec(parser->rsfp_filters); 962 SvREFCNT_dec(parser->lex_stuff); 963 SvREFCNT_dec(parser->lex_sub_repl); 964 965 Safefree(parser->lex_brackstack); 966 Safefree(parser->lex_casestack); 967 Safefree(parser->lex_shared); 968 PL_parser = parser->old_parser; 969 Safefree(parser); 970} 971 972void 973Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) 974{ 975 I32 nexttoke = parser->nexttoke; 976 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; 977 while (nexttoke--) { 978 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) 979 && parser->nextval[nexttoke].opval 980 && parser->nextval[nexttoke].opval->op_slabbed 981 && OpSLAB(parser->nextval[nexttoke].opval) == slab) { 982 op_free(parser->nextval[nexttoke].opval); 983 parser->nextval[nexttoke].opval = NULL; 984 } 985 } 986} 987 988 989/* 990=for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr 991 992Buffer scalar containing the chunk currently under consideration of the 993text currently being lexed. This is always a plain string scalar (for 994which C<SvPOK> is true). It is not intended to be used as a scalar by 995normal scalar means; instead refer to the buffer directly by the pointer 996variables described below. 997 998The lexer maintains various C<char*> pointers to things in the 999C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever 1000reallocated, all of these pointers must be updated. Don't attempt to 1001do this manually, but rather use L</lex_grow_linestr> if you need to 1002reallocate the buffer. 1003 1004The content of the text chunk in the buffer is commonly exactly one 1005complete line of input, up to and including a newline terminator, 1006but there are situations where it is otherwise. The octets of the 1007buffer may be intended to be interpreted as either UTF-8 or Latin-1. 1008The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8> 1009flag on this scalar, which may disagree with it. 1010 1011For direct examination of the buffer, the variable 1012L</PL_parser-E<gt>bufend> points to the end of the buffer. The current 1013lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use 1014of these pointers is usually preferable to examination of the scalar 1015through normal scalar means. 1016 1017=for apidoc AmnxUN|char *|PL_parser-E<gt>bufend 1018 1019Direct pointer to the end of the chunk of text currently being lexed, the 1020end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr) 1021+ SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is 1022always located at the end of the buffer, and does not count as part of 1023the buffer's contents. 1024 1025=for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr 1026 1027Points to the current position of lexing inside the lexer buffer. 1028Characters around this point may be freely examined, within 1029the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and 1030L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be 1031interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>. 1032 1033Lexing code (whether in the Perl core or not) moves this pointer past 1034the characters that it consumes. It is also expected to perform some 1035bookkeeping whenever a newline character is consumed. This movement 1036can be more conveniently performed by the function L</lex_read_to>, 1037which handles newlines appropriately. 1038 1039Interpretation of the buffer's octets can be abstracted out by 1040using the slightly higher-level functions L</lex_peek_unichar> and 1041L</lex_read_unichar>. 1042 1043=for apidoc AmnxUN|char *|PL_parser-E<gt>linestart 1044 1045Points to the start of the current line inside the lexer buffer. 1046This is useful for indicating at which column an error occurred, and 1047not much else. This must be updated by any lexing code that consumes 1048a newline; the function L</lex_read_to> handles this detail. 1049 1050=cut 1051*/ 1052 1053/* 1054=for apidoc lex_bufutf8 1055 1056Indicates whether the octets in the lexer buffer 1057(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding 1058of Unicode characters. If not, they should be interpreted as Latin-1 1059characters. This is analogous to the C<SvUTF8> flag for scalars. 1060 1061In UTF-8 mode, it is not guaranteed that the lexer buffer actually 1062contains valid UTF-8. Lexing code must be robust in the face of invalid 1063encoding. 1064 1065The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar 1066is significant, but not the whole story regarding the input character 1067encoding. Normally, when a file is being read, the scalar contains octets 1068and its C<SvUTF8> flag is off, but the octets should be interpreted as 1069UTF-8 if the C<use utf8> pragma is in effect. During a string eval, 1070however, the scalar may have the C<SvUTF8> flag on, and in this case its 1071octets should be interpreted as UTF-8 unless the C<use bytes> pragma 1072is in effect. This logic may change in the future; use this function 1073instead of implementing the logic yourself. 1074 1075=cut 1076*/ 1077 1078bool 1079Perl_lex_bufutf8(pTHX) 1080{ 1081 return UTF; 1082} 1083 1084/* 1085=for apidoc lex_grow_linestr 1086 1087Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate 1088at least C<len> octets (including terminating C<NUL>). Returns a 1089pointer to the reallocated buffer. This is necessary before making 1090any direct modification of the buffer that would increase its length. 1091L</lex_stuff_pvn> provides a more convenient way to insert text into 1092the buffer. 1093 1094Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>; 1095this function updates all of the lexer's variables that point directly 1096into the buffer. 1097 1098=cut 1099*/ 1100 1101char * 1102Perl_lex_grow_linestr(pTHX_ STRLEN len) 1103{ 1104 SV *linestr; 1105 char *buf; 1106 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1107 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos; 1108 bool current; 1109 1110 linestr = PL_parser->linestr; 1111 buf = SvPVX(linestr); 1112 if (len <= SvLEN(linestr)) 1113 return buf; 1114 1115 /* Is the lex_shared linestr SV the same as the current linestr SV? 1116 * Only in this case does re_eval_start need adjusting, since it 1117 * points within lex_shared->ls_linestr's buffer */ 1118 current = ( !PL_parser->lex_shared->ls_linestr 1119 || linestr == PL_parser->lex_shared->ls_linestr); 1120 1121 bufend_pos = PL_parser->bufend - buf; 1122 bufptr_pos = PL_parser->bufptr - buf; 1123 oldbufptr_pos = PL_parser->oldbufptr - buf; 1124 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1125 linestart_pos = PL_parser->linestart - buf; 1126 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1127 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1128 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ? 1129 PL_parser->lex_shared->re_eval_start - buf : 0; 1130 1131 buf = sv_grow(linestr, len); 1132 1133 PL_parser->bufend = buf + bufend_pos; 1134 PL_parser->bufptr = buf + bufptr_pos; 1135 PL_parser->oldbufptr = buf + oldbufptr_pos; 1136 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1137 PL_parser->linestart = buf + linestart_pos; 1138 if (PL_parser->last_uni) 1139 PL_parser->last_uni = buf + last_uni_pos; 1140 if (PL_parser->last_lop) 1141 PL_parser->last_lop = buf + last_lop_pos; 1142 if (current && PL_parser->lex_shared->re_eval_start) 1143 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; 1144 return buf; 1145} 1146 1147/* 1148=for apidoc lex_stuff_pvn 1149 1150Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1151immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1152reallocating the buffer if necessary. This means that lexing code that 1153runs later will see the characters as if they had appeared in the input. 1154It is not recommended to do this as part of normal parsing, and most 1155uses of this facility run the risk of the inserted characters being 1156interpreted in an unintended manner. 1157 1158The string to be inserted is represented by C<len> octets starting 1159at C<pv>. These octets are interpreted as either UTF-8 or Latin-1, 1160according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>. 1161The characters are recoded for the lexer buffer, according to how the 1162buffer is currently being interpreted (L</lex_bufutf8>). If a string 1163to be inserted is available as a Perl scalar, the L</lex_stuff_sv> 1164function is more convenient. 1165 1166=for apidoc Amnh||LEX_STUFF_UTF8 1167 1168=cut 1169*/ 1170 1171void 1172Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) 1173{ 1174 char *bufptr; 1175 PERL_ARGS_ASSERT_LEX_STUFF_PVN; 1176 if (flags & ~(LEX_STUFF_UTF8)) 1177 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); 1178 if (UTF) { 1179 if (flags & LEX_STUFF_UTF8) { 1180 goto plain_copy; 1181 } else { 1182 STRLEN highhalf = variant_under_utf8_count((U8 *) pv, 1183 (U8 *) pv + len); 1184 const char *p, *e = pv+len;; 1185 if (!highhalf) 1186 goto plain_copy; 1187 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); 1188 bufptr = PL_parser->bufptr; 1189 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); 1190 SvCUR_set(PL_parser->linestr, 1191 SvCUR(PL_parser->linestr) + len+highhalf); 1192 PL_parser->bufend += len+highhalf; 1193 for (p = pv; p != e; p++) { 1194 append_utf8_from_native_byte(*p, (U8 **) &bufptr); 1195 } 1196 } 1197 } else { 1198 if (flags & LEX_STUFF_UTF8) { 1199 STRLEN highhalf = 0; 1200 const char *p, *e = pv+len; 1201 for (p = pv; p != e; p++) { 1202 U8 c = (U8)*p; 1203 if (UTF8_IS_ABOVE_LATIN1(c)) { 1204 Perl_croak(aTHX_ "Lexing code attempted to stuff " 1205 "non-Latin-1 character into Latin-1 input"); 1206 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { 1207 p++; 1208 highhalf++; 1209 } else assert(UTF8_IS_INVARIANT(c)); 1210 } 1211 if (!highhalf) 1212 goto plain_copy; 1213 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); 1214 bufptr = PL_parser->bufptr; 1215 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); 1216 SvCUR_set(PL_parser->linestr, 1217 SvCUR(PL_parser->linestr) + len-highhalf); 1218 PL_parser->bufend += len-highhalf; 1219 p = pv; 1220 while (p < e) { 1221 if (UTF8_IS_INVARIANT(*p)) { 1222 *bufptr++ = *p; 1223 p++; 1224 } 1225 else { 1226 assert(p < e -1 ); 1227 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); 1228 p += 2; 1229 } 1230 } 1231 } else { 1232 plain_copy: 1233 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); 1234 bufptr = PL_parser->bufptr; 1235 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); 1236 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); 1237 PL_parser->bufend += len; 1238 Copy(pv, bufptr, len, char); 1239 } 1240 } 1241} 1242 1243/* 1244=for apidoc lex_stuff_pv 1245 1246Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1247immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1248reallocating the buffer if necessary. This means that lexing code that 1249runs later will see the characters as if they had appeared in the input. 1250It is not recommended to do this as part of normal parsing, and most 1251uses of this facility run the risk of the inserted characters being 1252interpreted in an unintended manner. 1253 1254The string to be inserted is represented by octets starting at C<pv> 1255and continuing to the first nul. These octets are interpreted as either 1256UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set 1257in C<flags>. The characters are recoded for the lexer buffer, according 1258to how the buffer is currently being interpreted (L</lex_bufutf8>). 1259If it is not convenient to nul-terminate a string to be inserted, the 1260L</lex_stuff_pvn> function is more appropriate. 1261 1262=cut 1263*/ 1264 1265void 1266Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags) 1267{ 1268 PERL_ARGS_ASSERT_LEX_STUFF_PV; 1269 lex_stuff_pvn(pv, strlen(pv), flags); 1270} 1271 1272/* 1273=for apidoc lex_stuff_sv 1274 1275Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>), 1276immediately after the current lexing point (L</PL_parser-E<gt>bufptr>), 1277reallocating the buffer if necessary. This means that lexing code that 1278runs later will see the characters as if they had appeared in the input. 1279It is not recommended to do this as part of normal parsing, and most 1280uses of this facility run the risk of the inserted characters being 1281interpreted in an unintended manner. 1282 1283The string to be inserted is the string value of C<sv>. The characters 1284are recoded for the lexer buffer, according to how the buffer is currently 1285being interpreted (L</lex_bufutf8>). If a string to be inserted is 1286not already a Perl scalar, the L</lex_stuff_pvn> function avoids the 1287need to construct a scalar. 1288 1289=cut 1290*/ 1291 1292void 1293Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) 1294{ 1295 char *pv; 1296 STRLEN len; 1297 PERL_ARGS_ASSERT_LEX_STUFF_SV; 1298 if (flags) 1299 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); 1300 pv = SvPV(sv, len); 1301 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); 1302} 1303 1304/* 1305=for apidoc lex_unstuff 1306 1307Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to 1308C<ptr>. Text following C<ptr> will be moved, and the buffer shortened. 1309This hides the discarded text from any lexing code that runs later, 1310as if the text had never appeared. 1311 1312This is not the normal way to consume lexed text. For that, use 1313L</lex_read_to>. 1314 1315=cut 1316*/ 1317 1318void 1319Perl_lex_unstuff(pTHX_ char *ptr) 1320{ 1321 char *buf, *bufend; 1322 STRLEN unstuff_len; 1323 PERL_ARGS_ASSERT_LEX_UNSTUFF; 1324 buf = PL_parser->bufptr; 1325 if (ptr < buf) 1326 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1327 if (ptr == buf) 1328 return; 1329 bufend = PL_parser->bufend; 1330 if (ptr > bufend) 1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); 1332 unstuff_len = ptr - buf; 1333 Move(ptr, buf, bufend+1-ptr, char); 1334 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); 1335 PL_parser->bufend = bufend - unstuff_len; 1336} 1337 1338/* 1339=for apidoc lex_read_to 1340 1341Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up 1342to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>, 1343performing the correct bookkeeping whenever a newline character is passed. 1344This is the normal way to consume lexed text. 1345 1346Interpretation of the buffer's octets can be abstracted out by 1347using the slightly higher-level functions L</lex_peek_unichar> and 1348L</lex_read_unichar>. 1349 1350=cut 1351*/ 1352 1353void 1354Perl_lex_read_to(pTHX_ char *ptr) 1355{ 1356 char *s; 1357 PERL_ARGS_ASSERT_LEX_READ_TO; 1358 s = PL_parser->bufptr; 1359 if (ptr < s || ptr > PL_parser->bufend) 1360 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); 1361 for (; s != ptr; s++) 1362 if (*s == '\n') { 1363 COPLINE_INC_WITH_HERELINES; 1364 PL_parser->linestart = s+1; 1365 } 1366 PL_parser->bufptr = ptr; 1367} 1368 1369/* 1370=for apidoc lex_discard_to 1371 1372Discards the first part of the L</PL_parser-E<gt>linestr> buffer, 1373up to C<ptr>. The remaining content of the buffer will be moved, and 1374all pointers into the buffer updated appropriately. C<ptr> must not 1375be later in the buffer than the position of L</PL_parser-E<gt>bufptr>: 1376it is not permitted to discard text that has yet to be lexed. 1377 1378Normally it is not necessarily to do this directly, because it suffices to 1379use the implicit discarding behaviour of L</lex_next_chunk> and things 1380based on it. However, if a token stretches across multiple lines, 1381and the lexing code has kept multiple lines of text in the buffer for 1382that purpose, then after completion of the token it would be wise to 1383explicitly discard the now-unneeded earlier lines, to avoid future 1384multi-line tokens growing the buffer without bound. 1385 1386=cut 1387*/ 1388 1389void 1390Perl_lex_discard_to(pTHX_ char *ptr) 1391{ 1392 char *buf; 1393 STRLEN discard_len; 1394 PERL_ARGS_ASSERT_LEX_DISCARD_TO; 1395 buf = SvPVX(PL_parser->linestr); 1396 if (ptr < buf) 1397 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1398 if (ptr == buf) 1399 return; 1400 if (ptr > PL_parser->bufptr) 1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); 1402 discard_len = ptr - buf; 1403 if (PL_parser->oldbufptr < ptr) 1404 PL_parser->oldbufptr = ptr; 1405 if (PL_parser->oldoldbufptr < ptr) 1406 PL_parser->oldoldbufptr = ptr; 1407 if (PL_parser->last_uni && PL_parser->last_uni < ptr) 1408 PL_parser->last_uni = NULL; 1409 if (PL_parser->last_lop && PL_parser->last_lop < ptr) 1410 PL_parser->last_lop = NULL; 1411 Move(ptr, buf, PL_parser->bufend+1-ptr, char); 1412 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); 1413 PL_parser->bufend -= discard_len; 1414 PL_parser->bufptr -= discard_len; 1415 PL_parser->oldbufptr -= discard_len; 1416 PL_parser->oldoldbufptr -= discard_len; 1417 if (PL_parser->last_uni) 1418 PL_parser->last_uni -= discard_len; 1419 if (PL_parser->last_lop) 1420 PL_parser->last_lop -= discard_len; 1421} 1422 1423void 1424Perl_notify_parser_that_changed_to_utf8(pTHX) 1425{ 1426 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from 1427 * off to on. At compile time, this has the effect of entering a 'use 1428 * utf8' section. This means that any input was not previously checked for 1429 * UTF-8 (because it was off), but now we do need to check it, or our 1430 * assumptions about the input being sane could be wrong, and we could 1431 * segfault. This routine just sets a flag so that the next time we look 1432 * at the input we do the well-formed UTF-8 check. If we aren't in the 1433 * proper phase, there may not be a parser object, but if there is, setting 1434 * the flag is harmless */ 1435 1436 if (PL_parser) { 1437 PL_parser->recheck_utf8_validity = TRUE; 1438 } 1439} 1440 1441/* 1442=for apidoc lex_next_chunk 1443 1444Reads in the next chunk of text to be lexed, appending it to 1445L</PL_parser-E<gt>linestr>. This should be called when lexing code has 1446looked to the end of the current chunk and wants to know more. It is 1447usual, but not necessary, for lexing to have consumed the entirety of 1448the current chunk at this time. 1449 1450If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current 1451chunk (i.e., the current chunk has been entirely consumed), normally the 1452current chunk will be discarded at the same time that the new chunk is 1453read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk 1454will not be discarded. If the current chunk has not been entirely 1455consumed, then it will not be discarded regardless of the flag. 1456 1457Returns true if some new text was added to the buffer, or false if the 1458buffer has reached the end of the input text. 1459 1460=for apidoc Amnh||LEX_KEEP_PREVIOUS 1461 1462=cut 1463*/ 1464 1465#define LEX_FAKE_EOF 0x80000000 1466#define LEX_NO_TERM 0x40000000 /* here-doc */ 1467 1468bool 1469Perl_lex_next_chunk(pTHX_ U32 flags) 1470{ 1471 SV *linestr; 1472 char *buf; 1473 STRLEN old_bufend_pos, new_bufend_pos; 1474 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos; 1475 STRLEN linestart_pos, last_uni_pos, last_lop_pos; 1476 bool got_some_for_debugger = 0; 1477 bool got_some; 1478 1479 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) 1480 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); 1481 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) 1482 return FALSE; 1483 linestr = PL_parser->linestr; 1484 buf = SvPVX(linestr); 1485 if (!(flags & LEX_KEEP_PREVIOUS) 1486 && PL_parser->bufptr == PL_parser->bufend) 1487 { 1488 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; 1489 linestart_pos = 0; 1490 if (PL_parser->last_uni != PL_parser->bufend) 1491 PL_parser->last_uni = NULL; 1492 if (PL_parser->last_lop != PL_parser->bufend) 1493 PL_parser->last_lop = NULL; 1494 last_uni_pos = last_lop_pos = 0; 1495 *buf = 0; 1496 SvCUR_set(linestr, 0); 1497 } else { 1498 old_bufend_pos = PL_parser->bufend - buf; 1499 bufptr_pos = PL_parser->bufptr - buf; 1500 oldbufptr_pos = PL_parser->oldbufptr - buf; 1501 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; 1502 linestart_pos = PL_parser->linestart - buf; 1503 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 1504 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 1505 } 1506 if (flags & LEX_FAKE_EOF) { 1507 goto eof; 1508 } else if (!PL_parser->rsfp && !PL_parser->filtered) { 1509 got_some = 0; 1510 } else if (filter_gets(linestr, old_bufend_pos)) { 1511 got_some = 1; 1512 got_some_for_debugger = 1; 1513 } else if (flags & LEX_NO_TERM) { 1514 got_some = 0; 1515 } else { 1516 if (!SvPOK(linestr)) /* can get undefined by filter_gets */ 1517 SvPVCLEAR(linestr); 1518 eof: 1519 /* End of real input. Close filehandle (unless it was STDIN), 1520 * then add implicit termination. 1521 */ 1522 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) 1523 PerlIO_clearerr(PL_parser->rsfp); 1524 else if (PL_parser->rsfp) 1525 (void)PerlIO_close(PL_parser->rsfp); 1526 PL_parser->rsfp = NULL; 1527 PL_parser->in_pod = PL_parser->filtered = 0; 1528 if (!PL_in_eval && PL_minus_p) { 1529 sv_catpvs(linestr, 1530 /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); 1531 PL_minus_n = PL_minus_p = 0; 1532 } else if (!PL_in_eval && PL_minus_n) { 1533 sv_catpvs(linestr, /*{*/";}"); 1534 PL_minus_n = 0; 1535 } else 1536 sv_catpvs(linestr, ";"); 1537 got_some = 1; 1538 } 1539 buf = SvPVX(linestr); 1540 new_bufend_pos = SvCUR(linestr); 1541 PL_parser->bufend = buf + new_bufend_pos; 1542 PL_parser->bufptr = buf + bufptr_pos; 1543 1544 if (UTF) { 1545 const U8* first_bad_char_loc; 1546 if (UNLIKELY(! is_utf8_string_loc( 1547 (U8 *) PL_parser->bufptr, 1548 PL_parser->bufend - PL_parser->bufptr, 1549 &first_bad_char_loc))) 1550 { 1551 _force_out_malformed_utf8_message(first_bad_char_loc, 1552 (U8 *) PL_parser->bufend, 1553 0, 1554 1 /* 1 means die */ ); 1555 NOT_REACHED; /* NOTREACHED */ 1556 } 1557 } 1558 1559 PL_parser->oldbufptr = buf + oldbufptr_pos; 1560 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 1561 PL_parser->linestart = buf + linestart_pos; 1562 if (PL_parser->last_uni) 1563 PL_parser->last_uni = buf + last_uni_pos; 1564 if (PL_parser->last_lop) 1565 PL_parser->last_lop = buf + last_lop_pos; 1566 if (PL_parser->preambling != NOLINE) { 1567 CopLINE_set(PL_curcop, PL_parser->preambling + 1); 1568 PL_parser->preambling = NOLINE; 1569 } 1570 if ( got_some_for_debugger 1571 && PERLDB_LINE_OR_SAVESRC 1572 && PL_curstash != PL_debstash) 1573 { 1574 /* debugger active and we're not compiling the debugger code, 1575 * so store the line into the debugger's array of lines 1576 */ 1577 update_debugger_info(NULL, buf+old_bufend_pos, 1578 new_bufend_pos-old_bufend_pos); 1579 } 1580 return got_some; 1581} 1582 1583/* 1584=for apidoc lex_peek_unichar 1585 1586Looks ahead one (Unicode) character in the text currently being lexed. 1587Returns the codepoint (unsigned integer value) of the next character, 1588or -1 if lexing has reached the end of the input text. To consume the 1589peeked character, use L</lex_read_unichar>. 1590 1591If the next character is in (or extends into) the next chunk of input 1592text, the next chunk will be read in. Normally the current chunk will be 1593discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1594bit set, then the current chunk will not be discarded. 1595 1596If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1597is encountered, an exception is generated. 1598 1599=cut 1600*/ 1601 1602I32 1603Perl_lex_peek_unichar(pTHX_ U32 flags) 1604{ 1605 char *s, *bufend; 1606 if (flags & ~(LEX_KEEP_PREVIOUS)) 1607 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); 1608 s = PL_parser->bufptr; 1609 bufend = PL_parser->bufend; 1610 if (UTF) { 1611 U8 head; 1612 I32 unichar; 1613 STRLEN len, retlen; 1614 if (s == bufend) { 1615 if (!lex_next_chunk(flags)) 1616 return -1; 1617 s = PL_parser->bufptr; 1618 bufend = PL_parser->bufend; 1619 } 1620 head = (U8)*s; 1621 if (UTF8_IS_INVARIANT(head)) 1622 return head; 1623 if (UTF8_IS_START(head)) { 1624 len = UTF8SKIP(&head); 1625 while ((STRLEN)(bufend-s) < len) { 1626 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) 1627 break; 1628 s = PL_parser->bufptr; 1629 bufend = PL_parser->bufend; 1630 } 1631 } 1632 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); 1633 if (retlen == (STRLEN)-1) { 1634 _force_out_malformed_utf8_message((U8 *) s, 1635 (U8 *) bufend, 1636 0, 1637 1 /* 1 means die */ ); 1638 NOT_REACHED; /* NOTREACHED */ 1639 } 1640 return unichar; 1641 } else { 1642 if (s == bufend) { 1643 if (!lex_next_chunk(flags)) 1644 return -1; 1645 s = PL_parser->bufptr; 1646 } 1647 return (U8)*s; 1648 } 1649} 1650 1651/* 1652=for apidoc lex_read_unichar 1653 1654Reads the next (Unicode) character in the text currently being lexed. 1655Returns the codepoint (unsigned integer value) of the character read, 1656and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1 1657if lexing has reached the end of the input text. To non-destructively 1658examine the next character, use L</lex_peek_unichar> instead. 1659 1660If the next character is in (or extends into) the next chunk of input 1661text, the next chunk will be read in. Normally the current chunk will be 1662discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> 1663bit set, then the current chunk will not be discarded. 1664 1665If the input is being interpreted as UTF-8 and a UTF-8 encoding error 1666is encountered, an exception is generated. 1667 1668=cut 1669*/ 1670 1671I32 1672Perl_lex_read_unichar(pTHX_ U32 flags) 1673{ 1674 I32 c; 1675 if (flags & ~(LEX_KEEP_PREVIOUS)) 1676 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); 1677 c = lex_peek_unichar(flags); 1678 if (c != -1) { 1679 if (c == '\n') 1680 COPLINE_INC_WITH_HERELINES; 1681 if (UTF) 1682 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); 1683 else 1684 ++(PL_parser->bufptr); 1685 } 1686 return c; 1687} 1688 1689/* 1690=for apidoc lex_read_space 1691 1692Reads optional spaces, in Perl style, in the text currently being 1693lexed. The spaces may include ordinary whitespace characters and 1694Perl-style comments. C<#line> directives are processed if encountered. 1695L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points 1696at a non-space character (or the end of the input text). 1697 1698If spaces extend into the next chunk of input text, the next chunk will 1699be read in. Normally the current chunk will be discarded at the same 1700time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current 1701chunk will not be discarded. 1702 1703=cut 1704*/ 1705 1706#define LEX_NO_INCLINE 0x40000000 1707#define LEX_NO_NEXT_CHUNK 0x80000000 1708 1709void 1710Perl_lex_read_space(pTHX_ U32 flags) 1711{ 1712 char *s, *bufend; 1713 const bool can_incline = !(flags & LEX_NO_INCLINE); 1714 bool need_incline = 0; 1715 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) 1716 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); 1717 s = PL_parser->bufptr; 1718 bufend = PL_parser->bufend; 1719 while (1) { 1720 char c = *s; 1721 if (c == '#') { 1722 do { 1723 c = *++s; 1724 } while (!(c == '\n' || (c == 0 && s == bufend))); 1725 } else if (c == '\n') { 1726 s++; 1727 if (can_incline) { 1728 PL_parser->linestart = s; 1729 if (s == bufend) 1730 need_incline = 1; 1731 else 1732 incline(s, bufend); 1733 } 1734 } else if (isSPACE(c)) { 1735 s++; 1736 } else if (c == 0 && s == bufend) { 1737 bool got_more; 1738 line_t l; 1739 if (flags & LEX_NO_NEXT_CHUNK) 1740 break; 1741 PL_parser->bufptr = s; 1742 l = CopLINE(PL_curcop); 1743 CopLINE(PL_curcop) += PL_parser->herelines + 1; 1744 got_more = lex_next_chunk(flags); 1745 CopLINE_set(PL_curcop, l); 1746 s = PL_parser->bufptr; 1747 bufend = PL_parser->bufend; 1748 if (!got_more) 1749 break; 1750 if (can_incline && need_incline && PL_parser->rsfp) { 1751 incline(s, bufend); 1752 need_incline = 0; 1753 } 1754 } else if (!c) { 1755 s++; 1756 } else { 1757 break; 1758 } 1759 } 1760 PL_parser->bufptr = s; 1761} 1762 1763/* 1764 1765=for apidoc validate_proto 1766 1767This function performs syntax checking on a prototype, C<proto>. 1768If C<warn> is true, any illegal characters or mismatched brackets 1769will trigger illegalproto warnings, declaring that they were 1770detected in the prototype for C<name>. 1771 1772The return value is C<true> if this is a valid prototype, and 1773C<false> if it is not, regardless of whether C<warn> was C<true> or 1774C<false>. 1775 1776Note that C<NULL> is a valid C<proto> and will always return C<true>. 1777 1778=cut 1779 1780 */ 1781 1782bool 1783Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) 1784{ 1785 STRLEN len, origlen; 1786 char *p; 1787 bool bad_proto = FALSE; 1788 bool in_brackets = FALSE; 1789 bool after_slash = FALSE; 1790 char greedy_proto = ' '; 1791 bool proto_after_greedy_proto = FALSE; 1792 bool must_be_last = FALSE; 1793 bool underscore = FALSE; 1794 bool bad_proto_after_underscore = FALSE; 1795 1796 PERL_ARGS_ASSERT_VALIDATE_PROTO; 1797 1798 if (!proto) 1799 return TRUE; 1800 1801 p = SvPV(proto, len); 1802 origlen = len; 1803 for (; len--; p++) { 1804 if (!isSPACE(*p)) { 1805 if (must_be_last) 1806 proto_after_greedy_proto = TRUE; 1807 if (underscore) { 1808 if (!memCHRs(";@%", *p)) 1809 bad_proto_after_underscore = TRUE; 1810 underscore = FALSE; 1811 } 1812 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') { 1813 bad_proto = TRUE; 1814 } 1815 else { 1816 if (*p == '[') 1817 in_brackets = TRUE; 1818 else if (*p == ']') 1819 in_brackets = FALSE; 1820 else if ((*p == '@' || *p == '%') 1821 && !after_slash 1822 && !in_brackets ) 1823 { 1824 must_be_last = TRUE; 1825 greedy_proto = *p; 1826 } 1827 else if (*p == '_') 1828 underscore = TRUE; 1829 } 1830 if (*p == '\\') 1831 after_slash = TRUE; 1832 else 1833 after_slash = FALSE; 1834 } 1835 } 1836 1837 if (warn) { 1838 SV *tmpsv = newSVpvs_flags("", SVs_TEMP); 1839 p -= origlen; 1840 p = SvUTF8(proto) 1841 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), 1842 origlen, UNI_DISPLAY_ISPRINT) 1843 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); 1844 1845 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { 1846 SV *name2 = sv_2mortal(newSVsv(PL_curstname)); 1847 sv_catpvs(name2, "::"); 1848 sv_catsv(name2, (SV *)name); 1849 name = name2; 1850 } 1851 1852 if (proto_after_greedy_proto) 1853 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1854 "Prototype after '%c' for %" SVf " : %s", 1855 greedy_proto, SVfARG(name), p); 1856 if (in_brackets) 1857 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1858 "Missing ']' in prototype for %" SVf " : %s", 1859 SVfARG(name), p); 1860 if (bad_proto) 1861 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1862 "Illegal character in prototype for %" SVf " : %s", 1863 SVfARG(name), p); 1864 if (bad_proto_after_underscore) 1865 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), 1866 "Illegal character after '_' in prototype for %" SVf " : %s", 1867 SVfARG(name), p); 1868 } 1869 1870 return (! (proto_after_greedy_proto || bad_proto) ); 1871} 1872 1873/* 1874 * S_incline 1875 * This subroutine has nothing to do with tilting, whether at windmills 1876 * or pinball tables. Its name is short for "increment line". It 1877 * increments the current line number in CopLINE(PL_curcop) and checks 1878 * to see whether the line starts with a comment of the form 1879 * # line 500 "foo.pm" 1880 * If so, it sets the current line number and file to the values in the comment. 1881 */ 1882 1883STATIC void 1884S_incline(pTHX_ const char *s, const char *end) 1885{ 1886 const char *t; 1887 const char *n; 1888 const char *e; 1889 line_t line_num; 1890 UV uv; 1891 1892 PERL_ARGS_ASSERT_INCLINE; 1893 1894 assert(end >= s); 1895 1896 COPLINE_INC_WITH_HERELINES; 1897 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL 1898 && s+1 == PL_bufend && *s == ';') { 1899 /* fake newline in string eval */ 1900 CopLINE_dec(PL_curcop); 1901 return; 1902 } 1903 if (*s++ != '#') 1904 return; 1905 while (SPACE_OR_TAB(*s)) 1906 s++; 1907 if (memBEGINs(s, (STRLEN) (end - s), "line")) 1908 s += sizeof("line") - 1; 1909 else 1910 return; 1911 if (SPACE_OR_TAB(*s)) 1912 s++; 1913 else 1914 return; 1915 while (SPACE_OR_TAB(*s)) 1916 s++; 1917 if (!isDIGIT(*s)) 1918 return; 1919 1920 n = s; 1921 while (isDIGIT(*s)) 1922 s++; 1923 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') 1924 return; 1925 while (SPACE_OR_TAB(*s)) 1926 s++; 1927 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { 1928 s++; 1929 e = t + 1; 1930 } 1931 else { 1932 t = s; 1933 while (*t && !isSPACE(*t)) 1934 t++; 1935 e = t; 1936 } 1937 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') 1938 e++; 1939 if (*e != '\n' && *e != '\0') 1940 return; /* false alarm */ 1941 1942 if (!grok_atoUV(n, &uv, &e)) 1943 return; 1944 line_num = ((line_t)uv) - 1; 1945 1946 if (t - s > 0) { 1947 const STRLEN len = t - s; 1948 1949 if (!PL_rsfp && !PL_parser->filtered) { 1950 /* must copy *{"::_<(eval N)[oldfilename:L]"} 1951 * to *{"::_<newfilename"} */ 1952 /* However, the long form of evals is only turned on by the 1953 debugger - usually they're "(eval %lu)" */ 1954 GV * const cfgv = CopFILEGV(PL_curcop); 1955 if (cfgv) { 1956 char smallbuf[128]; 1957 STRLEN tmplen2 = len; 1958 char *tmpbuf2; 1959 GV *gv2; 1960 1961 if (tmplen2 + 2 <= sizeof smallbuf) 1962 tmpbuf2 = smallbuf; 1963 else 1964 Newx(tmpbuf2, tmplen2 + 2, char); 1965 1966 tmpbuf2[0] = '_'; 1967 tmpbuf2[1] = '<'; 1968 1969 memcpy(tmpbuf2 + 2, s, tmplen2); 1970 tmplen2 += 2; 1971 1972 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE); 1973 if (!isGV(gv2)) { 1974 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); 1975 /* adjust ${"::_<newfilename"} to store the new file name */ 1976 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2); 1977 /* The line number may differ. If that is the case, 1978 alias the saved lines that are in the array. 1979 Otherwise alias the whole array. */ 1980 if (CopLINE(PL_curcop) == line_num) { 1981 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv))); 1982 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv))); 1983 } 1984 else if (GvAV(cfgv)) { 1985 AV * const av = GvAV(cfgv); 1986 const line_t start = CopLINE(PL_curcop)+1; 1987 SSize_t items = AvFILLp(av) - start; 1988 if (items > 0) { 1989 AV * const av2 = GvAVn(gv2); 1990 SV **svp = AvARRAY(av) + start; 1991 Size_t l = line_num+1; 1992 while (items-- && l < SSize_t_MAX && l == (line_t)l) 1993 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); 1994 } 1995 } 1996 } 1997 1998 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); 1999 } 2000 } 2001 CopFILE_free(PL_curcop); 2002 CopFILE_setn(PL_curcop, s, len); 2003 } 2004 CopLINE_set(PL_curcop, line_num); 2005} 2006 2007STATIC void 2008S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) 2009{ 2010 AV *av = CopFILEAVx(PL_curcop); 2011 if (av) { 2012 SV * sv; 2013 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); 2014 else { 2015 sv = *av_fetch(av, 0, 1); 2016 SvUPGRADE(sv, SVt_PVMG); 2017 } 2018 if (!SvPOK(sv)) SvPVCLEAR(sv); 2019 if (orig_sv) 2020 sv_catsv(sv, orig_sv); 2021 else 2022 sv_catpvn(sv, buf, len); 2023 if (!SvIOK(sv)) { 2024 (void)SvIOK_on(sv); 2025 SvIV_set(sv, 0); 2026 } 2027 if (PL_parser->preambling == NOLINE) 2028 av_store(av, CopLINE(PL_curcop), sv); 2029 } 2030} 2031 2032/* 2033 * skipspace 2034 * Called to gobble the appropriate amount and type of whitespace. 2035 * Skips comments as well. 2036 * Returns the next character after the whitespace that is skipped. 2037 * 2038 * peekspace 2039 * Same thing, but look ahead without incrementing line numbers or 2040 * adjusting PL_linestart. 2041 */ 2042 2043#define skipspace(s) skipspace_flags(s, 0) 2044#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE) 2045 2046char * 2047Perl_skipspace_flags(pTHX_ char *s, U32 flags) 2048{ 2049 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; 2050 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 2051 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) 2052 s++; 2053 } else { 2054 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); 2055 PL_bufptr = s; 2056 lex_read_space(flags | LEX_KEEP_PREVIOUS | 2057 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? 2058 LEX_NO_NEXT_CHUNK : 0)); 2059 s = PL_bufptr; 2060 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; 2061 if (PL_linestart > PL_bufptr) 2062 PL_bufptr = PL_linestart; 2063 return s; 2064 } 2065 return s; 2066} 2067 2068/* 2069 * S_check_uni 2070 * Check the unary operators to ensure there's no ambiguity in how they're 2071 * used. An ambiguous piece of code would be: 2072 * rand + 5 2073 * This doesn't mean rand() + 5. Because rand() is a unary operator, 2074 * the +5 is its argument. 2075 */ 2076 2077STATIC void 2078S_check_uni(pTHX) 2079{ 2080 const char *s; 2081 2082 if (PL_oldoldbufptr != PL_last_uni) 2083 return; 2084 while (isSPACE(*PL_last_uni)) 2085 PL_last_uni++; 2086 s = PL_last_uni; 2087 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') 2088 s += UTF ? UTF8SKIP(s) : 1; 2089 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) 2090 return; 2091 2092 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 2093 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous", 2094 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); 2095} 2096 2097/* 2098 * LOP : macro to build a list operator. Its behaviour has been replaced 2099 * with a subroutine, S_lop() for which LOP is just another name. 2100 */ 2101 2102#define LOP(f,x) return lop(f,x,s) 2103 2104/* 2105 * S_lop 2106 * Build a list operator (or something that might be one). The rules: 2107 * - if we have a next token, then it's a list operator (no parens) for 2108 * which the next token has already been parsed; e.g., 2109 * sort foo @args 2110 * sort foo (@args) 2111 * - if the next thing is an opening paren, then it's a function 2112 * - else it's a list operator 2113 */ 2114 2115STATIC I32 2116S_lop(pTHX_ I32 f, U8 x, char *s) 2117{ 2118 PERL_ARGS_ASSERT_LOP; 2119 2120 pl_yylval.ival = f; 2121 CLINE; 2122 PL_bufptr = s; 2123 PL_last_lop = PL_oldbufptr; 2124 PL_last_lop_op = (OPCODE)f; 2125 if (PL_nexttoke) 2126 goto lstop; 2127 PL_expect = x; 2128 if (*s == '(') 2129 return REPORT(FUNC); 2130 s = skipspace(s); 2131 if (*s == '(') 2132 return REPORT(FUNC); 2133 else { 2134 lstop: 2135 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 2136 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 2137 return REPORT(LSTOP); 2138 } 2139} 2140 2141/* 2142 * S_force_next 2143 * When the lexer realizes it knows the next token (for instance, 2144 * it is reordering tokens for the parser) then it can call S_force_next 2145 * to know what token to return the next time the lexer is called. Caller 2146 * will need to set PL_nextval[] and possibly PL_expect to ensure 2147 * the lexer handles the token correctly. 2148 */ 2149 2150STATIC void 2151S_force_next(pTHX_ I32 type) 2152{ 2153#ifdef DEBUGGING 2154 if (DEBUG_T_TEST) { 2155 PerlIO_printf(Perl_debug_log, "### forced token:\n"); 2156 tokereport(type, &NEXTVAL_NEXTTOKE); 2157 } 2158#endif 2159 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); 2160 PL_nexttype[PL_nexttoke] = type; 2161 PL_nexttoke++; 2162} 2163 2164/* 2165 * S_postderef 2166 * 2167 * This subroutine handles postfix deref syntax after the arrow has already 2168 * been emitted. @* $* etc. are emitted as two separate tokens right here. 2169 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits 2170 * only the first, leaving yylex to find the next. 2171 */ 2172 2173static int 2174S_postderef(pTHX_ int const funny, char const next) 2175{ 2176 assert(funny == DOLSHARP 2177 || funny == PERLY_DOLLAR 2178 || funny == PERLY_SNAIL 2179 || funny == PERLY_PERCENT_SIGN 2180 || funny == PERLY_AMPERSAND 2181 || funny == PERLY_STAR 2182 ); 2183 if (next == '*') { 2184 PL_expect = XOPERATOR; 2185 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 2186 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny); 2187 PL_lex_state = LEX_INTERPEND; 2188 if (PERLY_SNAIL == funny) 2189 force_next(POSTJOIN); 2190 } 2191 force_next(PERLY_STAR); 2192 PL_bufptr+=2; 2193 } 2194 else { 2195 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL 2196 && !PL_lex_brackets) 2197 PL_lex_dojoin = 2; 2198 PL_expect = XOPERATOR; 2199 PL_bufptr++; 2200 } 2201 return funny; 2202} 2203 2204void 2205Perl_yyunlex(pTHX) 2206{ 2207 int yyc = PL_parser->yychar; 2208 if (yyc != YYEMPTY) { 2209 if (yyc) { 2210 NEXTVAL_NEXTTOKE = PL_parser->yylval; 2211 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) { 2212 PL_lex_allbrackets--; 2213 PL_lex_brackets--; 2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); 2215 } else if (yyc == PERLY_PAREN_OPEN) { 2216 PL_lex_allbrackets--; 2217 yyc |= (2<<24); 2218 } 2219 force_next(yyc); 2220 } 2221 PL_parser->yychar = YYEMPTY; 2222 } 2223} 2224 2225STATIC SV * 2226S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) 2227{ 2228 SV * const sv = newSVpvn_utf8(start, len, 2229 ! IN_BYTES 2230 && UTF 2231 && len != 0 2232 && is_utf8_non_invariant_string((const U8*)start, len)); 2233 return sv; 2234} 2235 2236/* 2237 * S_force_word 2238 * When the lexer knows the next thing is a word (for instance, it has 2239 * just seen -> and it knows that the next char is a word char, then 2240 * it calls S_force_word to stick the next word into the PL_nexttoke/val 2241 * lookahead. 2242 * 2243 * Arguments: 2244 * char *start : buffer position (must be within PL_linestr) 2245 * int token : PL_next* will be this type of bare word 2246 * (e.g., METHCALL0,BAREWORD) 2247 * int check_keyword : if true, Perl checks to make sure the word isn't 2248 * a keyword (do this if the word is a label, e.g. goto FOO) 2249 * int allow_pack : if true, : characters will also be allowed (require, 2250 * use, etc. do this) 2251 */ 2252 2253STATIC char * 2254S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) 2255{ 2256 char *s; 2257 STRLEN len; 2258 2259 PERL_ARGS_ASSERT_FORCE_WORD; 2260 2261 start = skipspace(start); 2262 s = start; 2263 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 2264 || (allow_pack && *s == ':' && s[1] == ':') ) 2265 { 2266 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack); 2267 if (check_keyword) { 2268 char *s2 = PL_tokenbuf; 2269 STRLEN len2 = len; 2270 if (allow_pack && memBEGINPs(s2, len, "CORE::")) { 2271 s2 += sizeof("CORE::") - 1; 2272 len2 -= sizeof("CORE::") - 1; 2273 } 2274 if (keyword(s2, len2, 0)) 2275 return start; 2276 } 2277 if (token == METHCALL0) { 2278 s = skipspace(s); 2279 if (*s == '(') 2280 PL_expect = XTERM; 2281 else { 2282 PL_expect = XOPERATOR; 2283 } 2284 } 2285 NEXTVAL_NEXTTOKE.opval 2286 = newSVOP(OP_CONST,0, 2287 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 2288 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 2289 force_next(token); 2290 } 2291 return s; 2292} 2293 2294/* 2295 * S_force_ident 2296 * Called when the lexer wants $foo *foo &foo etc, but the program 2297 * text only contains the "foo" portion. The first argument is a pointer 2298 * to the "foo", and the second argument is the type symbol to prefix. 2299 * Forces the next token to be a "BAREWORD". 2300 * Creates the symbol if it didn't already exist (via gv_fetchpv()). 2301 */ 2302 2303STATIC void 2304S_force_ident(pTHX_ const char *s, int kind) 2305{ 2306 PERL_ARGS_ASSERT_FORCE_IDENT; 2307 2308 if (s[0]) { 2309 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ 2310 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, 2311 UTF ? SVf_UTF8 : 0)); 2312 NEXTVAL_NEXTTOKE.opval = o; 2313 force_next(BAREWORD); 2314 if (kind) { 2315 o->op_private = OPpCONST_ENTERED; 2316 /* XXX see note in pp_entereval() for why we forgo typo 2317 warnings if the symbol must be introduced in an eval. 2318 GSAR 96-10-12 */ 2319 gv_fetchpvn_flags(s, len, 2320 (PL_in_eval ? GV_ADDMULTI 2321 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), 2322 kind == PERLY_DOLLAR ? SVt_PV : 2323 kind == PERLY_SNAIL ? SVt_PVAV : 2324 kind == PERLY_PERCENT_SIGN ? SVt_PVHV : 2325 SVt_PVGV 2326 ); 2327 } 2328 } 2329} 2330 2331static void 2332S_force_ident_maybe_lex(pTHX_ char pit) 2333{ 2334 NEXTVAL_NEXTTOKE.ival = pit; 2335 force_next('p'); 2336} 2337 2338NV 2339Perl_str_to_version(pTHX_ SV *sv) 2340{ 2341 NV retval = 0.0; 2342 NV nshift = 1.0; 2343 STRLEN len; 2344 const char *start = SvPV_const(sv,len); 2345 const char * const end = start + len; 2346 const bool utf = cBOOL(SvUTF8(sv)); 2347 2348 PERL_ARGS_ASSERT_STR_TO_VERSION; 2349 2350 while (start < end) { 2351 STRLEN skip; 2352 UV n; 2353 if (utf) 2354 n = utf8n_to_uvchr((U8*)start, len, &skip, 0); 2355 else { 2356 n = *(U8*)start; 2357 skip = 1; 2358 } 2359 retval += ((NV)n)/nshift; 2360 start += skip; 2361 nshift *= 1000; 2362 } 2363 return retval; 2364} 2365 2366/* 2367 * S_force_version 2368 * Forces the next token to be a version number. 2369 * If the next token appears to be an invalid version number, (e.g. "v2b"), 2370 * and if "guessing" is TRUE, then no new token is created (and the caller 2371 * must use an alternative parsing method). 2372 */ 2373 2374STATIC char * 2375S_force_version(pTHX_ char *s, int guessing) 2376{ 2377 OP *version = NULL; 2378 char *d; 2379 2380 PERL_ARGS_ASSERT_FORCE_VERSION; 2381 2382 s = skipspace(s); 2383 2384 d = s; 2385 if (*d == 'v') 2386 d++; 2387 if (isDIGIT(*d)) { 2388 while (isDIGIT(*d) || *d == '_' || *d == '.') 2389 d++; 2390 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { 2391 SV *ver; 2392 s = scan_num(s, &pl_yylval); 2393 version = pl_yylval.opval; 2394 ver = cSVOPx(version)->op_sv; 2395 if (SvPOK(ver) && !SvNIOK(ver)) { 2396 SvUPGRADE(ver, SVt_PVNV); 2397 SvNV_set(ver, str_to_version(ver)); 2398 SvNOK_on(ver); /* hint that it is a version */ 2399 } 2400 } 2401 else if (guessing) { 2402 return s; 2403 } 2404 } 2405 2406 /* NOTE: The parser sees the package name and the VERSION swapped */ 2407 NEXTVAL_NEXTTOKE.opval = version; 2408 force_next(BAREWORD); 2409 2410 return s; 2411} 2412 2413/* 2414 * S_force_strict_version 2415 * Forces the next token to be a version number using strict syntax rules. 2416 */ 2417 2418STATIC char * 2419S_force_strict_version(pTHX_ char *s) 2420{ 2421 OP *version = NULL; 2422 const char *errstr = NULL; 2423 2424 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; 2425 2426 while (isSPACE(*s)) /* leading whitespace */ 2427 s++; 2428 2429 if (is_STRICT_VERSION(s,&errstr)) { 2430 SV *ver = newSV_type(SVt_NULL); 2431 s = (char *)scan_version(s, ver, 0); 2432 version = newSVOP(OP_CONST, 0, ver); 2433 } 2434 else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' ) 2435 && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' ))) 2436 { 2437 PL_bufptr = s; 2438 if (errstr) 2439 yyerror(errstr); /* version required */ 2440 return s; 2441 } 2442 2443 /* NOTE: The parser sees the package name and the VERSION swapped */ 2444 NEXTVAL_NEXTTOKE.opval = version; 2445 force_next(BAREWORD); 2446 2447 return s; 2448} 2449 2450/* 2451 * S_tokeq 2452 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv', 2453 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is 2454 * unchanged, and a new SV containing the modified input is returned. 2455 */ 2456 2457STATIC SV * 2458S_tokeq(pTHX_ SV *sv) 2459{ 2460 char *s; 2461 char *send; 2462 char *d; 2463 SV *pv = sv; 2464 2465 PERL_ARGS_ASSERT_TOKEQ; 2466 2467 assert (SvPOK(sv)); 2468 assert (SvLEN(sv)); 2469 assert (!SvIsCOW(sv)); 2470 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ 2471 goto finish; 2472 s = SvPVX(sv); 2473 send = SvEND(sv); 2474 /* This is relying on the SV being "well formed" with a trailing '\0' */ 2475 while (s < send && !(*s == '\\' && s[1] == '\\')) 2476 s++; 2477 if (s == send) 2478 goto finish; 2479 d = s; 2480 if ( PL_hints & HINT_NEW_STRING ) { 2481 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), 2482 SVs_TEMP | SvUTF8(sv)); 2483 } 2484 while (s < send) { 2485 if (*s == '\\') { 2486 if (s + 1 < send && (s[1] == '\\')) 2487 s++; /* all that, just for this */ 2488 } 2489 *d++ = *s++; 2490 } 2491 *d = '\0'; 2492 SvCUR_set(sv, d - SvPVX_const(sv)); 2493 finish: 2494 if ( PL_hints & HINT_NEW_STRING ) 2495 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL); 2496 return sv; 2497} 2498 2499/* 2500 * Now come three functions related to double-quote context, 2501 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when 2502 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They 2503 * interact with PL_lex_state, and create fake ( ... ) argument lists 2504 * to handle functions and concatenation. 2505 * For example, 2506 * "foo\lbar" 2507 * is tokenised as 2508 * stringify ( const[foo] concat lcfirst ( const[bar] ) ) 2509 */ 2510 2511/* 2512 * S_sublex_start 2513 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST). 2514 * 2515 * Pattern matching will set PL_lex_op to the pattern-matching op to 2516 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise). 2517 * 2518 * OP_CONST is easy--just make the new op and return. 2519 * 2520 * Everything else becomes a FUNC. 2521 * 2522 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we 2523 * had an OP_CONST. This just sets us up for a 2524 * call to S_sublex_push(). 2525 */ 2526 2527STATIC I32 2528S_sublex_start(pTHX) 2529{ 2530 const I32 op_type = pl_yylval.ival; 2531 2532 if (op_type == OP_NULL) { 2533 pl_yylval.opval = PL_lex_op; 2534 PL_lex_op = NULL; 2535 return THING; 2536 } 2537 if (op_type == OP_CONST) { 2538 SV *sv = PL_lex_stuff; 2539 PL_lex_stuff = NULL; 2540 sv = tokeq(sv); 2541 2542 if (SvTYPE(sv) == SVt_PVIV) { 2543 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ 2544 STRLEN len; 2545 const char * const p = SvPV_const(sv, len); 2546 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); 2547 SvREFCNT_dec(sv); 2548 sv = nsv; 2549 } 2550 pl_yylval.opval = newSVOP(op_type, 0, sv); 2551 return THING; 2552 } 2553 2554 PL_parser->lex_super_state = PL_lex_state; 2555 PL_parser->lex_sub_inwhat = (U16)op_type; 2556 PL_parser->lex_sub_op = PL_lex_op; 2557 PL_parser->sub_no_recover = FALSE; 2558 PL_parser->sub_error_count = PL_error_count; 2559 PL_lex_state = LEX_INTERPPUSH; 2560 2561 PL_expect = XTERM; 2562 if (PL_lex_op) { 2563 pl_yylval.opval = PL_lex_op; 2564 PL_lex_op = NULL; 2565 return PMFUNC; 2566 } 2567 else 2568 return FUNC; 2569} 2570 2571/* 2572 * S_sublex_push 2573 * Create a new scope to save the lexing state. The scope will be 2574 * ended in S_sublex_done. Returns a '(', starting the function arguments 2575 * to the uc, lc, etc. found before. 2576 * Sets PL_lex_state to LEX_INTERPCONCAT. 2577 */ 2578 2579STATIC I32 2580S_sublex_push(pTHX) 2581{ 2582 LEXSHARED *shared; 2583 const bool is_heredoc = PL_multi_close == '<'; 2584 ENTER; 2585 2586 PL_lex_state = PL_parser->lex_super_state; 2587 SAVEI8(PL_lex_dojoin); 2588 SAVEI32(PL_lex_brackets); 2589 SAVEI32(PL_lex_allbrackets); 2590 SAVEI32(PL_lex_formbrack); 2591 SAVEI8(PL_lex_fakeeof); 2592 SAVEI32(PL_lex_casemods); 2593 SAVEI32(PL_lex_starts); 2594 SAVEI8(PL_lex_state); 2595 SAVESPTR(PL_lex_repl); 2596 SAVEVPTR(PL_lex_inpat); 2597 SAVEI16(PL_lex_inwhat); 2598 if (is_heredoc) 2599 { 2600 SAVECOPLINE(PL_curcop); 2601 SAVEI32(PL_multi_end); 2602 SAVEI32(PL_parser->herelines); 2603 PL_parser->herelines = 0; 2604 } 2605 SAVEIV(PL_multi_close); 2606 SAVEPPTR(PL_bufptr); 2607 SAVEPPTR(PL_bufend); 2608 SAVEPPTR(PL_oldbufptr); 2609 SAVEPPTR(PL_oldoldbufptr); 2610 SAVEPPTR(PL_last_lop); 2611 SAVEPPTR(PL_last_uni); 2612 SAVEPPTR(PL_linestart); 2613 SAVESPTR(PL_linestr); 2614 SAVEGENERICPV(PL_lex_brackstack); 2615 SAVEGENERICPV(PL_lex_casestack); 2616 SAVEGENERICPV(PL_parser->lex_shared); 2617 SAVEBOOL(PL_parser->lex_re_reparsing); 2618 SAVEI32(PL_copline); 2619 2620 /* The here-doc parser needs to be able to peek into outer lexing 2621 scopes to find the body of the here-doc. So we put PL_linestr and 2622 PL_bufptr into lex_shared, to 'share' those values. 2623 */ 2624 PL_parser->lex_shared->ls_linestr = PL_linestr; 2625 PL_parser->lex_shared->ls_bufptr = PL_bufptr; 2626 2627 PL_linestr = PL_lex_stuff; 2628 PL_lex_repl = PL_parser->lex_sub_repl; 2629 PL_lex_stuff = NULL; 2630 PL_parser->lex_sub_repl = NULL; 2631 2632 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets 2633 set for an inner quote-like operator and then an error causes scope- 2634 popping. We must not have a PL_lex_stuff value left dangling, as 2635 that breaks assumptions elsewhere. See bug #123617. */ 2636 SAVEGENERICSV(PL_lex_stuff); 2637 SAVEGENERICSV(PL_parser->lex_sub_repl); 2638 2639 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart 2640 = SvPVX(PL_linestr); 2641 PL_bufend += SvCUR(PL_linestr); 2642 PL_last_lop = PL_last_uni = NULL; 2643 SAVEFREESV(PL_linestr); 2644 if (PL_lex_repl) SAVEFREESV(PL_lex_repl); 2645 2646 PL_lex_dojoin = FALSE; 2647 PL_lex_brackets = PL_lex_formbrack = 0; 2648 PL_lex_allbrackets = 0; 2649 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2650 Newx(PL_lex_brackstack, 120, char); 2651 Newx(PL_lex_casestack, 12, char); 2652 PL_lex_casemods = 0; 2653 *PL_lex_casestack = '\0'; 2654 PL_lex_starts = 0; 2655 PL_lex_state = LEX_INTERPCONCAT; 2656 if (is_heredoc) 2657 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 2658 PL_copline = NOLINE; 2659 2660 Newxz(shared, 1, LEXSHARED); 2661 shared->ls_prev = PL_parser->lex_shared; 2662 PL_parser->lex_shared = shared; 2663 2664 PL_lex_inwhat = PL_parser->lex_sub_inwhat; 2665 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; 2666 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) 2667 PL_lex_inpat = PL_parser->lex_sub_op; 2668 else 2669 PL_lex_inpat = NULL; 2670 2671 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); 2672 PL_in_eval &= ~EVAL_RE_REPARSING; 2673 2674 return SUBLEXSTART; 2675} 2676 2677/* 2678 * S_sublex_done 2679 * Restores lexer state after a S_sublex_push. 2680 */ 2681 2682STATIC I32 2683S_sublex_done(pTHX) 2684{ 2685 if (!PL_lex_starts++) { 2686 SV * const sv = newSVpvs(""); 2687 if (SvUTF8(PL_linestr)) 2688 SvUTF8_on(sv); 2689 PL_expect = XOPERATOR; 2690 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 2691 return THING; 2692 } 2693 2694 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ 2695 PL_lex_state = LEX_INTERPCASEMOD; 2696 return yylex(); 2697 } 2698 2699 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ 2700 assert(PL_lex_inwhat != OP_TRANSR); 2701 if (PL_lex_repl) { 2702 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); 2703 PL_linestr = PL_lex_repl; 2704 PL_lex_inpat = 0; 2705 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); 2706 PL_bufend += SvCUR(PL_linestr); 2707 PL_last_lop = PL_last_uni = NULL; 2708 PL_lex_dojoin = FALSE; 2709 PL_lex_brackets = 0; 2710 PL_lex_allbrackets = 0; 2711 PL_lex_fakeeof = LEX_FAKEEOF_NEVER; 2712 PL_lex_casemods = 0; 2713 *PL_lex_casestack = '\0'; 2714 PL_lex_starts = 0; 2715 if (SvEVALED(PL_lex_repl)) { 2716 PL_lex_state = LEX_INTERPNORMAL; 2717 PL_lex_starts++; 2718 /* we don't clear PL_lex_repl here, so that we can check later 2719 whether this is an evalled subst; that means we rely on the 2720 logic to ensure sublex_done() is called again only via the 2721 branch (in yylex()) that clears PL_lex_repl, else we'll loop */ 2722 } 2723 else { 2724 PL_lex_state = LEX_INTERPCONCAT; 2725 PL_lex_repl = NULL; 2726 } 2727 if (SvTYPE(PL_linestr) >= SVt_PVNV) { 2728 CopLINE(PL_curcop) += 2729 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines 2730 + PL_parser->herelines; 2731 PL_parser->herelines = 0; 2732 } 2733 return PERLY_SLASH; 2734 } 2735 else { 2736 const line_t l = CopLINE(PL_curcop); 2737 LEAVE; 2738 if (PL_parser->sub_error_count != PL_error_count) { 2739 if (PL_parser->sub_no_recover) { 2740 yyquit(); 2741 NOT_REACHED; 2742 } 2743 } 2744 if (PL_multi_close == '<') 2745 PL_parser->herelines += l - PL_multi_end; 2746 PL_bufend = SvPVX(PL_linestr); 2747 PL_bufend += SvCUR(PL_linestr); 2748 PL_expect = XOPERATOR; 2749 return SUBLEXEND; 2750 } 2751} 2752 2753HV * 2754Perl_load_charnames(pTHX_ SV * char_name, const char * context, 2755 const STRLEN context_len, const char ** error_msg) 2756{ 2757 /* Load the official _charnames module if not already there. The 2758 * parameters are just to give info for any error messages generated: 2759 * char_name a name to look up which is the reason for loading this 2760 * context 'char_name' in the context in the input in which it appears 2761 * context_len how many bytes 'context' occupies 2762 * error_msg *error_msg will be set to any error 2763 * 2764 * Returns the ^H table if success; otherwise NULL */ 2765 2766 unsigned int i; 2767 HV * table; 2768 SV **cvp; 2769 SV * res; 2770 2771 PERL_ARGS_ASSERT_LOAD_CHARNAMES; 2772 2773 /* This loop is executed 1 1/2 times. On the first time through, if it 2774 * isn't already loaded, try loading it, and iterate just once to see if it 2775 * worked. */ 2776 for (i = 0; i < 2; i++) { 2777 table = GvHV(PL_hintgv); /* ^H */ 2778 2779 if ( table 2780 && (PL_hints & HINT_LOCALIZE_HH) 2781 && (cvp = hv_fetchs(table, "charnames", FALSE)) 2782 && SvOK(*cvp)) 2783 { 2784 return table; /* Quit if already loaded */ 2785 } 2786 2787 if (i == 0) { 2788 Perl_load_module(aTHX_ 2789 0, 2790 newSVpvs("_charnames"), 2791 2792 /* version parameter; no need to specify it, as if we get too early 2793 * a version, will fail anyway, not being able to find 'charnames' 2794 * */ 2795 NULL, 2796 newSVpvs(":full"), 2797 newSVpvs(":short"), 2798 NULL); 2799 } 2800 } 2801 2802 /* Here, it failed; new_constant will give appropriate error messages */ 2803 *error_msg = NULL; 2804 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2805 context, context_len, error_msg); 2806 SvREFCNT_dec(res); 2807 2808 return NULL; 2809} 2810 2811STATIC SV* 2812S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e) 2813{ 2814 /* This justs wraps get_and_check_backslash_N_name() to output any error 2815 * message it returns. */ 2816 2817 const char * error_msg = NULL; 2818 SV * result; 2819 2820 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER; 2821 2822 /* charnames doesn't work well if there have been errors found */ 2823 if (PL_error_count > 0) { 2824 return NULL; 2825 } 2826 2827 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg); 2828 2829 if (error_msg) { 2830 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0); 2831 } 2832 2833 return result; 2834} 2835 2836SV* 2837Perl_get_and_check_backslash_N_name(pTHX_ const char* s, 2838 const char* e, 2839 const bool is_utf8, 2840 const char ** error_msg) 2841{ 2842 /* <s> points to first character of interior of \N{}, <e> to one beyond the 2843 * interior, hence to the "}". Finds what the name resolves to, returning 2844 * an SV* containing it; NULL if no valid one found. 2845 * 2846 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it 2847 * doesn't have to be. */ 2848 2849 SV* char_name; 2850 SV* res; 2851 HV * table; 2852 SV **cvp; 2853 SV *cv; 2854 SV *rv; 2855 HV *stash; 2856 2857 /* Points to the beginning of the \N{... so that any messages include the 2858 * context of what's failing*/ 2859 const char* context = s - 3; 2860 STRLEN context_len = e - context + 1; /* include all of \N{...} */ 2861 2862 2863 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; 2864 2865 assert(e >= s); 2866 assert(s > (char *) 3); 2867 2868 while (s < e && isBLANK(*s)) { 2869 s++; 2870 } 2871 2872 while (s < e && isBLANK(*(e - 1))) { 2873 e--; 2874 } 2875 2876 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); 2877 2878 if (!SvCUR(char_name)) { 2879 SvREFCNT_dec_NN(char_name); 2880 /* diag_listed_as: Unknown charname '%s' */ 2881 *error_msg = Perl_form(aTHX_ "Unknown charname ''"); 2882 return NULL; 2883 } 2884 2885 /* Autoload the charnames module */ 2886 2887 table = load_charnames(char_name, context, context_len, error_msg); 2888 if (table == NULL) { 2889 return NULL; 2890 } 2891 2892 *error_msg = NULL; 2893 res = new_constant( NULL, 0, "charnames", char_name, NULL, 2894 context, context_len, error_msg); 2895 if (*error_msg) { 2896 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name)); 2897 2898 SvREFCNT_dec(res); 2899 return NULL; 2900 } 2901 2902 /* See if the charnames handler is the Perl core's, and if so, we can skip 2903 * the validation needed for a user-supplied one, as Perl's does its own 2904 * validation. */ 2905 cvp = hv_fetchs(table, "charnames", FALSE); 2906 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), 2907 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) 2908 { 2909 const char * const name = HvNAME(stash); 2910 if (memEQs(name, HvNAMELEN(stash), "_charnames")) { 2911 return res; 2912 } 2913 } 2914 2915 /* Here, it isn't Perl's charname handler. We can't rely on a 2916 * user-supplied handler to validate the input name. For non-ut8 input, 2917 * look to see that the first character is legal. Then loop through the 2918 * rest checking that each is a continuation */ 2919 2920 /* This code makes the reasonable assumption that the only Latin1-range 2921 * characters that begin a character name alias are alphabetic, otherwise 2922 * would have to create a isCHARNAME_BEGIN macro */ 2923 2924 if (! is_utf8) { 2925 if (! isALPHAU(*s)) { 2926 goto bad_charname; 2927 } 2928 s++; 2929 while (s < e) { 2930 if (! isCHARNAME_CONT(*s)) { 2931 goto bad_charname; 2932 } 2933 if (*s == ' ' && *(s-1) == ' ') { 2934 goto multi_spaces; 2935 } 2936 s++; 2937 } 2938 } 2939 else { 2940 /* Similarly for utf8. For invariants can check directly; for other 2941 * Latin1, can calculate their code point and check; otherwise use an 2942 * inversion list */ 2943 if (UTF8_IS_INVARIANT(*s)) { 2944 if (! isALPHAU(*s)) { 2945 goto bad_charname; 2946 } 2947 s++; 2948 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2949 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) { 2950 goto bad_charname; 2951 } 2952 s += 2; 2953 } 2954 else { 2955 if (! _invlist_contains_cp(PL_utf8_charname_begin, 2956 utf8_to_uvchr_buf((U8 *) s, 2957 (U8 *) e, 2958 NULL))) 2959 { 2960 goto bad_charname; 2961 } 2962 s += UTF8SKIP(s); 2963 } 2964 2965 while (s < e) { 2966 if (UTF8_IS_INVARIANT(*s)) { 2967 if (! isCHARNAME_CONT(*s)) { 2968 goto bad_charname; 2969 } 2970 if (*s == ' ' && *(s-1) == ' ') { 2971 goto multi_spaces; 2972 } 2973 s++; 2974 } 2975 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 2976 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) 2977 { 2978 goto bad_charname; 2979 } 2980 s += 2; 2981 } 2982 else { 2983 if (! _invlist_contains_cp(PL_utf8_charname_continue, 2984 utf8_to_uvchr_buf((U8 *) s, 2985 (U8 *) e, 2986 NULL))) 2987 { 2988 goto bad_charname; 2989 } 2990 s += UTF8SKIP(s); 2991 } 2992 } 2993 } 2994 if (*(s-1) == ' ') { 2995 /* diag_listed_as: charnames alias definitions may not contain 2996 trailing white-space; marked by <-- HERE in %s 2997 */ 2998 *error_msg = Perl_form(aTHX_ 2999 "charnames alias definitions may not contain trailing " 3000 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", 3001 (int)(s - context + 1), context, 3002 (int)(e - s + 1), s + 1); 3003 return NULL; 3004 } 3005 3006 if (SvUTF8(res)) { /* Don't accept malformed charname value */ 3007 const U8* first_bad_char_loc; 3008 STRLEN len; 3009 const char* const str = SvPV_const(res, len); 3010 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, 3011 &first_bad_char_loc))) 3012 { 3013 _force_out_malformed_utf8_message(first_bad_char_loc, 3014 (U8 *) PL_parser->bufend, 3015 0, 3016 0 /* 0 means don't die */ ); 3017 /* diag_listed_as: Malformed UTF-8 returned by \N{%s} 3018 immediately after '%s' */ 3019 *error_msg = Perl_form(aTHX_ 3020 "Malformed UTF-8 returned by %.*s immediately after '%.*s'", 3021 (int) context_len, context, 3022 (int) ((char *) first_bad_char_loc - str), str); 3023 return NULL; 3024 } 3025 } 3026 3027 return res; 3028 3029 bad_charname: { 3030 3031 /* The final %.*s makes sure that should the trailing NUL be missing 3032 * that this print won't run off the end of the string */ 3033 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE 3034 in \N{%s} */ 3035 *error_msg = Perl_form(aTHX_ 3036 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", 3037 (int)(s - context + 1), context, 3038 (int)(e - s + 1), s + 1); 3039 return NULL; 3040 } 3041 3042 multi_spaces: 3043 /* diag_listed_as: charnames alias definitions may not contain a 3044 sequence of multiple spaces; marked by <-- HERE 3045 in %s */ 3046 *error_msg = Perl_form(aTHX_ 3047 "charnames alias definitions may not contain a sequence of " 3048 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", 3049 (int)(s - context + 1), context, 3050 (int)(e - s + 1), s + 1); 3051 return NULL; 3052} 3053 3054/* 3055 scan_const 3056 3057 Extracts the next constant part of a pattern, double-quoted string, 3058 or transliteration. This is terrifying code. 3059 3060 For example, in parsing the double-quoted string "ab\x63$d", it would 3061 stop at the '$' and return an OP_CONST containing 'abc'. 3062 3063 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's 3064 processing a pattern (PL_lex_inpat is true), a transliteration 3065 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string. 3066 3067 Returns a pointer to the character scanned up to. If this is 3068 advanced from the start pointer supplied (i.e. if anything was 3069 successfully parsed), will leave an OP_CONST for the substring scanned 3070 in pl_yylval. Caller must intuit reason for not parsing further 3071 by looking at the next characters herself. 3072 3073 In patterns: 3074 expand: 3075 \N{FOO} => \N{U+hex_for_character_FOO} 3076 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) 3077 3078 pass through: 3079 all other \-char, including \N and \N{ apart from \N{ABC} 3080 3081 stops on: 3082 @ and $ where it appears to be a var, but not for $ as tail anchor 3083 \l \L \u \U \Q \E 3084 (?{ or (??{ or (*{ 3085 3086 In transliterations: 3087 characters are VERY literal, except for - not at the start or end 3088 of the string, which indicates a range. However some backslash sequences 3089 are recognized: \r, \n, and the like 3090 \007 \o{}, \x{}, \N{} 3091 If all elements in the transliteration are below 256, 3092 scan_const expands the range to the full set of intermediate 3093 characters. If the range is in utf8, the hyphen is replaced with 3094 a certain range mark which will be handled by pmtrans() in op.c. 3095 3096 In double-quoted strings: 3097 backslashes: 3098 all those recognized in transliterations 3099 deprecated backrefs: \1 (in substitution replacements) 3100 case and quoting: \U \Q \E 3101 stops on @ and $ 3102 3103 scan_const does *not* construct ops to handle interpolated strings. 3104 It stops processing as soon as it finds an embedded $ or @ variable 3105 and leaves it to the caller to work out what's going on. 3106 3107 embedded arrays (whether in pattern or not) could be: 3108 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-. 3109 3110 $ in double-quoted strings must be the symbol of an embedded scalar. 3111 3112 $ in pattern could be $foo or could be tail anchor. Assumption: 3113 it's a tail anchor if $ is the last thing in the string, or if it's 3114 followed by one of "()| \r\n\t" 3115 3116 \1 (backreferences) are turned into $1 in substitutions 3117 3118 The structure of the code is 3119 while (there's a character to process) { 3120 handle transliteration ranges 3121 skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/ 3122 skip #-initiated comments in //x patterns 3123 check for embedded arrays 3124 check for embedded scalars 3125 if (backslash) { 3126 deprecate \1 in substitution replacements 3127 handle string-changing backslashes \l \U \Q \E, etc. 3128 switch (what was escaped) { 3129 handle \- in a transliteration (becomes a literal -) 3130 if a pattern and not \N{, go treat as regular character 3131 handle \132 (octal characters) 3132 handle \x15 and \x{1234} (hex characters) 3133 handle \N{name} (named characters, also \N{3,5} in a pattern) 3134 handle \cV (control characters) 3135 handle printf-style backslashes (\f, \r, \n, etc) 3136 } (end switch) 3137 continue 3138 } (end if backslash) 3139 handle regular character 3140 } (end while character to read) 3141 3142*/ 3143 3144STATIC char * 3145S_scan_const(pTHX_ char *start) 3146{ 3147 const char * const send = PL_bufend;/* end of the constant */ 3148 SV *sv = newSV(send - start); /* sv for the constant. See note below 3149 on sizing. */ 3150 char *s = start; /* start of the constant */ 3151 char *d = SvPVX(sv); /* destination for copies */ 3152 bool dorange = FALSE; /* are we in a translit range? */ 3153 bool didrange = FALSE; /* did we just finish a range? */ 3154 bool in_charclass = FALSE; /* within /[...]/ */ 3155 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be 3156 UTF8? But, this can show as true 3157 when the source isn't utf8, as for 3158 example when it is entirely composed 3159 of hex constants */ 3160 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */ 3161 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the 3162 number of characters found so far 3163 that will expand (into 2 bytes) 3164 should we have to convert to 3165 UTF-8) */ 3166 SV *res; /* result from charnames */ 3167 STRLEN offset_to_max = 0; /* The offset in the output to where the range 3168 high-end character is temporarily placed */ 3169 3170 /* Does something require special handling in tr/// ? This avoids extra 3171 * work in a less likely case. As such, khw didn't feel it was worth 3172 * adding any branches to the more mainline code to handle this, which 3173 * means that this doesn't get set in some circumstances when things like 3174 * \x{100} get expanded out. As a result there needs to be extra testing 3175 * done in the tr code */ 3176 bool has_above_latin1 = FALSE; 3177 3178 /* Note on sizing: The scanned constant is placed into sv, which is 3179 * initialized by newSV() assuming one byte of output for every byte of 3180 * input. This routine expects newSV() to allocate an extra byte for a 3181 * trailing NUL, which this routine will append if it gets to the end of 3182 * the input. There may be more bytes of input than output (eg., \N{LATIN 3183 * CAPITAL LETTER A}), or more output than input if the constant ends up 3184 * recoded to utf8, but each time a construct is found that might increase 3185 * the needed size, SvGROW() is called. Its size parameter each time is 3186 * based on the best guess estimate at the time, namely the length used so 3187 * far, plus the length the current construct will occupy, plus room for 3188 * the trailing NUL, plus one byte for every input byte still unscanned */ 3189 3190 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses 3191 before set */ 3192#ifdef EBCDIC 3193 int backslash_N = 0; /* ? was the character from \N{} */ 3194 int non_portable_endpoint = 0; /* ? In a range is an endpoint 3195 platform-specific like \x65 */ 3196#endif 3197 3198 PERL_ARGS_ASSERT_SCAN_CONST; 3199 3200 assert(PL_lex_inwhat != OP_TRANSR); 3201 3202 /* Protect sv from errors and fatal warnings. */ 3203 ENTER_with_name("scan_const"); 3204 SAVEFREESV(sv); 3205 3206 /* A bunch of code in the loop below assumes that if s[n] exists and is not 3207 * NUL, then s[n+1] exists. This assertion makes sure that assumption is 3208 * valid */ 3209 assert(*send == '\0'); 3210 3211 while (s < send 3212 || dorange /* Handle tr/// range at right edge of input */ 3213 ) { 3214 3215 /* get transliterations out of the way (they're most literal) */ 3216 if (PL_lex_inwhat == OP_TRANS) { 3217 3218 /* But there isn't any special handling necessary unless there is a 3219 * range, so for most cases we just drop down and handle the value 3220 * as any other. There are two exceptions. 3221 * 3222 * 1. A hyphen indicates that we are actually going to have a 3223 * range. In this case, skip the '-', set a flag, then drop 3224 * down to handle what should be the end range value. 3225 * 2. After we've handled that value, the next time through, that 3226 * flag is set and we fix up the range. 3227 * 3228 * Ranges entirely within Latin1 are expanded out entirely, in 3229 * order to make the transliteration a simple table look-up. 3230 * Ranges that extend above Latin1 have to be done differently, so 3231 * there is no advantage to expanding them here, so they are 3232 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is 3233 * a byte that can't occur in legal UTF-8, and hence can signify a 3234 * hyphen without any possible ambiguity. On EBCDIC machines, if 3235 * the range is expressed as Unicode, the Latin1 portion is 3236 * expanded out even if the range extends above Latin1. This is 3237 * because each code point in it has to be processed here 3238 * individually to get its native translation */ 3239 3240 if (! dorange) { 3241 3242 /* Here, we don't think we're in a range. If the new character 3243 * is not a hyphen; or if it is a hyphen, but it's too close to 3244 * either edge to indicate a range, or if we haven't output any 3245 * characters yet then it's a regular character. */ 3246 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) 3247 { 3248 3249 /* A regular character. Process like any other, but first 3250 * clear any flags */ 3251 didrange = FALSE; 3252 dorange = FALSE; 3253#ifdef EBCDIC 3254 non_portable_endpoint = 0; 3255 backslash_N = 0; 3256#endif 3257 /* The tests here for being above Latin1 and similar ones 3258 * in the following 'else' suffice to find all such 3259 * occurences in the constant, except those added by a 3260 * backslash escape sequence, like \x{100}. Mostly, those 3261 * set 'has_above_latin1' as appropriate */ 3262 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3263 has_above_latin1 = TRUE; 3264 } 3265 3266 /* Drops down to generic code to process current byte */ 3267 } 3268 else { /* Is a '-' in the context where it means a range */ 3269 if (didrange) { /* Something like y/A-C-Z// */ 3270 Perl_croak(aTHX_ "Ambiguous range in transliteration" 3271 " operator"); 3272 } 3273 3274 dorange = TRUE; 3275 3276 s++; /* Skip past the hyphen */ 3277 3278 /* d now points to where the end-range character will be 3279 * placed. Drop down to get that character. We'll finish 3280 * processing the range the next time through the loop */ 3281 3282 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { 3283 has_above_latin1 = TRUE; 3284 } 3285 3286 /* Drops down to generic code to process current byte */ 3287 } 3288 } /* End of not a range */ 3289 else { 3290 /* Here we have parsed a range. Now must handle it. At this 3291 * point: 3292 * 'sv' is a SV* that contains the output string we are 3293 * constructing. The final two characters in that string 3294 * are the range start and range end, in order. 3295 * 'd' points to just beyond the range end in the 'sv' string, 3296 * where we would next place something 3297 */ 3298 char * max_ptr; 3299 char * min_ptr; 3300 IV range_min; 3301 IV range_max; /* last character in range */ 3302 STRLEN grow; 3303 Size_t offset_to_min = 0; 3304 Size_t extras = 0; 3305#ifdef EBCDIC 3306 bool convert_unicode; 3307 IV real_range_max = 0; 3308#endif 3309 /* Get the code point values of the range ends. */ 3310 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1; 3311 offset_to_max = max_ptr - SvPVX_const(sv); 3312 if (d_is_utf8) { 3313 /* We know the utf8 is valid, because we just constructed 3314 * it ourselves in previous loop iterations */ 3315 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); 3316 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); 3317 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); 3318 3319 /* This compensates for not all code setting 3320 * 'has_above_latin1', so that we don't skip stuff that 3321 * should be executed */ 3322 if (range_max > 255) { 3323 has_above_latin1 = TRUE; 3324 } 3325 } 3326 else { 3327 min_ptr = max_ptr - 1; 3328 range_min = * (U8*) min_ptr; 3329 range_max = * (U8*) max_ptr; 3330 } 3331 3332 /* If the range is just a single code point, like tr/a-a/.../, 3333 * that code point is already in the output, twice. We can 3334 * just back up over the second instance and avoid all the rest 3335 * of the work. But if it is a variant character, it's been 3336 * counted twice, so decrement. (This unlikely scenario is 3337 * special cased, like the one for a range of 2 code points 3338 * below, only because the main-line code below needs a range 3339 * of 3 or more to work without special casing. Might as well 3340 * get it out of the way now.) */ 3341 if (UNLIKELY(range_max == range_min)) { 3342 d = max_ptr; 3343 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) { 3344 utf8_variant_count--; 3345 } 3346 goto range_done; 3347 } 3348 3349#ifdef EBCDIC 3350 /* On EBCDIC platforms, we may have to deal with portable 3351 * ranges. These happen if at least one range endpoint is a 3352 * Unicode value (\N{...}), or if the range is a subset of 3353 * [A-Z] or [a-z], and both ends are literal characters, 3354 * like 'A', and not like \x{C1} */ 3355 convert_unicode = 3356 cBOOL(backslash_N) /* \N{} forces Unicode, 3357 hence portable range */ 3358 || ( ! non_portable_endpoint 3359 && (( isLOWER_A(range_min) && isLOWER_A(range_max)) 3360 || (isUPPER_A(range_min) && isUPPER_A(range_max)))); 3361 if (convert_unicode) { 3362 3363 /* Special handling is needed for these portable ranges. 3364 * They are defined to be in Unicode terms, which includes 3365 * all the Unicode code points between the end points. 3366 * Convert to Unicode to get the Unicode range. Later we 3367 * will convert each code point in the range back to 3368 * native. */ 3369 range_min = NATIVE_TO_UNI(range_min); 3370 range_max = NATIVE_TO_UNI(range_max); 3371 } 3372#endif 3373 3374 if (range_min > range_max) { 3375#ifdef EBCDIC 3376 if (convert_unicode) { 3377 /* Need to convert back to native for meaningful 3378 * messages for this platform */ 3379 range_min = UNI_TO_NATIVE(range_min); 3380 range_max = UNI_TO_NATIVE(range_max); 3381 } 3382#endif 3383 /* Use the characters themselves for the error message if 3384 * ASCII printables; otherwise some visible representation 3385 * of them */ 3386 if (isPRINT_A(range_min) && isPRINT_A(range_max)) { 3387 Perl_croak(aTHX_ 3388 "Invalid range \"%c-%c\" in transliteration operator", 3389 (char)range_min, (char)range_max); 3390 } 3391#ifdef EBCDIC 3392 else if (convert_unicode) { 3393 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3394 Perl_croak(aTHX_ 3395 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" 3396 UVXf "}\" in transliteration operator", 3397 range_min, range_max); 3398 } 3399#endif 3400 else { 3401 /* diag_listed_as: Invalid range "%s" in transliteration operator */ 3402 Perl_croak(aTHX_ 3403 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" 3404 " in transliteration operator", 3405 range_min, range_max); 3406 } 3407 } 3408 3409 /* If the range is exactly two code points long, they are 3410 * already both in the output */ 3411 if (UNLIKELY(range_min + 1 == range_max)) { 3412 goto range_done; 3413 } 3414 3415 /* Here the range contains at least 3 code points */ 3416 3417 if (d_is_utf8) { 3418 3419 /* If everything in the transliteration is below 256, we 3420 * can avoid special handling later. A translation table 3421 * for each of those bytes is created by op.c. So we 3422 * expand out all ranges to their constituent code points. 3423 * But if we've encountered something above 255, the 3424 * expanding won't help, so skip doing that. But if it's 3425 * EBCDIC, we may have to look at each character below 256 3426 * if we have to convert to/from Unicode values */ 3427 if ( has_above_latin1 3428#ifdef EBCDIC 3429 && (range_min > 255 || ! convert_unicode) 3430#endif 3431 ) { 3432 const STRLEN off = d - SvPVX(sv); 3433 const STRLEN extra = 1 + (send - s) + 1; 3434 char *e; 3435 3436 /* Move the high character one byte to the right; then 3437 * insert between it and the range begin, an illegal 3438 * byte which serves to indicate this is a range (using 3439 * a '-' would be ambiguous). */ 3440 3441 if (off + extra > SvLEN(sv)) { 3442 d = off + SvGROW(sv, off + extra); 3443 max_ptr = d - off + offset_to_max; 3444 } 3445 3446 e = d++; 3447 while (e-- > max_ptr) { 3448 *(e + 1) = *e; 3449 } 3450 *(e + 1) = (char) RANGE_INDICATOR; 3451 goto range_done; 3452 } 3453 3454 /* Here, we're going to expand out the range. For EBCDIC 3455 * the range can extend above 255 (not so in ASCII), so 3456 * for EBCDIC, split it into the parts above and below 3457 * 255/256 */ 3458#ifdef EBCDIC 3459 if (range_max > 255) { 3460 real_range_max = range_max; 3461 range_max = 255; 3462 } 3463#endif 3464 } 3465 3466 /* Here we need to expand out the string to contain each 3467 * character in the range. Grow the output to handle this. 3468 * For non-UTF8, we need a byte for each code point in the 3469 * range, minus the three that we've already allocated for: the 3470 * hyphen, the min, and the max. For UTF-8, we need this 3471 * plus an extra byte for each code point that occupies two 3472 * bytes (is variant) when in UTF-8 (except we've already 3473 * allocated for the end points, including if they are 3474 * variants). For ASCII platforms and Unicode ranges on EBCDIC 3475 * platforms, it's easy to calculate a precise number. To 3476 * start, we count the variants in the range, which we need 3477 * elsewhere in this function anyway. (For the case where it 3478 * isn't easy to calculate, 'extras' has been initialized to 0, 3479 * and the calculation is done in a loop further down.) */ 3480#ifdef EBCDIC 3481 if (convert_unicode) 3482#endif 3483 { 3484 /* This is executed unconditionally on ASCII, and for 3485 * Unicode ranges on EBCDIC. Under these conditions, all 3486 * code points above a certain value are variant; and none 3487 * under that value are. We just need to find out how much 3488 * of the range is above that value. We don't count the 3489 * end points here, as they will already have been counted 3490 * as they were parsed. */ 3491 if (range_min >= UTF_CONTINUATION_MARK) { 3492 3493 /* The whole range is made up of variants */ 3494 extras = (range_max - 1) - (range_min + 1) + 1; 3495 } 3496 else if (range_max >= UTF_CONTINUATION_MARK) { 3497 3498 /* Only the higher portion of the range is variants */ 3499 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1; 3500 } 3501 3502 utf8_variant_count += extras; 3503 } 3504 3505 /* The base growth is the number of code points in the range, 3506 * not including the endpoints, which have already been sized 3507 * for (and output). We don't subtract for the hyphen, as it 3508 * has been parsed but not output, and the SvGROW below is 3509 * based only on what's been output plus what's left to parse. 3510 * */ 3511 grow = (range_max - 1) - (range_min + 1) + 1; 3512 3513 if (d_is_utf8) { 3514#ifdef EBCDIC 3515 /* In some cases in EBCDIC, we haven't yet calculated a 3516 * precise amount needed for the UTF-8 variants. Just 3517 * assume the worst case, that everything will expand by a 3518 * byte */ 3519 if (! convert_unicode) { 3520 grow *= 2; 3521 } 3522 else 3523#endif 3524 { 3525 /* Otherwise we know exactly how many variants there 3526 * are in the range. */ 3527 grow += extras; 3528 } 3529 } 3530 3531 /* Grow, but position the output to overwrite the range min end 3532 * point, because in some cases we overwrite that */ 3533 SvCUR_set(sv, d - SvPVX_const(sv)); 3534 offset_to_min = min_ptr - SvPVX_const(sv); 3535 3536 /* See Note on sizing above. */ 3537 d = offset_to_min + SvGROW(sv, SvCUR(sv) 3538 + (send - s) 3539 + grow 3540 + 1 /* Trailing NUL */ ); 3541 3542 /* Now, we can expand out the range. */ 3543#ifdef EBCDIC 3544 if (convert_unicode) { 3545 SSize_t i; 3546 3547 /* Recall that the min and max are now in Unicode terms, so 3548 * we have to convert each character to its native 3549 * equivalent */ 3550 if (d_is_utf8) { 3551 for (i = range_min; i <= range_max; i++) { 3552 append_utf8_from_native_byte( 3553 LATIN1_TO_NATIVE((U8) i), 3554 (U8 **) &d); 3555 } 3556 } 3557 else { 3558 for (i = range_min; i <= range_max; i++) { 3559 *d++ = (char)LATIN1_TO_NATIVE((U8) i); 3560 } 3561 } 3562 } 3563 else 3564#endif 3565 /* Always gets run for ASCII, and sometimes for EBCDIC. */ 3566 { 3567 /* Here, no conversions are necessary, which means that the 3568 * first character in the range is already in 'd' and 3569 * valid, so we can skip overwriting it */ 3570 if (d_is_utf8) { 3571 SSize_t i; 3572 d += UTF8SKIP(d); 3573 for (i = range_min + 1; i <= range_max; i++) { 3574 append_utf8_from_native_byte((U8) i, (U8 **) &d); 3575 } 3576 } 3577 else { 3578 SSize_t i; 3579 d++; 3580 assert(range_min + 1 <= range_max); 3581 for (i = range_min + 1; i < range_max; i++) { 3582#ifdef EBCDIC 3583 /* In this case on EBCDIC, we haven't calculated 3584 * the variants. Do it here, as we go along */ 3585 if (! UVCHR_IS_INVARIANT(i)) { 3586 utf8_variant_count++; 3587 } 3588#endif 3589 *d++ = (char)i; 3590 } 3591 3592 /* The range_max is done outside the loop so as to 3593 * avoid having to special case not incrementing 3594 * 'utf8_variant_count' on EBCDIC (it's already been 3595 * counted when originally parsed) */ 3596 *d++ = (char) range_max; 3597 } 3598 } 3599 3600#ifdef EBCDIC 3601 /* If the original range extended above 255, add in that 3602 * portion. */ 3603 if (real_range_max) { 3604 *d++ = (char) UTF8_TWO_BYTE_HI(0x100); 3605 *d++ = (char) UTF8_TWO_BYTE_LO(0x100); 3606 if (real_range_max > 0x100) { 3607 if (real_range_max > 0x101) { 3608 *d++ = (char) RANGE_INDICATOR; 3609 } 3610 d = (char*)uvchr_to_utf8((U8*)d, real_range_max); 3611 } 3612 } 3613#endif 3614 3615 range_done: 3616 /* mark the range as done, and continue */ 3617 didrange = TRUE; 3618 dorange = FALSE; 3619#ifdef EBCDIC 3620 non_portable_endpoint = 0; 3621 backslash_N = 0; 3622#endif 3623 continue; 3624 } /* End of is a range */ 3625 } /* End of transliteration. Joins main code after these else's */ 3626 else if (*s == '[' && PL_lex_inpat && !in_charclass) { 3627 char *s1 = s-1; 3628 int esc = 0; 3629 while (s1 >= start && *s1-- == '\\') 3630 esc = !esc; 3631 if (!esc) 3632 in_charclass = TRUE; 3633 } 3634 else if (*s == ']' && PL_lex_inpat && in_charclass) { 3635 char *s1 = s-1; 3636 int esc = 0; 3637 while (s1 >= start && *s1-- == '\\') 3638 esc = !esc; 3639 if (!esc) 3640 in_charclass = FALSE; 3641 } 3642 /* skip for regexp comments /(?#comment)/, except for the last 3643 * char, which will be done separately. Stop on (?{..}) and 3644 * friends (??{ ... }) or (*{ ... }) */ 3645 else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) { 3646 if (s[1] == '?' && s[2] == '#') { 3647 if (s_is_utf8) { 3648 PERL_UINT_FAST8_T len = UTF8SKIP(s); 3649 3650 while (s + len < send && *s != ')') { 3651 Copy(s, d, len, U8); 3652 d += len; 3653 s += len; 3654 len = UTF8_SAFE_SKIP(s, send); 3655 } 3656 } 3657 else while (s+1 < send && *s != ')') { 3658 *d++ = *s++; 3659 } 3660 } 3661 else 3662 if (!PL_lex_casemods && 3663 /* The following should match regcomp.c */ 3664 ((s[1] == '?' && (s[2] == '{' /* (?{ ... }) */ 3665 || (s[2] == '?' && s[3] == '{'))) || /* (??{ ... }) */ 3666 (s[1] == '*' && (s[2] == '{' ))) /* (*{ ... }) */ 3667 ){ 3668 break; 3669 } 3670 } 3671 /* likewise skip #-initiated comments in //x patterns */ 3672 else if (*s == '#' 3673 && PL_lex_inpat 3674 && !in_charclass 3675 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) 3676 { 3677 while (s < send && *s != '\n') 3678 *d++ = *s++; 3679 } 3680 /* no further processing of single-quoted regex */ 3681 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') 3682 goto default_action; 3683 3684 /* check for embedded arrays 3685 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) 3686 */ 3687 else if (*s == '@' && s[1]) { 3688 if (UTF 3689 ? isIDFIRST_utf8_safe(s+1, send) 3690 : isWORDCHAR_A(s[1])) 3691 { 3692 break; 3693 } 3694 if (memCHRs(":'{$", s[1])) 3695 break; 3696 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) 3697 break; /* in regexp, neither @+ nor @- are interpolated */ 3698 } 3699 /* check for embedded scalars. only stop if we're sure it's a 3700 * variable. */ 3701 else if (*s == '$') { 3702 if (!PL_lex_inpat) /* not a regexp, so $ must be var */ 3703 break; 3704 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) { 3705 if (s[1] == '\\') { 3706 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 3707 "Possible unintended interpolation of $\\ in regex"); 3708 } 3709 break; /* in regexp, $ might be tail anchor */ 3710 } 3711 } 3712 3713 /* End of else if chain - OP_TRANS rejoin rest */ 3714 3715 if (UNLIKELY(s >= send)) { 3716 assert(s == send); 3717 break; 3718 } 3719 3720 /* backslashes */ 3721 if (*s == '\\' && s+1 < send) { 3722 char* bslash = s; /* point to beginning \ */ 3723 char* rbrace; /* point to ending '}' */ 3724 char* e; /* 1 past the meat (non-blanks) before the 3725 brace */ 3726 s++; 3727 3728 /* warn on \1 - \9 in substitution replacements, but note that \11 3729 * is an octal; and \19 is \1 followed by '9' */ 3730 if (PL_lex_inwhat == OP_SUBST 3731 && !PL_lex_inpat 3732 && isDIGIT(*s) 3733 && *s != '0' 3734 && !isDIGIT(s[1])) 3735 { 3736 /* diag_listed_as: \%d better written as $%d */ 3737 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); 3738 s = bslash; 3739 *s = '$'; 3740 break; 3741 } 3742 3743 /* string-change backslash escapes */ 3744 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) { 3745 s = bslash; 3746 break; 3747 } 3748 /* In a pattern, process \N, but skip any other backslash escapes. 3749 * This is because we don't want to translate an escape sequence 3750 * into a meta symbol and have the regex compiler use the meta 3751 * symbol meaning, e.g. \x{2E} would be confused with a dot. But 3752 * in spite of this, we do have to process \N here while the proper 3753 * charnames handler is in scope. See bugs #56444 and #62056. 3754 * 3755 * There is a complication because \N in a pattern may also stand 3756 * for 'match a non-nl', and not mean a charname, in which case its 3757 * processing should be deferred to the regex compiler. To be a 3758 * charname it must be followed immediately by a '{', and not look 3759 * like \N followed by a curly quantifier, i.e., not something like 3760 * \N{3,}. regcurly returns a boolean indicating if it is a legal 3761 * quantifier */ 3762 else if (PL_lex_inpat 3763 && (*s != 'N' 3764 || s[1] != '{' 3765 || regcurly(s + 1, send, NULL))) 3766 { 3767 *d++ = '\\'; 3768 goto default_action; 3769 } 3770 3771 switch (*s) { 3772 default: 3773 { 3774 if ((isALPHANUMERIC(*s))) 3775 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 3776 "Unrecognized escape \\%c passed through", 3777 *s); 3778 /* default action is to copy the quoted character */ 3779 goto default_action; 3780 } 3781 3782 /* eg. \132 indicates the octal constant 0132 */ 3783 case '0': case '1': case '2': case '3': 3784 case '4': case '5': case '6': case '7': 3785 { 3786 I32 flags = PERL_SCAN_SILENT_ILLDIGIT 3787 | PERL_SCAN_NOTIFY_ILLDIGIT; 3788 STRLEN len = 3; 3789 uv = grok_oct(s, &len, &flags, NULL); 3790 s += len; 3791 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) 3792 && s < send 3793 && isDIGIT(*s) /* like \08, \178 */ 3794 && ckWARN(WARN_MISC)) 3795 { 3796 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s", 3797 form_alien_digit_msg(8, len, s, send, UTF, FALSE)); 3798 } 3799 } 3800 goto NUM_ESCAPE_INSERT; 3801 3802 /* eg. \o{24} indicates the octal constant \024 */ 3803 case 'o': 3804 { 3805 const char* error; 3806 3807 if (! grok_bslash_o(&s, send, 3808 &uv, &error, 3809 NULL, 3810 FALSE, /* Not strict */ 3811 FALSE, /* No illegal cp's */ 3812 UTF)) 3813 { 3814 yyerror(error); 3815 uv = 0; /* drop through to ensure range ends are set */ 3816 } 3817 goto NUM_ESCAPE_INSERT; 3818 } 3819 3820 /* eg. \x24 indicates the hex constant 0x24 */ 3821 case 'x': 3822 { 3823 const char* error; 3824 3825 if (! grok_bslash_x(&s, send, 3826 &uv, &error, 3827 NULL, 3828 FALSE, /* Not strict */ 3829 FALSE, /* No illegal cp's */ 3830 UTF)) 3831 { 3832 yyerror(error); 3833 uv = 0; /* drop through to ensure range ends are set */ 3834 } 3835 } 3836 3837 NUM_ESCAPE_INSERT: 3838 /* Insert oct or hex escaped character. */ 3839 3840 /* Here uv is the ordinal of the next character being added */ 3841 if (UVCHR_IS_INVARIANT(uv)) { 3842 *d++ = (char) uv; 3843 } 3844 else { 3845 if (!d_is_utf8 && uv > 255) { 3846 3847 /* Here, 'uv' won't fit unless we convert to UTF-8. 3848 * If we've only seen invariants so far, all we have to 3849 * do is turn on the flag */ 3850 if (utf8_variant_count == 0) { 3851 SvUTF8_on(sv); 3852 } 3853 else { 3854 SvCUR_set(sv, d - SvPVX_const(sv)); 3855 SvPOK_on(sv); 3856 *d = '\0'; 3857 3858 sv_utf8_upgrade_flags_grow( 3859 sv, 3860 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3861 3862 /* Since we're having to grow here, 3863 * make sure we have enough room for 3864 * this escape and a NUL, so the 3865 * code immediately below won't have 3866 * to actually grow again */ 3867 UVCHR_SKIP(uv) 3868 + (STRLEN)(send - s) + 1); 3869 d = SvPVX(sv) + SvCUR(sv); 3870 } 3871 3872 has_above_latin1 = TRUE; 3873 d_is_utf8 = TRUE; 3874 } 3875 3876 if (! d_is_utf8) { 3877 *d++ = (char)uv; 3878 utf8_variant_count++; 3879 } 3880 else { 3881 /* Usually, there will already be enough room in 'sv' 3882 * since such escapes are likely longer than any UTF-8 3883 * sequence they can end up as. This isn't the case on 3884 * EBCDIC where \x{40000000} contains 12 bytes, and the 3885 * UTF-8 for it contains 14. And, we have to allow for 3886 * a trailing NUL. It probably can't happen on ASCII 3887 * platforms, but be safe. See Note on sizing above. */ 3888 const STRLEN needed = d - SvPVX(sv) 3889 + UVCHR_SKIP(uv) 3890 + (send - s) 3891 + 1; 3892 if (UNLIKELY(needed > SvLEN(sv))) { 3893 SvCUR_set(sv, d - SvPVX_const(sv)); 3894 d = SvCUR(sv) + SvGROW(sv, needed); 3895 } 3896 3897 d = (char*) uvchr_to_utf8_flags((U8*)d, uv, 3898 (ckWARN(WARN_PORTABLE)) 3899 ? UNICODE_WARN_PERL_EXTENDED 3900 : 0); 3901 } 3902 } 3903#ifdef EBCDIC 3904 non_portable_endpoint++; 3905#endif 3906 continue; 3907 3908 case 'N': 3909 /* In a non-pattern \N must be like \N{U+0041}, or it can be a 3910 * named character, like \N{LATIN SMALL LETTER A}, or a named 3911 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND 3912 * GRAVE} (except y/// can't handle the latter, croaking). For 3913 * convenience all three forms are referred to as "named 3914 * characters" below. 3915 * 3916 * For patterns, \N also can mean to match a non-newline. Code 3917 * before this 'switch' statement should already have handled 3918 * this situation, and hence this code only has to deal with 3919 * the named character cases. 3920 * 3921 * For non-patterns, the named characters are converted to 3922 * their string equivalents. In patterns, named characters are 3923 * not converted to their ultimate forms for the same reasons 3924 * that other escapes aren't (mainly that the ultimate 3925 * character could be considered a meta-symbol by the regex 3926 * compiler). Instead, they are converted to the \N{U+...} 3927 * form to get the value from the charnames that is in effect 3928 * right now, while preserving the fact that it was a named 3929 * character, so that the regex compiler knows this. 3930 * 3931 * The structure of this section of code (besides checking for 3932 * errors and upgrading to utf8) is: 3933 * If the named character is of the form \N{U+...}, pass it 3934 * through if a pattern; otherwise convert the code point 3935 * to utf8 3936 * Otherwise must be some \N{NAME}: convert to 3937 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8 3938 * 3939 * Transliteration is an exception. The conversion to utf8 is 3940 * only done if the code point requires it to be representable. 3941 * 3942 * Here, 's' points to the 'N'; the test below is guaranteed to 3943 * succeed if we are being called on a pattern, as we already 3944 * know from a test above that the next character is a '{'. A 3945 * non-pattern \N must mean 'named character', which requires 3946 * braces */ 3947 s++; 3948 if (*s != '{') { 3949 yyerror("Missing braces on \\N{}"); 3950 *d++ = '\0'; 3951 continue; 3952 } 3953 s++; 3954 3955 /* If there is no matching '}', it is an error. */ 3956 if (! (rbrace = (char *) memchr(s, '}', send - s))) { 3957 if (! PL_lex_inpat) { 3958 yyerror("Missing right brace on \\N{}"); 3959 } else { 3960 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); 3961 } 3962 yyquit(); /* Have exhausted the input. */ 3963 } 3964 3965 /* Here it looks like a named character */ 3966 while (s < rbrace && isBLANK(*s)) { 3967 s++; 3968 } 3969 3970 e = rbrace; 3971 while (s < e && isBLANK(*(e - 1))) { 3972 e--; 3973 } 3974 3975 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ 3976 s += 2; /* Skip to next char after the 'U+' */ 3977 if (PL_lex_inpat) { 3978 3979 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ 3980 /* Check the syntax. */ 3981 if (!isXDIGIT(*s)) { 3982 bad_NU: 3983 yyerror( 3984 "Invalid hexadecimal number in \\N{U+...}" 3985 ); 3986 s = rbrace + 1; 3987 *d++ = '\0'; 3988 continue; 3989 } 3990 while (++s < e) { 3991 if (isXDIGIT(*s)) 3992 continue; 3993 else if ((*s == '.' || *s == '_') 3994 && isXDIGIT(s[1])) 3995 continue; 3996 goto bad_NU; 3997 } 3998 3999 /* Pass everything through unchanged. 4000 * +1 is to include the '}' */ 4001 Copy(bslash, d, rbrace - bslash + 1, char); 4002 d += rbrace - bslash + 1; 4003 } 4004 else { /* Not a pattern: convert the hex to string */ 4005 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 4006 | PERL_SCAN_SILENT_ILLDIGIT 4007 | PERL_SCAN_SILENT_OVERFLOW 4008 | PERL_SCAN_DISALLOW_PREFIX; 4009 STRLEN len = e - s; 4010 4011 uv = grok_hex(s, &len, &flags, NULL); 4012 if (len == 0 || (len != (STRLEN)(e - s))) 4013 goto bad_NU; 4014 4015 if ( uv > MAX_LEGAL_CP 4016 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX)) 4017 { 4018 yyerror(form_cp_too_large_msg(16, s, len, 0)); 4019 uv = 0; /* drop through to ensure range ends are 4020 set */ 4021 } 4022 4023 /* For non-tr///, if the destination is not in utf8, 4024 * unconditionally recode it to be so. This is 4025 * because \N{} implies Unicode semantics, and scalars 4026 * have to be in utf8 to guarantee those semantics. 4027 * tr/// doesn't care about Unicode rules, so no need 4028 * there to upgrade to UTF-8 for small enough code 4029 * points */ 4030 if (! d_is_utf8 && ( uv > 0xFF 4031 || PL_lex_inwhat != OP_TRANS)) 4032 { 4033 /* See Note on sizing above. */ 4034 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1; 4035 4036 SvCUR_set(sv, d - SvPVX_const(sv)); 4037 SvPOK_on(sv); 4038 *d = '\0'; 4039 4040 if (utf8_variant_count == 0) { 4041 SvUTF8_on(sv); 4042 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 4043 } 4044 else { 4045 sv_utf8_upgrade_flags_grow( 4046 sv, 4047 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4048 extra); 4049 d = SvPVX(sv) + SvCUR(sv); 4050 } 4051 4052 d_is_utf8 = TRUE; 4053 has_above_latin1 = TRUE; 4054 } 4055 4056 /* Add the (Unicode) code point to the output. */ 4057 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) { 4058 *d++ = (char) LATIN1_TO_NATIVE(uv); 4059 } 4060 else { 4061 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 4062 (ckWARN(WARN_PORTABLE)) 4063 ? UNICODE_WARN_PERL_EXTENDED 4064 : 0); 4065 } 4066 } 4067 } 4068 else /* Here is \N{NAME} but not \N{U+...}. */ 4069 if (! (res = get_and_check_backslash_N_name_wrapper(s, e))) 4070 { /* Failed. We should die eventually, but for now use a NUL 4071 to keep parsing */ 4072 *d++ = '\0'; 4073 } 4074 else { /* Successfully evaluated the name */ 4075 STRLEN len; 4076 const char *str = SvPV_const(res, len); 4077 if (PL_lex_inpat) { 4078 4079 if (! len) { /* The name resolved to an empty string */ 4080 const char empty_N[] = "\\N{_}"; 4081 Copy(empty_N, d, sizeof(empty_N) - 1, char); 4082 d += sizeof(empty_N) - 1; 4083 } 4084 else { 4085 /* In order to not lose information for the regex 4086 * compiler, pass the result in the specially made 4087 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are 4088 * the code points in hex of each character 4089 * returned by charnames */ 4090 4091 const char *str_end = str + len; 4092 const STRLEN off = d - SvPVX_const(sv); 4093 4094 if (! SvUTF8(res)) { 4095 /* For the non-UTF-8 case, we can determine the 4096 * exact length needed without having to parse 4097 * through the string. Each character takes up 4098 * 2 hex digits plus either a trailing dot or 4099 * the "}" */ 4100 const char initial_text[] = "\\N{U+"; 4101 const STRLEN initial_len = sizeof(initial_text) 4102 - 1; 4103 d = off + SvGROW(sv, off 4104 + 3 * len 4105 4106 /* +1 for trailing NUL */ 4107 + initial_len + 1 4108 4109 + (STRLEN)(send - rbrace)); 4110 Copy(initial_text, d, initial_len, char); 4111 d += initial_len; 4112 while (str < str_end) { 4113 char hex_string[4]; 4114 int len = 4115 my_snprintf(hex_string, 4116 sizeof(hex_string), 4117 "%02X.", 4118 4119 /* The regex compiler is 4120 * expecting Unicode, not 4121 * native */ 4122 NATIVE_TO_LATIN1(*str)); 4123 PERL_MY_SNPRINTF_POST_GUARD(len, 4124 sizeof(hex_string)); 4125 Copy(hex_string, d, 3, char); 4126 d += 3; 4127 str++; 4128 } 4129 d--; /* Below, we will overwrite the final 4130 dot with a right brace */ 4131 } 4132 else { 4133 STRLEN char_length; /* cur char's byte length */ 4134 4135 /* and the number of bytes after this is 4136 * translated into hex digits */ 4137 STRLEN output_length; 4138 4139 /* 2 hex per byte; 2 chars for '\N'; 2 chars 4140 * for max('U+', '.'); and 1 for NUL */ 4141 char hex_string[2 * UTF8_MAXBYTES + 5]; 4142 4143 /* Get the first character of the result. */ 4144 U32 uv = utf8n_to_uvchr((U8 *) str, 4145 len, 4146 &char_length, 4147 UTF8_ALLOW_ANYUV); 4148 /* Convert first code point to Unicode hex, 4149 * including the boiler plate before it. */ 4150 output_length = 4151 my_snprintf(hex_string, sizeof(hex_string), 4152 "\\N{U+%X", 4153 (unsigned int) NATIVE_TO_UNI(uv)); 4154 4155 /* Make sure there is enough space to hold it */ 4156 d = off + SvGROW(sv, off 4157 + output_length 4158 + (STRLEN)(send - rbrace) 4159 + 2); /* '}' + NUL */ 4160 /* And output it */ 4161 Copy(hex_string, d, output_length, char); 4162 d += output_length; 4163 4164 /* For each subsequent character, append dot and 4165 * its Unicode code point in hex */ 4166 while ((str += char_length) < str_end) { 4167 const STRLEN off = d - SvPVX_const(sv); 4168 U32 uv = utf8n_to_uvchr((U8 *) str, 4169 str_end - str, 4170 &char_length, 4171 UTF8_ALLOW_ANYUV); 4172 output_length = 4173 my_snprintf(hex_string, 4174 sizeof(hex_string), 4175 ".%X", 4176 (unsigned int) NATIVE_TO_UNI(uv)); 4177 4178 d = off + SvGROW(sv, off 4179 + output_length 4180 + (STRLEN)(send - rbrace) 4181 + 2); /* '}' + NUL */ 4182 Copy(hex_string, d, output_length, char); 4183 d += output_length; 4184 } 4185 } 4186 4187 *d++ = '}'; /* Done. Add the trailing brace */ 4188 } 4189 } 4190 else { /* Here, not in a pattern. Convert the name to a 4191 * string. */ 4192 4193 if (PL_lex_inwhat == OP_TRANS) { 4194 str = SvPV_const(res, len); 4195 if (len > ((SvUTF8(res)) 4196 ? UTF8SKIP(str) 4197 : 1U)) 4198 { 4199 yyerror(Perl_form(aTHX_ 4200 "%.*s must not be a named sequence" 4201 " in transliteration operator", 4202 /* +1 to include the "}" */ 4203 (int) (rbrace + 1 - start), start)); 4204 *d++ = '\0'; 4205 goto end_backslash_N; 4206 } 4207 4208 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) { 4209 has_above_latin1 = TRUE; 4210 } 4211 4212 } 4213 else if (! SvUTF8(res)) { 4214 /* Make sure \N{} return is UTF-8. This is because 4215 * \N{} implies Unicode semantics, and scalars have 4216 * to be in utf8 to guarantee those semantics; but 4217 * not needed in tr/// */ 4218 sv_utf8_upgrade_flags(res, 0); 4219 str = SvPV_const(res, len); 4220 } 4221 4222 /* Upgrade destination to be utf8 if this new 4223 * component is */ 4224 if (! d_is_utf8 && SvUTF8(res)) { 4225 /* See Note on sizing above. */ 4226 const STRLEN extra = len + (send - s) + 1; 4227 4228 SvCUR_set(sv, d - SvPVX_const(sv)); 4229 SvPOK_on(sv); 4230 *d = '\0'; 4231 4232 if (utf8_variant_count == 0) { 4233 SvUTF8_on(sv); 4234 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra); 4235 } 4236 else { 4237 sv_utf8_upgrade_flags_grow(sv, 4238 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4239 extra); 4240 d = SvPVX(sv) + SvCUR(sv); 4241 } 4242 d_is_utf8 = TRUE; 4243 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */ 4244 4245 /* See Note on sizing above. (NOTE: SvCUR() is not 4246 * set correctly here). */ 4247 const STRLEN extra = len + (send - rbrace) + 1; 4248 const STRLEN off = d - SvPVX_const(sv); 4249 d = off + SvGROW(sv, off + extra); 4250 } 4251 Copy(str, d, len, char); 4252 d += len; 4253 } 4254 4255 SvREFCNT_dec(res); 4256 4257 } /* End \N{NAME} */ 4258 4259 end_backslash_N: 4260#ifdef EBCDIC 4261 backslash_N++; /* \N{} is defined to be Unicode */ 4262#endif 4263 s = rbrace + 1; /* Point to just after the '}' */ 4264 continue; 4265 4266 /* \c is a control character */ 4267 case 'c': 4268 s++; 4269 if (s < send) { 4270 const char * message; 4271 4272 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) { 4273 yyerror(message); 4274 yyquit(); /* Have always immediately croaked on 4275 errors in this */ 4276 } 4277 d++; 4278 } 4279 else { 4280 yyerror("Missing control char name in \\c"); 4281 yyquit(); /* Are at end of input, no sense continuing */ 4282 } 4283#ifdef EBCDIC 4284 non_portable_endpoint++; 4285#endif 4286 break; 4287 4288 /* printf-style backslashes, formfeeds, newlines, etc */ 4289 case 'b': 4290 *d++ = '\b'; 4291 break; 4292 case 'n': 4293 *d++ = '\n'; 4294 break; 4295 case 'r': 4296 *d++ = '\r'; 4297 break; 4298 case 'f': 4299 *d++ = '\f'; 4300 break; 4301 case 't': 4302 *d++ = '\t'; 4303 break; 4304 case 'e': 4305 *d++ = ESC_NATIVE; 4306 break; 4307 case 'a': 4308 *d++ = '\a'; 4309 break; 4310 } /* end switch */ 4311 4312 s++; 4313 continue; 4314 } /* end if (backslash) */ 4315 4316 default_action: 4317 /* Just copy the input to the output, though we may have to convert 4318 * to/from UTF-8. 4319 * 4320 * If the input has the same representation in UTF-8 as not, it will be 4321 * a single byte, and we don't care about UTF8ness; just copy the byte */ 4322 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) { 4323 *d++ = *s++; 4324 } 4325 else if (! s_is_utf8 && ! d_is_utf8) { 4326 /* If neither source nor output is UTF-8, is also a single byte, 4327 * just copy it; but this byte counts should we later have to 4328 * convert to UTF-8 */ 4329 *d++ = *s++; 4330 utf8_variant_count++; 4331 } 4332 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */ 4333 const STRLEN len = UTF8SKIP(s); 4334 4335 /* We expect the source to have already been checked for 4336 * malformedness */ 4337 assert(isUTF8_CHAR((U8 *) s, (U8 *) send)); 4338 4339 Copy(s, d, len, U8); 4340 d += len; 4341 s += len; 4342 } 4343 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */ 4344 STRLEN need = send - s + 1; /* See Note on sizing above. */ 4345 4346 SvCUR_set(sv, d - SvPVX_const(sv)); 4347 SvPOK_on(sv); 4348 *d = '\0'; 4349 4350 if (utf8_variant_count == 0) { 4351 SvUTF8_on(sv); 4352 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need); 4353 } 4354 else { 4355 sv_utf8_upgrade_flags_grow(sv, 4356 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4357 need); 4358 d = SvPVX(sv) + SvCUR(sv); 4359 } 4360 d_is_utf8 = TRUE; 4361 goto default_action; /* Redo, having upgraded so both are UTF-8 */ 4362 } 4363 else { /* UTF8ness matters: convert this non-UTF8 source char to 4364 UTF-8 for output. It will occupy 2 bytes, but don't include 4365 the input byte since we haven't incremented 's' yet. See 4366 Note on sizing above. */ 4367 const STRLEN off = d - SvPVX(sv); 4368 const STRLEN extra = 2 + (send - s - 1) + 1; 4369 if (off + extra > SvLEN(sv)) { 4370 d = off + SvGROW(sv, off + extra); 4371 } 4372 *d++ = UTF8_EIGHT_BIT_HI(*s); 4373 *d++ = UTF8_EIGHT_BIT_LO(*s); 4374 s++; 4375 } 4376 } /* while loop to process each character */ 4377 4378 { 4379 const STRLEN off = d - SvPVX(sv); 4380 4381 /* See if room for the terminating NUL */ 4382 if (UNLIKELY(off >= SvLEN(sv))) { 4383 4384#ifndef DEBUGGING 4385 4386 if (off > SvLEN(sv)) 4387#endif 4388 Perl_croak(aTHX_ "panic: constant overflowed allocated space," 4389 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv)); 4390 4391 /* Whew! Here we don't have room for the terminating NUL, but 4392 * everything else so far has fit. It's not too late to grow 4393 * to fit the NUL and continue on. But it is a bug, as the code 4394 * above was supposed to have made room for this, so under 4395 * DEBUGGING builds, we panic anyway. */ 4396 d = off + SvGROW(sv, off + 1); 4397 } 4398 } 4399 4400 /* terminate the string and set up the sv */ 4401 *d = '\0'; 4402 SvCUR_set(sv, d - SvPVX_const(sv)); 4403 4404 SvPOK_on(sv); 4405 if (d_is_utf8) { 4406 SvUTF8_on(sv); 4407 } 4408 4409 /* shrink the sv if we allocated more than we used */ 4410 if (SvCUR(sv) + 5 < SvLEN(sv)) { 4411 SvPV_shrink_to_cur(sv); 4412 } 4413 4414 /* return the substring (via pl_yylval) only if we parsed anything */ 4415 if (s > start) { 4416 char *s2 = start; 4417 for (; s2 < s; s2++) { 4418 if (*s2 == '\n') 4419 COPLINE_INC_WITH_HERELINES; 4420 } 4421 SvREFCNT_inc_simple_void_NN(sv); 4422 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) 4423 && ! PL_parser->lex_re_reparsing) 4424 { 4425 const char *const key = PL_lex_inpat ? "qr" : "q"; 4426 const STRLEN keylen = PL_lex_inpat ? 2 : 1; 4427 const char *type; 4428 STRLEN typelen; 4429 4430 if (PL_lex_inwhat == OP_TRANS) { 4431 type = "tr"; 4432 typelen = 2; 4433 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { 4434 type = "s"; 4435 typelen = 1; 4436 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { 4437 type = "q"; 4438 typelen = 1; 4439 } else { 4440 type = "qq"; 4441 typelen = 2; 4442 } 4443 4444 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, 4445 type, typelen, NULL); 4446 } 4447 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 4448 } 4449 LEAVE_with_name("scan_const"); 4450 return s; 4451} 4452 4453/* S_intuit_more 4454 * Returns TRUE if there's more to the expression (e.g., a subscript), 4455 * FALSE otherwise. 4456 * 4457 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/ 4458 * 4459 * ->[ and ->{ return TRUE 4460 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled 4461 * { and [ outside a pattern are always subscripts, so return TRUE 4462 * if we're outside a pattern and it's not { or [, then return FALSE 4463 * if we're in a pattern and the first char is a { 4464 * {4,5} (any digits around the comma) returns FALSE 4465 * if we're in a pattern and the first char is a [ 4466 * [] returns FALSE 4467 * [SOMETHING] has a funky algorithm to decide whether it's a 4468 * character class or not. It has to deal with things like 4469 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/ 4470 * anything else returns TRUE 4471 */ 4472 4473/* This is the one truly awful dwimmer necessary to conflate C and sed. */ 4474 4475STATIC int 4476S_intuit_more(pTHX_ char *s, char *e) 4477{ 4478 PERL_ARGS_ASSERT_INTUIT_MORE; 4479 4480 if (PL_lex_brackets) 4481 return TRUE; 4482 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) 4483 return TRUE; 4484 if (*s == '-' && s[1] == '>' 4485 && FEATURE_POSTDEREF_QQ_IS_ENABLED 4486 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) 4487 ||(s[2] == '@' && memCHRs("*[{",s[3])) )) 4488 return TRUE; 4489 if (*s != '{' && *s != '[') 4490 return FALSE; 4491 PL_parser->sub_no_recover = TRUE; 4492 if (!PL_lex_inpat) 4493 return TRUE; 4494 4495 /* In a pattern, so maybe we have {n,m}. */ 4496 if (*s == '{') { 4497 if (regcurly(s, e, NULL)) { 4498 return FALSE; 4499 } 4500 return TRUE; 4501 } 4502 4503 /* On the other hand, maybe we have a character class */ 4504 4505 s++; 4506 if (*s == ']' || *s == '^') 4507 return FALSE; 4508 else { 4509 /* this is terrifying, and it works */ 4510 int weight; 4511 char seen[256]; 4512 const char * const send = (char *) memchr(s, ']', e - s); 4513 unsigned char un_char, last_un_char; 4514 char tmpbuf[sizeof PL_tokenbuf * 4]; 4515 4516 if (!send) /* has to be an expression */ 4517 return TRUE; 4518 weight = 2; /* let's weigh the evidence */ 4519 4520 if (*s == '$') 4521 weight -= 3; 4522 else if (isDIGIT(*s)) { 4523 if (s[1] != ']') { 4524 if (isDIGIT(s[1]) && s[2] == ']') 4525 weight -= 10; 4526 } 4527 else 4528 weight -= 100; 4529 } 4530 Zero(seen,256,char); 4531 un_char = 255; 4532 for (; s < send; s++) { 4533 last_un_char = un_char; 4534 un_char = (unsigned char)*s; 4535 switch (*s) { 4536 case '@': 4537 case '&': 4538 case '$': 4539 weight -= seen[un_char] * 10; 4540 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { 4541 int len; 4542 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); 4543 len = (int)strlen(tmpbuf); 4544 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 4545 UTF ? SVf_UTF8 : 0, SVt_PV)) 4546 weight -= 100; 4547 else 4548 weight -= 10; 4549 } 4550 else if (*s == '$' 4551 && s[1] 4552 && memCHRs("[#!%*<>()-=",s[1])) 4553 { 4554 if (/*{*/ memCHRs("])} =",s[2])) 4555 weight -= 10; 4556 else 4557 weight -= 1; 4558 } 4559 break; 4560 case '\\': 4561 un_char = 254; 4562 if (s[1]) { 4563 if (memCHRs("wds]",s[1])) 4564 weight += 100; 4565 else if (seen[(U8)'\''] || seen[(U8)'"']) 4566 weight += 1; 4567 else if (memCHRs("rnftbxcav",s[1])) 4568 weight += 40; 4569 else if (isDIGIT(s[1])) { 4570 weight += 40; 4571 while (s[1] && isDIGIT(s[1])) 4572 s++; 4573 } 4574 } 4575 else 4576 weight += 100; 4577 break; 4578 case '-': 4579 if (s[1] == '\\') 4580 weight += 50; 4581 if (memCHRs("aA01! ",last_un_char)) 4582 weight += 30; 4583 if (memCHRs("zZ79~",s[1])) 4584 weight += 30; 4585 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) 4586 weight -= 5; /* cope with negative subscript */ 4587 break; 4588 default: 4589 if (!isWORDCHAR(last_un_char) 4590 && !(last_un_char == '$' || last_un_char == '@' 4591 || last_un_char == '&') 4592 && isALPHA(*s) && s[1] && isALPHA(s[1])) { 4593 char *d = s; 4594 while (isALPHA(*s)) 4595 s++; 4596 if (keyword(d, s - d, 0)) 4597 weight -= 150; 4598 } 4599 if (un_char == last_un_char + 1) 4600 weight += 5; 4601 weight -= seen[un_char]; 4602 break; 4603 } 4604 seen[un_char]++; 4605 } 4606 if (weight >= 0) /* probably a character class */ 4607 return FALSE; 4608 } 4609 4610 return TRUE; 4611} 4612 4613/* 4614 * S_intuit_method 4615 * 4616 * Does all the checking to disambiguate 4617 * foo bar 4618 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise 4619 * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args). 4620 * 4621 * First argument is the stuff after the first token, e.g. "bar". 4622 * 4623 * Not a method if foo is a filehandle. 4624 * Not a method if foo is a subroutine prototyped to take a filehandle. 4625 * Not a method if it's really "Foo $bar" 4626 * Method if it's "foo $bar" 4627 * Not a method if it's really "print foo $bar" 4628 * Method if it's really "foo package::" (interpreted as package->foo) 4629 * Not a method if bar is known to be a subroutine ("sub bar; foo bar") 4630 * Not a method if bar is a filehandle or package, but is quoted with 4631 * => 4632 */ 4633 4634STATIC int 4635S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) 4636{ 4637 char *s = start + (*start == '$'); 4638 char tmpbuf[sizeof PL_tokenbuf]; 4639 STRLEN len; 4640 GV* indirgv; 4641 /* Mustn't actually add anything to a symbol table. 4642 But also don't want to "initialise" any placeholder 4643 constants that might already be there into full 4644 blown PVGVs with attached PVCV. */ 4645 GV * const gv = 4646 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; 4647 4648 PERL_ARGS_ASSERT_INTUIT_METHOD; 4649 4650 if (!FEATURE_INDIRECT_IS_ENABLED) 4651 return 0; 4652 4653 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) 4654 return 0; 4655 if (cv && SvPOK(cv)) { 4656 const char *proto = CvPROTO(cv); 4657 if (proto) { 4658 while (*proto && (isSPACE(*proto) || *proto == ';')) 4659 proto++; 4660 if (*proto == '*') 4661 return 0; 4662 } 4663 } 4664 4665 if (*start == '$') { 4666 SSize_t start_off = start - SvPVX(PL_linestr); 4667 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY 4668 || isUPPER(*PL_tokenbuf)) 4669 return 0; 4670 /* this could be $# */ 4671 if (isSPACE(*s)) 4672 s = skipspace(s); 4673 PL_bufptr = SvPVX(PL_linestr) + start_off; 4674 PL_expect = XREF; 4675 return *s == '(' ? METHCALL : METHCALL0; 4676 } 4677 4678 s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE); 4679 /* start is the beginning of the possible filehandle/object, 4680 * and s is the end of it 4681 * tmpbuf is a copy of it (but with single quotes as double colons) 4682 */ 4683 4684 if (!keyword(tmpbuf, len, 0)) { 4685 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { 4686 len -= 2; 4687 tmpbuf[len] = '\0'; 4688 goto bare_package; 4689 } 4690 indirgv = gv_fetchpvn_flags(tmpbuf, len, 4691 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), 4692 SVt_PVCV); 4693 if (indirgv && SvTYPE(indirgv) != SVt_NULL 4694 && (!isGV(indirgv) || GvCVu(indirgv))) 4695 return 0; 4696 /* filehandle or package name makes it a method */ 4697 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { 4698 s = skipspace(s); 4699 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') 4700 return 0; /* no assumptions -- "=>" quotes bareword */ 4701 bare_package: 4702 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, 4703 S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); 4704 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; 4705 PL_expect = XTERM; 4706 force_next(BAREWORD); 4707 PL_bufptr = s; 4708 return *s == '(' ? METHCALL : METHCALL0; 4709 } 4710 } 4711 return 0; 4712} 4713 4714/* Encoded script support. filter_add() effectively inserts a 4715 * 'pre-processing' function into the current source input stream. 4716 * Note that the filter function only applies to the current source file 4717 * (e.g., it will not affect files 'require'd or 'use'd by this one). 4718 * 4719 * The datasv parameter (which may be NULL) can be used to pass 4720 * private data to this instance of the filter. The filter function 4721 * can recover the SV using the FILTER_DATA macro and use it to 4722 * store private buffers and state information. 4723 * 4724 * The supplied datasv parameter is upgraded to a PVIO type 4725 * and the IoDIRP/IoANY field is used to store the function pointer, 4726 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such. 4727 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for 4728 * private use must be set using malloc'd pointers. 4729 */ 4730 4731SV * 4732Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) 4733{ 4734 if (!funcp) 4735 return NULL; 4736 4737 if (!PL_parser) 4738 return NULL; 4739 4740 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) 4741 Perl_croak(aTHX_ "Source filters apply only to byte streams"); 4742 4743 if (!PL_rsfp_filters) 4744 PL_rsfp_filters = newAV(); 4745 if (!datasv) 4746 datasv = newSV(0); 4747 SvUPGRADE(datasv, SVt_PVIO); 4748 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ 4749 IoFLAGS(datasv) |= IOf_FAKE_DIRP; 4750 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", 4751 FPTR2DPTR(void *, IoANY(datasv)), 4752 SvPV_nolen(datasv))); 4753 av_unshift(PL_rsfp_filters, 1); 4754 av_store(PL_rsfp_filters, 0, datasv) ; 4755 if ( 4756 !PL_parser->filtered 4757 && PL_parser->lex_flags & LEX_EVALBYTES 4758 && PL_bufptr < PL_bufend 4759 ) { 4760 const char *s = PL_bufptr; 4761 while (s < PL_bufend) { 4762 if (*s == '\n') { 4763 SV *linestr = PL_parser->linestr; 4764 char *buf = SvPVX(linestr); 4765 STRLEN const bufptr_pos = PL_parser->bufptr - buf; 4766 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; 4767 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; 4768 STRLEN const linestart_pos = PL_parser->linestart - buf; 4769 STRLEN const last_uni_pos = 4770 PL_parser->last_uni ? PL_parser->last_uni - buf : 0; 4771 STRLEN const last_lop_pos = 4772 PL_parser->last_lop ? PL_parser->last_lop - buf : 0; 4773 av_push(PL_rsfp_filters, linestr); 4774 PL_parser->linestr = 4775 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); 4776 buf = SvPVX(PL_parser->linestr); 4777 PL_parser->bufend = buf + SvCUR(PL_parser->linestr); 4778 PL_parser->bufptr = buf + bufptr_pos; 4779 PL_parser->oldbufptr = buf + oldbufptr_pos; 4780 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; 4781 PL_parser->linestart = buf + linestart_pos; 4782 if (PL_parser->last_uni) 4783 PL_parser->last_uni = buf + last_uni_pos; 4784 if (PL_parser->last_lop) 4785 PL_parser->last_lop = buf + last_lop_pos; 4786 SvLEN_set(linestr, SvCUR(linestr)); 4787 SvCUR_set(linestr, s - SvPVX(linestr)); 4788 PL_parser->filtered = 1; 4789 break; 4790 } 4791 s++; 4792 } 4793 } 4794 return(datasv); 4795} 4796 4797/* 4798=for apidoc_section $filters 4799=for apidoc filter_del 4800 4801Delete most recently added instance of the filter function argument 4802 4803=cut 4804*/ 4805 4806void 4807Perl_filter_del(pTHX_ filter_t funcp) 4808{ 4809 SV *datasv; 4810 4811 PERL_ARGS_ASSERT_FILTER_DEL; 4812 4813#ifdef DEBUGGING 4814 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", 4815 FPTR2DPTR(void*, funcp))); 4816#endif 4817 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) 4818 return; 4819 /* if filter is on top of stack (usual case) just pop it off */ 4820 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); 4821 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { 4822 SvREFCNT_dec(av_pop(PL_rsfp_filters)); 4823 4824 return; 4825 } 4826 /* we need to search for the correct entry and clear it */ 4827 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)"); 4828} 4829 4830 4831/* Invoke the idxth filter function for the current rsfp. */ 4832/* maxlen 0 = read one text line */ 4833I32 4834Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) 4835{ 4836 filter_t funcp; 4837 I32 ret; 4838 SV *datasv = NULL; 4839 /* This API is bad. It should have been using unsigned int for maxlen. 4840 Not sure if we want to change the API, but if not we should sanity 4841 check the value here. */ 4842 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen; 4843 4844 PERL_ARGS_ASSERT_FILTER_READ; 4845 4846 if (!PL_parser || !PL_rsfp_filters) 4847 return -1; 4848 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ 4849 /* Provide a default input filter to make life easy. */ 4850 /* Note that we append to the line. This is handy. */ 4851 DEBUG_P(PerlIO_printf(Perl_debug_log, 4852 "filter_read %d: from rsfp\n", idx)); 4853 if (correct_length) { 4854 /* Want a block */ 4855 int len ; 4856 const int old_len = SvCUR(buf_sv); 4857 4858 /* ensure buf_sv is large enough */ 4859 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; 4860 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, 4861 correct_length)) <= 0) { 4862 if (PerlIO_error(PL_rsfp)) 4863 return -1; /* error */ 4864 else 4865 return 0 ; /* end of file */ 4866 } 4867 SvCUR_set(buf_sv, old_len + len) ; 4868 SvPVX(buf_sv)[old_len + len] = '\0'; 4869 } else { 4870 /* Want a line */ 4871 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { 4872 if (PerlIO_error(PL_rsfp)) 4873 return -1; /* error */ 4874 else 4875 return 0 ; /* end of file */ 4876 } 4877 } 4878 return SvCUR(buf_sv); 4879 } 4880 /* Skip this filter slot if filter has been deleted */ 4881 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { 4882 DEBUG_P(PerlIO_printf(Perl_debug_log, 4883 "filter_read %d: skipped (filter deleted)\n", 4884 idx)); 4885 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ 4886 } 4887 if (SvTYPE(datasv) != SVt_PVIO) { 4888 if (correct_length) { 4889 /* Want a block */ 4890 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); 4891 if (!remainder) return 0; /* eof */ 4892 if (correct_length > remainder) correct_length = remainder; 4893 sv_catpvn(buf_sv, SvEND(datasv), correct_length); 4894 SvCUR_set(datasv, SvCUR(datasv) + correct_length); 4895 } else { 4896 /* Want a line */ 4897 const char *s = SvEND(datasv); 4898 const char *send = SvPVX(datasv) + SvLEN(datasv); 4899 while (s < send) { 4900 if (*s == '\n') { 4901 s++; 4902 break; 4903 } 4904 s++; 4905 } 4906 if (s == send) return 0; /* eof */ 4907 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); 4908 SvCUR_set(datasv, s-SvPVX(datasv)); 4909 } 4910 return SvCUR(buf_sv); 4911 } 4912 /* Get function pointer hidden within datasv */ 4913 funcp = DPTR2FPTR(filter_t, IoANY(datasv)); 4914 DEBUG_P(PerlIO_printf(Perl_debug_log, 4915 "filter_read %d: via function %p (%s)\n", 4916 idx, (void*)datasv, SvPV_nolen_const(datasv))); 4917 /* Call function. The function is expected to */ 4918 /* call "FILTER_READ(idx+1, buf_sv)" first. */ 4919 /* Return: <0:error, =0:eof, >0:not eof */ 4920 ENTER; 4921 save_scalar(PL_errgv); 4922 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); 4923 LEAVE; 4924 return ret; 4925} 4926 4927STATIC char * 4928S_filter_gets(pTHX_ SV *sv, STRLEN append) 4929{ 4930 PERL_ARGS_ASSERT_FILTER_GETS; 4931 4932#ifdef PERL_CR_FILTER 4933 if (!PL_rsfp_filters) { 4934 filter_add(S_cr_textfilter,NULL); 4935 } 4936#endif 4937 if (PL_rsfp_filters) { 4938 if (!append) 4939 SvCUR_set(sv, 0); /* start with empty line */ 4940 if (FILTER_READ(0, sv, 0) > 0) 4941 return ( SvPVX(sv) ) ; 4942 else 4943 return NULL ; 4944 } 4945 else 4946 return (sv_gets(sv, PL_rsfp, append)); 4947} 4948 4949STATIC HV * 4950S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) 4951{ 4952 GV *gv; 4953 4954 PERL_ARGS_ASSERT_FIND_IN_MY_STASH; 4955 4956 if (memEQs(pkgname, len, "__PACKAGE__")) 4957 return PL_curstash; 4958 4959 if (len > 2 4960 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') 4961 && (gv = gv_fetchpvn_flags(pkgname, 4962 len, 4963 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV))) 4964 { 4965 return GvHV(gv); /* Foo:: */ 4966 } 4967 4968 /* use constant CLASS => 'MyClass' */ 4969 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); 4970 if (gv && GvCV(gv)) { 4971 SV * const sv = cv_const_sv(GvCV(gv)); 4972 if (sv) 4973 return gv_stashsv(sv, 0); 4974 } 4975 4976 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); 4977} 4978 4979 4980STATIC char * 4981S_tokenize_use(pTHX_ int is_use, char *s) { 4982 PERL_ARGS_ASSERT_TOKENIZE_USE; 4983 4984 if (PL_expect != XSTATE) 4985 /* diag_listed_as: "use" not allowed in expression */ 4986 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", 4987 is_use ? "use" : "no")); 4988 PL_expect = XTERM; 4989 s = skipspace(s); 4990 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { 4991 s = force_version(s, TRUE); 4992 if (*s == ';' || *s == '}' 4993 || (s = skipspace(s), (*s == ';' || *s == '}'))) { 4994 NEXTVAL_NEXTTOKE.opval = NULL; 4995 force_next(BAREWORD); 4996 } 4997 else if (*s == 'v') { 4998 s = force_word(s,BAREWORD,FALSE,TRUE); 4999 s = force_version(s, FALSE); 5000 } 5001 } 5002 else { 5003 s = force_word(s,BAREWORD,FALSE,TRUE); 5004 s = force_version(s, FALSE); 5005 } 5006 pl_yylval.ival = is_use; 5007 return s; 5008} 5009#ifdef DEBUGGING 5010 static const char* const exp_name[] = 5011 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", 5012 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", 5013 "SIGVAR", "TERMORDORDOR" 5014 }; 5015#endif 5016 5017#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l) 5018STATIC bool 5019S_word_takes_any_delimiter(char *p, STRLEN len) 5020{ 5021 return (len == 1 && memCHRs("msyq", p[0])) 5022 || (len == 2 5023 && ((p[0] == 't' && p[1] == 'r') 5024 || (p[0] == 'q' && memCHRs("qwxr", p[1])))); 5025} 5026 5027static void 5028S_check_scalar_slice(pTHX_ char *s) 5029{ 5030 s++; 5031 while (SPACE_OR_TAB(*s)) s++; 5032 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2, 5033 PL_bufend, 5034 UTF)) 5035 { 5036 return; 5037 } 5038 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) 5039 || (*s && memCHRs(" \t$#+-'\"", *s))) 5040 { 5041 s += UTF ? UTF8SKIP(s) : 1; 5042 } 5043 if (*s == '}' || *s == ']') 5044 pl_yylval.ival = OPpSLICEWARNING; 5045} 5046 5047#define lex_token_boundary() S_lex_token_boundary(aTHX) 5048static void 5049S_lex_token_boundary(pTHX) 5050{ 5051 PL_oldoldbufptr = PL_oldbufptr; 5052 PL_oldbufptr = PL_bufptr; 5053} 5054 5055#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s) 5056static char * 5057S_vcs_conflict_marker(pTHX_ char *s) 5058{ 5059 lex_token_boundary(); 5060 PL_bufptr = s; 5061 yyerror("Version control conflict marker"); 5062 while (s < PL_bufend && *s != '\n') 5063 s++; 5064 return s; 5065} 5066 5067static int 5068yyl_sigvar(pTHX_ char *s) 5069{ 5070 /* we expect the sigil and optional var name part of a 5071 * signature element here. Since a '$' is not necessarily 5072 * followed by a var name, handle it specially here; the general 5073 * yylex code would otherwise try to interpret whatever follows 5074 * as a var; e.g. ($, ...) would be seen as the var '$,' 5075 */ 5076 5077 U8 sigil; 5078 5079 s = skipspace(s); 5080 sigil = *s++; 5081 PL_bufptr = s; /* for error reporting */ 5082 switch (sigil) { 5083 case '$': 5084 case '@': 5085 case '%': 5086 /* spot stuff that looks like an prototype */ 5087 if (memCHRs("$:@%&*;\\[]", *s)) { 5088 yyerror("Illegal character following sigil in a subroutine signature"); 5089 break; 5090 } 5091 /* '$#' is banned, while '$ # comment' isn't */ 5092 if (*s == '#') { 5093 yyerror("'#' not allowed immediately following a sigil in a subroutine signature"); 5094 break; 5095 } 5096 s = skipspace(s); 5097 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5098 char *dest = PL_tokenbuf + 1; 5099 /* read var name, including sigil, into PL_tokenbuf */ 5100 PL_tokenbuf[0] = sigil; 5101 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, 5102 0, cBOOL(UTF), FALSE, FALSE); 5103 *dest = '\0'; 5104 assert(PL_tokenbuf[1]); /* we have a variable name */ 5105 } 5106 else { 5107 *PL_tokenbuf = 0; 5108 PL_in_my = 0; 5109 } 5110 5111 s = skipspace(s); 5112 /* parse the = for the default ourselves to avoid '+=' etc being accepted here 5113 * as the ASSIGNOP, and exclude other tokens that start with = 5114 */ 5115 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) { 5116 /* save now to report with the same context as we did when 5117 * all ASSIGNOPS were accepted */ 5118 PL_oldbufptr = s; 5119 5120 ++s; 5121 NEXTVAL_NEXTTOKE.ival = OP_SASSIGN; 5122 force_next(ASSIGNOP); 5123 PL_expect = XTERM; 5124 } 5125 else if(*s == '/' && s[1] == '/' && s[2] == '=') { 5126 PL_oldbufptr = s; 5127 5128 s += 3; 5129 NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN; 5130 force_next(ASSIGNOP); 5131 PL_expect = XTERM; 5132 } 5133 else if(*s == '|' && s[1] == '|' && s[2] == '=') { 5134 PL_oldbufptr = s; 5135 5136 s += 3; 5137 NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN; 5138 force_next(ASSIGNOP); 5139 PL_expect = XTERM; 5140 } 5141 else if (*s == ',' || *s == ')') { 5142 PL_expect = XOPERATOR; 5143 } 5144 else { 5145 /* make sure the context shows the unexpected character and 5146 * hopefully a bit more */ 5147 if (*s) ++s; 5148 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 5149 s++; 5150 PL_bufptr = s; /* for error reporting */ 5151 yyerror("Illegal operator following parameter in a subroutine signature"); 5152 PL_in_my = 0; 5153 } 5154 if (*PL_tokenbuf) { 5155 NEXTVAL_NEXTTOKE.ival = sigil; 5156 force_next('p'); /* force a signature pending identifier */ 5157 } 5158 break; 5159 5160 case ')': 5161 PL_expect = XBLOCK; 5162 break; 5163 case ',': /* handle ($a,,$b) */ 5164 break; 5165 5166 default: 5167 PL_in_my = 0; 5168 yyerror("A signature parameter must start with '$', '@' or '%'"); 5169 /* very crude error recovery: skip to likely next signature 5170 * element */ 5171 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') 5172 s++; 5173 break; 5174 } 5175 5176 switch (sigil) { 5177 case ',': TOKEN (PERLY_COMMA); 5178 case '$': TOKEN (PERLY_DOLLAR); 5179 case '@': TOKEN (PERLY_SNAIL); 5180 case '%': TOKEN (PERLY_PERCENT_SIGN); 5181 case ')': TOKEN (PERLY_PAREN_CLOSE); 5182 default: TOKEN (sigil); 5183 } 5184} 5185 5186static int 5187yyl_dollar(pTHX_ char *s) 5188{ 5189 CLINE; 5190 5191 if (PL_expect == XPOSTDEREF) { 5192 if (s[1] == '#') { 5193 s++; 5194 POSTDEREF(DOLSHARP); 5195 } 5196 POSTDEREF(PERLY_DOLLAR); 5197 } 5198 5199 if ( s[1] == '#' 5200 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF) 5201 || memCHRs("{$:+-@", s[2]))) 5202 { 5203 PL_tokenbuf[0] = '@'; 5204 s = scan_ident(s + 1, PL_tokenbuf + 1, 5205 sizeof PL_tokenbuf - 1, FALSE); 5206 if (PL_expect == XOPERATOR) { 5207 char *d = s; 5208 if (PL_bufptr > s) { 5209 d = PL_bufptr-1; 5210 PL_bufptr = PL_oldbufptr; 5211 } 5212 no_op("Array length", d); 5213 } 5214 if (!PL_tokenbuf[1]) 5215 PREREF(DOLSHARP); 5216 PL_expect = XOPERATOR; 5217 force_ident_maybe_lex('#'); 5218 TOKEN(DOLSHARP); 5219 } 5220 5221 PL_tokenbuf[0] = '$'; 5222 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5223 if (PL_expect == XOPERATOR) { 5224 char *d = s; 5225 if (PL_bufptr > s) { 5226 d = PL_bufptr-1; 5227 PL_bufptr = PL_oldbufptr; 5228 } 5229 no_op("Scalar", d); 5230 } 5231 if (!PL_tokenbuf[1]) { 5232 if (s == PL_bufend) 5233 yyerror("Final $ should be \\$ or $name"); 5234 PREREF(PERLY_DOLLAR); 5235 } 5236 5237 { 5238 const char tmp = *s; 5239 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 5240 s = skipspace(s); 5241 5242 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5243 && intuit_more(s, PL_bufend)) { 5244 if (*s == '[') { 5245 PL_tokenbuf[0] = '@'; 5246 if (ckWARN(WARN_SYNTAX)) { 5247 char *t = s+1; 5248 5249 while ( t < PL_bufend ) { 5250 if (isSPACE(*t)) { 5251 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t)); 5252 /* consumed one or more space chars */ 5253 } else if (*t == '$' || *t == '@') { 5254 /* could be more than one '$' like $$ref or @$ref */ 5255 do { t++; } while (t < PL_bufend && *t == '$'); 5256 5257 /* could be an abigail style identifier like $ foo */ 5258 while (t < PL_bufend && *t == ' ') t++; 5259 5260 /* strip off the name of the var */ 5261 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 5262 t += UTF ? UTF8SKIP(t) : 1; 5263 /* consumed a varname */ 5264 } else if (isDIGIT(*t)) { 5265 /* deal with hex constants like 0x11 */ 5266 if (t[0] == '0' && t[1] == 'x') { 5267 t += 2; 5268 while (t < PL_bufend && isXDIGIT(*t)) t++; 5269 } else { 5270 /* deal with decimal/octal constants like 1 and 0123 */ 5271 do { t++; } while (isDIGIT(*t)); 5272 if (t<PL_bufend && *t == '.') { 5273 do { t++; } while (isDIGIT(*t)); 5274 } 5275 } 5276 /* consumed a number */ 5277 } else { 5278 /* not a var nor a space nor a number */ 5279 break; 5280 } 5281 } 5282 if (t < PL_bufend && *t++ == ',') { 5283 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ 5284 while (t < PL_bufend && *t != ']') 5285 t++; 5286 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5287 "Multidimensional syntax %" UTF8f " not supported", 5288 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr)); 5289 } 5290 } 5291 } 5292 else if (*s == '{') { 5293 char *t; 5294 PL_tokenbuf[0] = '%'; 5295 if ( strEQ(PL_tokenbuf+1, "SIG") 5296 && ckWARN(WARN_SYNTAX) 5297 && (t = (char *) memchr(s, '}', PL_bufend - s)) 5298 && (t = (char *) memchr(t, '=', PL_bufend - t))) 5299 { 5300 char tmpbuf[sizeof PL_tokenbuf]; 5301 do { 5302 t++; 5303 } while (isSPACE(*t)); 5304 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { 5305 STRLEN len; 5306 t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE, 5307 &len, TRUE); 5308 while (isSPACE(*t)) 5309 t++; 5310 if ( *t == ';' 5311 && get_cvn_flags(tmpbuf, len, UTF 5312 ? SVf_UTF8 5313 : 0)) 5314 { 5315 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 5316 "You need to quote \"%" UTF8f "\"", 5317 UTF8fARG(UTF, len, tmpbuf)); 5318 } 5319 } 5320 } 5321 } 5322 } 5323 5324 PL_expect = XOPERATOR; 5325 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) { 5326 const bool islop = (PL_last_lop == PL_oldoldbufptr); 5327 if (!islop || PL_last_lop_op == OP_GREPSTART) 5328 PL_expect = XOPERATOR; 5329 else if (memCHRs("$@\"'`q", *s)) 5330 PL_expect = XTERM; /* e.g. print $fh "foo" */ 5331 else if ( memCHRs("&*<%", *s) 5332 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF)) 5333 { 5334 PL_expect = XTERM; /* e.g. print $fh &sub */ 5335 } 5336 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5337 char tmpbuf[sizeof PL_tokenbuf]; 5338 int t2; 5339 STRLEN len; 5340 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE); 5341 if ((t2 = keyword(tmpbuf, len, 0))) { 5342 /* binary operators exclude handle interpretations */ 5343 switch (t2) { 5344 case -KEY_x: 5345 case -KEY_eq: 5346 case -KEY_ne: 5347 case -KEY_gt: 5348 case -KEY_lt: 5349 case -KEY_ge: 5350 case -KEY_le: 5351 case -KEY_cmp: 5352 break; 5353 default: 5354 PL_expect = XTERM; /* e.g. print $fh length() */ 5355 break; 5356 } 5357 } 5358 else { 5359 PL_expect = XTERM; /* e.g. print $fh subr() */ 5360 } 5361 } 5362 else if (isDIGIT(*s)) 5363 PL_expect = XTERM; /* e.g. print $fh 3 */ 5364 else if (*s == '.' && isDIGIT(s[1])) 5365 PL_expect = XTERM; /* e.g. print $fh .3 */ 5366 else if ((*s == '?' || *s == '-' || *s == '+') 5367 && !isSPACE(s[1]) && s[1] != '=') 5368 PL_expect = XTERM; /* e.g. print $fh -1 */ 5369 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' 5370 && s[1] != '/') 5371 PL_expect = XTERM; /* e.g. print $fh /.../ 5372 XXX except DORDOR operator 5373 */ 5374 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) 5375 && s[2] != '=') 5376 PL_expect = XTERM; /* print $fh <<"EOF" */ 5377 } 5378 } 5379 force_ident_maybe_lex('$'); 5380 TOKEN(PERLY_DOLLAR); 5381} 5382 5383static int 5384yyl_sub(pTHX_ char *s, const int key) 5385{ 5386 char * const tmpbuf = PL_tokenbuf + 1; 5387 bool have_name, have_proto; 5388 STRLEN len; 5389 SV *format_name = NULL; 5390 bool is_method = (key == KEY_method); 5391 5392 /* method always implies signatures */ 5393 bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED; 5394 5395 SSize_t off = s-SvPVX(PL_linestr); 5396 char *d; 5397 5398 s = skipspace(s); /* can move PL_linestr */ 5399 5400 d = SvPVX(PL_linestr)+off; 5401 5402 SAVEBOOL(PL_parser->sig_seen); 5403 PL_parser->sig_seen = FALSE; 5404 5405 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 5406 || *s == '\'' 5407 || (*s == ':' && s[1] == ':')) 5408 { 5409 5410 PL_expect = XATTRBLOCK; 5411 d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, 5412 &len, TRUE); 5413 if (key == KEY_format) 5414 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); 5415 *PL_tokenbuf = '&'; 5416 if (memchr(tmpbuf, ':', len) || key != KEY_sub 5417 || pad_findmy_pvn( 5418 PL_tokenbuf, len + 1, 0 5419 ) != NOT_IN_PAD) 5420 sv_setpvn(PL_subname, tmpbuf, len); 5421 else { 5422 sv_setsv(PL_subname,PL_curstname); 5423 sv_catpvs(PL_subname,"::"); 5424 sv_catpvn(PL_subname,tmpbuf,len); 5425 } 5426 if (SvUTF8(PL_linestr)) 5427 SvUTF8_on(PL_subname); 5428 have_name = TRUE; 5429 5430 s = skipspace(d); 5431 } 5432 else { 5433 if (key == KEY_my || key == KEY_our || key==KEY_state) { 5434 *d = '\0'; 5435 /* diag_listed_as: Missing name in "%s sub" */ 5436 Perl_croak(aTHX_ 5437 "Missing name in \"%s\"", PL_bufptr); 5438 } 5439 PL_expect = XATTRTERM; 5440 sv_setpvs(PL_subname,"?"); 5441 have_name = FALSE; 5442 } 5443 5444 if (key == KEY_format) { 5445 if (format_name) { 5446 NEXTVAL_NEXTTOKE.opval 5447 = newSVOP(OP_CONST,0, format_name); 5448 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; 5449 force_next(BAREWORD); 5450 } 5451 PREBLOCK(KW_FORMAT); 5452 } 5453 5454 /* Look for a prototype */ 5455 if (*s == '(' && !is_sigsub) { 5456 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5457 if (!s) 5458 Perl_croak(aTHX_ "Prototype not terminated"); 5459 COPLINE_SET_FROM_MULTI_END; 5460 (void)validate_proto(PL_subname, PL_lex_stuff, 5461 ckWARN(WARN_ILLEGALPROTO), 0); 5462 have_proto = TRUE; 5463 5464 s = skipspace(s); 5465 } 5466 else 5467 have_proto = FALSE; 5468 5469 if ( !(*s == ':' && s[1] != ':') 5470 && (*s != '{' && *s != '(') && key != KEY_format) 5471 { 5472 assert(key == KEY_sub || key == KEY_method || 5473 key == KEY_AUTOLOAD || key == KEY_DESTROY || 5474 key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK || 5475 key == KEY_INIT || key == KEY_END || 5476 key == KEY_my || key == KEY_state || 5477 key == KEY_our); 5478 if (!have_name) 5479 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); 5480 else if (*s != ';' && *s != '}') 5481 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname)); 5482 } 5483 5484 if (have_proto) { 5485 NEXTVAL_NEXTTOKE.opval = 5486 newSVOP(OP_CONST, 0, PL_lex_stuff); 5487 PL_lex_stuff = NULL; 5488 force_next(THING); 5489 } 5490 5491 if (!have_name) { 5492 if (PL_curstash) 5493 sv_setpvs(PL_subname, "__ANON__"); 5494 else 5495 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 5496 if (is_method) 5497 TOKEN(KW_METHOD_anon); 5498 else if (is_sigsub) 5499 TOKEN(KW_SUB_anon_sig); 5500 else 5501 TOKEN(KW_SUB_anon); 5502 } 5503 force_ident_maybe_lex('&'); 5504 if (is_method) 5505 TOKEN(KW_METHOD_named); 5506 else if (is_sigsub) 5507 TOKEN(KW_SUB_named_sig); 5508 else 5509 TOKEN(KW_SUB_named); 5510} 5511 5512static int 5513yyl_interpcasemod(pTHX_ char *s) 5514{ 5515#ifdef DEBUGGING 5516 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') 5517 Perl_croak(aTHX_ 5518 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", 5519 PL_bufptr, PL_bufend, *PL_bufptr); 5520#endif 5521 5522 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { 5523 /* if at a \E */ 5524 if (PL_lex_casemods) { 5525 const char oldmod = PL_lex_casestack[--PL_lex_casemods]; 5526 PL_lex_casestack[PL_lex_casemods] = '\0'; 5527 5528 if (PL_bufptr != PL_bufend 5529 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' 5530 || oldmod == 'F')) { 5531 PL_bufptr += 2; 5532 PL_lex_state = LEX_INTERPCONCAT; 5533 } 5534 PL_lex_allbrackets--; 5535 return REPORT(PERLY_PAREN_CLOSE); 5536 } 5537 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { 5538 /* Got an unpaired \E */ 5539 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 5540 "Useless use of \\E"); 5541 } 5542 if (PL_bufptr != PL_bufend) 5543 PL_bufptr += 2; 5544 PL_lex_state = LEX_INTERPCONCAT; 5545 return yylex(); 5546 } 5547 else { 5548 DEBUG_T({ 5549 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n"); 5550 }); 5551 s = PL_bufptr + 1; 5552 if (s[1] == '\\' && s[2] == 'E') { 5553 PL_bufptr = s + 3; 5554 PL_lex_state = LEX_INTERPCONCAT; 5555 return yylex(); 5556 } 5557 else { 5558 I32 tmp; 5559 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u") 5560 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l")) 5561 { 5562 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ 5563 } 5564 if ((*s == 'L' || *s == 'U' || *s == 'F') 5565 && (strpbrk(PL_lex_casestack, "LUF"))) 5566 { 5567 PL_lex_casestack[--PL_lex_casemods] = '\0'; 5568 PL_lex_allbrackets--; 5569 return REPORT(PERLY_PAREN_CLOSE); 5570 } 5571 if (PL_lex_casemods > 10) 5572 Renew(PL_lex_casestack, PL_lex_casemods + 2, char); 5573 PL_lex_casestack[PL_lex_casemods++] = *s; 5574 PL_lex_casestack[PL_lex_casemods] = '\0'; 5575 PL_lex_state = LEX_INTERPCONCAT; 5576 NEXTVAL_NEXTTOKE.ival = 0; 5577 force_next((2<<24)|PERLY_PAREN_OPEN); 5578 if (*s == 'l') 5579 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; 5580 else if (*s == 'u') 5581 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST; 5582 else if (*s == 'L') 5583 NEXTVAL_NEXTTOKE.ival = OP_LC; 5584 else if (*s == 'U') 5585 NEXTVAL_NEXTTOKE.ival = OP_UC; 5586 else if (*s == 'Q') 5587 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; 5588 else if (*s == 'F') 5589 NEXTVAL_NEXTTOKE.ival = OP_FC; 5590 else 5591 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); 5592 PL_bufptr = s + 1; 5593 } 5594 force_next(FUNC); 5595 if (PL_lex_starts) { 5596 s = PL_bufptr; 5597 PL_lex_starts = 0; 5598 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 5599 if (PL_lex_casemods == 1 && PL_lex_inpat) 5600 TOKEN(PERLY_COMMA); 5601 else 5602 AopNOASSIGN(OP_CONCAT); 5603 } 5604 else 5605 return yylex(); 5606 } 5607} 5608 5609static int 5610yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword, 5611 GV **pgv, GV ***pgvp) 5612{ 5613 GV *ogv = NULL; /* override (winner) */ 5614 GV *hgv = NULL; /* hidden (loser) */ 5615 GV *gv = *pgv; 5616 5617 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) { 5618 CV *cv; 5619 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 5620 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL, 5621 SVt_PVCV)) 5622 && (cv = GvCVu(gv))) 5623 { 5624 if (GvIMPORTED_CV(gv)) 5625 ogv = gv; 5626 else if (! CvNOWARN_AMBIGUOUS(cv)) 5627 hgv = gv; 5628 } 5629 if (!ogv 5630 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE)) 5631 && (gv = **pgvp) 5632 && (isGV_with_GP(gv) 5633 ? GvCVu(gv) && GvIMPORTED_CV(gv) 5634 : SvPCS_IMPORTED(gv) 5635 && (gv_init(gv, PL_globalstash, PL_tokenbuf, 5636 len, 0), 1))) 5637 { 5638 ogv = gv; 5639 } 5640 } 5641 5642 *pgv = gv; 5643 5644 if (ogv) { 5645 *orig_keyword = key; 5646 return 0; /* overridden by import or by GLOBAL */ 5647 } 5648 else if (gv && !*pgvp 5649 && -key==KEY_lock /* XXX generalizable kludge */ 5650 && GvCVu(gv)) 5651 { 5652 return 0; /* any sub overrides "weak" keyword */ 5653 } 5654 else { /* no override */ 5655 key = -key; 5656 if (key == KEY_dump) { 5657 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30"); 5658 } 5659 *pgv = NULL; 5660 *pgvp = 0; 5661 if (hgv && key != KEY_x) /* never ambiguous */ 5662 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 5663 "Ambiguous call resolved as CORE::%s(), " 5664 "qualify as such or use &", 5665 GvENAME(hgv)); 5666 return key; 5667 } 5668} 5669 5670static int 5671yyl_qw(pTHX_ char *s, STRLEN len) 5672{ 5673 OP *words = NULL; 5674 5675 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 5676 if (!s) 5677 missingterm(NULL, 0); 5678 5679 COPLINE_SET_FROM_MULTI_END; 5680 PL_expect = XOPERATOR; 5681 if (SvCUR(PL_lex_stuff)) { 5682 int warned_comma = !ckWARN(WARN_QW); 5683 int warned_comment = warned_comma; 5684 char *d = SvPV_force(PL_lex_stuff, len); 5685 while (len) { 5686 for (; isSPACE(*d) && len; --len, ++d) 5687 /**/; 5688 if (len) { 5689 SV *sv; 5690 const char *b = d; 5691 if (!warned_comma || !warned_comment) { 5692 for (; !isSPACE(*d) && len; --len, ++d) { 5693 if (!warned_comma && *d == ',') { 5694 Perl_warner(aTHX_ packWARN(WARN_QW), 5695 "Possible attempt to separate words with commas"); 5696 ++warned_comma; 5697 } 5698 else if (!warned_comment && *d == '#') { 5699 Perl_warner(aTHX_ packWARN(WARN_QW), 5700 "Possible attempt to put comments in qw() list"); 5701 ++warned_comment; 5702 } 5703 } 5704 } 5705 else { 5706 for (; !isSPACE(*d) && len; --len, ++d) 5707 /**/; 5708 } 5709 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); 5710 words = op_append_elem(OP_LIST, words, 5711 newSVOP(OP_CONST, 0, tokeq(sv))); 5712 } 5713 } 5714 } 5715 if (!words) 5716 words = newNULLLIST(); 5717 SvREFCNT_dec_NN(PL_lex_stuff); 5718 PL_lex_stuff = NULL; 5719 PL_expect = XOPERATOR; 5720 pl_yylval.opval = sawparens(words); 5721 TOKEN(QWLIST); 5722} 5723 5724static int 5725yyl_hyphen(pTHX_ char *s) 5726{ 5727 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) { 5728 I32 ftst = 0; 5729 char tmp; 5730 5731 s++; 5732 PL_bufptr = s; 5733 tmp = *s++; 5734 5735 while (s < PL_bufend && SPACE_OR_TAB(*s)) 5736 s++; 5737 5738 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { 5739 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); 5740 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); 5741 OPERATOR(PERLY_MINUS); /* unary minus */ 5742 } 5743 switch (tmp) { 5744 case 'r': ftst = OP_FTEREAD; break; 5745 case 'w': ftst = OP_FTEWRITE; break; 5746 case 'x': ftst = OP_FTEEXEC; break; 5747 case 'o': ftst = OP_FTEOWNED; break; 5748 case 'R': ftst = OP_FTRREAD; break; 5749 case 'W': ftst = OP_FTRWRITE; break; 5750 case 'X': ftst = OP_FTREXEC; break; 5751 case 'O': ftst = OP_FTROWNED; break; 5752 case 'e': ftst = OP_FTIS; break; 5753 case 'z': ftst = OP_FTZERO; break; 5754 case 's': ftst = OP_FTSIZE; break; 5755 case 'f': ftst = OP_FTFILE; break; 5756 case 'd': ftst = OP_FTDIR; break; 5757 case 'l': ftst = OP_FTLINK; break; 5758 case 'p': ftst = OP_FTPIPE; break; 5759 case 'S': ftst = OP_FTSOCK; break; 5760 case 'u': ftst = OP_FTSUID; break; 5761 case 'g': ftst = OP_FTSGID; break; 5762 case 'k': ftst = OP_FTSVTX; break; 5763 case 'b': ftst = OP_FTBLK; break; 5764 case 'c': ftst = OP_FTCHR; break; 5765 case 't': ftst = OP_FTTTY; break; 5766 case 'T': ftst = OP_FTTEXT; break; 5767 case 'B': ftst = OP_FTBINARY; break; 5768 case 'M': case 'A': case 'C': 5769 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV); 5770 switch (tmp) { 5771 case 'M': ftst = OP_FTMTIME; break; 5772 case 'A': ftst = OP_FTATIME; break; 5773 case 'C': ftst = OP_FTCTIME; break; 5774 default: break; 5775 } 5776 break; 5777 default: 5778 break; 5779 } 5780 if (ftst) { 5781 PL_last_uni = PL_oldbufptr; 5782 PL_last_lop_op = (OPCODE)ftst; 5783 DEBUG_T( { 5784 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp); 5785 } ); 5786 FTST(ftst); 5787 } 5788 else { 5789 /* Assume it was a minus followed by a one-letter named 5790 * subroutine call (or a -bareword), then. */ 5791 DEBUG_T( { 5792 PerlIO_printf(Perl_debug_log, 5793 "### '-%c' looked like a file test but was not\n", 5794 (int) tmp); 5795 } ); 5796 s = --PL_bufptr; 5797 } 5798 } 5799 { 5800 const char tmp = *s++; 5801 if (*s == tmp) { 5802 s++; 5803 if (PL_expect == XOPERATOR) 5804 TERM(POSTDEC); 5805 else 5806 OPERATOR(PREDEC); 5807 } 5808 else if (*s == '>') { 5809 s++; 5810 s = skipspace(s); 5811 if (((*s == '$' || *s == '&') && s[1] == '*') 5812 ||(*s == '$' && s[1] == '#' && s[2] == '*') 5813 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1])) 5814 ||(*s == '*' && (s[1] == '*' || s[1] == '{')) 5815 ) 5816 { 5817 PL_expect = XPOSTDEREF; 5818 TOKEN(ARROW); 5819 } 5820 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5821 s = force_word(s,METHCALL0,FALSE,TRUE); 5822 TOKEN(ARROW); 5823 } 5824 else if (*s == '$') 5825 OPERATOR(ARROW); 5826 else 5827 TERM(ARROW); 5828 } 5829 if (PL_expect == XOPERATOR) { 5830 if (*s == '=' 5831 && !PL_lex_allbrackets 5832 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5833 { 5834 s--; 5835 TOKEN(0); 5836 } 5837 Aop(OP_SUBTRACT); 5838 } 5839 else { 5840 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5841 check_uni(); 5842 OPERATOR(PERLY_MINUS); /* unary minus */ 5843 } 5844 } 5845} 5846 5847static int 5848yyl_plus(pTHX_ char *s) 5849{ 5850 const char tmp = *s++; 5851 if (*s == tmp) { 5852 s++; 5853 if (PL_expect == XOPERATOR) 5854 TERM(POSTINC); 5855 else 5856 OPERATOR(PREINC); 5857 } 5858 if (PL_expect == XOPERATOR) { 5859 if (*s == '=' 5860 && !PL_lex_allbrackets 5861 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5862 { 5863 s--; 5864 TOKEN(0); 5865 } 5866 Aop(OP_ADD); 5867 } 5868 else { 5869 if (isSPACE(*s) || !isSPACE(*PL_bufptr)) 5870 check_uni(); 5871 OPERATOR(PERLY_PLUS); 5872 } 5873} 5874 5875static int 5876yyl_star(pTHX_ char *s) 5877{ 5878 if (PL_expect == XPOSTDEREF) 5879 POSTDEREF(PERLY_STAR); 5880 5881 if (PL_expect != XOPERATOR) { 5882 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); 5883 PL_expect = XOPERATOR; 5884 force_ident(PL_tokenbuf, PERLY_STAR); 5885 if (!*PL_tokenbuf) 5886 PREREF(PERLY_STAR); 5887 TERM(PERLY_STAR); 5888 } 5889 5890 s++; 5891 if (*s == '*') { 5892 s++; 5893 if (*s == '=' && !PL_lex_allbrackets 5894 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5895 { 5896 s -= 2; 5897 TOKEN(0); 5898 } 5899 PWop(OP_POW); 5900 } 5901 5902 if (*s == '=' 5903 && !PL_lex_allbrackets 5904 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5905 { 5906 s--; 5907 TOKEN(0); 5908 } 5909 5910 Mop(OP_MULTIPLY); 5911} 5912 5913static int 5914yyl_percent(pTHX_ char *s) 5915{ 5916 if (PL_expect == XOPERATOR) { 5917 if (s[1] == '=' 5918 && !PL_lex_allbrackets 5919 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 5920 { 5921 TOKEN(0); 5922 } 5923 ++s; 5924 Mop(OP_MODULO); 5925 } 5926 else if (PL_expect == XPOSTDEREF) 5927 POSTDEREF(PERLY_PERCENT_SIGN); 5928 5929 PL_tokenbuf[0] = '%'; 5930 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 5931 pl_yylval.ival = 0; 5932 if (!PL_tokenbuf[1]) { 5933 PREREF(PERLY_PERCENT_SIGN); 5934 } 5935 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 5936 && intuit_more(s, PL_bufend)) { 5937 if (*s == '[') 5938 PL_tokenbuf[0] = '@'; 5939 } 5940 PL_expect = XOPERATOR; 5941 force_ident_maybe_lex('%'); 5942 TERM(PERLY_PERCENT_SIGN); 5943} 5944 5945static int 5946yyl_caret(pTHX_ char *s) 5947{ 5948 char *d = s; 5949 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED); 5950 if (bof && s[1] == '.') 5951 s++; 5952 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 5953 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) 5954 { 5955 s = d; 5956 TOKEN(0); 5957 } 5958 s++; 5959 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR); 5960} 5961 5962static int 5963yyl_colon(pTHX_ char *s) 5964{ 5965 OP *attrs; 5966 5967 switch (PL_expect) { 5968 case XOPERATOR: 5969 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets)) 5970 break; 5971 PL_bufptr = s; /* update in case we back off */ 5972 if (*s == '=') { 5973 Perl_croak(aTHX_ 5974 "Use of := for an empty attribute list is not allowed"); 5975 } 5976 goto grabattrs; 5977 case XATTRBLOCK: 5978 PL_expect = XBLOCK; 5979 goto grabattrs; 5980 case XATTRTERM: 5981 PL_expect = XTERMBLOCK; 5982 grabattrs: 5983 /* NB: as well as parsing normal attributes, we also end up 5984 * here if there is something looking like attributes 5985 * following a signature (which is illegal, but used to be 5986 * legal in 5.20..5.26). If the latter, we still parse the 5987 * attributes so that error messages(s) are less confusing, 5988 * but ignore them (parser->sig_seen). 5989 */ 5990 s = skipspace(s); 5991 attrs = NULL; 5992 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 5993 I32 tmp; 5994 SV *sv; 5995 STRLEN len; 5996 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 5997 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) { 5998 if (tmp < 0) tmp = -tmp; 5999 switch (tmp) { 6000 case KEY_or: 6001 case KEY_and: 6002 case KEY_for: 6003 case KEY_foreach: 6004 case KEY_unless: 6005 case KEY_if: 6006 case KEY_while: 6007 case KEY_until: 6008 goto got_attrs; 6009 default: 6010 break; 6011 } 6012 } 6013 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); 6014 if (*d == '(') { 6015 d = scan_str(d,TRUE,TRUE,FALSE,NULL); 6016 if (!d) { 6017 if (attrs) 6018 op_free(attrs); 6019 ASSUME(sv && SvREFCNT(sv) == 1); 6020 SvREFCNT_dec(sv); 6021 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list"); 6022 } 6023 COPLINE_SET_FROM_MULTI_END; 6024 } 6025 if (PL_lex_stuff) { 6026 sv_catsv(sv, PL_lex_stuff); 6027 attrs = op_append_elem(OP_LIST, attrs, 6028 newSVOP(OP_CONST, 0, sv)); 6029 SvREFCNT_dec_NN(PL_lex_stuff); 6030 PL_lex_stuff = NULL; 6031 } 6032 else { 6033 attrs = op_append_elem(OP_LIST, attrs, 6034 newSVOP(OP_CONST, 0, sv)); 6035 } 6036 s = skipspace(d); 6037 if (*s == ':' && s[1] != ':') 6038 s = skipspace(s+1); 6039 else if (s == d) 6040 break; /* require real whitespace or :'s */ 6041 /* XXX losing whitespace on sequential attributes here */ 6042 } 6043 6044 if (*s != ';' 6045 && *s != '}' 6046 && !(PL_expect == XOPERATOR 6047 /* if an operator is expected, permit =, //= and ||= or ) to end */ 6048 ? (*s == '=' || *s == ')' || *s == '/' || *s == '|') 6049 : (*s == '{' || *s == '('))) 6050 { 6051 const char q = ((*s == '\'') ? '"' : '\''); 6052 /* If here for an expression, and parsed no attrs, back off. */ 6053 if (PL_expect == XOPERATOR && !attrs) { 6054 s = PL_bufptr; 6055 break; 6056 } 6057 /* MUST advance bufptr here to avoid bogus "at end of line" 6058 context messages from yyerror(). 6059 */ 6060 PL_bufptr = s; 6061 yyerror( (const char *) 6062 (*s 6063 ? Perl_form(aTHX_ "Invalid separator character " 6064 "%c%c%c in attribute list", q, *s, q) 6065 : "Unterminated attribute list" ) ); 6066 if (attrs) 6067 op_free(attrs); 6068 OPERATOR(PERLY_COLON); 6069 } 6070 6071 got_attrs: 6072 if (PL_parser->sig_seen) { 6073 /* see comment about about sig_seen and parser error 6074 * handling */ 6075 if (attrs) 6076 op_free(attrs); 6077 Perl_croak(aTHX_ "Subroutine attributes must come " 6078 "before the signature"); 6079 } 6080 if (attrs) { 6081 NEXTVAL_NEXTTOKE.opval = attrs; 6082 force_next(THING); 6083 } 6084 TOKEN(COLONATTR); 6085 } 6086 6087 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) { 6088 s--; 6089 TOKEN(0); 6090 } 6091 6092 PL_lex_allbrackets--; 6093 OPERATOR(PERLY_COLON); 6094} 6095 6096static int 6097yyl_subproto(pTHX_ char *s, CV *cv) 6098{ 6099 STRLEN protolen = CvPROTOLEN(cv); 6100 const char *proto = CvPROTO(cv); 6101 bool optional; 6102 6103 proto = S_strip_spaces(aTHX_ proto, &protolen); 6104 if (!protolen) 6105 TERM(FUNC0SUB); 6106 if ((optional = *proto == ';')) { 6107 do { 6108 proto++; 6109 } while (*proto == ';'); 6110 } 6111 6112 if ( 6113 ( 6114 ( 6115 *proto == '$' || *proto == '_' 6116 || *proto == '*' || *proto == '+' 6117 ) 6118 && proto[1] == '\0' 6119 ) 6120 || ( 6121 *proto == '\\' && proto[1] && proto[2] == '\0' 6122 ) 6123 ) { 6124 UNIPROTO(UNIOPSUB,optional); 6125 } 6126 6127 if (*proto == '\\' && proto[1] == '[') { 6128 const char *p = proto + 2; 6129 while(*p && *p != ']') 6130 ++p; 6131 if(*p == ']' && !p[1]) 6132 UNIPROTO(UNIOPSUB,optional); 6133 } 6134 6135 if (*proto == '&' && *s == '{') { 6136 if (PL_curstash) 6137 sv_setpvs(PL_subname, "__ANON__"); 6138 else 6139 sv_setpvs(PL_subname, "__ANON__::__ANON__"); 6140 if (!PL_lex_allbrackets 6141 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 6142 { 6143 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 6144 } 6145 PREBLOCK(LSTOPSUB); 6146 } 6147 6148 return KEY_NULL; 6149} 6150 6151static int 6152yyl_leftcurly(pTHX_ char *s, const U8 formbrack) 6153{ 6154 char *d; 6155 if (PL_lex_brackets > 100) { 6156 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6157 } 6158 6159 switch (PL_expect) { 6160 case XTERM: 6161 case XTERMORDORDOR: 6162 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6163 PL_lex_allbrackets++; 6164 OPERATOR(HASHBRACK); 6165 case XOPERATOR: 6166 while (s < PL_bufend && SPACE_OR_TAB(*s)) 6167 s++; 6168 d = s; 6169 PL_tokenbuf[0] = '\0'; 6170 if (d < PL_bufend && *d == '-') { 6171 PL_tokenbuf[0] = '-'; 6172 d++; 6173 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6174 d++; 6175 } 6176 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) { 6177 STRLEN len; 6178 d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 6179 FALSE, &len, FALSE); 6180 while (d < PL_bufend && SPACE_OR_TAB(*d)) 6181 d++; 6182 if (*d == '}') { 6183 const char minus = (PL_tokenbuf[0] == '-'); 6184 s = force_word(s + minus, BAREWORD, FALSE, TRUE); 6185 if (minus) 6186 force_next(PERLY_MINUS); 6187 } 6188 } 6189 /* FALLTHROUGH */ 6190 case XATTRTERM: 6191 case XTERMBLOCK: 6192 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6193 PL_lex_allbrackets++; 6194 PL_expect = XSTATE; 6195 break; 6196 case XATTRBLOCK: 6197 case XBLOCK: 6198 PL_lex_brackstack[PL_lex_brackets++] = XSTATE; 6199 PL_lex_allbrackets++; 6200 PL_expect = XSTATE; 6201 break; 6202 case XBLOCKTERM: 6203 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6204 PL_lex_allbrackets++; 6205 PL_expect = XSTATE; 6206 break; 6207 default: { 6208 const char *t; 6209 if (PL_oldoldbufptr == PL_last_lop) 6210 PL_lex_brackstack[PL_lex_brackets++] = XTERM; 6211 else 6212 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; 6213 PL_lex_allbrackets++; 6214 s = skipspace(s); 6215 if (*s == '}') { 6216 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { 6217 PL_expect = XTERM; 6218 /* This hack is to get the ${} in the message. */ 6219 PL_bufptr = s+1; 6220 yyerror("syntax error"); 6221 yyquit(); 6222 break; 6223 } 6224 OPERATOR(HASHBRACK); 6225 } 6226 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { 6227 /* ${...} or @{...} etc., but not print {...} 6228 * Skip the disambiguation and treat this as a block. 6229 */ 6230 goto block_expectation; 6231 } 6232 /* This hack serves to disambiguate a pair of curlies 6233 * as being a block or an anon hash. Normally, expectation 6234 * determines that, but in cases where we're not in a 6235 * position to expect anything in particular (like inside 6236 * eval"") we have to resolve the ambiguity. This code 6237 * covers the case where the first term in the curlies is a 6238 * quoted string. Most other cases need to be explicitly 6239 * disambiguated by prepending a "+" before the opening 6240 * curly in order to force resolution as an anon hash. 6241 * 6242 * XXX should probably propagate the outer expectation 6243 * into eval"" to rely less on this hack, but that could 6244 * potentially break current behavior of eval"". 6245 * GSAR 97-07-21 6246 */ 6247 t = s; 6248 if (*s == '\'' || *s == '"' || *s == '`') { 6249 /* common case: get past first string, handling escapes */ 6250 for (t++; t < PL_bufend && *t != *s;) 6251 if (*t++ == '\\') 6252 t++; 6253 t++; 6254 } 6255 else if (*s == 'q') { 6256 if (++t < PL_bufend 6257 && (!isWORDCHAR(*t) 6258 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend 6259 && !isWORDCHAR(*t)))) 6260 { 6261 /* skip q//-like construct */ 6262 const char *tmps; 6263 char open, close, term; 6264 I32 brackets = 1; 6265 6266 while (t < PL_bufend && isSPACE(*t)) 6267 t++; 6268 /* check for q => */ 6269 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { 6270 OPERATOR(HASHBRACK); 6271 } 6272 term = *t; 6273 open = term; 6274 if (term && (tmps = memCHRs("([{< )]}> )]}>",term))) 6275 term = tmps[5]; 6276 close = term; 6277 if (open == close) 6278 for (t++; t < PL_bufend; t++) { 6279 if (*t == '\\' && t+1 < PL_bufend && open != '\\') 6280 t++; 6281 else if (*t == open) 6282 break; 6283 } 6284 else { 6285 for (t++; t < PL_bufend; t++) { 6286 if (*t == '\\' && t+1 < PL_bufend) 6287 t++; 6288 else if (*t == close && --brackets <= 0) 6289 break; 6290 else if (*t == open) 6291 brackets++; 6292 } 6293 } 6294 t++; 6295 } 6296 else 6297 /* skip plain q word */ 6298 while ( t < PL_bufend 6299 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6300 { 6301 t += UTF ? UTF8SKIP(t) : 1; 6302 } 6303 } 6304 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) { 6305 t += UTF ? UTF8SKIP(t) : 1; 6306 while ( t < PL_bufend 6307 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) 6308 { 6309 t += UTF ? UTF8SKIP(t) : 1; 6310 } 6311 } 6312 while (t < PL_bufend && isSPACE(*t)) 6313 t++; 6314 /* if comma follows first term, call it an anon hash */ 6315 /* XXX it could be a comma expression with loop modifiers */ 6316 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) 6317 || (*t == '=' && t[1] == '>'))) 6318 OPERATOR(HASHBRACK); 6319 if (PL_expect == XREF) { 6320 block_expectation: 6321 /* If there is an opening brace or 'sub:', treat it 6322 as a term to make ${{...}}{k} and &{sub:attr...} 6323 dwim. Otherwise, treat it as a statement, so 6324 map {no strict; ...} works. 6325 */ 6326 s = skipspace(s); 6327 if (*s == '{') { 6328 PL_expect = XTERM; 6329 break; 6330 } 6331 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) { 6332 PL_bufptr = s; 6333 d = s + 3; 6334 d = skipspace(d); 6335 s = PL_bufptr; 6336 if (*d == ':') { 6337 PL_expect = XTERM; 6338 break; 6339 } 6340 } 6341 PL_expect = XSTATE; 6342 } 6343 else { 6344 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; 6345 PL_expect = XSTATE; 6346 } 6347 } 6348 break; 6349 } 6350 6351 pl_yylval.ival = CopLINE(PL_curcop); 6352 PL_copline = NOLINE; /* invalidate current command line number */ 6353 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN); 6354} 6355 6356static int 6357yyl_rightcurly(pTHX_ char *s, const U8 formbrack) 6358{ 6359 assert(s != PL_bufend); 6360 s++; 6361 6362 if (PL_lex_brackets <= 0) 6363 /* diag_listed_as: Unmatched right %s bracket */ 6364 yyerror("Unmatched right curly bracket"); 6365 else 6366 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets]; 6367 6368 PL_lex_allbrackets--; 6369 6370 if (PL_lex_state == LEX_INTERPNORMAL) { 6371 if (PL_lex_brackets == 0) { 6372 if (PL_expect & XFAKEBRACK) { 6373 PL_expect &= XENUMMASK; 6374 PL_lex_state = LEX_INTERPEND; 6375 PL_bufptr = s; 6376 return yylex(); /* ignore fake brackets */ 6377 } 6378 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 6379 && SvEVALED(PL_lex_repl)) 6380 PL_lex_state = LEX_INTERPEND; 6381 else if (*s == '-' && s[1] == '>') 6382 PL_lex_state = LEX_INTERPENDMAYBE; 6383 else if (*s != '[' && *s != '{') 6384 PL_lex_state = LEX_INTERPEND; 6385 } 6386 } 6387 6388 if (PL_expect & XFAKEBRACK) { 6389 PL_expect &= XENUMMASK; 6390 PL_bufptr = s; 6391 return yylex(); /* ignore fake brackets */ 6392 } 6393 6394 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE); 6395 if (formbrack) LEAVE_with_name("lex_format"); 6396 if (formbrack == 2) { /* means . where arguments were expected */ 6397 force_next(PERLY_SEMICOLON); 6398 TOKEN(FORMRBRACK); 6399 } 6400 6401 TOKEN(PERLY_SEMICOLON); 6402} 6403 6404static int 6405yyl_ampersand(pTHX_ char *s) 6406{ 6407 if (PL_expect == XPOSTDEREF) 6408 POSTDEREF(PERLY_AMPERSAND); 6409 6410 s++; 6411 if (*s++ == '&') { 6412 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6413 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6414 s -= 2; 6415 TOKEN(0); 6416 } 6417 AOPERATOR(ANDAND); 6418 } 6419 s--; 6420 6421 if (PL_expect == XOPERATOR) { 6422 char *d; 6423 bool bof; 6424 if ( PL_bufptr == PL_linestart 6425 && ckWARN(WARN_SEMICOLON) 6426 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 6427 { 6428 CopLINE_dec(PL_curcop); 6429 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 6430 CopLINE_inc(PL_curcop); 6431 } 6432 d = s; 6433 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6434 s++; 6435 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6436 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6437 s = d; 6438 s--; 6439 TOKEN(0); 6440 } 6441 if (d == s) 6442 BAop(bof ? OP_NBIT_AND : OP_BIT_AND); 6443 else 6444 BAop(OP_SBIT_AND); 6445 } 6446 6447 PL_tokenbuf[0] = '&'; 6448 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); 6449 pl_yylval.ival = (OPpENTERSUB_AMPER<<8); 6450 6451 if (PL_tokenbuf[1]) 6452 force_ident_maybe_lex('&'); 6453 else 6454 PREREF(PERLY_AMPERSAND); 6455 6456 TERM(PERLY_AMPERSAND); 6457} 6458 6459static int 6460yyl_verticalbar(pTHX_ char *s) 6461{ 6462 char *d; 6463 bool bof; 6464 6465 s++; 6466 if (*s++ == '|') { 6467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6468 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) { 6469 s -= 2; 6470 TOKEN(0); 6471 } 6472 AOPERATOR(OROR); 6473 } 6474 6475 s--; 6476 d = s; 6477 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') 6478 s++; 6479 6480 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6481 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) { 6482 s = d - 1; 6483 TOKEN(0); 6484 } 6485 6486 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR); 6487} 6488 6489static int 6490yyl_bang(pTHX_ char *s) 6491{ 6492 const char tmp = *s++; 6493 if (tmp == '=') { 6494 /* was this !=~ where !~ was meant? 6495 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ 6496 6497 if (*s == '~' && ckWARN(WARN_SYNTAX)) { 6498 const char *t = s+1; 6499 6500 while (t < PL_bufend && isSPACE(*t)) 6501 ++t; 6502 6503 if (*t == '/' || *t == '?' 6504 || ((*t == 'm' || *t == 's' || *t == 'y') 6505 && !isWORDCHAR(t[1])) 6506 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2]))) 6507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 6508 "!=~ should be !~"); 6509 } 6510 6511 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6512 s -= 2; 6513 TOKEN(0); 6514 } 6515 6516 ChEop(OP_NE); 6517 } 6518 6519 if (tmp == '~') 6520 PMop(OP_NOT); 6521 6522 s--; 6523 OPERATOR(PERLY_EXCLAMATION_MARK); 6524} 6525 6526static int 6527yyl_snail(pTHX_ char *s) 6528{ 6529 if (PL_expect == XPOSTDEREF) 6530 POSTDEREF(PERLY_SNAIL); 6531 PL_tokenbuf[0] = '@'; 6532 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); 6533 if (PL_expect == XOPERATOR) { 6534 char *d = s; 6535 if (PL_bufptr > s) { 6536 d = PL_bufptr-1; 6537 PL_bufptr = PL_oldbufptr; 6538 } 6539 no_op("Array", d); 6540 } 6541 pl_yylval.ival = 0; 6542 if (!PL_tokenbuf[1]) { 6543 PREREF(PERLY_SNAIL); 6544 } 6545 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) 6546 s = skipspace(s); 6547 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) 6548 && intuit_more(s, PL_bufend)) 6549 { 6550 if (*s == '{') 6551 PL_tokenbuf[0] = '%'; 6552 6553 /* Warn about @ where they meant $. */ 6554 if (*s == '[' || *s == '{') { 6555 if (ckWARN(WARN_SYNTAX)) { 6556 S_check_scalar_slice(aTHX_ s); 6557 } 6558 } 6559 } 6560 PL_expect = XOPERATOR; 6561 force_ident_maybe_lex('@'); 6562 TERM(PERLY_SNAIL); 6563} 6564 6565static int 6566yyl_slash(pTHX_ char *s) 6567{ 6568 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { 6569 if (!PL_lex_allbrackets && PL_lex_fakeeof >= 6570 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) 6571 TOKEN(0); 6572 s += 2; 6573 AOPERATOR(DORDOR); 6574 } 6575 else if (PL_expect == XOPERATOR) { 6576 s++; 6577 if (*s == '=' && !PL_lex_allbrackets 6578 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 6579 { 6580 s--; 6581 TOKEN(0); 6582 } 6583 Mop(OP_DIVIDE); 6584 } 6585 else { 6586 /* Disable warning on "study /blah/" */ 6587 if ( PL_oldoldbufptr == PL_last_uni 6588 && ( *PL_last_uni != 's' || s - PL_last_uni < 5 6589 || memNE(PL_last_uni, "study", 5) 6590 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF) 6591 )) 6592 check_uni(); 6593 s = scan_pat(s,OP_MATCH); 6594 TERM(sublex_start()); 6595 } 6596} 6597 6598static int 6599yyl_leftsquare(pTHX_ char *s) 6600{ 6601 if (PL_lex_brackets > 100) 6602 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 6603 PL_lex_brackstack[PL_lex_brackets++] = 0; 6604 PL_lex_allbrackets++; 6605 s++; 6606 OPERATOR(PERLY_BRACKET_OPEN); 6607} 6608 6609static int 6610yyl_rightsquare(pTHX_ char *s) 6611{ 6612 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 6613 TOKEN(0); 6614 s++; 6615 if (PL_lex_brackets <= 0) 6616 /* diag_listed_as: Unmatched right %s bracket */ 6617 yyerror("Unmatched right square bracket"); 6618 else 6619 --PL_lex_brackets; 6620 PL_lex_allbrackets--; 6621 if (PL_lex_state == LEX_INTERPNORMAL) { 6622 if (PL_lex_brackets == 0) { 6623 if (*s == '-' && s[1] == '>') 6624 PL_lex_state = LEX_INTERPENDMAYBE; 6625 else if (*s != '[' && *s != '{') 6626 PL_lex_state = LEX_INTERPEND; 6627 } 6628 } 6629 TERM(PERLY_BRACKET_CLOSE); 6630} 6631 6632static int 6633yyl_tilde(pTHX_ char *s) 6634{ 6635 bool bof; 6636 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) { 6637 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 6638 TOKEN(0); 6639 s += 2; 6640 Perl_ck_warner_d(aTHX_ 6641 packWARN(WARN_DEPRECATED__SMARTMATCH), 6642 "Smartmatch is deprecated"); 6643 NCEop(OP_SMARTMATCH); 6644 } 6645 s++; 6646 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { 6647 s++; 6648 BCop(OP_SCOMPLEMENT); 6649 } 6650 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT); 6651} 6652 6653static int 6654yyl_leftparen(pTHX_ char *s) 6655{ 6656 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr) 6657 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ 6658 else 6659 PL_expect = XTERM; 6660 s = skipspace(s); 6661 PL_lex_allbrackets++; 6662 TOKEN(PERLY_PAREN_OPEN); 6663} 6664 6665static int 6666yyl_rightparen(pTHX_ char *s) 6667{ 6668 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) 6669 TOKEN(0); 6670 s++; 6671 PL_lex_allbrackets--; 6672 s = skipspace(s); 6673 if (*s == '{') 6674 PREBLOCK(PERLY_PAREN_CLOSE); 6675 TERM(PERLY_PAREN_CLOSE); 6676} 6677 6678static int 6679yyl_leftpointy(pTHX_ char *s) 6680{ 6681 char tmp; 6682 6683 if (PL_expect != XOPERATOR) { 6684 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) 6685 check_uni(); 6686 if (s[1] == '<' && s[2] != '>') 6687 s = scan_heredoc(s); 6688 else 6689 s = scan_inputsymbol(s); 6690 PL_expect = XOPERATOR; 6691 TOKEN(sublex_start()); 6692 } 6693 6694 s++; 6695 6696 tmp = *s++; 6697 if (tmp == '<') { 6698 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6699 s -= 2; 6700 TOKEN(0); 6701 } 6702 SHop(OP_LEFT_SHIFT); 6703 } 6704 if (tmp == '=') { 6705 tmp = *s++; 6706 if (tmp == '>') { 6707 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6708 s -= 3; 6709 TOKEN(0); 6710 } 6711 NCEop(OP_NCMP); 6712 } 6713 s--; 6714 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6715 s -= 2; 6716 TOKEN(0); 6717 } 6718 ChRop(OP_LE); 6719 } 6720 6721 s--; 6722 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6723 s--; 6724 TOKEN(0); 6725 } 6726 6727 ChRop(OP_LT); 6728} 6729 6730static int 6731yyl_rightpointy(pTHX_ char *s) 6732{ 6733 const char tmp = *s++; 6734 6735 if (tmp == '>') { 6736 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 6737 s -= 2; 6738 TOKEN(0); 6739 } 6740 SHop(OP_RIGHT_SHIFT); 6741 } 6742 else if (tmp == '=') { 6743 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6744 s -= 2; 6745 TOKEN(0); 6746 } 6747 ChRop(OP_GE); 6748 } 6749 6750 s--; 6751 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { 6752 s--; 6753 TOKEN(0); 6754 } 6755 6756 ChRop(OP_GT); 6757} 6758 6759static int 6760yyl_sglquote(pTHX_ char *s) 6761{ 6762 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6763 if (!s) 6764 missingterm(NULL, 0); 6765 COPLINE_SET_FROM_MULTI_END; 6766 DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); 6767 if (PL_expect == XOPERATOR) { 6768 no_op("String",s); 6769 } 6770 pl_yylval.ival = OP_CONST; 6771 TERM(sublex_start()); 6772} 6773 6774static int 6775yyl_dblquote(pTHX_ char *s) 6776{ 6777 char *d; 6778 STRLEN len; 6779 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6780 DEBUG_T( { 6781 if (s) 6782 printbuf("### Saw string before %s\n", s); 6783 else 6784 PerlIO_printf(Perl_debug_log, 6785 "### Saw unterminated string\n"); 6786 } ); 6787 if (PL_expect == XOPERATOR) { 6788 no_op("String",s); 6789 } 6790 if (!s) 6791 missingterm(NULL, 0); 6792 pl_yylval.ival = OP_CONST; 6793 /* FIXME. I think that this can be const if char *d is replaced by 6794 more localised variables. */ 6795 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { 6796 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { 6797 pl_yylval.ival = OP_STRINGIFY; 6798 break; 6799 } 6800 } 6801 if (pl_yylval.ival == OP_CONST) 6802 COPLINE_SET_FROM_MULTI_END; 6803 TERM(sublex_start()); 6804} 6805 6806static int 6807yyl_backtick(pTHX_ char *s) 6808{ 6809 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 6810 DEBUG_T( { 6811 if (s) 6812 printbuf("### Saw backtick string before %s\n", s); 6813 else 6814 PerlIO_printf(Perl_debug_log, 6815 "### Saw unterminated backtick string\n"); 6816 } ); 6817 if (PL_expect == XOPERATOR) 6818 no_op("Backticks",s); 6819 if (!s) 6820 missingterm(NULL, 0); 6821 pl_yylval.ival = OP_BACKTICK; 6822 TERM(sublex_start()); 6823} 6824 6825static int 6826yyl_backslash(pTHX_ char *s) 6827{ 6828 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s)) 6829 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", 6830 *s, *s); 6831 if (PL_expect == XOPERATOR) 6832 no_op("Backslash",s); 6833 OPERATOR(REFGEN); 6834} 6835 6836static void 6837yyl_data_handle(pTHX) 6838{ 6839 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash 6840 ? PL_curstash 6841 : PL_defstash; 6842 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1); 6843 6844 if (!isGV(gv)) 6845 gv_init(gv,stash,"DATA",4,0); 6846 6847 GvMULTI_on(gv); 6848 if (!GvIO(gv)) 6849 GvIOp(gv) = newIO(); 6850 IoIFP(GvIOp(gv)) = PL_rsfp; 6851 6852 /* Mark this internal pseudo-handle as clean */ 6853 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; 6854 if ((PerlIO*)PL_rsfp == PerlIO_stdin()) 6855 IoTYPE(GvIOp(gv)) = IoTYPE_STD; 6856 else 6857 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; 6858 6859#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) 6860 /* if the script was opened in binmode, we need to revert 6861 * it to text mode for compatibility; but only iff it has CRs 6862 * XXX this is a questionable hack at best. */ 6863 if (PL_bufend-PL_bufptr > 2 6864 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') 6865 { 6866 Off_t loc = 0; 6867 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { 6868 loc = PerlIO_tell(PL_rsfp); 6869 (void)PerlIO_seek(PL_rsfp, 0L, 0); 6870 } 6871 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { 6872 if (loc > 0) 6873 PerlIO_seek(PL_rsfp, loc, 0); 6874 } 6875 } 6876#endif 6877 6878#ifdef PERLIO_LAYERS 6879 if (!IN_BYTES) { 6880 if (UTF) 6881 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); 6882 } 6883#endif 6884 6885 PL_rsfp = NULL; 6886} 6887 6888PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*) 6889 __attribute__noreturn__; 6890 6891PERL_STATIC_NO_RET void 6892yyl_croak_unrecognised(pTHX_ char *s) 6893{ 6894 SV *dsv = newSVpvs_flags("", SVs_TEMP); 6895 const char *c; 6896 char *d; 6897 STRLEN len; 6898 6899 if (UTF) { 6900 STRLEN skiplen = UTF8SKIP(s); 6901 STRLEN stravail = PL_bufend - s; 6902 c = sv_uni_display(dsv, newSVpvn_flags(s, 6903 skiplen > stravail ? stravail : skiplen, 6904 SVs_TEMP | SVf_UTF8), 6905 10, UNI_DISPLAY_ISPRINT); 6906 } 6907 else { 6908 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); 6909 } 6910 6911 if (s >= PL_linestart) { 6912 d = PL_linestart; 6913 } 6914 else { 6915 /* somehow (probably due to a parse failure), PL_linestart has advanced 6916 * pass PL_bufptr, get a reasonable beginning of line 6917 */ 6918 d = s; 6919 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n') 6920 --d; 6921 } 6922 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d); 6923 if (len > UNRECOGNIZED_PRECEDE_COUNT) { 6924 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT; 6925 } 6926 6927 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c, 6928 UTF8fARG(UTF, (s - d), d), 6929 (int) len + 1); 6930} 6931 6932static int 6933yyl_require(pTHX_ char *s, I32 orig_keyword) 6934{ 6935 s = skipspace(s); 6936 if (isDIGIT(*s)) { 6937 s = force_version(s, FALSE); 6938 } 6939 else if (*s != 'v' || !isDIGIT(s[1]) 6940 || (s = force_version(s, TRUE), *s == 'v')) 6941 { 6942 *PL_tokenbuf = '\0'; 6943 s = force_word(s,BAREWORD,TRUE,TRUE); 6944 if (isIDFIRST_lazy_if_safe(PL_tokenbuf, 6945 PL_tokenbuf + sizeof(PL_tokenbuf), 6946 UTF)) 6947 { 6948 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), 6949 GV_ADD | (UTF ? SVf_UTF8 : 0)); 6950 } 6951 else if (*s == '<') 6952 yyerror("<> at require-statement should be quotes"); 6953 } 6954 6955 if (orig_keyword == KEY_require) 6956 pl_yylval.ival = 1; 6957 else 6958 pl_yylval.ival = 0; 6959 6960 PL_expect = PL_nexttoke ? XOPERATOR : XTERM; 6961 PL_bufptr = s; 6962 PL_last_uni = PL_oldbufptr; 6963 PL_last_lop_op = OP_REQUIRE; 6964 s = skipspace(s); 6965 return REPORT( (int)KW_REQUIRE ); 6966} 6967 6968static int 6969yyl_foreach(pTHX_ char *s) 6970{ 6971 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 6972 return REPORT(0); 6973 pl_yylval.ival = CopLINE(PL_curcop); 6974 s = skipspace(s); 6975 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 6976 char *p = s; 6977 SSize_t s_off = s - SvPVX(PL_linestr); 6978 bool paren_is_valid = FALSE; 6979 bool maybe_package = FALSE; 6980 bool saw_core = FALSE; 6981 bool core_valid = FALSE; 6982 6983 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) { 6984 saw_core = TRUE; 6985 p += 6; 6986 } 6987 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) { 6988 core_valid = TRUE; 6989 paren_is_valid = TRUE; 6990 if (isSPACE(p[2])) { 6991 p = skipspace(p + 3); 6992 maybe_package = TRUE; 6993 } 6994 else { 6995 p += 2; 6996 } 6997 } 6998 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) { 6999 core_valid = TRUE; 7000 if (isSPACE(p[3])) { 7001 p = skipspace(p + 4); 7002 maybe_package = TRUE; 7003 } 7004 else { 7005 p += 3; 7006 } 7007 } 7008 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) { 7009 core_valid = TRUE; 7010 if (isSPACE(p[5])) { 7011 p = skipspace(p + 6); 7012 } 7013 else { 7014 p += 5; 7015 } 7016 } 7017 if (saw_core && !core_valid) { 7018 Perl_croak(aTHX_ "Missing $ on loop variable"); 7019 } 7020 7021 if (maybe_package && !saw_core) { 7022 /* skip optional package name, as in "for my abc $x (..)" */ 7023 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) { 7024 STRLEN len; 7025 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE); 7026 p = skipspace(p); 7027 paren_is_valid = FALSE; 7028 } 7029 } 7030 7031 if (UNLIKELY(paren_is_valid && *p == '(')) { 7032 Perl_ck_warner_d(aTHX_ 7033 packWARN(WARN_EXPERIMENTAL__FOR_LIST), 7034 "for my (...) is experimental"); 7035 } 7036 else if (UNLIKELY(*p != '$' && *p != '\\')) { 7037 /* "for myfoo (" will end up here, but with p pointing at the 'f' */ 7038 Perl_croak(aTHX_ "Missing $ on loop variable"); 7039 } 7040 /* The buffer may have been reallocated, update s */ 7041 s = SvPVX(PL_linestr) + s_off; 7042 } 7043 OPERATOR(KW_FOR); 7044} 7045 7046static int 7047yyl_do(pTHX_ char *s, I32 orig_keyword) 7048{ 7049 s = skipspace(s); 7050 if (*s == '{') 7051 PRETERMBLOCK(KW_DO); 7052 if (*s != '\'') { 7053 char *d; 7054 STRLEN len; 7055 *PL_tokenbuf = '&'; 7056 d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 7057 1, &len, TRUE); 7058 if (len && memNEs(PL_tokenbuf+1, len, "CORE") 7059 && !keyword(PL_tokenbuf + 1, len, 0)) { 7060 SSize_t off = s-SvPVX(PL_linestr); 7061 d = skipspace(d); 7062 s = SvPVX(PL_linestr)+off; 7063 if (*d == '(') { 7064 force_ident_maybe_lex('&'); 7065 s = d; 7066 } 7067 } 7068 } 7069 if (orig_keyword == KEY_do) 7070 pl_yylval.ival = 1; 7071 else 7072 pl_yylval.ival = 0; 7073 OPERATOR(KW_DO); 7074} 7075 7076static int 7077yyl_my(pTHX_ char *s, I32 my) 7078{ 7079 if (PL_in_my) { 7080 PL_bufptr = s; 7081 yyerror(Perl_form(aTHX_ 7082 "Can't redeclare \"%s\" in \"%s\"", 7083 my == KEY_my ? "my" : 7084 my == KEY_state ? "state" : "our", 7085 PL_in_my == KEY_my ? "my" : 7086 PL_in_my == KEY_state ? "state" : "our")); 7087 } 7088 PL_in_my = (U16)my; 7089 s = skipspace(s); 7090 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 7091 STRLEN len; 7092 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE); 7093 if (memEQs(PL_tokenbuf, len, "sub")) 7094 return yyl_sub(aTHX_ s, my); 7095 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); 7096 if (!PL_in_my_stash) { 7097 char tmpbuf[1024]; 7098 int i; 7099 PL_bufptr = s; 7100 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); 7101 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf)); 7102 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); 7103 } 7104 } 7105 else if (*s == '\\') { 7106 if (!FEATURE_MYREF_IS_ENABLED) 7107 Perl_croak(aTHX_ "The experimental declared_refs " 7108 "feature is not enabled"); 7109 Perl_ck_warner_d(aTHX_ 7110 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS), 7111 "Declaring references is experimental"); 7112 } 7113 OPERATOR(KW_MY); 7114} 7115 7116static int yyl_try(pTHX_ char*); 7117 7118static bool 7119yyl_eol_needs_semicolon(pTHX_ char **ps) 7120{ 7121 char *s = *ps; 7122 if (PL_lex_state != LEX_NORMAL 7123 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) 7124 { 7125 const bool in_comment = *s == '#'; 7126 char *d; 7127 if (*s == '#' && s == PL_linestart && PL_in_eval 7128 && !PL_rsfp && !PL_parser->filtered) { 7129 /* handle eval qq[#line 1 "foo"\n ...] */ 7130 CopLINE_dec(PL_curcop); 7131 incline(s, PL_bufend); 7132 } 7133 d = s; 7134 while (d < PL_bufend && *d != '\n') 7135 d++; 7136 if (d < PL_bufend) 7137 d++; 7138 s = d; 7139 if (in_comment && d == PL_bufend 7140 && PL_lex_state == LEX_INTERPNORMAL 7141 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr 7142 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; 7143 else 7144 incline(s, PL_bufend); 7145 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7146 PL_lex_state = LEX_FORMLINE; 7147 force_next(FORMRBRACK); 7148 *ps = s; 7149 return TRUE; 7150 } 7151 } 7152 else { 7153 while (s < PL_bufend && *s != '\n') 7154 s++; 7155 if (s < PL_bufend) { 7156 s++; 7157 if (s < PL_bufend) 7158 incline(s, PL_bufend); 7159 } 7160 } 7161 *ps = s; 7162 return FALSE; 7163} 7164 7165static int 7166yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s) 7167{ 7168 char *d; 7169 7170 goto start; 7171 7172 do { 7173 fake_eof = 0; 7174 bof = cBOOL(PL_rsfp); 7175 start: 7176 7177 PL_bufptr = PL_bufend; 7178 COPLINE_INC_WITH_HERELINES; 7179 if (!lex_next_chunk(fake_eof)) { 7180 CopLINE_dec(PL_curcop); 7181 s = PL_bufptr; 7182 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */ 7183 } 7184 CopLINE_dec(PL_curcop); 7185 s = PL_bufptr; 7186 /* If it looks like the start of a BOM or raw UTF-16, 7187 * check if it in fact is. */ 7188 if (bof && PL_rsfp 7189 && ( *s == 0 7190 || *(U8*)s == BOM_UTF8_FIRST_BYTE 7191 || *(U8*)s >= 0xFE 7192 || s[1] == 0)) 7193 { 7194 Off_t offset = (IV)PerlIO_tell(PL_rsfp); 7195 bof = (offset == (Off_t)SvCUR(PL_linestr)); 7196#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) 7197 /* offset may include swallowed CR */ 7198 if (!bof) 7199 bof = (offset == (Off_t)SvCUR(PL_linestr)+1); 7200#endif 7201 if (bof) { 7202 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7203 s = swallow_bom((U8*)s); 7204 } 7205 } 7206 if (PL_parser->in_pod) { 7207 /* Incest with pod. */ 7208 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") 7209 && !isALPHA(s[4])) 7210 { 7211 SvPVCLEAR(PL_linestr); 7212 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7213 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7214 PL_last_lop = PL_last_uni = NULL; 7215 PL_parser->in_pod = 0; 7216 } 7217 } 7218 if (PL_rsfp || PL_parser->filtered) 7219 incline(s, PL_bufend); 7220 } while (PL_parser->in_pod); 7221 7222 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; 7223 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7224 PL_last_lop = PL_last_uni = NULL; 7225 if (CopLINE(PL_curcop) == 1) { 7226 while (s < PL_bufend && isSPACE(*s)) 7227 s++; 7228 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ 7229 s++; 7230 d = NULL; 7231 if (!PL_in_eval) { 7232 if (*s == '#' && *(s+1) == '!') 7233 d = s + 2; 7234#ifdef ALTERNATE_SHEBANG 7235 else { 7236 static char const as[] = ALTERNATE_SHEBANG; 7237 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) 7238 d = s + (sizeof(as) - 1); 7239 } 7240#endif /* ALTERNATE_SHEBANG */ 7241 } 7242 if (d) { 7243 char *ipath; 7244 char *ipathend; 7245 7246 while (isSPACE(*d)) 7247 d++; 7248 ipath = d; 7249 while (*d && !isSPACE(*d)) 7250 d++; 7251 ipathend = d; 7252 7253#ifdef ARG_ZERO_IS_SCRIPT 7254 if (ipathend > ipath) { 7255 /* 7256 * HP-UX (at least) sets argv[0] to the script name, 7257 * which makes $^X incorrect. And Digital UNIX and Linux, 7258 * at least, set argv[0] to the basename of the Perl 7259 * interpreter. So, having found "#!", we'll set it right. 7260 */ 7261 SV* copfilesv = CopFILESV(PL_curcop); 7262 if (copfilesv) { 7263 SV * const x = 7264 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, 7265 SVt_PV)); /* $^X */ 7266 assert(SvPOK(x) || SvGMAGICAL(x)); 7267 if (sv_eq(x, copfilesv)) { 7268 sv_setpvn(x, ipath, ipathend - ipath); 7269 SvSETMAGIC(x); 7270 } 7271 else { 7272 STRLEN blen; 7273 STRLEN llen; 7274 const char *bstart = SvPV_const(copfilesv, blen); 7275 const char * const lstart = SvPV_const(x, llen); 7276 if (llen < blen) { 7277 bstart += blen - llen; 7278 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { 7279 sv_setpvn(x, ipath, ipathend - ipath); 7280 SvSETMAGIC(x); 7281 } 7282 } 7283 } 7284 } 7285 else { 7286 /* Anything to do if no copfilesv? */ 7287 } 7288 TAINT_NOT; /* $^X is always tainted, but that's OK */ 7289 } 7290#endif /* ARG_ZERO_IS_SCRIPT */ 7291 7292 /* 7293 * Look for options. 7294 */ 7295 d = instr(s,"perl -"); 7296 if (!d) { 7297 d = instr(s,"perl"); 7298#if defined(DOSISH) 7299 /* avoid getting into infinite loops when shebang 7300 * line contains "Perl" rather than "perl" */ 7301 if (!d) { 7302 for (d = ipathend-4; d >= ipath; --d) { 7303 if (isALPHA_FOLD_EQ(*d, 'p') 7304 && !ibcmp(d, "perl", 4)) 7305 { 7306 break; 7307 } 7308 } 7309 if (d < ipath) 7310 d = NULL; 7311 } 7312#endif 7313 } 7314#ifdef ALTERNATE_SHEBANG 7315 /* 7316 * If the ALTERNATE_SHEBANG on this system starts with a 7317 * character that can be part of a Perl expression, then if 7318 * we see it but not "perl", we're probably looking at the 7319 * start of Perl code, not a request to hand off to some 7320 * other interpreter. Similarly, if "perl" is there, but 7321 * not in the first 'word' of the line, we assume the line 7322 * contains the start of the Perl program. 7323 */ 7324 if (d && *s != '#') { 7325 const char *c = ipath; 7326 while (*c && !memCHRs("; \t\r\n\f\v#", *c)) 7327 c++; 7328 if (c < d) 7329 d = NULL; /* "perl" not in first word; ignore */ 7330 else 7331 *s = '#'; /* Don't try to parse shebang line */ 7332 } 7333#endif /* ALTERNATE_SHEBANG */ 7334 if (!d 7335 && *s == '#' 7336 && ipathend > ipath 7337 && !PL_minus_c 7338 && !instr(s,"indir") 7339 && instr(PL_origargv[0],"perl")) 7340 { 7341 char **newargv; 7342 7343 *ipathend = '\0'; 7344 s = ipathend + 1; 7345 while (s < PL_bufend && isSPACE(*s)) 7346 s++; 7347 if (s < PL_bufend) { 7348 Newx(newargv,PL_origargc+3,char*); 7349 newargv[1] = s; 7350 while (s < PL_bufend && !isSPACE(*s)) 7351 s++; 7352 *s = '\0'; 7353 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); 7354 } 7355 else 7356 newargv = PL_origargv; 7357 newargv[0] = ipath; 7358 PERL_FPU_PRE_EXEC 7359 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); 7360 PERL_FPU_POST_EXEC 7361 Perl_croak(aTHX_ "Can't exec %s", ipath); 7362 } 7363 if (d) { 7364 while (*d && !isSPACE(*d)) 7365 d++; 7366 while (SPACE_OR_TAB(*d)) 7367 d++; 7368 7369 if (*d++ == '-') { 7370 const bool switches_done = PL_doswitches; 7371 const U32 oldpdb = PL_perldb; 7372 const bool oldn = PL_minus_n; 7373 const bool oldp = PL_minus_p; 7374 const char *d1 = d; 7375 7376 do { 7377 bool baduni = FALSE; 7378 if (*d1 == 'C') { 7379 const char *d2 = d1 + 1; 7380 if (parse_unicode_opts((const char **)&d2) 7381 != PL_unicode) 7382 baduni = TRUE; 7383 } 7384 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { 7385 const char * const m = d1; 7386 while (*d1 && !isSPACE(*d1)) 7387 d1++; 7388 Perl_croak(aTHX_ "Too late for \"-%.*s\" option", 7389 (int)(d1 - m), m); 7390 } 7391 d1 = moreswitches(d1); 7392 } while (d1); 7393 if (PL_doswitches && !switches_done) { 7394 int argc = PL_origargc; 7395 char **argv = PL_origargv; 7396 do { 7397 argc--,argv++; 7398 } while (argc && argv[0][0] == '-' && argv[0][1]); 7399 init_argv_symbols(argc,argv); 7400 } 7401 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) 7402 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) 7403 /* if we have already added "LINE: while (<>) {", 7404 we must not do it again */ 7405 { 7406 SvPVCLEAR(PL_linestr); 7407 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 7408 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 7409 PL_last_lop = PL_last_uni = NULL; 7410 PL_preambled = FALSE; 7411 if (PERLDB_LINE_OR_SAVESRC) 7412 (void)gv_fetchfile(PL_origfilename); 7413 return YYL_RETRY; 7414 } 7415 } 7416 } 7417 } 7418 } 7419 7420 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { 7421 PL_lex_state = LEX_FORMLINE; 7422 force_next(FORMRBRACK); 7423 TOKEN(PERLY_SEMICOLON); 7424 } 7425 7426 PL_bufptr = s; 7427 return YYL_RETRY; 7428} 7429 7430static int 7431yyl_fatcomma(pTHX_ char *s, STRLEN len) 7432{ 7433 CLINE; 7434 pl_yylval.opval 7435 = newSVOP(OP_CONST, 0, 7436 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); 7437 pl_yylval.opval->op_private = OPpCONST_BARE; 7438 TERM(BAREWORD); 7439} 7440 7441static int 7442yyl_safe_bareword(pTHX_ char *s, const char lastchar) 7443{ 7444 if ((lastchar == '*' || lastchar == '%' || lastchar == '&') 7445 && PL_parser->saw_infix_sigil) 7446 { 7447 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7448 "Operator or semicolon missing before %c%" UTF8f, 7449 lastchar, 7450 UTF8fARG(UTF, strlen(PL_tokenbuf), 7451 PL_tokenbuf)); 7452 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), 7453 "Ambiguous use of %c resolved as operator %c", 7454 lastchar, lastchar); 7455 } 7456 TOKEN(BAREWORD); 7457} 7458 7459static int 7460yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off) 7461{ 7462 if (sv) { 7463 op_free(rv2cv_op); 7464 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); 7465 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); 7466 if (SvTYPE(sv) == SVt_PVAV) 7467 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, 7468 pl_yylval.opval); 7469 else { 7470 pl_yylval.opval->op_private = 0; 7471 pl_yylval.opval->op_folded = 1; 7472 pl_yylval.opval->op_flags |= OPf_SPECIAL; 7473 } 7474 TOKEN(BAREWORD); 7475 } 7476 7477 op_free(pl_yylval.opval); 7478 pl_yylval.opval = 7479 off ? newCVREF(0, rv2cv_op) : rv2cv_op; 7480 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; 7481 PL_last_lop = PL_oldbufptr; 7482 PL_last_lop_op = OP_ENTERSUB; 7483 7484 /* Is there a prototype? */ 7485 if (SvPOK(cv)) { 7486 int k = yyl_subproto(aTHX_ s, cv); 7487 if (k != KEY_NULL) 7488 return k; 7489 } 7490 7491 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; 7492 PL_expect = XTERM; 7493 force_next(off ? PRIVATEREF : BAREWORD); 7494 if (!PL_lex_allbrackets 7495 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7496 { 7497 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7498 } 7499 7500 TOKEN(NOAMP); 7501} 7502 7503/* Honour "reserved word" warnings, and enforce strict subs */ 7504static void 7505yyl_strictwarn_bareword(pTHX_ const char lastchar) 7506{ 7507 /* after "print" and similar functions (corresponding to 7508 * "F? L" in opcode.pl), whatever wasn't already parsed as 7509 * a filehandle should be subject to "strict subs". 7510 * Likewise for the optional indirect-object argument to system 7511 * or exec, which can't be a bareword */ 7512 if ((PL_last_lop_op == OP_PRINT 7513 || PL_last_lop_op == OP_PRTF 7514 || PL_last_lop_op == OP_SAY 7515 || PL_last_lop_op == OP_SYSTEM 7516 || PL_last_lop_op == OP_EXEC) 7517 && (PL_hints & HINT_STRICT_SUBS)) 7518 { 7519 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7520 } 7521 7522 if (lastchar != '-' && ckWARN(WARN_RESERVED)) { 7523 char *d = PL_tokenbuf; 7524 while (isLOWER(*d)) 7525 d++; 7526 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { 7527 /* PL_warn_reserved is constant */ 7528 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 7529 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, 7530 PL_tokenbuf); 7531 GCC_DIAG_RESTORE_STMT; 7532 } 7533 } 7534} 7535 7536static int 7537yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) 7538{ 7539 int pkgname = 0; 7540 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); 7541 bool safebw; 7542 bool no_op_error = FALSE; 7543 /* Use this var to track whether intuit_method has been 7544 called. intuit_method returns 0 or > 255. */ 7545 int key = 1; 7546 7547 if (PL_expect == XOPERATOR) { 7548 if (PL_bufptr == PL_linestart) { 7549 CopLINE_dec(PL_curcop); 7550 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); 7551 CopLINE_inc(PL_curcop); 7552 } 7553 else 7554 /* We want to call no_op with s pointing after the 7555 bareword, so defer it. But we want it to come 7556 before the Bad name croak. */ 7557 no_op_error = TRUE; 7558 } 7559 7560 /* Get the rest if it looks like a package qualifier */ 7561 7562 if (*s == '\'' || (*s == ':' && s[1] == ':')) { 7563 STRLEN morelen; 7564 s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, 7565 TRUE, &morelen, TRUE); 7566 if (no_op_error) { 7567 no_op("Bareword",s); 7568 no_op_error = FALSE; 7569 } 7570 if (!morelen) 7571 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", 7572 UTF8fARG(UTF, len, PL_tokenbuf), 7573 *s == '\'' ? "'" : "::"); 7574 len += morelen; 7575 pkgname = 1; 7576 } 7577 7578 if (no_op_error) 7579 no_op("Bareword",s); 7580 7581 /* See if the name is "Foo::", 7582 in which case Foo is a bareword 7583 (and a package name). */ 7584 7585 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { 7586 if (ckWARN(WARN_BAREWORD) 7587 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) 7588 Perl_warner(aTHX_ packWARN(WARN_BAREWORD), 7589 "Bareword \"%" UTF8f 7590 "\" refers to nonexistent package", 7591 UTF8fARG(UTF, len, PL_tokenbuf)); 7592 len -= 2; 7593 PL_tokenbuf[len] = '\0'; 7594 c.gv = NULL; 7595 c.gvp = 0; 7596 safebw = TRUE; 7597 } 7598 else { 7599 safebw = FALSE; 7600 } 7601 7602 /* if we saw a global override before, get the right name */ 7603 7604 if (!c.sv) 7605 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); 7606 if (c.gvp) { 7607 SV *sv = newSVpvs("CORE::GLOBAL::"); 7608 sv_catsv(sv, c.sv); 7609 SvREFCNT_dec(c.sv); 7610 c.sv = sv; 7611 } 7612 7613 /* Presume this is going to be a bareword of some sort. */ 7614 CLINE; 7615 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv); 7616 pl_yylval.opval->op_private = OPpCONST_BARE; 7617 7618 /* And if "Foo::", then that's what it certainly is. */ 7619 if (safebw) 7620 return yyl_safe_bareword(aTHX_ s, lastchar); 7621 7622 if (!c.off) { 7623 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv)); 7624 const_op->op_private = OPpCONST_BARE; 7625 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); 7626 c.cv = c.lex 7627 ? isGV(c.gv) 7628 ? GvCV(c.gv) 7629 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV 7630 ? (CV *)SvRV(c.gv) 7631 : ((CV *)c.gv) 7632 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB); 7633 } 7634 7635 /* See if it's the indirect object for a list operator. */ 7636 7637 if (PL_oldoldbufptr 7638 && PL_oldoldbufptr < PL_bufptr 7639 && (PL_oldoldbufptr == PL_last_lop 7640 || PL_oldoldbufptr == PL_last_uni) 7641 && /* NO SKIPSPACE BEFORE HERE! */ 7642 (PL_expect == XREF 7643 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) 7644 == OA_FILEREF)) 7645 { 7646 bool immediate_paren = *s == '('; 7647 SSize_t s_off; 7648 7649 /* (Now we can afford to cross potential line boundary.) */ 7650 s = skipspace(s); 7651 7652 /* intuit_method() can indirectly call lex_next_chunk(), 7653 * invalidating s 7654 */ 7655 s_off = s - SvPVX(PL_linestr); 7656 /* Two barewords in a row may indicate method call. */ 7657 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) 7658 || *s == '$') 7659 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7660 { 7661 /* the code at method: doesn't use s */ 7662 goto method; 7663 } 7664 s = SvPVX(PL_linestr) + s_off; 7665 7666 /* If not a declared subroutine, it's an indirect object. */ 7667 /* (But it's an indir obj regardless for sort.) */ 7668 /* Also, if "_" follows a filetest operator, it's a bareword */ 7669 7670 if ( 7671 ( !immediate_paren && (PL_last_lop_op == OP_SORT 7672 || (!c.cv 7673 && (PL_last_lop_op != OP_MAPSTART 7674 && PL_last_lop_op != OP_GREPSTART)))) 7675 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' 7676 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) 7677 == OA_FILESTATOP)) 7678 ) 7679 { 7680 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; 7681 yyl_strictwarn_bareword(aTHX_ lastchar); 7682 op_free(c.rv2cv_op); 7683 return yyl_safe_bareword(aTHX_ s, lastchar); 7684 } 7685 } 7686 7687 PL_expect = XOPERATOR; 7688 s = skipspace(s); 7689 7690 /* Is this a word before a => operator? */ 7691 if (*s == '=' && s[1] == '>' && !pkgname) { 7692 op_free(c.rv2cv_op); 7693 CLINE; 7694 if (c.gvp || (c.lex && !c.off)) { 7695 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7696 /* This is our own scalar, created a few lines 7697 above, so this is safe. */ 7698 SvREADONLY_off(c.sv); 7699 sv_setpv(c.sv, PL_tokenbuf); 7700 if (UTF && !IN_BYTES 7701 && is_utf8_string((U8*)PL_tokenbuf, len)) 7702 SvUTF8_on(c.sv); 7703 SvREADONLY_on(c.sv); 7704 } 7705 TERM(BAREWORD); 7706 } 7707 7708 /* If followed by a paren, it's certainly a subroutine. */ 7709 if (*s == '(') { 7710 CLINE; 7711 if (c.cv) { 7712 char *d = s + 1; 7713 while (SPACE_OR_TAB(*d)) 7714 d++; 7715 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv))) 7716 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off); 7717 } 7718 NEXTVAL_NEXTTOKE.opval = 7719 c.off ? c.rv2cv_op : pl_yylval.opval; 7720 if (c.off) 7721 op_free(pl_yylval.opval), force_next(PRIVATEREF); 7722 else op_free(c.rv2cv_op), force_next(BAREWORD); 7723 pl_yylval.ival = 0; 7724 TOKEN(PERLY_AMPERSAND); 7725 } 7726 7727 /* If followed by var or block, call it a method (unless sub) */ 7728 7729 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) { 7730 op_free(c.rv2cv_op); 7731 PL_last_lop = PL_oldbufptr; 7732 PL_last_lop_op = OP_METHOD; 7733 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7734 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7735 PL_expect = XBLOCKTERM; 7736 PL_bufptr = s; 7737 return REPORT(METHCALL0); 7738 } 7739 7740 /* If followed by a bareword, see if it looks like indir obj. */ 7741 7742 if ( key == 1 7743 && !orig_keyword 7744 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') 7745 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) 7746 { 7747 method: 7748 if (c.lex && !c.off) { 7749 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv); 7750 SvREADONLY_off(c.sv); 7751 sv_setpvn(c.sv, PL_tokenbuf, len); 7752 if (UTF && !IN_BYTES 7753 && is_utf8_string((U8*)PL_tokenbuf, len)) 7754 SvUTF8_on(c.sv); 7755 else SvUTF8_off(c.sv); 7756 } 7757 op_free(c.rv2cv_op); 7758 if (key == METHCALL0 && !PL_lex_allbrackets 7759 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 7760 { 7761 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 7762 } 7763 return REPORT(key); 7764 } 7765 7766 /* Not a method, so call it a subroutine (if defined) */ 7767 7768 if (c.cv) { 7769 /* Check for a constant sub */ 7770 c.sv = cv_const_sv_or_av(c.cv); 7771 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off); 7772 } 7773 7774 /* Call it a bare word */ 7775 7776 if (PL_hints & HINT_STRICT_SUBS) 7777 pl_yylval.opval->op_private |= OPpCONST_STRICT; 7778 else 7779 yyl_strictwarn_bareword(aTHX_ lastchar); 7780 7781 op_free(c.rv2cv_op); 7782 7783 return yyl_safe_bareword(aTHX_ s, lastchar); 7784} 7785 7786static int 7787yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) 7788{ 7789 switch (key) { 7790 default: /* not a keyword */ 7791 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7792 7793 case KEY___FILE__: 7794 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) ); 7795 7796 case KEY___LINE__: 7797 FUN0OP( 7798 newSVOP(OP_CONST, 0, 7799 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop))) 7800 ); 7801 7802 case KEY___PACKAGE__: 7803 FUN0OP( 7804 newSVOP(OP_CONST, 0, (PL_curstash 7805 ? newSVhek(HvNAME_HEK(PL_curstash)) 7806 : &PL_sv_undef)) 7807 ); 7808 7809 case KEY___DATA__: 7810 case KEY___END__: 7811 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) 7812 yyl_data_handle(aTHX); 7813 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s); 7814 7815 case KEY___SUB__: 7816 /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an 7817 * OP_CONST. We need to make it big enough to allow room for that if 7818 * so */ 7819 FUN0OP(CvCLONE(PL_compcv) 7820 ? newOP(OP_RUNCV, 0) 7821 : newSVOP(OP_RUNCV, 0, &PL_sv_undef)); 7822 7823 case KEY_AUTOLOAD: 7824 case KEY_DESTROY: 7825 case KEY_BEGIN: 7826 case KEY_UNITCHECK: 7827 case KEY_CHECK: 7828 case KEY_INIT: 7829 case KEY_END: 7830 if (PL_expect == XSTATE) 7831 return yyl_sub(aTHX_ PL_bufptr, key); 7832 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 7833 7834 case KEY_ADJUST: 7835 Perl_ck_warner_d(aTHX_ 7836 packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental"); 7837 7838 /* The way that KEY_CHECK et.al. are handled currently are nothing 7839 * short of crazy. We won't copy that model for new phasers, but use 7840 * this as an experiment to test if this will work 7841 */ 7842 PHASERBLOCK(KEY_ADJUST); 7843 7844 case KEY_abs: 7845 UNI(OP_ABS); 7846 7847 case KEY_alarm: 7848 UNI(OP_ALARM); 7849 7850 case KEY_accept: 7851 LOP(OP_ACCEPT,XTERM); 7852 7853 case KEY_and: 7854 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 7855 return REPORT(0); 7856 OPERATOR(ANDOP); 7857 7858 case KEY_atan2: 7859 LOP(OP_ATAN2,XTERM); 7860 7861 case KEY_bind: 7862 LOP(OP_BIND,XTERM); 7863 7864 case KEY_binmode: 7865 LOP(OP_BINMODE,XTERM); 7866 7867 case KEY_bless: 7868 LOP(OP_BLESS,XTERM); 7869 7870 case KEY_break: 7871 FUN0(OP_BREAK); 7872 7873 case KEY_catch: 7874 Perl_ck_warner_d(aTHX_ 7875 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental"); 7876 PREBLOCK(KW_CATCH); 7877 7878 case KEY_chop: 7879 UNI(OP_CHOP); 7880 7881 case KEY_class: 7882 Perl_ck_warner_d(aTHX_ 7883 packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental"); 7884 7885 s = force_word(s,BAREWORD,FALSE,TRUE); 7886 s = skipspace(s); 7887 s = force_strict_version(s); 7888 PL_expect = XATTRBLOCK; 7889 TOKEN(KW_CLASS); 7890 7891 case KEY_continue: 7892 /* We have to disambiguate the two senses of 7893 "continue". If the next token is a '{' then 7894 treat it as the start of a continue block; 7895 otherwise treat it as a control operator. 7896 */ 7897 s = skipspace(s); 7898 if (*s == '{') 7899 PREBLOCK(KW_CONTINUE); 7900 else 7901 FUN0(OP_CONTINUE); 7902 7903 case KEY_chdir: 7904 /* may use HOME */ 7905 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); 7906 UNI(OP_CHDIR); 7907 7908 case KEY_close: 7909 UNI(OP_CLOSE); 7910 7911 case KEY_closedir: 7912 UNI(OP_CLOSEDIR); 7913 7914 case KEY_cmp: 7915 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7916 return REPORT(0); 7917 NCEop(OP_SCMP); 7918 7919 case KEY_caller: 7920 UNI(OP_CALLER); 7921 7922 case KEY_crypt: 7923 7924 LOP(OP_CRYPT,XTERM); 7925 7926 case KEY_chmod: 7927 LOP(OP_CHMOD,XTERM); 7928 7929 case KEY_chown: 7930 LOP(OP_CHOWN,XTERM); 7931 7932 case KEY_connect: 7933 LOP(OP_CONNECT,XTERM); 7934 7935 case KEY_chr: 7936 UNI(OP_CHR); 7937 7938 case KEY_cos: 7939 UNI(OP_COS); 7940 7941 case KEY_chroot: 7942 UNI(OP_CHROOT); 7943 7944 case KEY_default: 7945 PREBLOCK(KW_DEFAULT); 7946 7947 case KEY_defer: 7948 Perl_ck_warner_d(aTHX_ 7949 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental"); 7950 PREBLOCK(KW_DEFER); 7951 7952 case KEY_do: 7953 return yyl_do(aTHX_ s, orig_keyword); 7954 7955 case KEY_die: 7956 PL_hints |= HINT_BLOCK_SCOPE; 7957 LOP(OP_DIE,XTERM); 7958 7959 case KEY_defined: 7960 UNI(OP_DEFINED); 7961 7962 case KEY_delete: 7963 UNI(OP_DELETE); 7964 7965 case KEY_dbmopen: 7966 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), 7967 STR_WITH_LEN("NDBM_File::"), 7968 STR_WITH_LEN("DB_File::"), 7969 STR_WITH_LEN("GDBM_File::"), 7970 STR_WITH_LEN("SDBM_File::"), 7971 STR_WITH_LEN("ODBM_File::"), 7972 NULL); 7973 LOP(OP_DBMOPEN,XTERM); 7974 7975 case KEY_dbmclose: 7976 UNI(OP_DBMCLOSE); 7977 7978 case KEY_dump: 7979 LOOPX(OP_DUMP); 7980 7981 case KEY_else: 7982 PREBLOCK(KW_ELSE); 7983 7984 case KEY_elsif: 7985 pl_yylval.ival = CopLINE(PL_curcop); 7986 OPERATOR(KW_ELSIF); 7987 7988 case KEY_eq: 7989 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 7990 return REPORT(0); 7991 ChEop(OP_SEQ); 7992 7993 case KEY_exists: 7994 UNI(OP_EXISTS); 7995 7996 case KEY_exit: 7997 UNI(OP_EXIT); 7998 7999 case KEY_eval: 8000 s = skipspace(s); 8001 if (*s == '{') { /* block eval */ 8002 PL_expect = XTERMBLOCK; 8003 UNIBRACK(OP_ENTERTRY); 8004 } 8005 else { /* string eval */ 8006 PL_expect = XTERM; 8007 UNIBRACK(OP_ENTEREVAL); 8008 } 8009 8010 case KEY_evalbytes: 8011 PL_expect = XTERM; 8012 UNIBRACK(-OP_ENTEREVAL); 8013 8014 case KEY_eof: 8015 UNI(OP_EOF); 8016 8017 case KEY_exp: 8018 UNI(OP_EXP); 8019 8020 case KEY_each: 8021 UNI(OP_EACH); 8022 8023 case KEY_exec: 8024 LOP(OP_EXEC,XREF); 8025 8026 case KEY_endhostent: 8027 FUN0(OP_EHOSTENT); 8028 8029 case KEY_endnetent: 8030 FUN0(OP_ENETENT); 8031 8032 case KEY_endservent: 8033 FUN0(OP_ESERVENT); 8034 8035 case KEY_endprotoent: 8036 FUN0(OP_EPROTOENT); 8037 8038 case KEY_endpwent: 8039 FUN0(OP_EPWENT); 8040 8041 case KEY_endgrent: 8042 FUN0(OP_EGRENT); 8043 8044 case KEY_field: 8045 /* TODO: maybe this should use the same parser/grammar structures as 8046 * `my`, but it's also rather messy because of the `our` conflation 8047 */ 8048 Perl_ck_warner_d(aTHX_ 8049 packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental"); 8050 8051 croak_kw_unless_class("field"); 8052 8053 PL_parser->in_my = KEY_field; 8054 OPERATOR(KW_FIELD); 8055 8056 case KEY_finally: 8057 Perl_ck_warner_d(aTHX_ 8058 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental"); 8059 PREBLOCK(KW_FINALLY); 8060 8061 case KEY_for: 8062 case KEY_foreach: 8063 return yyl_foreach(aTHX_ s); 8064 8065 case KEY_formline: 8066 LOP(OP_FORMLINE,XTERM); 8067 8068 case KEY_fork: 8069 FUN0(OP_FORK); 8070 8071 case KEY_fc: 8072 UNI(OP_FC); 8073 8074 case KEY_fcntl: 8075 LOP(OP_FCNTL,XTERM); 8076 8077 case KEY_fileno: 8078 UNI(OP_FILENO); 8079 8080 case KEY_flock: 8081 LOP(OP_FLOCK,XTERM); 8082 8083 case KEY_gt: 8084 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8085 return REPORT(0); 8086 ChRop(OP_SGT); 8087 8088 case KEY_ge: 8089 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8090 return REPORT(0); 8091 ChRop(OP_SGE); 8092 8093 case KEY_grep: 8094 LOP(OP_GREPSTART, XREF); 8095 8096 case KEY_goto: 8097 LOOPX(OP_GOTO); 8098 8099 case KEY_gmtime: 8100 UNI(OP_GMTIME); 8101 8102 case KEY_getc: 8103 UNIDOR(OP_GETC); 8104 8105 case KEY_getppid: 8106 FUN0(OP_GETPPID); 8107 8108 case KEY_getpgrp: 8109 UNI(OP_GETPGRP); 8110 8111 case KEY_getpriority: 8112 LOP(OP_GETPRIORITY,XTERM); 8113 8114 case KEY_getprotobyname: 8115 UNI(OP_GPBYNAME); 8116 8117 case KEY_getprotobynumber: 8118 LOP(OP_GPBYNUMBER,XTERM); 8119 8120 case KEY_getprotoent: 8121 FUN0(OP_GPROTOENT); 8122 8123 case KEY_getpwent: 8124 FUN0(OP_GPWENT); 8125 8126 case KEY_getpwnam: 8127 UNI(OP_GPWNAM); 8128 8129 case KEY_getpwuid: 8130 UNI(OP_GPWUID); 8131 8132 case KEY_getpeername: 8133 UNI(OP_GETPEERNAME); 8134 8135 case KEY_gethostbyname: 8136 UNI(OP_GHBYNAME); 8137 8138 case KEY_gethostbyaddr: 8139 LOP(OP_GHBYADDR,XTERM); 8140 8141 case KEY_gethostent: 8142 FUN0(OP_GHOSTENT); 8143 8144 case KEY_getnetbyname: 8145 UNI(OP_GNBYNAME); 8146 8147 case KEY_getnetbyaddr: 8148 LOP(OP_GNBYADDR,XTERM); 8149 8150 case KEY_getnetent: 8151 FUN0(OP_GNETENT); 8152 8153 case KEY_getservbyname: 8154 LOP(OP_GSBYNAME,XTERM); 8155 8156 case KEY_getservbyport: 8157 LOP(OP_GSBYPORT,XTERM); 8158 8159 case KEY_getservent: 8160 FUN0(OP_GSERVENT); 8161 8162 case KEY_getsockname: 8163 UNI(OP_GETSOCKNAME); 8164 8165 case KEY_getsockopt: 8166 LOP(OP_GSOCKOPT,XTERM); 8167 8168 case KEY_getgrent: 8169 FUN0(OP_GGRENT); 8170 8171 case KEY_getgrnam: 8172 UNI(OP_GGRNAM); 8173 8174 case KEY_getgrgid: 8175 UNI(OP_GGRGID); 8176 8177 case KEY_getlogin: 8178 FUN0(OP_GETLOGIN); 8179 8180 case KEY_given: 8181 pl_yylval.ival = CopLINE(PL_curcop); 8182 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH), 8183 "given is deprecated"); 8184 OPERATOR(KW_GIVEN); 8185 8186 case KEY_glob: 8187 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM ); 8188 8189 case KEY_hex: 8190 UNI(OP_HEX); 8191 8192 case KEY_if: 8193 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8194 return REPORT(0); 8195 pl_yylval.ival = CopLINE(PL_curcop); 8196 OPERATOR(KW_IF); 8197 8198 case KEY_index: 8199 LOP(OP_INDEX,XTERM); 8200 8201 case KEY_int: 8202 UNI(OP_INT); 8203 8204 case KEY_ioctl: 8205 LOP(OP_IOCTL,XTERM); 8206 8207 case KEY_isa: 8208 NCRop(OP_ISA); 8209 8210 case KEY_join: 8211 LOP(OP_JOIN,XTERM); 8212 8213 case KEY_keys: 8214 UNI(OP_KEYS); 8215 8216 case KEY_kill: 8217 LOP(OP_KILL,XTERM); 8218 8219 case KEY_last: 8220 LOOPX(OP_LAST); 8221 8222 case KEY_lc: 8223 UNI(OP_LC); 8224 8225 case KEY_lcfirst: 8226 UNI(OP_LCFIRST); 8227 8228 case KEY_local: 8229 OPERATOR(KW_LOCAL); 8230 8231 case KEY_length: 8232 UNI(OP_LENGTH); 8233 8234 case KEY_lt: 8235 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8236 return REPORT(0); 8237 ChRop(OP_SLT); 8238 8239 case KEY_le: 8240 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8241 return REPORT(0); 8242 ChRop(OP_SLE); 8243 8244 case KEY_localtime: 8245 UNI(OP_LOCALTIME); 8246 8247 case KEY_log: 8248 UNI(OP_LOG); 8249 8250 case KEY_link: 8251 LOP(OP_LINK,XTERM); 8252 8253 case KEY_listen: 8254 LOP(OP_LISTEN,XTERM); 8255 8256 case KEY_lock: 8257 UNI(OP_LOCK); 8258 8259 case KEY_lstat: 8260 UNI(OP_LSTAT); 8261 8262 case KEY_m: 8263 s = scan_pat(s,OP_MATCH); 8264 TERM(sublex_start()); 8265 8266 case KEY_map: 8267 LOP(OP_MAPSTART, XREF); 8268 8269 case KEY_mkdir: 8270 LOP(OP_MKDIR,XTERM); 8271 8272 case KEY_msgctl: 8273 LOP(OP_MSGCTL,XTERM); 8274 8275 case KEY_msgget: 8276 LOP(OP_MSGGET,XTERM); 8277 8278 case KEY_msgrcv: 8279 LOP(OP_MSGRCV,XTERM); 8280 8281 case KEY_msgsnd: 8282 LOP(OP_MSGSND,XTERM); 8283 8284 case KEY_our: 8285 case KEY_my: 8286 case KEY_state: 8287 return yyl_my(aTHX_ s, key); 8288 8289 case KEY_next: 8290 LOOPX(OP_NEXT); 8291 8292 case KEY_ne: 8293 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 8294 return REPORT(0); 8295 ChEop(OP_SNE); 8296 8297 case KEY_no: 8298 s = tokenize_use(0, s); 8299 TOKEN(KW_USE_or_NO); 8300 8301 case KEY_not: 8302 if (*s == '(' || (s = skipspace(s), *s == '(')) 8303 FUN1(OP_NOT); 8304 else { 8305 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) 8306 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; 8307 OPERATOR(NOTOP); 8308 } 8309 8310 case KEY_open: 8311 s = skipspace(s); 8312 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 8313 const char *t; 8314 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 8315 for (t=d; isSPACE(*t);) 8316 t++; 8317 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) 8318 /* [perl #16184] */ 8319 && !(t[0] == '=' && t[1] == '>') 8320 && !(t[0] == ':' && t[1] == ':') 8321 && !keyword(s, d-s, 0) 8322 ) { 8323 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), 8324 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")", 8325 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); 8326 } 8327 } 8328 LOP(OP_OPEN,XTERM); 8329 8330 case KEY_or: 8331 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8332 return REPORT(0); 8333 pl_yylval.ival = OP_OR; 8334 OPERATOR(OROP); 8335 8336 case KEY_ord: 8337 UNI(OP_ORD); 8338 8339 case KEY_oct: 8340 UNI(OP_OCT); 8341 8342 case KEY_opendir: 8343 LOP(OP_OPEN_DIR,XTERM); 8344 8345 case KEY_print: 8346 checkcomma(s,PL_tokenbuf,"filehandle"); 8347 LOP(OP_PRINT,XREF); 8348 8349 case KEY_printf: 8350 checkcomma(s,PL_tokenbuf,"filehandle"); 8351 LOP(OP_PRTF,XREF); 8352 8353 case KEY_prototype: 8354 UNI(OP_PROTOTYPE); 8355 8356 case KEY_push: 8357 LOP(OP_PUSH,XTERM); 8358 8359 case KEY_pop: 8360 UNIDOR(OP_POP); 8361 8362 case KEY_pos: 8363 UNIDOR(OP_POS); 8364 8365 case KEY_pack: 8366 LOP(OP_PACK,XTERM); 8367 8368 case KEY_package: 8369 s = force_word(s,BAREWORD,FALSE,TRUE); 8370 s = skipspace(s); 8371 s = force_strict_version(s); 8372 PREBLOCK(KW_PACKAGE); 8373 8374 case KEY_pipe: 8375 LOP(OP_PIPE_OP,XTERM); 8376 8377 case KEY_q: 8378 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8379 if (!s) 8380 missingterm(NULL, 0); 8381 COPLINE_SET_FROM_MULTI_END; 8382 pl_yylval.ival = OP_CONST; 8383 TERM(sublex_start()); 8384 8385 case KEY_quotemeta: 8386 UNI(OP_QUOTEMETA); 8387 8388 case KEY_qw: 8389 return yyl_qw(aTHX_ s, len); 8390 8391 case KEY_qq: 8392 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8393 if (!s) 8394 missingterm(NULL, 0); 8395 pl_yylval.ival = OP_STRINGIFY; 8396 if (SvIVX(PL_lex_stuff) == '\'') 8397 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ 8398 TERM(sublex_start()); 8399 8400 case KEY_qr: 8401 s = scan_pat(s,OP_QR); 8402 TERM(sublex_start()); 8403 8404 case KEY_qx: 8405 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 8406 if (!s) 8407 missingterm(NULL, 0); 8408 pl_yylval.ival = OP_BACKTICK; 8409 TERM(sublex_start()); 8410 8411 case KEY_return: 8412 OLDLOP(OP_RETURN); 8413 8414 case KEY_require: 8415 return yyl_require(aTHX_ s, orig_keyword); 8416 8417 case KEY_reset: 8418 UNI(OP_RESET); 8419 8420 case KEY_redo: 8421 LOOPX(OP_REDO); 8422 8423 case KEY_rename: 8424 LOP(OP_RENAME,XTERM); 8425 8426 case KEY_rand: 8427 UNI(OP_RAND); 8428 8429 case KEY_rmdir: 8430 UNI(OP_RMDIR); 8431 8432 case KEY_rindex: 8433 LOP(OP_RINDEX,XTERM); 8434 8435 case KEY_read: 8436 LOP(OP_READ,XTERM); 8437 8438 case KEY_readdir: 8439 UNI(OP_READDIR); 8440 8441 case KEY_readline: 8442 UNIDOR(OP_READLINE); 8443 8444 case KEY_readpipe: 8445 UNIDOR(OP_BACKTICK); 8446 8447 case KEY_rewinddir: 8448 UNI(OP_REWINDDIR); 8449 8450 case KEY_recv: 8451 LOP(OP_RECV,XTERM); 8452 8453 case KEY_reverse: 8454 LOP(OP_REVERSE,XTERM); 8455 8456 case KEY_readlink: 8457 UNIDOR(OP_READLINK); 8458 8459 case KEY_ref: 8460 UNI(OP_REF); 8461 8462 case KEY_s: 8463 s = scan_subst(s); 8464 if (pl_yylval.opval) 8465 TERM(sublex_start()); 8466 else 8467 TOKEN(1); /* force error */ 8468 8469 case KEY_say: 8470 checkcomma(s,PL_tokenbuf,"filehandle"); 8471 LOP(OP_SAY,XREF); 8472 8473 case KEY_chomp: 8474 UNI(OP_CHOMP); 8475 8476 case KEY_scalar: 8477 UNI(OP_SCALAR); 8478 8479 case KEY_select: 8480 LOP(OP_SELECT,XTERM); 8481 8482 case KEY_seek: 8483 LOP(OP_SEEK,XTERM); 8484 8485 case KEY_semctl: 8486 LOP(OP_SEMCTL,XTERM); 8487 8488 case KEY_semget: 8489 LOP(OP_SEMGET,XTERM); 8490 8491 case KEY_semop: 8492 LOP(OP_SEMOP,XTERM); 8493 8494 case KEY_send: 8495 LOP(OP_SEND,XTERM); 8496 8497 case KEY_setpgrp: 8498 LOP(OP_SETPGRP,XTERM); 8499 8500 case KEY_setpriority: 8501 LOP(OP_SETPRIORITY,XTERM); 8502 8503 case KEY_sethostent: 8504 UNI(OP_SHOSTENT); 8505 8506 case KEY_setnetent: 8507 UNI(OP_SNETENT); 8508 8509 case KEY_setservent: 8510 UNI(OP_SSERVENT); 8511 8512 case KEY_setprotoent: 8513 UNI(OP_SPROTOENT); 8514 8515 case KEY_setpwent: 8516 FUN0(OP_SPWENT); 8517 8518 case KEY_setgrent: 8519 FUN0(OP_SGRENT); 8520 8521 case KEY_seekdir: 8522 LOP(OP_SEEKDIR,XTERM); 8523 8524 case KEY_setsockopt: 8525 LOP(OP_SSOCKOPT,XTERM); 8526 8527 case KEY_shift: 8528 UNIDOR(OP_SHIFT); 8529 8530 case KEY_shmctl: 8531 LOP(OP_SHMCTL,XTERM); 8532 8533 case KEY_shmget: 8534 LOP(OP_SHMGET,XTERM); 8535 8536 case KEY_shmread: 8537 LOP(OP_SHMREAD,XTERM); 8538 8539 case KEY_shmwrite: 8540 LOP(OP_SHMWRITE,XTERM); 8541 8542 case KEY_shutdown: 8543 LOP(OP_SHUTDOWN,XTERM); 8544 8545 case KEY_sin: 8546 UNI(OP_SIN); 8547 8548 case KEY_sleep: 8549 UNI(OP_SLEEP); 8550 8551 case KEY_socket: 8552 LOP(OP_SOCKET,XTERM); 8553 8554 case KEY_socketpair: 8555 LOP(OP_SOCKPAIR,XTERM); 8556 8557 case KEY_sort: 8558 checkcomma(s,PL_tokenbuf,"subroutine name"); 8559 s = skipspace(s); 8560 PL_expect = XTERM; 8561 s = force_word(s,BAREWORD,TRUE,TRUE); 8562 LOP(OP_SORT,XREF); 8563 8564 case KEY_split: 8565 LOP(OP_SPLIT,XTERM); 8566 8567 case KEY_sprintf: 8568 LOP(OP_SPRINTF,XTERM); 8569 8570 case KEY_splice: 8571 LOP(OP_SPLICE,XTERM); 8572 8573 case KEY_sqrt: 8574 UNI(OP_SQRT); 8575 8576 case KEY_srand: 8577 UNI(OP_SRAND); 8578 8579 case KEY_stat: 8580 UNI(OP_STAT); 8581 8582 case KEY_study: 8583 UNI(OP_STUDY); 8584 8585 case KEY_substr: 8586 LOP(OP_SUBSTR,XTERM); 8587 8588 case KEY_method: 8589 /* For now we just treat 'method' identical to 'sub' plus a warning */ 8590 Perl_ck_warner_d(aTHX_ 8591 packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental"); 8592 return yyl_sub(aTHX_ s, KEY_method); 8593 8594 case KEY_format: 8595 case KEY_sub: 8596 return yyl_sub(aTHX_ s, key); 8597 8598 case KEY_system: 8599 LOP(OP_SYSTEM,XREF); 8600 8601 case KEY_symlink: 8602 LOP(OP_SYMLINK,XTERM); 8603 8604 case KEY_syscall: 8605 LOP(OP_SYSCALL,XTERM); 8606 8607 case KEY_sysopen: 8608 LOP(OP_SYSOPEN,XTERM); 8609 8610 case KEY_sysseek: 8611 LOP(OP_SYSSEEK,XTERM); 8612 8613 case KEY_sysread: 8614 LOP(OP_SYSREAD,XTERM); 8615 8616 case KEY_syswrite: 8617 LOP(OP_SYSWRITE,XTERM); 8618 8619 case KEY_tr: 8620 case KEY_y: 8621 s = scan_trans(s); 8622 TERM(sublex_start()); 8623 8624 case KEY_tell: 8625 UNI(OP_TELL); 8626 8627 case KEY_telldir: 8628 UNI(OP_TELLDIR); 8629 8630 case KEY_tie: 8631 LOP(OP_TIE,XTERM); 8632 8633 case KEY_tied: 8634 UNI(OP_TIED); 8635 8636 case KEY_time: 8637 FUN0(OP_TIME); 8638 8639 case KEY_times: 8640 FUN0(OP_TMS); 8641 8642 case KEY_truncate: 8643 LOP(OP_TRUNCATE,XTERM); 8644 8645 case KEY_try: 8646 pl_yylval.ival = CopLINE(PL_curcop); 8647 Perl_ck_warner_d(aTHX_ 8648 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental"); 8649 PREBLOCK(KW_TRY); 8650 8651 case KEY_uc: 8652 UNI(OP_UC); 8653 8654 case KEY_ucfirst: 8655 UNI(OP_UCFIRST); 8656 8657 case KEY_untie: 8658 UNI(OP_UNTIE); 8659 8660 case KEY_until: 8661 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8662 return REPORT(0); 8663 pl_yylval.ival = CopLINE(PL_curcop); 8664 OPERATOR(KW_UNTIL); 8665 8666 case KEY_unless: 8667 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8668 return REPORT(0); 8669 pl_yylval.ival = CopLINE(PL_curcop); 8670 OPERATOR(KW_UNLESS); 8671 8672 case KEY_unlink: 8673 LOP(OP_UNLINK,XTERM); 8674 8675 case KEY_undef: 8676 UNIDOR(OP_UNDEF); 8677 8678 case KEY_unpack: 8679 LOP(OP_UNPACK,XTERM); 8680 8681 case KEY_utime: 8682 LOP(OP_UTIME,XTERM); 8683 8684 case KEY_umask: 8685 UNIDOR(OP_UMASK); 8686 8687 case KEY_unshift: 8688 LOP(OP_UNSHIFT,XTERM); 8689 8690 case KEY_use: 8691 s = tokenize_use(1, s); 8692 TOKEN(KW_USE_or_NO); 8693 8694 case KEY_values: 8695 UNI(OP_VALUES); 8696 8697 case KEY_vec: 8698 LOP(OP_VEC,XTERM); 8699 8700 case KEY_when: 8701 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8702 return REPORT(0); 8703 pl_yylval.ival = CopLINE(PL_curcop); 8704 Perl_ck_warner_d(aTHX_ 8705 packWARN(WARN_DEPRECATED__SMARTMATCH), 8706 "when is deprecated"); 8707 OPERATOR(KW_WHEN); 8708 8709 case KEY_while: 8710 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 8711 return REPORT(0); 8712 pl_yylval.ival = CopLINE(PL_curcop); 8713 OPERATOR(KW_WHILE); 8714 8715 case KEY_warn: 8716 PL_hints |= HINT_BLOCK_SCOPE; 8717 LOP(OP_WARN,XTERM); 8718 8719 case KEY_wait: 8720 FUN0(OP_WAIT); 8721 8722 case KEY_waitpid: 8723 LOP(OP_WAITPID,XTERM); 8724 8725 case KEY_wantarray: 8726 FUN0(OP_WANTARRAY); 8727 8728 case KEY_write: 8729 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and 8730 * we use the same number on EBCDIC */ 8731 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); 8732 UNI(OP_ENTERWRITE); 8733 8734 case KEY_x: 8735 if (PL_expect == XOPERATOR) { 8736 if (*s == '=' && !PL_lex_allbrackets 8737 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 8738 { 8739 return REPORT(0); 8740 } 8741 Mop(OP_REPEAT); 8742 } 8743 check_uni(); 8744 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c); 8745 8746 case KEY_xor: 8747 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) 8748 return REPORT(0); 8749 pl_yylval.ival = OP_XOR; 8750 OPERATOR(OROP); 8751 } 8752} 8753 8754static int 8755yyl_key_core(pTHX_ char *s, STRLEN len, struct code c) 8756{ 8757 I32 key = 0; 8758 I32 orig_keyword = 0; 8759 STRLEN olen = len; 8760 char *d = s; 8761 s += 2; 8762 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 8763 if ((*s == ':' && s[1] == ':') 8764 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) 8765 { 8766 Copy(PL_bufptr, PL_tokenbuf, olen, char); 8767 return yyl_just_a_word(aTHX_ d, olen, 0, c); 8768 } 8769 if (!key) 8770 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", 8771 UTF8fARG(UTF, len, PL_tokenbuf)); 8772 if (key < 0) 8773 key = -key; 8774 else if (key == KEY_require || key == KEY_do 8775 || key == KEY_glob) 8776 /* that's a way to remember we saw "CORE::" */ 8777 orig_keyword = key; 8778 8779 /* Known to be a reserved word at this point */ 8780 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8781} 8782 8783struct Perl_custom_infix_result { 8784 struct Perl_custom_infix *def; 8785 SV *parsedata; 8786}; 8787 8788static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def) 8789{ 8790 enum Perl_custom_infix_precedence prec = def->prec; 8791 if(prec <= INFIX_PREC_LOW) 8792 return PLUGIN_LOW_OP; 8793 if(prec <= INFIX_PREC_LOGICAL_OR_LOW) 8794 return PLUGIN_LOGICAL_OR_LOW_OP; 8795 if(prec <= INFIX_PREC_LOGICAL_AND_LOW) 8796 return PLUGIN_LOGICAL_AND_LOW_OP; 8797 if(prec <= INFIX_PREC_ASSIGN) 8798 return PLUGIN_ASSIGN_OP; 8799 if(prec <= INFIX_PREC_LOGICAL_OR) 8800 return PLUGIN_LOGICAL_OR_OP; 8801 if(prec <= INFIX_PREC_LOGICAL_AND) 8802 return PLUGIN_LOGICAL_AND_OP; 8803 if(prec <= INFIX_PREC_REL) 8804 return PLUGIN_REL_OP; 8805 if(prec <= INFIX_PREC_ADD) 8806 return PLUGIN_ADD_OP; 8807 if(prec <= INFIX_PREC_MUL) 8808 return PLUGIN_MUL_OP; 8809 if(prec <= INFIX_PREC_POW) 8810 return PLUGIN_POW_OP; 8811 return PLUGIN_HIGH_OP; 8812} 8813 8814OP * 8815Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata) 8816{ 8817 PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN; 8818 8819 struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata; 8820 SAVEFREEPV(result); 8821 if(result->parsedata) 8822 SAVEFREESV(result->parsedata); 8823 8824 return (*result->def->build_op)(aTHX_ 8825 &result->parsedata, lhs, rhs, result->def); 8826} 8827 8828static int 8829yyl_keylookup(pTHX_ char *s, GV *gv) 8830{ 8831 STRLEN len; 8832 bool anydelim; 8833 I32 key; 8834 struct code c = no_code; 8835 I32 orig_keyword = 0; 8836 char *d; 8837 8838 c.gv = gv; 8839 8840 PL_bufptr = s; 8841 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE); 8842 8843 /* Some keywords can be followed by any delimiter, including ':' */ 8844 anydelim = word_takes_any_delimiter(PL_tokenbuf, len); 8845 8846 /* x::* is just a word, unless x is "CORE" */ 8847 if (!anydelim && *s == ':' && s[1] == ':') { 8848 if (memEQs(PL_tokenbuf, len, "CORE")) 8849 return yyl_key_core(aTHX_ s, len, c); 8850 return yyl_just_a_word(aTHX_ s, len, 0, c); 8851 } 8852 8853 d = s; 8854 while (d < PL_bufend && isSPACE(*d)) 8855 d++; /* no comments skipped here, or s### is misparsed */ 8856 8857 /* Is this a word before a => operator? */ 8858 if (*d == '=' && d[1] == '>') { 8859 return yyl_fatcomma(aTHX_ s, len); 8860 } 8861 8862 /* Check for plugged-in keyword */ 8863 { 8864 OP *o; 8865 int result; 8866 char *saved_bufptr = PL_bufptr; 8867 PL_bufptr = s; 8868 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); 8869 s = PL_bufptr; 8870 if (result == KEYWORD_PLUGIN_DECLINE) { 8871 /* not a plugged-in keyword */ 8872 PL_bufptr = saved_bufptr; 8873 } else if (result == KEYWORD_PLUGIN_STMT) { 8874 pl_yylval.opval = o; 8875 CLINE; 8876 if (!PL_nexttoke) PL_expect = XSTATE; 8877 return REPORT(PLUGSTMT); 8878 } else if (result == KEYWORD_PLUGIN_EXPR) { 8879 pl_yylval.opval = o; 8880 CLINE; 8881 if (!PL_nexttoke) PL_expect = XOPERATOR; 8882 return REPORT(PLUGEXPR); 8883 } else { 8884 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf); 8885 } 8886 } 8887 8888 /* Check for plugged-in named operator */ 8889 if(PLUGINFIX_IS_ENABLED) { 8890 struct Perl_custom_infix *def; 8891 STRLEN result; 8892 result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def); 8893 if(result) { 8894 if(result != len) 8895 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n", 8896 result, PL_tokenbuf); 8897 PL_bufptr = s = d; 8898 struct Perl_custom_infix_result *result; 8899 Newx(result, 1, struct Perl_custom_infix_result); 8900 result->def = def; 8901 result->parsedata = NULL; 8902 if(def->parse) { 8903 (*def->parse)(aTHX_ &result->parsedata, def); 8904 s = PL_bufptr; /* restore local s variable */ 8905 } 8906 pl_yylval.pval = result; 8907 CLINE; 8908 OPERATOR(tokentype_for_plugop(def)); 8909 } 8910 } 8911 8912 /* Is this a label? */ 8913 if (!anydelim && PL_expect == XSTATE 8914 && d < PL_bufend && *d == ':' && *(d + 1) != ':') { 8915 s = d + 1; 8916 pl_yylval.opval = 8917 newSVOP(OP_CONST, 0, 8918 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); 8919 CLINE; 8920 TOKEN(LABEL); 8921 } 8922 8923 /* Check for lexical sub */ 8924 if (PL_expect != XOPERATOR) { 8925 char tmpbuf[sizeof PL_tokenbuf + 1]; 8926 *tmpbuf = '&'; 8927 Copy(PL_tokenbuf, tmpbuf+1, len, char); 8928 c.off = pad_findmy_pvn(tmpbuf, len+1, 0); 8929 if (c.off != NOT_IN_PAD) { 8930 assert(c.off); /* we assume this is boolean-true below */ 8931 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) { 8932 HV * const stash = PAD_COMPNAME_OURSTASH(c.off); 8933 HEK * const stashname = HvNAME_HEK(stash); 8934 c.sv = newSVhek(stashname); 8935 sv_catpvs(c.sv, "::"); 8936 sv_catpvn_flags(c.sv, PL_tokenbuf, len, 8937 (UTF ? SV_CATUTF8 : SV_CATBYTES)); 8938 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv), 8939 SVt_PVCV); 8940 c.off = 0; 8941 if (!c.gv) { 8942 ASSUME(c.sv && SvREFCNT(c.sv) == 1); 8943 SvREFCNT_dec(c.sv); 8944 c.sv = NULL; 8945 return yyl_just_a_word(aTHX_ s, len, 0, c); 8946 } 8947 } 8948 else { 8949 c.rv2cv_op = newOP(OP_PADANY, 0); 8950 c.rv2cv_op->op_targ = c.off; 8951 c.cv = find_lexical_cv(c.off); 8952 } 8953 c.lex = TRUE; 8954 return yyl_just_a_word(aTHX_ s, len, 0, c); 8955 } 8956 c.off = 0; 8957 } 8958 8959 /* Check for built-in keyword */ 8960 key = keyword(PL_tokenbuf, len, 0); 8961 8962 if (key < 0) 8963 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp); 8964 8965 if (key && key != KEY___DATA__ && key != KEY___END__ 8966 && (!anydelim || *s != '#')) { 8967 /* no override, and not s### either; skipspace is safe here 8968 * check for => on following line */ 8969 bool arrow; 8970 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); 8971 STRLEN soff = s - SvPVX(PL_linestr); 8972 s = peekspace(s); 8973 arrow = *s == '=' && s[1] == '>'; 8974 PL_bufptr = SvPVX(PL_linestr) + bufoff; 8975 s = SvPVX(PL_linestr) + soff; 8976 if (arrow) 8977 return yyl_fatcomma(aTHX_ s, len); 8978 } 8979 8980 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); 8981} 8982 8983static int 8984yyl_try(pTHX_ char *s) 8985{ 8986 char *d; 8987 GV *gv = NULL; 8988 int tok; 8989 8990 retry: 8991 /* Check for plugged-in symbolic operator */ 8992 if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) { 8993 struct Perl_custom_infix *def; 8994 char *s_end = s, *d = PL_tokenbuf; 8995 STRLEN len; 8996 8997 /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */ 8998 while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end)) 8999 *d++ = *s_end++; 9000 *d = '\0'; 9001 9002 if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) { 9003 s += len; 9004 struct Perl_custom_infix_result *result; 9005 Newx(result, 1, struct Perl_custom_infix_result); 9006 result->def = def; 9007 result->parsedata = NULL; 9008 if(def->parse) { 9009 PL_bufptr = s; 9010 (*def->parse)(aTHX_ &result->parsedata, def); 9011 s = PL_bufptr; /* restore local s variable */ 9012 } 9013 pl_yylval.pval = result; 9014 CLINE; 9015 OPERATOR(tokentype_for_plugop(def)); 9016 } 9017 } 9018 9019 switch (*s) { 9020 default: 9021 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) { 9022 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9023 return tok; 9024 goto retry_bufptr; 9025 } 9026 yyl_croak_unrecognised(aTHX_ s); 9027 9028 case 4: 9029 case 26: 9030 /* emulate EOF on ^D or ^Z */ 9031 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY) 9032 return tok; 9033 retry_bufptr: 9034 s = PL_bufptr; 9035 goto retry; 9036 9037 case 0: 9038 if ((!PL_rsfp || PL_lex_inwhat) 9039 && (!PL_parser->filtered || s+1 < PL_bufend)) { 9040 PL_last_uni = 0; 9041 PL_last_lop = 0; 9042 if (PL_lex_brackets 9043 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) 9044 { 9045 yyerror((const char *) 9046 (PL_lex_formbrack 9047 ? "Format not terminated" 9048 : "Missing right curly or square bracket")); 9049 } 9050 DEBUG_T({ 9051 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); 9052 }); 9053 TOKEN(0); 9054 } 9055 if (s++ < PL_bufend) 9056 goto retry; /* ignore stray nulls */ 9057 PL_last_uni = 0; 9058 PL_last_lop = 0; 9059 if (!PL_in_eval && !PL_preambled) { 9060 PL_preambled = TRUE; 9061 if (PL_perldb) { 9062 /* Generate a string of Perl code to load the debugger. 9063 * If PERL5DB is set, it will return the contents of that, 9064 * otherwise a compile-time require of perl5db.pl. */ 9065 9066 const char * const pdb = PerlEnv_getenv("PERL5DB"); 9067 9068 if (pdb) { 9069 sv_setpv(PL_linestr, pdb); 9070 sv_catpvs(PL_linestr,";"); 9071 } else { 9072 SETERRNO(0,SS_NORMAL); 9073 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); 9074 } 9075 PL_parser->preambling = CopLINE(PL_curcop); 9076 } else 9077 SvPVCLEAR(PL_linestr); 9078 if (PL_preambleav) { 9079 SV **svp = AvARRAY(PL_preambleav); 9080 SV **const end = svp + AvFILLp(PL_preambleav); 9081 while(svp <= end) { 9082 sv_catsv(PL_linestr, *svp); 9083 ++svp; 9084 sv_catpvs(PL_linestr, ";"); 9085 } 9086 SvREFCNT_dec(MUTABLE_SV(PL_preambleav)); 9087 PL_preambleav = NULL; 9088 } 9089 if (PL_minus_E) 9090 sv_catpvs(PL_linestr, 9091 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';"); 9092 if (PL_minus_n || PL_minus_p) { 9093 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); 9094 if (PL_minus_l) 9095 sv_catpvs(PL_linestr,"chomp;"); 9096 if (PL_minus_a) { 9097 if (PL_minus_F) { 9098 if ( ( *PL_splitstr == '/' 9099 || *PL_splitstr == '\'' 9100 || *PL_splitstr == '"') 9101 && strchr(PL_splitstr + 1, *PL_splitstr)) 9102 { 9103 /* strchr is ok, because -F pattern can't contain 9104 * embedded NULs */ 9105 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); 9106 } 9107 else { 9108 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL 9109 bytes can be used as quoting characters. :-) */ 9110 const char *splits = PL_splitstr; 9111 sv_catpvs(PL_linestr, "our @F=split(q\0"); 9112 do { 9113 /* Need to \ \s */ 9114 if (*splits == '\\') 9115 sv_catpvn(PL_linestr, splits, 1); 9116 sv_catpvn(PL_linestr, splits, 1); 9117 } while (*splits++); 9118 /* This loop will embed the trailing NUL of 9119 PL_linestr as the last thing it does before 9120 terminating. */ 9121 sv_catpvs(PL_linestr, ");"); 9122 } 9123 } 9124 else 9125 sv_catpvs(PL_linestr,"our @F=split(' ');"); 9126 } 9127 } 9128 sv_catpvs(PL_linestr, "\n"); 9129 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); 9130 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 9131 PL_last_lop = PL_last_uni = NULL; 9132 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) 9133 update_debugger_info(PL_linestr, NULL, 0); 9134 goto retry; 9135 } 9136 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY) 9137 return tok; 9138 goto retry_bufptr; 9139 9140 case '\r': 9141#ifdef PERL_STRICT_CR 9142 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); 9143 Perl_croak(aTHX_ 9144 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); 9145#endif 9146 case ' ': case '\t': case '\f': case '\v': 9147 s++; 9148 goto retry; 9149 9150 case '#': 9151 case '\n': { 9152 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s); 9153 if (needs_semicolon) 9154 TOKEN(PERLY_SEMICOLON); 9155 else 9156 goto retry; 9157 } 9158 9159 case '-': 9160 return yyl_hyphen(aTHX_ s); 9161 9162 case '+': 9163 return yyl_plus(aTHX_ s); 9164 9165 case '*': 9166 return yyl_star(aTHX_ s); 9167 9168 case '%': 9169 return yyl_percent(aTHX_ s); 9170 9171 case '^': 9172 return yyl_caret(aTHX_ s); 9173 9174 case '[': 9175 return yyl_leftsquare(aTHX_ s); 9176 9177 case '~': 9178 return yyl_tilde(aTHX_ s); 9179 9180 case ',': 9181 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 9182 TOKEN(0); 9183 s++; 9184 OPERATOR(PERLY_COMMA); 9185 case ':': 9186 if (s[1] == ':') 9187 return yyl_just_a_word(aTHX_ s, 0, 0, no_code); 9188 return yyl_colon(aTHX_ s + 1); 9189 9190 case '(': 9191 return yyl_leftparen(aTHX_ s + 1); 9192 9193 case ';': 9194 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) 9195 TOKEN(0); 9196 CLINE; 9197 s++; 9198 PL_expect = XSTATE; 9199 TOKEN(PERLY_SEMICOLON); 9200 9201 case ')': 9202 return yyl_rightparen(aTHX_ s); 9203 9204 case ']': 9205 return yyl_rightsquare(aTHX_ s); 9206 9207 case '{': 9208 return yyl_leftcurly(aTHX_ s + 1, 0); 9209 9210 case '}': 9211 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) 9212 TOKEN(0); 9213 return yyl_rightcurly(aTHX_ s, 0); 9214 9215 case '&': 9216 return yyl_ampersand(aTHX_ s); 9217 9218 case '|': 9219 return yyl_verticalbar(aTHX_ s); 9220 9221 case '=': 9222 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') 9223 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "=====")) 9224 { 9225 s = vcs_conflict_marker(s + 7); 9226 goto retry; 9227 } 9228 9229 s++; 9230 { 9231 const char tmp = *s++; 9232 if (tmp == '=') { 9233 if (!PL_lex_allbrackets 9234 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) 9235 { 9236 s -= 2; 9237 TOKEN(0); 9238 } 9239 ChEop(OP_EQ); 9240 } 9241 if (tmp == '>') { 9242 if (!PL_lex_allbrackets 9243 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) 9244 { 9245 s -= 2; 9246 TOKEN(0); 9247 } 9248 OPERATOR(PERLY_COMMA); 9249 } 9250 if (tmp == '~') 9251 PMop(OP_MATCH); 9252 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) 9253 && memCHRs("+-*/%.^&|<",tmp)) 9254 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 9255 "Reversed %c= operator",(int)tmp); 9256 s--; 9257 if (PL_expect == XSTATE 9258 && isALPHA(tmp) 9259 && (s == PL_linestart+1 || s[-2] == '\n') ) 9260 { 9261 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) 9262 || PL_lex_state != LEX_NORMAL) 9263 { 9264 d = PL_bufend; 9265 while (s < d) { 9266 if (*s++ == '\n') { 9267 incline(s, PL_bufend); 9268 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) 9269 { 9270 s = (char *) memchr(s,'\n', d - s); 9271 if (s) 9272 s++; 9273 else 9274 s = d; 9275 incline(s, PL_bufend); 9276 goto retry; 9277 } 9278 } 9279 } 9280 goto retry; 9281 } 9282 s = PL_bufend; 9283 PL_parser->in_pod = 1; 9284 goto retry; 9285 } 9286 } 9287 if (PL_expect == XBLOCK) { 9288 const char *t = s; 9289#ifdef PERL_STRICT_CR 9290 while (SPACE_OR_TAB(*t)) 9291#else 9292 while (SPACE_OR_TAB(*t) || *t == '\r') 9293#endif 9294 t++; 9295 if (*t == '\n' || *t == '#') { 9296 ENTER_with_name("lex_format"); 9297 SAVEI8(PL_parser->form_lex_state); 9298 SAVEI32(PL_lex_formbrack); 9299 PL_parser->form_lex_state = PL_lex_state; 9300 PL_lex_formbrack = PL_lex_brackets + 1; 9301 PL_parser->sub_error_count = PL_error_count; 9302 return yyl_leftcurly(aTHX_ s, 1); 9303 } 9304 } 9305 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { 9306 s--; 9307 TOKEN(0); 9308 } 9309 pl_yylval.ival = 0; 9310 OPERATOR(ASSIGNOP); 9311 9312 case '!': 9313 return yyl_bang(aTHX_ s + 1); 9314 9315 case '<': 9316 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') 9317 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<")) 9318 { 9319 s = vcs_conflict_marker(s + 7); 9320 goto retry; 9321 } 9322 return yyl_leftpointy(aTHX_ s); 9323 9324 case '>': 9325 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') 9326 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>")) 9327 { 9328 s = vcs_conflict_marker(s + 7); 9329 goto retry; 9330 } 9331 return yyl_rightpointy(aTHX_ s + 1); 9332 9333 case '$': 9334 return yyl_dollar(aTHX_ s); 9335 9336 case '@': 9337 return yyl_snail(aTHX_ s); 9338 9339 case '/': /* may be division, defined-or, or pattern */ 9340 return yyl_slash(aTHX_ s); 9341 9342 case '?': /* conditional */ 9343 s++; 9344 if (!PL_lex_allbrackets 9345 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) 9346 { 9347 s--; 9348 TOKEN(0); 9349 } 9350 PL_lex_allbrackets++; 9351 OPERATOR(PERLY_QUESTION_MARK); 9352 9353 case '.': 9354 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack 9355#ifdef PERL_STRICT_CR 9356 && s[1] == '\n' 9357#else 9358 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) 9359#endif 9360 && (s == PL_linestart || s[-1] == '\n') ) 9361 { 9362 PL_expect = XSTATE; 9363 /* formbrack==2 means dot seen where arguments expected */ 9364 return yyl_rightcurly(aTHX_ s, 2); 9365 } 9366 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { 9367 s += 3; 9368 OPERATOR(YADAYADA); 9369 } 9370 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { 9371 char tmp = *s++; 9372 if (*s == tmp) { 9373 if (!PL_lex_allbrackets 9374 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) 9375 { 9376 s--; 9377 TOKEN(0); 9378 } 9379 s++; 9380 if (*s == tmp) { 9381 s++; 9382 pl_yylval.ival = OPf_SPECIAL; 9383 } 9384 else 9385 pl_yylval.ival = 0; 9386 OPERATOR(DOTDOT); 9387 } 9388 if (*s == '=' && !PL_lex_allbrackets 9389 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) 9390 { 9391 s--; 9392 TOKEN(0); 9393 } 9394 Aop(OP_CONCAT); 9395 } 9396 /* FALLTHROUGH */ 9397 case '0': case '1': case '2': case '3': case '4': 9398 case '5': case '6': case '7': case '8': case '9': 9399 s = scan_num(s, &pl_yylval); 9400 DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); 9401 if (PL_expect == XOPERATOR) 9402 no_op("Number",s); 9403 TERM(THING); 9404 9405 case '\'': 9406 return yyl_sglquote(aTHX_ s); 9407 9408 case '"': 9409 return yyl_dblquote(aTHX_ s); 9410 9411 case '`': 9412 return yyl_backtick(aTHX_ s); 9413 9414 case '\\': 9415 return yyl_backslash(aTHX_ s + 1); 9416 9417 case 'v': 9418 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { 9419 char *start = s + 2; 9420 while (isDIGIT(*start) || *start == '_') 9421 start++; 9422 if (*start == '.' && isDIGIT(start[1])) { 9423 s = scan_num(s, &pl_yylval); 9424 TERM(THING); 9425 } 9426 else if ((*start == ':' && start[1] == ':') 9427 || (PL_expect == XSTATE && *start == ':')) { 9428 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9429 return tok; 9430 goto retry_bufptr; 9431 } 9432 else if (PL_expect == XSTATE) { 9433 d = start; 9434 while (d < PL_bufend && isSPACE(*d)) d++; 9435 if (*d == ':') { 9436 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9437 return tok; 9438 goto retry_bufptr; 9439 } 9440 } 9441 /* avoid v123abc() or $h{v1}, allow C<print v10;> */ 9442 if (!isALPHA(*start) && (PL_expect == XTERM 9443 || PL_expect == XREF || PL_expect == XSTATE 9444 || PL_expect == XTERMORDORDOR)) { 9445 GV *const gv = gv_fetchpvn_flags(s, start - s, 9446 UTF ? SVf_UTF8 : 0, SVt_PVCV); 9447 if (!gv) { 9448 s = scan_num(s, &pl_yylval); 9449 TERM(THING); 9450 } 9451 } 9452 } 9453 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9454 return tok; 9455 goto retry_bufptr; 9456 9457 case 'x': 9458 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { 9459 s++; 9460 Mop(OP_REPEAT); 9461 } 9462 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9463 return tok; 9464 goto retry_bufptr; 9465 9466 case '_': 9467 case 'a': case 'A': 9468 case 'b': case 'B': 9469 case 'c': case 'C': 9470 case 'd': case 'D': 9471 case 'e': case 'E': 9472 case 'f': case 'F': 9473 case 'g': case 'G': 9474 case 'h': case 'H': 9475 case 'i': case 'I': 9476 case 'j': case 'J': 9477 case 'k': case 'K': 9478 case 'l': case 'L': 9479 case 'm': case 'M': 9480 case 'n': case 'N': 9481 case 'o': case 'O': 9482 case 'p': case 'P': 9483 case 'q': case 'Q': 9484 case 'r': case 'R': 9485 case 's': case 'S': 9486 case 't': case 'T': 9487 case 'u': case 'U': 9488 case 'V': 9489 case 'w': case 'W': 9490 case 'X': 9491 case 'y': case 'Y': 9492 case 'z': case 'Z': 9493 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) 9494 return tok; 9495 goto retry_bufptr; 9496 } 9497} 9498 9499 9500/* 9501 yylex 9502 9503 Works out what to call the token just pulled out of the input 9504 stream. The yacc parser takes care of taking the ops we return and 9505 stitching them into a tree. 9506 9507 Returns: 9508 The type of the next token 9509 9510 Structure: 9511 Check if we have already built the token; if so, use it. 9512 Switch based on the current state: 9513 - if we have a case modifier in a string, deal with that 9514 - handle other cases of interpolation inside a string 9515 - scan the next line if we are inside a format 9516 In the normal state, switch on the next character: 9517 - default: 9518 if alphabetic, go to key lookup 9519 unrecognized character - croak 9520 - 0/4/26: handle end-of-line or EOF 9521 - cases for whitespace 9522 - \n and #: handle comments and line numbers 9523 - various operators, brackets and sigils 9524 - numbers 9525 - quotes 9526 - 'v': vstrings (or go to key lookup) 9527 - 'x' repetition operator (or go to key lookup) 9528 - other ASCII alphanumerics (key lookup begins here): 9529 word before => ? 9530 keyword plugin 9531 scan built-in keyword (but do nothing with it yet) 9532 check for statement label 9533 check for lexical subs 9534 return yyl_just_a_word if there is one 9535 see whether built-in keyword is overridden 9536 switch on keyword number: 9537 - default: return yyl_just_a_word: 9538 not a built-in keyword; handle bareword lookup 9539 disambiguate between method and sub call 9540 fall back to bareword 9541 - cases for built-in keywords 9542*/ 9543 9544int 9545Perl_yylex(pTHX) 9546{ 9547 char *s = PL_bufptr; 9548 9549 if (UNLIKELY(PL_parser->recheck_utf8_validity)) { 9550 const U8* first_bad_char_loc; 9551 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, 9552 PL_bufend - PL_bufptr, 9553 &first_bad_char_loc))) 9554 { 9555 _force_out_malformed_utf8_message(first_bad_char_loc, 9556 (U8 *) PL_bufend, 9557 0, 9558 1 /* 1 means die */ ); 9559 NOT_REACHED; /* NOTREACHED */ 9560 } 9561 PL_parser->recheck_utf8_validity = FALSE; 9562 } 9563 DEBUG_T( { 9564 SV* tmp = newSVpvs(""); 9565 PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n", 9566 CopLINE(PL_curcop), 9567 lex_state_names[PL_lex_state], 9568 exp_name[PL_expect], 9569 pv_display(tmp, s, strlen(s), 0, 60)); 9570 SvREFCNT_dec(tmp); 9571 } ); 9572 9573 /* when we've already built the next token, just pull it out of the queue */ 9574 if (PL_nexttoke) { 9575 PL_nexttoke--; 9576 pl_yylval = PL_nextval[PL_nexttoke]; 9577 { 9578 I32 next_type; 9579 next_type = PL_nexttype[PL_nexttoke]; 9580 if (next_type & (7<<24)) { 9581 if (next_type & (1<<24)) { 9582 if (PL_lex_brackets > 100) 9583 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 9584 PL_lex_brackstack[PL_lex_brackets++] = 9585 (char) ((U8) (next_type >> 16)); 9586 } 9587 if (next_type & (2<<24)) 9588 PL_lex_allbrackets++; 9589 if (next_type & (4<<24)) 9590 PL_lex_allbrackets--; 9591 next_type &= 0xffff; 9592 } 9593 return REPORT(next_type == 'p' ? pending_ident() : next_type); 9594 } 9595 } 9596 9597 switch (PL_lex_state) { 9598 case LEX_NORMAL: 9599 case LEX_INTERPNORMAL: 9600 break; 9601 9602 /* interpolated case modifiers like \L \U, including \Q and \E. 9603 when we get here, PL_bufptr is at the \ 9604 */ 9605 case LEX_INTERPCASEMOD: 9606 /* handle \E or end of string */ 9607 return yyl_interpcasemod(aTHX_ s); 9608 9609 case LEX_INTERPPUSH: 9610 return REPORT(sublex_push()); 9611 9612 case LEX_INTERPSTART: 9613 if (PL_bufptr == PL_bufend) 9614 return REPORT(sublex_done()); 9615 DEBUG_T({ 9616 if(*PL_bufptr != '(') 9617 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); 9618 }); 9619 PL_expect = XTERM; 9620 /* for /@a/, we leave the joining for the regex engine to do 9621 * (unless we're within \Q etc) */ 9622 PL_lex_dojoin = (*PL_bufptr == '@' 9623 && (!PL_lex_inpat || PL_lex_casemods)); 9624 PL_lex_state = LEX_INTERPNORMAL; 9625 if (PL_lex_dojoin) { 9626 NEXTVAL_NEXTTOKE.ival = 0; 9627 force_next(PERLY_COMMA); 9628 force_ident("\"", PERLY_DOLLAR); 9629 NEXTVAL_NEXTTOKE.ival = 0; 9630 force_next(PERLY_DOLLAR); 9631 NEXTVAL_NEXTTOKE.ival = 0; 9632 force_next((2<<24)|PERLY_PAREN_OPEN); 9633 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ 9634 force_next(FUNC); 9635 } 9636 /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */ 9637 if (PL_lex_inpat && *PL_bufptr == '(') { 9638 PL_parser->lex_shared->re_eval_start = PL_bufptr; 9639 PL_bufptr += 2; 9640 if (*PL_bufptr != '{') 9641 PL_bufptr++; 9642 PL_expect = XTERMBLOCK; 9643 force_next(KW_DO); 9644 } 9645 9646 if (PL_lex_starts++) { 9647 s = PL_bufptr; 9648 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9649 if (!PL_lex_casemods && PL_lex_inpat) 9650 TOKEN(PERLY_COMMA); 9651 else 9652 AopNOASSIGN(OP_CONCAT); 9653 } 9654 return yylex(); 9655 9656 case LEX_INTERPENDMAYBE: 9657 if (intuit_more(PL_bufptr, PL_bufend)) { 9658 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ 9659 break; 9660 } 9661 /* FALLTHROUGH */ 9662 9663 case LEX_INTERPEND: 9664 if (PL_lex_dojoin) { 9665 const U8 dojoin_was = PL_lex_dojoin; 9666 PL_lex_dojoin = FALSE; 9667 PL_lex_state = LEX_INTERPCONCAT; 9668 PL_lex_allbrackets--; 9669 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN); 9670 } 9671 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl 9672 && SvEVALED(PL_lex_repl)) 9673 { 9674 if (PL_bufptr != PL_bufend) 9675 Perl_croak(aTHX_ "Bad evalled substitution pattern"); 9676 PL_lex_repl = NULL; 9677 } 9678 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets 9679 re_eval_str. If the here-doc body's length equals the previous 9680 value of re_eval_start, re_eval_start will now be null. So 9681 check re_eval_str as well. */ 9682 if (PL_parser->lex_shared->re_eval_start 9683 || PL_parser->lex_shared->re_eval_str) { 9684 SV *sv; 9685 if (*PL_bufptr != ')') 9686 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); 9687 PL_bufptr++; 9688 /* having compiled a (?{..}) expression, return the original 9689 * text too, as a const */ 9690 if (PL_parser->lex_shared->re_eval_str) { 9691 sv = PL_parser->lex_shared->re_eval_str; 9692 PL_parser->lex_shared->re_eval_str = NULL; 9693 SvCUR_set(sv, 9694 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9695 SvPV_shrink_to_cur(sv); 9696 } 9697 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, 9698 PL_bufptr - PL_parser->lex_shared->re_eval_start); 9699 NEXTVAL_NEXTTOKE.opval = 9700 newSVOP(OP_CONST, 0, 9701 sv); 9702 force_next(THING); 9703 PL_parser->lex_shared->re_eval_start = NULL; 9704 PL_expect = XTERM; 9705 return REPORT(PERLY_COMMA); 9706 } 9707 9708 /* FALLTHROUGH */ 9709 case LEX_INTERPCONCAT: 9710#ifdef DEBUGGING 9711 if (PL_lex_brackets) 9712 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", 9713 (long) PL_lex_brackets); 9714#endif 9715 if (PL_bufptr == PL_bufend) 9716 return REPORT(sublex_done()); 9717 9718 /* m'foo' still needs to be parsed for possible (?{...}) */ 9719 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { 9720 SV *sv = newSVsv(PL_linestr); 9721 sv = tokeq(sv); 9722 pl_yylval.opval = newSVOP(OP_CONST, 0, sv); 9723 s = PL_bufend; 9724 } 9725 else { 9726 int save_error_count = PL_error_count; 9727 9728 s = scan_const(PL_bufptr); 9729 9730 /* Set flag if this was a pattern and there were errors. op.c will 9731 * refuse to compile a pattern with this flag set. Otherwise, we 9732 * could get segfaults, etc. */ 9733 if (PL_lex_inpat && PL_error_count > save_error_count) { 9734 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; 9735 } 9736 if (*s == '\\') 9737 PL_lex_state = LEX_INTERPCASEMOD; 9738 else 9739 PL_lex_state = LEX_INTERPSTART; 9740 } 9741 9742 if (s != PL_bufptr) { 9743 NEXTVAL_NEXTTOKE = pl_yylval; 9744 PL_expect = XTERM; 9745 force_next(THING); 9746 if (PL_lex_starts++) { 9747 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ 9748 if (!PL_lex_casemods && PL_lex_inpat) 9749 TOKEN(PERLY_COMMA); 9750 else 9751 AopNOASSIGN(OP_CONCAT); 9752 } 9753 else { 9754 PL_bufptr = s; 9755 return yylex(); 9756 } 9757 } 9758 9759 return yylex(); 9760 case LEX_FORMLINE: 9761 if (PL_parser->sub_error_count != PL_error_count) { 9762 /* There was an error parsing a formline, which tends to 9763 mess up the parser. 9764 Unlike interpolated sub-parsing, we can't treat any of 9765 these as recoverable, so no need to check sub_no_recover. 9766 */ 9767 yyquit(); 9768 } 9769 assert(PL_lex_formbrack); 9770 s = scan_formline(PL_bufptr); 9771 if (!PL_lex_formbrack) 9772 return yyl_rightcurly(aTHX_ s, 1); 9773 PL_bufptr = s; 9774 return yylex(); 9775 } 9776 9777 /* We really do *not* want PL_linestr ever becoming a COW. */ 9778 assert (!SvIsCOW(PL_linestr)); 9779 s = PL_bufptr; 9780 PL_oldoldbufptr = PL_oldbufptr; 9781 PL_oldbufptr = s; 9782 9783 if (PL_in_my == KEY_sigvar) { 9784 PL_parser->saw_infix_sigil = 0; 9785 return yyl_sigvar(aTHX_ s); 9786 } 9787 9788 { 9789 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil. 9790 On its return, we then need to set it to indicate whether the token 9791 we just encountered was an infix operator that (if we hadn't been 9792 expecting an operator) have been a sigil. 9793 */ 9794 bool expected_operator = (PL_expect == XOPERATOR); 9795 int ret = yyl_try(aTHX_ s); 9796 switch (pl_yylval.ival) { 9797 case OP_BIT_AND: 9798 case OP_MODULO: 9799 case OP_MULTIPLY: 9800 case OP_NBIT_AND: 9801 if (expected_operator) { 9802 PL_parser->saw_infix_sigil = 1; 9803 break; 9804 } 9805 /* FALLTHROUGH */ 9806 default: 9807 PL_parser->saw_infix_sigil = 0; 9808 } 9809 return ret; 9810 } 9811} 9812 9813 9814/* 9815 S_pending_ident 9816 9817 Looks up an identifier in the pad or in a package 9818 9819 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable 9820 rather than a plain pad var. 9821 9822 Returns: 9823 PRIVATEREF if this is a lexical name. 9824 BAREWORD if this belongs to a package. 9825 9826 Structure: 9827 if we're in a my declaration 9828 croak if they tried to say my($foo::bar) 9829 build the ops for a my() declaration 9830 if it's an access to a my() variable 9831 build ops for access to a my() variable 9832 if in a dq string, and they've said @foo and we can't find @foo 9833 warn 9834 build ops for a bareword 9835*/ 9836 9837static int 9838S_pending_ident(pTHX) 9839{ 9840 PADOFFSET tmp = 0; 9841 const char pit = (char)pl_yylval.ival; 9842 const STRLEN tokenbuf_len = strlen(PL_tokenbuf); 9843 /* All routes through this function want to know if there is a colon. */ 9844 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); 9845 9846 DEBUG_T({ PerlIO_printf(Perl_debug_log, 9847 "### Pending identifier '%s'\n", PL_tokenbuf); }); 9848 assert(tokenbuf_len >= 2); 9849 9850 /* if we're in a my(), we can't allow dynamics here. 9851 $foo'bar has already been turned into $foo::bar, so 9852 just check for colons. 9853 9854 if it's a legal name, the OP is a PADANY. 9855 */ 9856 if (PL_in_my) { 9857 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ 9858 if (has_colon) 9859 /* diag_listed_as: No package name allowed for variable %s 9860 in "our" */ 9861 yyerror_pv(Perl_form(aTHX_ "No package name allowed for " 9862 "%s %s in \"our\"", 9863 *PL_tokenbuf=='&' ? "subroutine" : "variable", 9864 PL_tokenbuf), UTF ? SVf_UTF8 : 0); 9865 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); 9866 } 9867 else { 9868 OP *o; 9869 if (has_colon) { 9870 /* "my" variable %s can't be in a package */ 9871 /* PL_no_myglob is constant */ 9872 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); 9873 yyerror_pv(Perl_form(aTHX_ PL_no_myglob, 9874 PL_in_my == KEY_my ? "my" : 9875 PL_in_my == KEY_field ? "field" : "state", 9876 *PL_tokenbuf == '&' ? "subroutine" : "variable", 9877 PL_tokenbuf), 9878 UTF ? SVf_UTF8 : 0); 9879 GCC_DIAG_RESTORE_STMT; 9880 } 9881 9882 if (PL_in_my == KEY_sigvar) { 9883 /* A signature 'padop' needs in addition, an op_first to 9884 * point to a child sigdefelem, and an extra field to hold 9885 * the signature index. We can achieve both by using an 9886 * UNOP_AUX and (ab)using the op_aux field to hold the 9887 * index. If we ever need more fields, use a real malloced 9888 * aux strut instead. 9889 */ 9890 o = newUNOP_AUX(OP_ARGELEM, 0, NULL, 9891 INT2PTR(UNOP_AUX_item *, 9892 (PL_parser->sig_elems))); 9893 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV 9894 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV 9895 : OPpARGELEM_HV); 9896 } 9897 else 9898 o = newOP(OP_PADANY, 0); 9899 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 9900 UTF ? SVf_UTF8 : 0); 9901 if (PL_in_my == KEY_sigvar) 9902 PL_in_my = 0; 9903 9904 pl_yylval.opval = o; 9905 return PRIVATEREF; 9906 } 9907 } 9908 9909 /* 9910 build the ops for accesses to a my() variable. 9911 */ 9912 9913 if (!has_colon) { 9914 if (!PL_in_my) 9915 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 9916 0); 9917 if (tmp != NOT_IN_PAD) { 9918 /* might be an "our" variable" */ 9919 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 9920 /* build ops for a bareword */ 9921 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 9922 HEK * const stashname = HvNAME_HEK(stash); 9923 SV * const sym = newSVhek(stashname); 9924 sv_catpvs(sym, "::"); 9925 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); 9926 pl_yylval.opval = newSVOP(OP_CONST, 0, sym); 9927 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9928 if (pit != '&') 9929 gv_fetchsv(sym, 9930 GV_ADDMULTI, 9931 ((PL_tokenbuf[0] == '$') ? SVt_PV 9932 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9933 : SVt_PVHV)); 9934 return BAREWORD; 9935 } 9936 9937 pl_yylval.opval = newOP(OP_PADANY, 0); 9938 pl_yylval.opval->op_targ = tmp; 9939 return PRIVATEREF; 9940 } 9941 } 9942 9943 /* 9944 Whine if they've said @foo or @foo{key} in a doublequoted string, 9945 and @foo (or %foo) isn't a variable we can find in the symbol 9946 table. 9947 */ 9948 if (ckWARN(WARN_AMBIGUOUS) 9949 && pit == '@' 9950 && PL_lex_state != LEX_NORMAL 9951 && !PL_lex_brackets) 9952 { 9953 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9954 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, 9955 SVt_PVAV); 9956 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) 9957 ) 9958 { 9959 /* Downgraded from fatal to warning 20000522 mjd */ 9960 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 9961 "Possible unintended interpolation of %" UTF8f 9962 " in string", 9963 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); 9964 } 9965 } 9966 9967 /* build ops for a bareword */ 9968 pl_yylval.opval = newSVOP(OP_CONST, 0, 9969 newSVpvn_flags(PL_tokenbuf + 1, 9970 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9971 UTF ? SVf_UTF8 : 0 )); 9972 pl_yylval.opval->op_private = OPpCONST_ENTERED; 9973 if (pit != '&') 9974 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, 9975 (PL_in_eval ? GV_ADDMULTI : GV_ADD) 9976 | ( UTF ? SVf_UTF8 : 0 ), 9977 ((PL_tokenbuf[0] == '$') ? SVt_PV 9978 : (PL_tokenbuf[0] == '@') ? SVt_PVAV 9979 : SVt_PVHV)); 9980 return BAREWORD; 9981} 9982 9983STATIC void 9984S_checkcomma(pTHX_ const char *s, const char *name, const char *what) 9985{ 9986 PERL_ARGS_ASSERT_CHECKCOMMA; 9987 9988 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ 9989 if (ckWARN(WARN_SYNTAX)) { 9990 int level = 1; 9991 const char *w; 9992 for (w = s+2; *w && level; w++) { 9993 if (*w == '(') 9994 ++level; 9995 else if (*w == ')') 9996 --level; 9997 } 9998 while (isSPACE(*w)) 9999 ++w; 10000 /* the list of chars below is for end of statements or 10001 * block / parens, boolean operators (&&, ||, //) and branch 10002 * constructs (or, and, if, until, unless, while, err, for). 10003 * Not a very solid hack... */ 10004 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w)) 10005 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 10006 "%s (...) interpreted as function",name); 10007 } 10008 } 10009 while (s < PL_bufend && isSPACE(*s)) 10010 s++; 10011 if (*s == '(') 10012 s++; 10013 while (s < PL_bufend && isSPACE(*s)) 10014 s++; 10015 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { 10016 const char * const w = s; 10017 s += UTF ? UTF8SKIP(s) : 1; 10018 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 10019 s += UTF ? UTF8SKIP(s) : 1; 10020 while (s < PL_bufend && isSPACE(*s)) 10021 s++; 10022 if (*s == ',') { 10023 GV* gv; 10024 if (keyword(w, s - w, 0)) 10025 return; 10026 10027 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); 10028 if (gv && GvCVu(gv)) 10029 return; 10030 if (s - w <= 254) { 10031 PADOFFSET off; 10032 char tmpbuf[256]; 10033 Copy(w, tmpbuf+1, s - w, char); 10034 *tmpbuf = '&'; 10035 off = pad_findmy_pvn(tmpbuf, s-w+1, 0); 10036 if (off != NOT_IN_PAD) return; 10037 } 10038 Perl_croak(aTHX_ "No comma allowed after %s", what); 10039 } 10040 } 10041} 10042 10043/* S_new_constant(): do any overload::constant lookup. 10044 10045 Either returns sv, or mortalizes/frees sv and returns a new SV*. 10046 Best used as sv=new_constant(..., sv, ...). 10047 If s, pv are NULL, calls subroutine with one argument, 10048 and <type> is used with error messages only. 10049 <type> is assumed to be well formed UTF-8. 10050 10051 If error_msg is not NULL, *error_msg will be set to any error encountered. 10052 Otherwise yyerror() will be used to output it */ 10053 10054STATIC SV * 10055S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, 10056 SV *sv, SV *pv, const char *type, STRLEN typelen, 10057 const char ** error_msg) 10058{ 10059 dSP; 10060 HV * table = GvHV(PL_hintgv); /* ^H */ 10061 SV *res; 10062 SV *errsv = NULL; 10063 SV **cvp; 10064 SV *cv, *typesv; 10065 const char *why1 = "", *why2 = "", *why3 = ""; 10066 const char * optional_colon = ":"; /* Only some messages have a colon */ 10067 char *msg; 10068 10069 PERL_ARGS_ASSERT_NEW_CONSTANT; 10070 /* We assume that this is true: */ 10071 assert(type || s); 10072 10073 sv_2mortal(sv); /* Parent created it permanently */ 10074 10075 if ( ! table 10076 || ! (PL_hints & HINT_LOCALIZE_HH)) 10077 { 10078 why1 = "unknown"; 10079 optional_colon = ""; 10080 goto report; 10081 } 10082 10083 cvp = hv_fetch(table, key, keylen, FALSE); 10084 if (!cvp || !SvOK(*cvp)) { 10085 why1 = "$^H{"; 10086 why2 = key; 10087 why3 = "} is not defined"; 10088 goto report; 10089 } 10090 10091 cv = *cvp; 10092 if (!pv && s) 10093 pv = newSVpvn_flags(s, len, SVs_TEMP); 10094 if (type && pv) 10095 typesv = newSVpvn_flags(type, typelen, SVs_TEMP); 10096 else 10097 typesv = &PL_sv_undef; 10098 10099 PUSHSTACKi(PERLSI_OVERLOAD); 10100 ENTER ; 10101 SAVETMPS; 10102 10103 PUSHMARK(SP) ; 10104 EXTEND(sp, 3); 10105 if (pv) 10106 PUSHs(pv); 10107 PUSHs(sv); 10108 if (pv) 10109 PUSHs(typesv); 10110 PUTBACK; 10111 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); 10112 10113 SPAGAIN ; 10114 10115 /* Check the eval first */ 10116 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { 10117 STRLEN errlen; 10118 const char * errstr; 10119 sv_catpvs(errsv, "Propagated"); 10120 errstr = SvPV_const(errsv, errlen); 10121 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ 10122 (void)POPs; 10123 res = SvREFCNT_inc_simple_NN(sv); 10124 } 10125 else { 10126 res = POPs; 10127 SvREFCNT_inc_simple_void_NN(res); 10128 } 10129 10130 PUTBACK ; 10131 FREETMPS ; 10132 LEAVE ; 10133 POPSTACK; 10134 10135 if (SvOK(res)) { 10136 return res; 10137 } 10138 10139 sv = res; 10140 (void)sv_2mortal(sv); 10141 10142 why1 = "Call to &{$^H{"; 10143 why2 = key; 10144 why3 = "}} did not return a defined value"; 10145 10146 report: 10147 10148 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s", 10149 (int)(type ? typelen : len), 10150 (type ? type: s), 10151 optional_colon, 10152 why1, why2, why3); 10153 if (error_msg) { 10154 *error_msg = msg; 10155 } 10156 else { 10157 yyerror_pv(msg, UTF ? SVf_UTF8 : 0); 10158 } 10159 return SvREFCNT_inc_simple_NN(sv); 10160} 10161 10162PERL_STATIC_INLINE void 10163S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, 10164 bool is_utf8, bool check_dollar, bool tick_warn) 10165{ 10166 int saw_tick = 0; 10167 const char *olds = *s; 10168 PERL_ARGS_ASSERT_PARSE_IDENT; 10169 10170 while (*s < PL_bufend) { 10171 if (*d >= e) 10172 Perl_croak(aTHX_ "%s", ident_too_long); 10173 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) { 10174 /* The UTF-8 case must come first, otherwise things 10175 * like c\N{COMBINING TILDE} would start failing, as the 10176 * isWORDCHAR_A case below would gobble the 'c' up. 10177 */ 10178 10179 char *t = *s + UTF8SKIP(*s); 10180 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) { 10181 t += UTF8SKIP(t); 10182 } 10183 if (*d + (t - *s) > e) 10184 Perl_croak(aTHX_ "%s", ident_too_long); 10185 Copy(*s, *d, t - *s, char); 10186 *d += t - *s; 10187 *s = t; 10188 } 10189 else if ( isWORDCHAR_A(**s) ) { 10190 do { 10191 *(*d)++ = *(*s)++; 10192 } while (isWORDCHAR_A(**s) && *d < e); 10193 } 10194 else if ( allow_package 10195 && **s == '\'' 10196 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8)) 10197 { 10198 *(*d)++ = ':'; 10199 *(*d)++ = ':'; 10200 (*s)++; 10201 saw_tick++; 10202 } 10203 else if (allow_package && **s == ':' && (*s)[1] == ':' 10204 /* Disallow things like Foo::$bar. For the curious, this is 10205 * the code path that triggers the "Bad name after" warning 10206 * when looking for barewords. 10207 */ 10208 && !(check_dollar && (*s)[2] == '$')) { 10209 *(*d)++ = *(*s)++; 10210 *(*d)++ = *(*s)++; 10211 } 10212 else 10213 break; 10214 } 10215 if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) { 10216 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 10217 char *this_d; 10218 char *d2; 10219 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */ 10220 d2 = this_d; 10221 SAVEFREEPV(this_d); 10222 10223 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR), 10224 "Old package separator used in string"); 10225 if (olds[-1] == '#') 10226 *d2++ = olds[-2]; 10227 *d2++ = olds[-1]; 10228 while (olds < *s) { 10229 if (*olds == '\'') { 10230 *d2++ = '\\'; 10231 *d2++ = *olds++; 10232 } 10233 else 10234 *d2++ = *olds++; 10235 } 10236 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), 10237 "\t(Did you mean \"%" UTF8f "\" instead?)\n", 10238 UTF8fARG(is_utf8, d2-this_d, this_d)); 10239 } 10240 else { 10241 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR), 10242 "Old package separator \"'\" deprecated"); 10243 } 10244 } 10245 return; 10246} 10247 10248/* Returns a NUL terminated string, with the length of the string written to 10249 *slp 10250 10251 scan_word6() may be removed once ' in names is removed. 10252 */ 10253char * 10254Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick) 10255{ 10256 char *d = dest; 10257 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 10258 bool is_utf8 = cBOOL(UTF); 10259 10260 PERL_ARGS_ASSERT_SCAN_WORD6; 10261 10262 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick); 10263 *d = '\0'; 10264 *slp = d - dest; 10265 return s; 10266} 10267 10268char * 10269Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) 10270{ 10271 PERL_ARGS_ASSERT_SCAN_WORD; 10272 return scan_word6(s, dest, destlen, allow_package, slp, FALSE); 10273} 10274 10275/* scan s and extract an identifier ($var) from it if possible 10276 * into dest. 10277 * XXX: This function has subtle implications on parsing, and 10278 * changing how it behaves can cause a variable to change from 10279 * being a run time rv2sv call or a compile time binding to a 10280 * specific variable name. 10281 */ 10282STATIC char * 10283S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) 10284{ 10285 I32 herelines = PL_parser->herelines; 10286 SSize_t bracket = -1; 10287 char funny = *s++; 10288 char *d = dest; 10289 char * const e = d + destlen - 3; /* two-character token, ending NUL */ 10290 bool is_utf8 = cBOOL(UTF); 10291 line_t orig_copline = 0, tmp_copline = 0; 10292 10293 PERL_ARGS_ASSERT_SCAN_IDENT; 10294 10295 if (isSPACE(*s) || !*s) 10296 s = skipspace(s); 10297 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */ 10298 bool is_zero= *s == '0' ? TRUE : FALSE; 10299 char *digit_start= d; 10300 *d++ = *s++; 10301 while (s < PL_bufend && isDIGIT(*s)) { 10302 if (d >= e) 10303 Perl_croak(aTHX_ "%s", ident_too_long); 10304 *d++ = *s++; 10305 } 10306 if (is_zero && d - digit_start > 1) 10307 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10308 } 10309 else { /* See if it is a "normal" identifier */ 10310 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); 10311 } 10312 *d = '\0'; 10313 d = dest; 10314 if (*d) { 10315 /* Either a digit variable, or parse_ident() found an identifier 10316 (anything valid as a bareword), so job done and return. */ 10317 if (PL_lex_state != LEX_NORMAL) 10318 PL_lex_state = LEX_INTERPENDMAYBE; 10319 return s; 10320 } 10321 10322 /* Here, it is not a run-of-the-mill identifier name */ 10323 10324 if (*s == '$' && s[1] 10325 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8) 10326 || isDIGIT_A((U8)s[1]) 10327 || s[1] == '$' 10328 || s[1] == '{' 10329 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) ) 10330 { 10331 /* Dereferencing a value in a scalar variable. 10332 The alternatives are different syntaxes for a scalar variable. 10333 Using ' as a leading package separator isn't allowed. :: is. */ 10334 return s; 10335 } 10336 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ 10337 if (*s == '{') { 10338 bracket = s - SvPVX(PL_linestr); 10339 s++; 10340 orig_copline = CopLINE(PL_curcop); 10341 if (s < PL_bufend && isSPACE(*s)) { 10342 s = skipspace(s); 10343 } 10344 } 10345 10346 10347 /* Extract the first character of the variable name from 's' and 10348 * copy it, null terminated into 'd'. Note that this does not 10349 * involve checking for just IDFIRST characters, as it allows the 10350 * '^' for ${^FOO} type variable names, and it allows all the 10351 * characters that are legal in a single character variable name. 10352 * 10353 * The legal ones are any of: 10354 * a) all ASCII characters except: 10355 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; 10356 * 2) '{' 10357 * The final case currently doesn't get this far in the program, so we 10358 * don't test for it. If that were to change, it would be ok to allow it. 10359 * b) When not under Unicode rules, any upper Latin1 character 10360 * c) Otherwise, when unicode rules are used, all XIDS characters. 10361 * 10362 * Because all ASCII characters have the same representation whether 10363 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and 10364 * '{' without knowing if is UTF-8 or not. */ 10365 10366 if ((s <= PL_bufend - ((is_utf8) 10367 ? UTF8SKIP(s) 10368 : 1)) 10369 && ( 10370 isGRAPH_A(*s) 10371 || 10372 ( is_utf8 10373 ? isIDFIRST_utf8_safe(s, PL_bufend) 10374 : (isGRAPH_L1(*s) 10375 && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD)) 10376 ) 10377 ) 10378 ) 10379 ){ 10380 if (is_utf8) { 10381 const STRLEN skip = UTF8SKIP(s); 10382 STRLEN i; 10383 d[skip] = '\0'; 10384 for ( i = 0; i < skip; i++ ) 10385 d[i] = *s++; 10386 } 10387 else { 10388 *d = *s++; 10389 d[1] = '\0'; 10390 } 10391 } 10392 10393 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ 10394 if (isDIGIT(*d)) { 10395 bool is_zero= *d == '0' ? TRUE : FALSE; 10396 char *digit_start= d; 10397 while (s < PL_bufend && isDIGIT(*s)) { 10398 d++; 10399 if (d >= e) 10400 Perl_croak(aTHX_ "%s", ident_too_long); 10401 *d= *s++; 10402 } 10403 if (is_zero && d - digit_start >= 1) /* d points at the last digit */ 10404 Perl_croak(aTHX_ ident_var_zero_multi_digit); 10405 d[1] = '\0'; 10406 } 10407 10408 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ 10409 else if (*d == '^' && *s && isCONTROLVAR(*s)) { 10410 *d = toCTRL(*s); 10411 s++; 10412 } 10413 /* Warn about ambiguous code after unary operators if {...} notation isn't 10414 used. There's no difference in ambiguity; it's merely a heuristic 10415 about when not to warn. */ 10416 else if (ck_uni && bracket == -1) 10417 check_uni(); 10418 10419 if (bracket != -1) { 10420 bool skip; 10421 char *s2; 10422 /* If we were processing {...} notation then... */ 10423 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) 10424 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ 10425 && isWORDCHAR(*s)) 10426 ) { 10427 /* note we have to check for a normal identifier first, 10428 * as it handles utf8 symbols, and only after that has 10429 * been ruled out can we look at the caret words */ 10430 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) { 10431 /* if it starts as a valid identifier, assume that it is one. 10432 (the later check for } being at the expected point will trap 10433 cases where this doesn't pan out.) */ 10434 d += is_utf8 ? UTF8SKIP(d) : 1; 10435 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE); 10436 *d = '\0'; 10437 } 10438 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ 10439 d++; 10440 while (isWORDCHAR(*s) && d < e) { 10441 *d++ = *s++; 10442 } 10443 if (d >= e) 10444 Perl_croak(aTHX_ "%s", ident_too_long); 10445 *d = '\0'; 10446 } 10447 tmp_copline = CopLINE(PL_curcop); 10448 if (s < PL_bufend && isSPACE(*s)) { 10449 s = skipspace(s); 10450 } 10451 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { 10452 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ 10453 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { 10454 const char * const brack = 10455 (const char *) 10456 ((*s == '[') ? "[...]" : "{...}"); 10457 orig_copline = CopLINE(PL_curcop); 10458 CopLINE_set(PL_curcop, tmp_copline); 10459 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ 10460 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10461 "Ambiguous use of %c{%s%s} resolved to %c%s%s", 10462 funny, dest, brack, funny, dest, brack); 10463 CopLINE_set(PL_curcop, orig_copline); 10464 } 10465 bracket++; 10466 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); 10467 PL_lex_allbrackets++; 10468 return s; 10469 } 10470 } 10471 10472 if ( !tmp_copline ) 10473 tmp_copline = CopLINE(PL_curcop); 10474 if ((skip = s < PL_bufend && isSPACE(*s))) { 10475 /* Avoid incrementing line numbers or resetting PL_linestart, 10476 in case we have to back up. */ 10477 STRLEN s_off = s - SvPVX(PL_linestr); 10478 s2 = peekspace(s); 10479 s = SvPVX(PL_linestr) + s_off; 10480 } 10481 else 10482 s2 = s; 10483 10484 /* Expect to find a closing } after consuming any trailing whitespace. 10485 */ 10486 if (*s2 == '}') { 10487 /* Now increment line numbers if applicable. */ 10488 if (skip) 10489 s = skipspace(s); 10490 s++; 10491 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { 10492 PL_lex_state = LEX_INTERPEND; 10493 PL_expect = XREF; 10494 } 10495 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) { 10496 if (ckWARN(WARN_AMBIGUOUS) 10497 && (keyword(dest, d - dest, 0) 10498 || get_cvn_flags(dest, d - dest, is_utf8 10499 ? SVf_UTF8 10500 : 0))) 10501 { 10502 SV *tmp = newSVpvn_flags( dest, d - dest, 10503 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); 10504 if (funny == '#') 10505 funny = '@'; 10506 orig_copline = CopLINE(PL_curcop); 10507 CopLINE_set(PL_curcop, tmp_copline); 10508 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), 10509 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf, 10510 funny, SVfARG(tmp), funny, SVfARG(tmp)); 10511 CopLINE_set(PL_curcop, orig_copline); 10512 } 10513 } 10514 } 10515 else { 10516 /* Didn't find the closing } at the point we expected, so restore 10517 state such that the next thing to process is the opening { and */ 10518 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ 10519 CopLINE_set(PL_curcop, orig_copline); 10520 PL_parser->herelines = herelines; 10521 *dest = '\0'; 10522 PL_parser->sub_no_recover = TRUE; 10523 } 10524 } 10525 else if ( PL_lex_state == LEX_INTERPNORMAL 10526 && !PL_lex_brackets 10527 && !intuit_more(s, PL_bufend)) 10528 PL_lex_state = LEX_INTERPEND; 10529 return s; 10530} 10531 10532static bool 10533S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { 10534 10535 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag 10536 * found in the parse starting at 's', based on the subset that are valid 10537 * in this context input to this routine in 'valid_flags'. Advances s. 10538 * Returns TRUE if the input should be treated as a valid flag, so the next 10539 * char may be as well; otherwise FALSE. 'charset' should point to a NUL 10540 * upon first call on the current regex. This routine will set it to any 10541 * charset modifier found. The caller shouldn't change it. This way, 10542 * another charset modifier encountered in the parse can be detected as an 10543 * error, as we have decided to allow only one */ 10544 10545 const char c = **s; 10546 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; 10547 10548 if ( charlen != 1 || ! strchr(valid_flags, c) ) { 10549 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) { 10550 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s), 10551 UTF ? SVf_UTF8 : 0); 10552 (*s) += charlen; 10553 /* Pretend that it worked, so will continue processing before 10554 * dieing */ 10555 return TRUE; 10556 } 10557 return FALSE; 10558 } 10559 10560 switch (c) { 10561 10562 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); 10563 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; 10564 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; 10565 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; 10566 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; 10567 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; 10568 case LOCALE_PAT_MOD: 10569 if (*charset) { 10570 goto multiple_charsets; 10571 } 10572 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); 10573 *charset = c; 10574 break; 10575 case UNICODE_PAT_MOD: 10576 if (*charset) { 10577 goto multiple_charsets; 10578 } 10579 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); 10580 *charset = c; 10581 break; 10582 case ASCII_RESTRICT_PAT_MOD: 10583 if (! *charset) { 10584 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); 10585 } 10586 else { 10587 10588 /* Error if previous modifier wasn't an 'a', but if it was, see 10589 * if, and accept, a second occurrence (only) */ 10590 if (*charset != 'a' 10591 || get_regex_charset(*pmfl) 10592 != REGEX_ASCII_RESTRICTED_CHARSET) 10593 { 10594 goto multiple_charsets; 10595 } 10596 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); 10597 } 10598 *charset = c; 10599 break; 10600 case DEPENDS_PAT_MOD: 10601 if (*charset) { 10602 goto multiple_charsets; 10603 } 10604 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); 10605 *charset = c; 10606 break; 10607 } 10608 10609 (*s)++; 10610 return TRUE; 10611 10612 multiple_charsets: 10613 if (*charset != c) { 10614 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); 10615 } 10616 else if (c == 'a') { 10617 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ 10618 yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); 10619 } 10620 else { 10621 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); 10622 } 10623 10624 /* Pretend that it worked, so will continue processing before dieing */ 10625 (*s)++; 10626 return TRUE; 10627} 10628 10629STATIC char * 10630S_scan_pat(pTHX_ char *start, I32 type) 10631{ 10632 PMOP *pm; 10633 char *s; 10634 const char * const valid_flags = 10635 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); 10636 char charset = '\0'; /* character set modifier */ 10637 unsigned int x_mod_count = 0; 10638 10639 PERL_ARGS_ASSERT_SCAN_PAT; 10640 10641 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); 10642 if (!s) 10643 Perl_croak(aTHX_ "Search pattern not terminated"); 10644 10645 pm = (PMOP*)newPMOP(type, 0); 10646 if (PL_multi_open == '?') { 10647 /* This is the only point in the code that sets PMf_ONCE: */ 10648 pm->op_pmflags |= PMf_ONCE; 10649 10650 /* Hence it's safe to do this bit of PMOP book-keeping here, which 10651 allows us to restrict the list needed by reset to just the ?? 10652 matches. */ 10653 assert(type != OP_TRANS); 10654 if (PL_curstash) { 10655 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); 10656 U32 elements; 10657 if (!mg) { 10658 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 10659 0); 10660 } 10661 elements = mg->mg_len / sizeof(PMOP**); 10662 Renewc(mg->mg_ptr, elements + 1, PMOP*, char); 10663 ((PMOP**)mg->mg_ptr) [elements++] = pm; 10664 mg->mg_len = elements * sizeof(PMOP**); 10665 PmopSTASH_set(pm,PL_curstash); 10666 } 10667 } 10668 10669 /* if qr/...(?{..}).../, then need to parse the pattern within a new 10670 * anon CV. False positives like qr/[(?{]/ are harmless */ 10671 10672 if (type == OP_QR) { 10673 STRLEN len; 10674 char *e, *p = SvPV(PL_lex_stuff, len); 10675 e = p + len; 10676 for (; p < e; p++) { 10677 if (p[0] == '(' && ( 10678 (p[1] == '?' && (p[2] == '{' || 10679 (p[2] == '?' && p[3] == '{'))) || 10680 (p[1] == '*' && (p[2] == '{' || 10681 (p[2] == '*' && p[3] == '{'))) 10682 )){ 10683 pm->op_pmflags |= PMf_HAS_CV; 10684 break; 10685 } 10686 } 10687 pm->op_pmflags |= PMf_IS_QR; 10688 } 10689 10690 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), 10691 &s, &charset, &x_mod_count)) 10692 {}; 10693 /* issue a warning if /c is specified,but /g is not */ 10694 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) 10695 { 10696 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), 10697 "Use of /c modifier is meaningless without /g" ); 10698 } 10699 10700 PL_lex_op = (OP*)pm; 10701 pl_yylval.ival = OP_MATCH; 10702 return s; 10703} 10704 10705STATIC char * 10706S_scan_subst(pTHX_ char *start) 10707{ 10708 char *s; 10709 PMOP *pm; 10710 I32 first_start; 10711 line_t first_line; 10712 line_t linediff = 0; 10713 I32 es = 0; 10714 char charset = '\0'; /* character set modifier */ 10715 unsigned int x_mod_count = 0; 10716 char *t; 10717 10718 PERL_ARGS_ASSERT_SCAN_SUBST; 10719 10720 pl_yylval.ival = OP_NULL; 10721 10722 s = scan_str(start, TRUE, FALSE, FALSE, &t); 10723 10724 if (!s) 10725 Perl_croak(aTHX_ "Substitution pattern not terminated"); 10726 10727 s = t; 10728 10729 first_start = PL_multi_start; 10730 first_line = CopLINE(PL_curcop); 10731 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10732 if (!s) { 10733 SvREFCNT_dec_NN(PL_lex_stuff); 10734 PL_lex_stuff = NULL; 10735 Perl_croak(aTHX_ "Substitution replacement not terminated"); 10736 } 10737 PL_multi_start = first_start; /* so whole substitution is taken together */ 10738 10739 pm = (PMOP*)newPMOP(OP_SUBST, 0); 10740 10741 10742 while (*s) { 10743 if (*s == EXEC_PAT_MOD) { 10744 s++; 10745 es++; 10746 } 10747 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), 10748 &s, &charset, &x_mod_count)) 10749 { 10750 break; 10751 } 10752 } 10753 10754 if ((pm->op_pmflags & PMf_CONTINUE)) { 10755 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); 10756 } 10757 10758 if (es) { 10759 SV * const repl = newSVpvs(""); 10760 10761 PL_multi_end = 0; 10762 pm->op_pmflags |= PMf_EVAL; 10763 for (; es > 1; es--) { 10764 sv_catpvs(repl, "eval "); 10765 } 10766 sv_catpvs(repl, "do {"); 10767 sv_catsv(repl, PL_parser->lex_sub_repl); 10768 sv_catpvs(repl, "}"); 10769 SvREFCNT_dec(PL_parser->lex_sub_repl); 10770 PL_parser->lex_sub_repl = repl; 10771 } 10772 10773 10774 linediff = CopLINE(PL_curcop) - first_line; 10775 if (linediff) 10776 CopLINE_set(PL_curcop, first_line); 10777 10778 if (linediff || es) { 10779 /* the IVX field indicates that the replacement string is a s///e; 10780 * the NVX field indicates how many src code lines the replacement 10781 * spreads over */ 10782 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV); 10783 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff; 10784 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = 10785 cBOOL(es); 10786 } 10787 10788 PL_lex_op = (OP*)pm; 10789 pl_yylval.ival = OP_SUBST; 10790 return s; 10791} 10792 10793STATIC char * 10794S_scan_trans(pTHX_ char *start) 10795{ 10796 char* s; 10797 OP *o; 10798 U8 squash; 10799 U8 del; 10800 U8 complement; 10801 bool nondestruct = 0; 10802 char *t; 10803 10804 PERL_ARGS_ASSERT_SCAN_TRANS; 10805 10806 pl_yylval.ival = OP_NULL; 10807 10808 s = scan_str(start,FALSE,FALSE,FALSE,&t); 10809 if (!s) 10810 Perl_croak(aTHX_ "Transliteration pattern not terminated"); 10811 10812 s = t; 10813 10814 s = scan_str(s,FALSE,FALSE,FALSE,NULL); 10815 if (!s) { 10816 SvREFCNT_dec_NN(PL_lex_stuff); 10817 PL_lex_stuff = NULL; 10818 Perl_croak(aTHX_ "Transliteration replacement not terminated"); 10819 } 10820 10821 complement = del = squash = 0; 10822 while (1) { 10823 switch (*s) { 10824 case 'c': 10825 complement = OPpTRANS_COMPLEMENT; 10826 break; 10827 case 'd': 10828 del = OPpTRANS_DELETE; 10829 break; 10830 case 's': 10831 squash = OPpTRANS_SQUASH; 10832 break; 10833 case 'r': 10834 nondestruct = 1; 10835 break; 10836 default: 10837 goto no_more; 10838 } 10839 s++; 10840 } 10841 no_more: 10842 10843 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); 10844 o->op_private &= ~OPpTRANS_ALL; 10845 o->op_private |= del|squash|complement; 10846 10847 PL_lex_op = o; 10848 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS; 10849 10850 10851 return s; 10852} 10853 10854/* scan_heredoc 10855 Takes a pointer to the first < in <<FOO. 10856 Returns a pointer to the byte following <<FOO. 10857 10858 This function scans a heredoc, which involves different methods 10859 depending on whether we are in a string eval, quoted construct, etc. 10860 This is because PL_linestr could containing a single line of input, or 10861 a whole string being evalled, or the contents of the current quote- 10862 like operator. 10863 10864 The two basic methods are: 10865 - Steal lines from the input stream 10866 - Scan the heredoc in PL_linestr and remove it therefrom 10867 10868 In a file scope or filtered eval, the first method is used; in a 10869 string eval, the second. 10870 10871 In a quote-like operator, we have to choose between the two, 10872 depending on where we can find a newline. We peek into outer lex- 10873 ing scopes until we find one with a newline in it. If we reach the 10874 outermost lexing scope and it is a file, we use the stream method. 10875 Otherwise it is treated as an eval. 10876*/ 10877 10878STATIC char * 10879S_scan_heredoc(pTHX_ char *s) 10880{ 10881 I32 op_type = OP_SCALAR; 10882 I32 len; 10883 SV *tmpstr; 10884 char term; 10885 char *d; 10886 char *e; 10887 char *peek; 10888 char *indent = 0; 10889 I32 indent_len = 0; 10890 bool indented = FALSE; 10891 const bool infile = PL_rsfp || PL_parser->filtered; 10892 const line_t origline = CopLINE(PL_curcop); 10893 LEXSHARED *shared = PL_parser->lex_shared; 10894 10895 PERL_ARGS_ASSERT_SCAN_HEREDOC; 10896 10897 s += 2; 10898 d = PL_tokenbuf + 1; 10899 e = PL_tokenbuf + sizeof PL_tokenbuf - 1; 10900 *PL_tokenbuf = '\n'; 10901 peek = s; 10902 10903 if (*peek == '~') { 10904 indented = TRUE; 10905 peek++; s++; 10906 } 10907 10908 while (SPACE_OR_TAB(*peek)) 10909 peek++; 10910 10911 if (*peek == '`' || *peek == '\'' || *peek =='"') { 10912 s = peek; 10913 term = *s++; 10914 s = delimcpy(d, e, s, PL_bufend, term, &len); 10915 if (s == PL_bufend) 10916 Perl_croak(aTHX_ "Unterminated delimiter for here document"); 10917 d += len; 10918 s++; 10919 } 10920 else { 10921 if (*s == '\\') 10922 /* <<\FOO is equivalent to <<'FOO' */ 10923 s++, term = '\''; 10924 else 10925 term = '"'; 10926 10927 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) 10928 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); 10929 10930 peek = s; 10931 10932 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) { 10933 peek += UTF ? UTF8SKIP(peek) : 1; 10934 } 10935 10936 len = (peek - s >= e - d) ? (e - d) : (peek - s); 10937 Copy(s, d, len, char); 10938 s += len; 10939 d += len; 10940 } 10941 10942 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) 10943 Perl_croak(aTHX_ "Delimiter for here document is too long"); 10944 10945 *d++ = '\n'; 10946 *d = '\0'; 10947 len = d - PL_tokenbuf; 10948 10949#ifndef PERL_STRICT_CR 10950 d = (char *) memchr(s, '\r', PL_bufend - s); 10951 if (d) { 10952 char * const olds = s; 10953 s = d; 10954 while (s < PL_bufend) { 10955 if (*s == '\r') { 10956 *d++ = '\n'; 10957 if (*++s == '\n') 10958 s++; 10959 } 10960 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ 10961 *d++ = *s++; 10962 s++; 10963 } 10964 else 10965 *d++ = *s++; 10966 } 10967 *d = '\0'; 10968 PL_bufend = d; 10969 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 10970 s = olds; 10971 } 10972#endif 10973 10974 tmpstr = newSV_type(SVt_PVIV); 10975 if (term == '\'') { 10976 op_type = OP_CONST; 10977 SvIV_set(tmpstr, -1); 10978 } 10979 else if (term == '`') { 10980 op_type = OP_BACKTICK; 10981 SvIV_set(tmpstr, '\\'); 10982 } 10983 10984 PL_multi_start = origline + 1 + PL_parser->herelines; 10985 PL_multi_open = PL_multi_close = '<'; 10986 10987 /* inside a string eval or quote-like operator */ 10988 if (!infile || PL_lex_inwhat) { 10989 SV *linestr; 10990 char *bufend; 10991 char * const olds = s; 10992 PERL_CONTEXT * const cx = CX_CUR(); 10993 /* These two fields are not set until an inner lexing scope is 10994 entered. But we need them set here. */ 10995 shared->ls_bufptr = s; 10996 shared->ls_linestr = PL_linestr; 10997 10998 if (PL_lex_inwhat) { 10999 /* Look for a newline. If the current buffer does not have one, 11000 peek into the line buffer of the parent lexing scope, going 11001 up as many levels as necessary to find one with a newline 11002 after bufptr. 11003 */ 11004 while (!(s = (char *)memchr( 11005 (void *)shared->ls_bufptr, '\n', 11006 SvEND(shared->ls_linestr)-shared->ls_bufptr 11007 ))) 11008 { 11009 shared = shared->ls_prev; 11010 /* shared is only null if we have gone beyond the outermost 11011 lexing scope. In a file, we will have broken out of the 11012 loop in the previous iteration. In an eval, the string buf- 11013 fer ends with "\n;", so the while condition above will have 11014 evaluated to false. So shared can never be null. Or so you 11015 might think. Odd syntax errors like s;@{<<; can gobble up 11016 the implicit semicolon at the end of a flie, causing the 11017 file handle to be closed even when we are not in a string 11018 eval. So shared may be null in that case. 11019 (Closing '>>}' here to balance the earlier open brace for 11020 editors that look for matched pairs.) */ 11021 if (UNLIKELY(!shared)) 11022 goto interminable; 11023 /* A LEXSHARED struct with a null ls_prev pointer is the outer- 11024 most lexing scope. In a file, shared->ls_linestr at that 11025 level is just one line, so there is no body to steal. */ 11026 if (infile && !shared->ls_prev) { 11027 s = olds; 11028 goto streaming; 11029 } 11030 } 11031 } 11032 else { /* eval or we've already hit EOF */ 11033 s = (char*)memchr((void*)s, '\n', PL_bufend - s); 11034 if (!s) 11035 goto interminable; 11036 } 11037 11038 linestr = shared->ls_linestr; 11039 bufend = SvEND(linestr); 11040 d = s; 11041 if (indented) { 11042 char *myolds = s; 11043 11044 while (s < bufend - len + 1) { 11045 if (*s++ == '\n') 11046 ++PL_parser->herelines; 11047 11048 if (memEQ(s, PL_tokenbuf + 1, len - 1)) { 11049 char *backup = s; 11050 indent_len = 0; 11051 11052 /* Only valid if it's preceded by whitespace only */ 11053 while (backup != myolds && --backup >= myolds) { 11054 if (! SPACE_OR_TAB(*backup)) { 11055 break; 11056 } 11057 indent_len++; 11058 } 11059 11060 /* No whitespace or all! */ 11061 if (backup == s || *backup == '\n') { 11062 Newx(indent, indent_len + 1, char); 11063 memcpy(indent, backup + 1, indent_len); 11064 indent[indent_len] = 0; 11065 s--; /* before our delimiter */ 11066 PL_parser->herelines--; /* this line doesn't count */ 11067 break; 11068 } 11069 } 11070 } 11071 } 11072 else { 11073 while (s < bufend - len + 1 11074 && memNE(s,PL_tokenbuf,len) ) 11075 { 11076 if (*s++ == '\n') 11077 ++PL_parser->herelines; 11078 } 11079 } 11080 11081 if (s >= bufend - len + 1) { 11082 goto interminable; 11083 } 11084 11085 sv_setpvn_fresh(tmpstr,d+1,s-d); 11086 s += len - 1; 11087 /* the preceding stmt passes a newline */ 11088 PL_parser->herelines++; 11089 11090 /* s now points to the newline after the heredoc terminator. 11091 d points to the newline before the body of the heredoc. 11092 */ 11093 11094 /* We are going to modify linestr in place here, so set 11095 aside copies of the string if necessary for re-evals or 11096 (caller $n)[6]. */ 11097 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we 11098 check shared->re_eval_str. */ 11099 if (shared->re_eval_start || shared->re_eval_str) { 11100 /* Set aside the rest of the regexp */ 11101 if (!shared->re_eval_str) 11102 shared->re_eval_str = 11103 newSVpvn(shared->re_eval_start, 11104 bufend - shared->re_eval_start); 11105 shared->re_eval_start -= s-d; 11106 } 11107 11108 if (cxstack_ix >= 0 11109 && CxTYPE(cx) == CXt_EVAL 11110 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL 11111 && cx->blk_eval.cur_text == linestr) 11112 { 11113 cx->blk_eval.cur_text = newSVsv(linestr); 11114 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */ 11115 } 11116 11117 /* Copy everything from s onwards back to d. */ 11118 Move(s,d,bufend-s + 1,char); 11119 SvCUR_set(linestr, SvCUR(linestr) - (s-d)); 11120 /* Setting PL_bufend only applies when we have not dug deeper 11121 into other scopes, because sublex_done sets PL_bufend to 11122 SvEND(PL_linestr). */ 11123 if (shared == PL_parser->lex_shared) 11124 PL_bufend = SvEND(linestr); 11125 s = olds; 11126 } 11127 else { 11128 SV *linestr_save; 11129 char *oldbufptr_save; 11130 char *oldoldbufptr_save; 11131 streaming: 11132 sv_grow_fresh(tmpstr, 80); 11133 SvPVCLEAR_FRESH(tmpstr); /* avoid "uninitialized" warning */ 11134 term = PL_tokenbuf[1]; 11135 len--; 11136 linestr_save = PL_linestr; /* must restore this afterwards */ 11137 d = s; /* and this */ 11138 oldbufptr_save = PL_oldbufptr; 11139 oldoldbufptr_save = PL_oldoldbufptr; 11140 PL_linestr = newSVpvs(""); 11141 PL_bufend = SvPVX(PL_linestr); 11142 11143 while (1) { 11144 PL_bufptr = PL_bufend; 11145 CopLINE_set(PL_curcop, 11146 origline + 1 + PL_parser->herelines); 11147 11148 if ( !lex_next_chunk(LEX_NO_TERM) 11149 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) 11150 { 11151 /* Simply freeing linestr_save might seem simpler here, as it 11152 does not matter what PL_linestr points to, since we are 11153 about to croak; but in a quote-like op, linestr_save 11154 will have been prospectively freed already, via 11155 SAVEFREESV(PL_linestr) in sublex_push, so it's easier to 11156 restore PL_linestr. */ 11157 SvREFCNT_dec_NN(PL_linestr); 11158 PL_linestr = linestr_save; 11159 PL_oldbufptr = oldbufptr_save; 11160 PL_oldoldbufptr = oldoldbufptr_save; 11161 goto interminable; 11162 } 11163 11164 CopLINE_set(PL_curcop, origline); 11165 11166 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { 11167 s = lex_grow_linestr(SvLEN(PL_linestr) + 3); 11168 /* ^That should be enough to avoid this needing to grow: */ 11169 sv_catpvs(PL_linestr, "\n\0"); 11170 assert(s == SvPVX(PL_linestr)); 11171 PL_bufend = SvEND(PL_linestr); 11172 } 11173 11174 s = PL_bufptr; 11175 PL_parser->herelines++; 11176 PL_last_lop = PL_last_uni = NULL; 11177 11178#ifndef PERL_STRICT_CR 11179 if (PL_bufend - PL_linestart >= 2) { 11180 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') 11181 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r')) 11182 { 11183 PL_bufend[-2] = '\n'; 11184 PL_bufend--; 11185 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); 11186 } 11187 else if (PL_bufend[-1] == '\r') 11188 PL_bufend[-1] = '\n'; 11189 } 11190 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') 11191 PL_bufend[-1] = '\n'; 11192#endif 11193 11194 if (indented && (PL_bufend-s) >= len) { 11195 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len)); 11196 11197 if (found) { 11198 char *backup = found; 11199 indent_len = 0; 11200 11201 /* Only valid if it's preceded by whitespace only */ 11202 while (backup != s && --backup >= s) { 11203 if (! SPACE_OR_TAB(*backup)) { 11204 break; 11205 } 11206 indent_len++; 11207 } 11208 11209 /* All whitespace or none! */ 11210 if (backup == found || SPACE_OR_TAB(*backup)) { 11211 Newx(indent, indent_len + 1, char); 11212 memcpy(indent, backup, indent_len); 11213 indent[indent_len] = 0; 11214 SvREFCNT_dec(PL_linestr); 11215 PL_linestr = linestr_save; 11216 PL_linestart = SvPVX(linestr_save); 11217 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 11218 PL_oldbufptr = oldbufptr_save; 11219 PL_oldoldbufptr = oldoldbufptr_save; 11220 s = d; 11221 break; 11222 } 11223 } 11224 11225 /* Didn't find it */ 11226 sv_catsv(tmpstr,PL_linestr); 11227 } 11228 else { 11229 if (*s == term && PL_bufend-s >= len 11230 && memEQ(s,PL_tokenbuf + 1,len)) 11231 { 11232 SvREFCNT_dec(PL_linestr); 11233 PL_linestr = linestr_save; 11234 PL_linestart = SvPVX(linestr_save); 11235 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); 11236 PL_oldbufptr = oldbufptr_save; 11237 PL_oldoldbufptr = oldoldbufptr_save; 11238 s = d; 11239 break; 11240 } 11241 else { 11242 sv_catsv(tmpstr,PL_linestr); 11243 } 11244 } 11245 } /* while (1) */ 11246 } 11247 11248 PL_multi_end = origline + PL_parser->herelines; 11249 11250 if (indented && indent) { 11251 STRLEN linecount = 1; 11252 STRLEN herelen = SvCUR(tmpstr); 11253 char *ss = SvPVX(tmpstr); 11254 char *se = ss + herelen; 11255 SV *newstr = newSV(herelen+1); 11256 SvPOK_on(newstr); 11257 11258 /* Trim leading whitespace */ 11259 while (ss < se) { 11260 /* newline only? Copy and move on */ 11261 if (*ss == '\n') { 11262 sv_catpvs(newstr,"\n"); 11263 ss++; 11264 linecount++; 11265 11266 /* Found our indentation? Strip it */ 11267 } 11268 else if (se - ss >= indent_len 11269 && memEQ(ss, indent, indent_len)) 11270 { 11271 STRLEN le = 0; 11272 ss += indent_len; 11273 11274 while ((ss + le) < se && *(ss + le) != '\n') 11275 le++; 11276 11277 sv_catpvn(newstr, ss, le); 11278 ss += le; 11279 11280 /* Line doesn't begin with our indentation? Croak */ 11281 } 11282 else { 11283 Safefree(indent); 11284 Perl_croak(aTHX_ 11285 "Indentation on line %d of here-doc doesn't match delimiter", 11286 (int)linecount 11287 ); 11288 } 11289 } /* while */ 11290 11291 /* avoid sv_setsv() as we don't want to COW here */ 11292 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr)); 11293 Safefree(indent); 11294 SvREFCNT_dec_NN(newstr); 11295 } 11296 11297 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { 11298 SvPV_shrink_to_cur(tmpstr); 11299 } 11300 11301 if (!IN_BYTES) { 11302 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) 11303 SvUTF8_on(tmpstr); 11304 } 11305 11306 PL_lex_stuff = tmpstr; 11307 pl_yylval.ival = op_type; 11308 return s; 11309 11310 interminable: 11311 if (indent) 11312 Safefree(indent); 11313 SvREFCNT_dec(tmpstr); 11314 CopLINE_set(PL_curcop, origline); 11315 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); 11316} 11317 11318 11319/* scan_inputsymbol 11320 takes: position of first '<' in input buffer 11321 returns: position of first char following the matching '>' in 11322 input buffer 11323 side-effects: pl_yylval and lex_op are set. 11324 11325 This code handles: 11326 11327 <> read from ARGV 11328 <<>> read from ARGV without magic open 11329 <FH> read from filehandle 11330 <pkg::FH> read from package qualified filehandle 11331 <pkg'FH> read from package qualified filehandle 11332 <$fh> read from filehandle in $fh 11333 <*.h> filename glob 11334 11335*/ 11336 11337STATIC char * 11338S_scan_inputsymbol(pTHX_ char *start) 11339{ 11340 char *s = start; /* current position in buffer */ 11341 char *end; 11342 I32 len; 11343 bool nomagicopen = FALSE; 11344 char *d = PL_tokenbuf; /* start of temp holding space */ 11345 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ 11346 11347 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; 11348 11349 end = (char *) memchr(s, '\n', PL_bufend - s); 11350 if (!end) 11351 end = PL_bufend; 11352 if (s[1] == '<' && s[2] == '>' && s[3] == '>') { 11353 nomagicopen = TRUE; 11354 *d = '\0'; 11355 len = 0; 11356 s += 3; 11357 } 11358 else 11359 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ 11360 11361 /* die if we didn't have space for the contents of the <>, 11362 or if it didn't end, or if we see a newline 11363 */ 11364 11365 if (len >= (I32)sizeof PL_tokenbuf) 11366 Perl_croak(aTHX_ "Excessively long <> operator"); 11367 if (s >= end) 11368 Perl_croak(aTHX_ "Unterminated <> operator"); 11369 11370 s++; 11371 11372 /* check for <$fh> 11373 Remember, only scalar variables are interpreted as filehandles by 11374 this code. Anything more complex (e.g., <$fh{$num}>) will be 11375 treated as a glob() call. 11376 This code makes use of the fact that except for the $ at the front, 11377 a scalar variable and a filehandle look the same. 11378 */ 11379 if (*d == '$' && d[1]) d++; 11380 11381 /* allow <Pkg'VALUE> or <Pkg::VALUE> */ 11382 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') { 11383 d += UTF ? UTF8SKIP(d) : 1; 11384 } 11385 11386 /* If we've tried to read what we allow filehandles to look like, and 11387 there's still text left, then it must be a glob() and not a getline. 11388 Use scan_str to pull out the stuff between the <> and treat it 11389 as nothing more than a string. 11390 */ 11391 11392 if (d - PL_tokenbuf != len) { 11393 pl_yylval.ival = OP_GLOB; 11394 s = scan_str(start,FALSE,FALSE,FALSE,NULL); 11395 if (!s) 11396 Perl_croak(aTHX_ "Glob not terminated"); 11397 return s; 11398 } 11399 else { 11400 bool readline_overridden = FALSE; 11401 GV *gv_readline; 11402 /* we're in a filehandle read situation */ 11403 d = PL_tokenbuf; 11404 11405 /* turn <> into <ARGV> */ 11406 if (!len) 11407 Copy("ARGV",d,5,char); 11408 11409 /* Check whether readline() is overridden */ 11410 if ((gv_readline = gv_override("readline",8))) 11411 readline_overridden = TRUE; 11412 11413 /* if <$fh>, create the ops to turn the variable into a 11414 filehandle 11415 */ 11416 if (*d == '$') { 11417 /* try to find it in the pad for this block, otherwise find 11418 add symbol table ops 11419 */ 11420 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); 11421 if (tmp != NOT_IN_PAD) { 11422 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { 11423 HV * const stash = PAD_COMPNAME_OURSTASH(tmp); 11424 HEK * const stashname = HvNAME_HEK(stash); 11425 SV * const sym = newSVhek_mortal(stashname); 11426 sv_catpvs(sym, "::"); 11427 sv_catpv(sym, d+1); 11428 d = SvPVX(sym); 11429 goto intro_sym; 11430 } 11431 else { 11432 OP * const o = newPADxVOP(OP_PADSV, 0, tmp); 11433 PL_lex_op = readline_overridden 11434 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11435 op_append_elem(OP_LIST, o, 11436 newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) 11437 : newUNOP(OP_READLINE, 0, o); 11438 } 11439 } 11440 else { 11441 GV *gv; 11442 ++d; 11443 intro_sym: 11444 gv = gv_fetchpv(d, 11445 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), 11446 SVt_PV); 11447 PL_lex_op = readline_overridden 11448 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11449 op_append_elem(OP_LIST, 11450 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), 11451 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11452 : newUNOP(OP_READLINE, 0, 11453 newUNOP(OP_RV2SV, 0, 11454 newGVOP(OP_GV, 0, gv))); 11455 } 11456 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ 11457 pl_yylval.ival = OP_NULL; 11458 } 11459 11460 /* If it's none of the above, it must be a literal filehandle 11461 (<Foo::BAR> or <FOO>) so build a simple readline OP */ 11462 else { 11463 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); 11464 PL_lex_op = readline_overridden 11465 ? newUNOP(OP_ENTERSUB, OPf_STACKED, 11466 op_append_elem(OP_LIST, 11467 newGVOP(OP_GV, 0, gv), 11468 newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) 11469 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); 11470 pl_yylval.ival = OP_NULL; 11471 11472 /* leave the token generation above to avoid confusing the parser */ 11473 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) { 11474 no_bareword_filehandle(d); 11475 } 11476 } 11477 } 11478 11479 return s; 11480} 11481 11482 11483/* scan_str 11484 takes: 11485 start position in buffer 11486 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but 11487 only if they are of the open/close form 11488 keep_delims preserve the delimiters around the string 11489 re_reparse compiling a run-time /(?{})/: 11490 collapse // to /, and skip encoding src 11491 delimp if non-null, this is set to the position of 11492 the closing delimiter, or just after it if 11493 the closing and opening delimiters differ 11494 (i.e., the opening delimiter of a substitu- 11495 tion replacement) 11496 returns: position to continue reading from buffer 11497 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and 11498 updates the read buffer. 11499 11500 This subroutine pulls a string out of the input. It is called for: 11501 q single quotes q(literal text) 11502 ' single quotes 'literal text' 11503 qq double quotes qq(interpolate $here please) 11504 " double quotes "interpolate $here please" 11505 qx backticks qx(/bin/ls -l) 11506 ` backticks `/bin/ls -l` 11507 qw quote words @EXPORT_OK = qw( func() $spam ) 11508 m// regexp match m/this/ 11509 s/// regexp substitute s/this/that/ 11510 tr/// string transliterate tr/this/that/ 11511 y/// string transliterate y/this/that/ 11512 ($*@) sub prototypes sub foo ($) 11513 (stuff) sub attr parameters sub foo : attr(stuff) 11514 <> readline or globs <FOO>, <>, <$fh>, or <*.c> 11515 11516 In most of these cases (all but <>, patterns and transliterate) 11517 yylex() calls scan_str(). m// makes yylex() call scan_pat() which 11518 calls scan_str(). s/// makes yylex() call scan_subst() which calls 11519 scan_str(). tr/// and y/// make yylex() call scan_trans() which 11520 calls scan_str(). 11521 11522 It skips whitespace before the string starts, and treats the first 11523 character as the delimiter. If the delimiter is one of ([{< then 11524 the corresponding "close" character )]}> is used as the closing 11525 delimiter. It allows quoting of delimiters, and if the string has 11526 balanced delimiters ([{<>}]) it allows nesting. 11527 11528 On success, the SV with the resulting string is put into lex_stuff or, 11529 if that is already non-NULL, into lex_repl. The second case occurs only 11530 when parsing the RHS of the special constructs s/// and tr/// (y///). 11531 For convenience, the terminating delimiter character is stuffed into 11532 SvIVX of the SV. 11533*/ 11534 11535char * 11536Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, 11537 char **delimp 11538 ) 11539{ 11540 SV *sv; /* scalar value: string */ 11541 char *s = start; /* current position in the buffer */ 11542 char *to; /* current position in the sv's data */ 11543 int brackets = 1; /* bracket nesting level */ 11544 bool d_is_utf8 = FALSE; /* is there any utf8 content? */ 11545 UV open_delim_code; /* code point */ 11546 char open_delim_str[UTF8_MAXBYTES+1]; 11547 STRLEN delim_byte_len; /* each delimiter currently is the same number 11548 of bytes */ 11549 line_t herelines; 11550 11551 /* The only non-UTF character that isn't a stand alone grapheme is 11552 * white-space, hence can't be a delimiter. */ 11553 const char * non_grapheme_msg = "Use of unassigned code point or" 11554 " non-standalone grapheme for a delimiter" 11555 " is not allowed"; 11556 PERL_ARGS_ASSERT_SCAN_STR; 11557 11558 /* skip space before the delimiter */ 11559 if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so 11560 'start' also has to change */ 11561 s = start = skipspace(s); 11562 } 11563 11564 /* mark where we are, in case we need to report errors */ 11565 CLINE; 11566 11567 /* after skipping whitespace, the next character is the delimiter */ 11568 if (! UTF || UTF8_IS_INVARIANT(*s)) { 11569 open_delim_code = (U8) *s; 11570 open_delim_str[0] = *s; 11571 delim_byte_len = 1; 11572 } 11573 else { 11574 open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, 11575 &delim_byte_len); 11576 if (UNLIKELY(! is_grapheme((U8 *) start, 11577 (U8 *) s, 11578 (U8 *) PL_bufend, 11579 open_delim_code))) 11580 { 11581 yyerror(non_grapheme_msg); 11582 } 11583 11584 Copy(s, open_delim_str, delim_byte_len, char); 11585 } 11586 open_delim_str[delim_byte_len] = '\0'; /* Only for safety */ 11587 11588 11589 /* mark where we are */ 11590 PL_multi_start = CopLINE(PL_curcop); 11591 PL_multi_open = open_delim_code; 11592 herelines = PL_parser->herelines; 11593 11594 const char * legal_paired_opening_delims; 11595 const char * legal_paired_closing_delims; 11596 const char * deprecated_opening_delims; 11597 if (FEATURE_MORE_DELIMS_IS_ENABLED) { 11598 if (UTF) { 11599 legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS; 11600 legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS; 11601 11602 /* We are deprecating using a closing delimiter as the opening, in 11603 * case we want in the future to accept them reversed. The string 11604 * may include ones that are legal, but the code below won't look 11605 * at this string unless it didn't find a legal opening one */ 11606 deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS; 11607 } 11608 else { 11609 legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS; 11610 legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS; 11611 deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS; 11612 } 11613 } 11614 else { 11615 legal_paired_opening_delims = "([{<"; 11616 legal_paired_closing_delims = ")]}>"; 11617 deprecated_opening_delims = (UTF) 11618 ? DEPRECATED_OPENING_UTF8_BRACKETS 11619 : DEPRECATED_OPENING_NON_UTF8_BRACKETS; 11620 } 11621 11622 const char * legal_paired_opening_delims_end = legal_paired_opening_delims 11623 + strlen(legal_paired_opening_delims); 11624 const char * deprecated_delims_end = deprecated_opening_delims 11625 + strlen(deprecated_opening_delims); 11626 11627 const char * close_delim_str = open_delim_str; 11628 UV close_delim_code = open_delim_code; 11629 11630 /* If the delimiter has a mirror-image closing one, get it */ 11631 const char *tmps = ninstr(legal_paired_opening_delims, 11632 legal_paired_opening_delims_end, 11633 open_delim_str, open_delim_str + delim_byte_len); 11634 if (tmps) { 11635 /* Here, there is a paired delimiter, and tmps points to its position 11636 in the string of the accepted opening paired delimiters. The 11637 corresponding position in the string of closing ones is the 11638 beginning of the paired mate. Both contain the same number of 11639 bytes. */ 11640 close_delim_str = legal_paired_closing_delims 11641 + (tmps - legal_paired_opening_delims); 11642 11643 /* The list of paired delimiters contains all the ASCII ones that have 11644 * always been legal, and no other ASCIIs. Don't raise a message if 11645 * using one of these */ 11646 if (! isASCII(open_delim_code)) { 11647 Perl_ck_warner_d(aTHX_ 11648 packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS), 11649 "Use of '%" UTF8f "' is experimental as a string delimiter", 11650 UTF8fARG(UTF, delim_byte_len, open_delim_str)); 11651 } 11652 11653 close_delim_code = (UTF) 11654 ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL) 11655 : * (U8 *) close_delim_str; 11656 } 11657 else { /* Here, the delimiter isn't paired, hence the close is the same as 11658 the open; and has already been set up. But make sure it isn't 11659 deprecated to use this particular delimiter, as we plan 11660 eventually to make it paired. */ 11661 if (ninstr(deprecated_opening_delims, deprecated_delims_end, 11662 open_delim_str, open_delim_str + delim_byte_len)) 11663 { 11664 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED), 11665 "Use of '%" UTF8f "' is deprecated as a string delimiter", 11666 UTF8fARG(UTF, delim_byte_len, open_delim_str)); 11667 } 11668 11669 /* Note that a NUL may be used as a delimiter, and this happens when 11670 * delimiting an empty string, and no special handling for it is 11671 * needed, as ninstr() calls are used */ 11672 } 11673 11674 PL_multi_close = close_delim_code; 11675 11676 if (PL_multi_open == PL_multi_close) { 11677 keep_bracketed_quoted = FALSE; 11678 } 11679 11680 /* create a new SV to hold the contents. 79 is the SV's initial length. 11681 What a random number. */ 11682 sv = newSV_type(SVt_PVIV); 11683 sv_grow_fresh(sv, 79); 11684 SvIV_set(sv, close_delim_code); 11685 (void)SvPOK_only(sv); /* validate pointer */ 11686 11687 /* move past delimiter and try to read a complete string */ 11688 if (keep_delims) 11689 sv_catpvn(sv, s, delim_byte_len); 11690 s += delim_byte_len; 11691 for (;;) { 11692 /* extend sv if need be */ 11693 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); 11694 /* set 'to' to the next character in the sv's string */ 11695 to = SvPVX(sv)+SvCUR(sv); 11696 11697 /* read until we run out of string, or we find the closing delimiter */ 11698 while (s < PL_bufend) { 11699 /* embedded newlines increment the line count */ 11700 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) 11701 COPLINE_INC_WITH_HERELINES; 11702 11703 /* backslashes can escape the closing delimiter */ 11704 if ( *s == '\\' && s < PL_bufend - delim_byte_len 11705 11706 /* ... but not if the delimiter itself is a backslash */ 11707 && close_delim_code != '\\') 11708 { 11709 /* Here, we have an escaping backslash. If we're supposed to 11710 * discard those that escape the closing delimiter, just 11711 * discard this one */ 11712 if ( ! keep_bracketed_quoted 11713 && ( memEQ(s + 1, open_delim_str, delim_byte_len) 11714 || ( PL_multi_open == PL_multi_close 11715 && re_reparse && s[1] == '\\') 11716 || memEQ(s + 1, close_delim_str, delim_byte_len))) 11717 { 11718 s++; 11719 } 11720 else /* any other escapes are simply copied straight through */ 11721 *to++ = *s++; 11722 } 11723 else if ( s < PL_bufend - (delim_byte_len - 1) 11724 && memEQ(s, close_delim_str, delim_byte_len) 11725 && --brackets <= 0) 11726 { 11727 /* Found unescaped closing delimiter, unnested if we care about 11728 * that; so are done. 11729 * 11730 * In the case of the opening and closing delimiters being 11731 * different, we have to deal with nesting; the conditional 11732 * above makes sure we don't get here until the nesting level, 11733 * 'brackets', is back down to zero. In the other case, 11734 * nesting isn't an issue, and 'brackets' never can get 11735 * incremented above 0, so will come here at the first closing 11736 * delimiter. 11737 * 11738 * Only grapheme delimiters are legal. */ 11739 if ( UTF /* All Non-UTF-8's are graphemes */ 11740 && UNLIKELY(! is_grapheme((U8 *) start, 11741 (U8 *) s, 11742 (U8 *) PL_bufend, 11743 close_delim_code))) 11744 { 11745 yyerror(non_grapheme_msg); 11746 } 11747 11748 break; 11749 } 11750 /* No nesting if open eq close */ 11751 else if ( PL_multi_open != PL_multi_close 11752 && s < PL_bufend - (delim_byte_len - 1) 11753 && memEQ(s, open_delim_str, delim_byte_len)) 11754 { 11755 brackets++; 11756 } 11757 11758 /* Here, still in the middle of the string; copy this character */ 11759 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) { 11760 *to++ = *s++; 11761 } 11762 else { 11763 size_t this_char_len = UTF8SKIP(s); 11764 Copy(s, to, this_char_len, char); 11765 s += this_char_len; 11766 to += this_char_len; 11767 11768 d_is_utf8 = TRUE; 11769 } 11770 } /* End of loop through buffer */ 11771 11772 /* Here, found end of the string, OR ran out of buffer: terminate the 11773 * copied string and update the sv's end-of-string */ 11774 *to = '\0'; 11775 SvCUR_set(sv, to - SvPVX_const(sv)); 11776 11777 /* 11778 * this next chunk reads more into the buffer if we're not done yet 11779 */ 11780 11781 if (s < PL_bufend) 11782 break; /* handle case where we are done yet :-) */ 11783 11784#ifndef PERL_STRICT_CR 11785 if (to - SvPVX_const(sv) >= 2) { 11786 if ( (to[-2] == '\r' && to[-1] == '\n') 11787 || (to[-2] == '\n' && to[-1] == '\r')) 11788 { 11789 to[-2] = '\n'; 11790 to--; 11791 SvCUR_set(sv, to - SvPVX_const(sv)); 11792 } 11793 else if (to[-1] == '\r') 11794 to[-1] = '\n'; 11795 } 11796 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') 11797 to[-1] = '\n'; 11798#endif 11799 11800 /* if we're out of file, or a read fails, bail and reset the current 11801 line marker so we can report where the unterminated string began 11802 */ 11803 COPLINE_INC_WITH_HERELINES; 11804 PL_bufptr = PL_bufend; 11805 if (!lex_next_chunk(0)) { 11806 ASSUME(sv); 11807 SvREFCNT_dec(sv); 11808 CopLINE_set(PL_curcop, (line_t)PL_multi_start); 11809 return NULL; 11810 } 11811 s = start = PL_bufptr; 11812 } /* End of infinite loop */ 11813 11814 /* at this point, we have successfully read the delimited string */ 11815 11816 if (keep_delims) 11817 sv_catpvn(sv, s, delim_byte_len); 11818 s += delim_byte_len; 11819 11820 if (d_is_utf8) 11821 SvUTF8_on(sv); 11822 11823 PL_multi_end = CopLINE(PL_curcop); 11824 CopLINE_set(PL_curcop, PL_multi_start); 11825 PL_parser->herelines = herelines; 11826 11827 /* if we allocated too much space, give some back */ 11828 if (SvCUR(sv) + 5 < SvLEN(sv)) { 11829 SvLEN_set(sv, SvCUR(sv) + 1); 11830 SvPV_shrink_to_cur(sv); 11831 } 11832 11833 /* decide whether this is the first or second quoted string we've read 11834 for this op 11835 */ 11836 11837 if (PL_lex_stuff) 11838 PL_parser->lex_sub_repl = sv; 11839 else 11840 PL_lex_stuff = sv; 11841 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s; 11842 return s; 11843} 11844 11845/* 11846 scan_num 11847 takes: pointer to position in buffer 11848 returns: pointer to new position in buffer 11849 side-effects: builds ops for the constant in pl_yylval.op 11850 11851 Read a number in any of the formats that Perl accepts: 11852 11853 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. 11854 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 11855 0b[01](_?[01])* binary integers 11856 0o?[0-7](_?[0-7])* octal integers 11857 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 11858 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats 11859 11860 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the 11861 thing it reads. 11862 11863 If it reads a number without a decimal point or an exponent, it will 11864 try converting the number to an integer and see if it can do so 11865 without loss of precision. 11866*/ 11867 11868char * 11869Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) 11870{ 11871 const char *s = start; /* current position in buffer */ 11872 char *d; /* destination in temp buffer */ 11873 char *e; /* end of temp buffer */ 11874 NV nv; /* number read, as a double */ 11875 SV *sv = NULL; /* place to put the converted number */ 11876 bool floatit; /* boolean: int or float? */ 11877 const char *lastub = NULL; /* position of last underbar */ 11878 static const char* const number_too_long = "Number too long"; 11879 bool warned_about_underscore = 0; 11880 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */ 11881#define WARN_ABOUT_UNDERSCORE() \ 11882 do { \ 11883 if (!warned_about_underscore) { \ 11884 warned_about_underscore = 1; \ 11885 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \ 11886 "Misplaced _ in number"); \ 11887 } \ 11888 } while(0) 11889 /* Hexadecimal floating point. 11890 * 11891 * In many places (where we have quads and NV is IEEE 754 double) 11892 * we can fit the mantissa bits of a NV into an unsigned quad. 11893 * (Note that UVs might not be quads even when we have quads.) 11894 * This will not work everywhere, though (either no quads, or 11895 * using long doubles), in which case we have to resort to NV, 11896 * which will probably mean horrible loss of precision due to 11897 * multiple fp operations. */ 11898 bool hexfp = FALSE; 11899 int total_bits = 0; 11900 int significant_bits = 0; 11901#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) 11902# define HEXFP_UQUAD 11903 Uquad_t hexfp_uquad = 0; 11904 int hexfp_frac_bits = 0; 11905#else 11906# define HEXFP_NV 11907 NV hexfp_nv = 0.0; 11908#endif 11909 NV hexfp_mult = 1.0; 11910 UV high_non_zero = 0; /* highest digit */ 11911 int non_zero_integer_digits = 0; 11912 bool new_octal = FALSE; /* octal with "0o" prefix */ 11913 11914 PERL_ARGS_ASSERT_SCAN_NUM; 11915 11916 /* We use the first character to decide what type of number this is */ 11917 11918 switch (*s) { 11919 default: 11920 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); 11921 11922 /* if it starts with a 0, it could be an octal number, a decimal in 11923 0.13 disguise, or a hexadecimal number, or a binary number. */ 11924 case '0': 11925 { 11926 /* variables: 11927 u holds the "number so far" 11928 overflowed was the number more than we can hold? 11929 11930 Shift is used when we add a digit. It also serves as an "are 11931 we in octal/hex/binary?" indicator to disallow hex characters 11932 when in octal mode. 11933 */ 11934 NV n = 0.0; 11935 UV u = 0; 11936 bool overflowed = FALSE; 11937 bool just_zero = TRUE; /* just plain 0 or binary number? */ 11938 bool has_digs = FALSE; 11939 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; 11940 static const char* const bases[5] = 11941 { "", "binary", "", "octal", "hexadecimal" }; 11942 static const char* const Bases[5] = 11943 { "", "Binary", "", "Octal", "Hexadecimal" }; 11944 static const char* const maxima[5] = 11945 { "", 11946 "0b11111111111111111111111111111111", 11947 "", 11948 "037777777777", 11949 "0xffffffff" }; 11950 11951 /* check for hex */ 11952 if (isALPHA_FOLD_EQ(s[1], 'x')) { 11953 shift = 4; 11954 s += 2; 11955 just_zero = FALSE; 11956 } else if (isALPHA_FOLD_EQ(s[1], 'b')) { 11957 shift = 1; 11958 s += 2; 11959 just_zero = FALSE; 11960 } 11961 /* check for a decimal in disguise */ 11962 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) 11963 goto decimal; 11964 /* so it must be octal */ 11965 else { 11966 shift = 3; 11967 s++; 11968 if (isALPHA_FOLD_EQ(*s, 'o')) { 11969 s++; 11970 just_zero = FALSE; 11971 new_octal = TRUE; 11972 } 11973 } 11974 11975 if (*s == '_') { 11976 WARN_ABOUT_UNDERSCORE(); 11977 lastub = s++; 11978 } 11979 11980 /* read the rest of the number */ 11981 for (;;) { 11982 /* x is used in the overflow test, 11983 b is the digit we're adding on. */ 11984 UV x, b; 11985 11986 switch (*s) { 11987 11988 /* if we don't mention it, we're done */ 11989 default: 11990 goto out; 11991 11992 /* _ are ignored -- but warned about if consecutive */ 11993 case '_': 11994 if (lastub && s == lastub + 1) 11995 WARN_ABOUT_UNDERSCORE(); 11996 lastub = s++; 11997 break; 11998 11999 /* 8 and 9 are not octal */ 12000 case '8': case '9': 12001 if (shift == 3) 12002 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); 12003 /* FALLTHROUGH */ 12004 12005 /* octal digits */ 12006 case '2': case '3': case '4': 12007 case '5': case '6': case '7': 12008 if (shift == 1) 12009 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); 12010 /* FALLTHROUGH */ 12011 12012 case '0': case '1': 12013 b = *s++ & 15; /* ASCII digit -> value of digit */ 12014 goto digit; 12015 12016 /* hex digits */ 12017 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 12018 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 12019 /* make sure they said 0x */ 12020 if (shift != 4) 12021 goto out; 12022 b = (*s++ & 7) + 9; 12023 12024 /* Prepare to put the digit we have onto the end 12025 of the number so far. We check for overflows. 12026 */ 12027 12028 digit: 12029 just_zero = FALSE; 12030 has_digs = TRUE; 12031 if (!overflowed) { 12032 assert(shift >= 0); 12033 x = u << shift; /* make room for the digit */ 12034 12035 total_bits += shift; 12036 12037 if ((x >> shift) != u 12038 && !(PL_hints & HINT_NEW_BINARY)) { 12039 overflowed = TRUE; 12040 n = (NV) u; 12041 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 12042 "Integer overflow in %s number", 12043 bases[shift]); 12044 } else 12045 u = x | b; /* add the digit to the end */ 12046 } 12047 if (overflowed) { 12048 n *= nvshift[shift]; 12049 /* If an NV has not enough bits in its 12050 * mantissa to represent an UV this summing of 12051 * small low-order numbers is a waste of time 12052 * (because the NV cannot preserve the 12053 * low-order bits anyway): we could just 12054 * remember when did we overflow and in the 12055 * end just multiply n by the right 12056 * amount. */ 12057 n += (NV) b; 12058 } 12059 12060 if (high_non_zero == 0 && b > 0) 12061 high_non_zero = b; 12062 12063 if (high_non_zero) 12064 non_zero_integer_digits++; 12065 12066 /* this could be hexfp, but peek ahead 12067 * to avoid matching ".." */ 12068 if (UNLIKELY(HEXFP_PEEK(s))) { 12069 goto out; 12070 } 12071 12072 break; 12073 } 12074 } 12075 12076 /* if we get here, we had success: make a scalar value from 12077 the number. 12078 */ 12079 out: 12080 12081 /* final misplaced underbar check */ 12082 if (s[-1] == '_') 12083 WARN_ABOUT_UNDERSCORE(); 12084 12085 if (UNLIKELY(HEXFP_PEEK(s))) { 12086 /* Do sloppy (on the underbars) but quick detection 12087 * (and value construction) for hexfp, the decimal 12088 * detection will shortly be more thorough with the 12089 * underbar checks. */ 12090 const char* h = s; 12091 significant_bits = non_zero_integer_digits * shift; 12092#ifdef HEXFP_UQUAD 12093 hexfp_uquad = u; 12094#else /* HEXFP_NV */ 12095 hexfp_nv = u; 12096#endif 12097 /* Ignore the leading zero bits of 12098 * the high (first) non-zero digit. */ 12099 if (high_non_zero) { 12100 if (high_non_zero < 0x8) 12101 significant_bits--; 12102 if (high_non_zero < 0x4) 12103 significant_bits--; 12104 if (high_non_zero < 0x2) 12105 significant_bits--; 12106 } 12107 12108 if (*h == '.') { 12109#ifdef HEXFP_NV 12110 NV nv_mult = 1.0; 12111#endif 12112 bool accumulate = TRUE; 12113 U8 b = 0; /* silence compiler warning */ 12114 int lim = 1 << shift; 12115 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) || 12116 *h == '_'); h++) { 12117 if (isXDIGIT(*h)) { 12118 significant_bits += shift; 12119#ifdef HEXFP_UQUAD 12120 if (accumulate) { 12121 if (significant_bits < NV_MANT_DIG) { 12122 /* We are in the long "run" of xdigits, 12123 * accumulate the full four bits. */ 12124 assert(shift >= 0); 12125 hexfp_uquad <<= shift; 12126 hexfp_uquad |= b; 12127 hexfp_frac_bits += shift; 12128 } else if (significant_bits - shift < NV_MANT_DIG) { 12129 /* We are at a hexdigit either at, 12130 * or straddling, the edge of mantissa. 12131 * We will try grabbing as many as 12132 * possible bits. */ 12133 int tail = 12134 significant_bits - NV_MANT_DIG; 12135 if (tail <= 0) 12136 tail += shift; 12137 assert(tail >= 0); 12138 hexfp_uquad <<= tail; 12139 assert((shift - tail) >= 0); 12140 hexfp_uquad |= b >> (shift - tail); 12141 hexfp_frac_bits += tail; 12142 12143 /* Ignore the trailing zero bits 12144 * of the last non-zero xdigit. 12145 * 12146 * The assumption here is that if 12147 * one has input of e.g. the xdigit 12148 * eight (0x8), there is only one 12149 * bit being input, not the full 12150 * four bits. Conversely, if one 12151 * specifies a zero xdigit, the 12152 * assumption is that one really 12153 * wants all those bits to be zero. */ 12154 if (b) { 12155 if ((b & 0x1) == 0x0) { 12156 significant_bits--; 12157 if ((b & 0x2) == 0x0) { 12158 significant_bits--; 12159 if ((b & 0x4) == 0x0) { 12160 significant_bits--; 12161 } 12162 } 12163 } 12164 } 12165 12166 accumulate = FALSE; 12167 } 12168 } else { 12169 /* Keep skipping the xdigits, and 12170 * accumulating the significant bits, 12171 * but do not shift the uquad 12172 * (which would catastrophically drop 12173 * high-order bits) or accumulate the 12174 * xdigits anymore. */ 12175 } 12176#else /* HEXFP_NV */ 12177 if (accumulate) { 12178 nv_mult /= nvshift[shift]; 12179 if (nv_mult > 0.0) 12180 hexfp_nv += b * nv_mult; 12181 else 12182 accumulate = FALSE; 12183 } 12184#endif 12185 } 12186 if (significant_bits >= NV_MANT_DIG) 12187 accumulate = FALSE; 12188 } 12189 } 12190 12191 if ((total_bits > 0 || significant_bits > 0) && 12192 isALPHA_FOLD_EQ(*h, 'p')) { 12193 bool negexp = FALSE; 12194 h++; 12195 if (*h == '+') 12196 h++; 12197 else if (*h == '-') { 12198 negexp = TRUE; 12199 h++; 12200 } 12201 if (isDIGIT(*h)) { 12202 I32 hexfp_exp = 0; 12203 while (isDIGIT(*h) || *h == '_') { 12204 if (isDIGIT(*h)) { 12205 hexfp_exp *= 10; 12206 hexfp_exp += *h - '0'; 12207#ifdef NV_MIN_EXP 12208 if (negexp 12209 && -hexfp_exp < NV_MIN_EXP - 1) { 12210 /* NOTE: this means that the exponent 12211 * underflow warning happens for 12212 * the IEEE 754 subnormals (denormals), 12213 * because DBL_MIN_EXP etc are the lowest 12214 * possible binary (or, rather, DBL_RADIX-base) 12215 * exponent for normals, not subnormals. 12216 * 12217 * This may or may not be a good thing. */ 12218 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12219 "Hexadecimal float: exponent underflow"); 12220 break; 12221 } 12222#endif 12223#ifdef NV_MAX_EXP 12224 if (!negexp 12225 && hexfp_exp > NV_MAX_EXP - 1) { 12226 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12227 "Hexadecimal float: exponent overflow"); 12228 break; 12229 } 12230#endif 12231 } 12232 h++; 12233 } 12234 if (negexp) 12235 hexfp_exp = -hexfp_exp; 12236#ifdef HEXFP_UQUAD 12237 hexfp_exp -= hexfp_frac_bits; 12238#endif 12239 hexfp_mult = Perl_pow(2.0, hexfp_exp); 12240 hexfp = TRUE; 12241 goto decimal; 12242 } 12243 } 12244 } 12245 12246 if (!just_zero && !has_digs) { 12247 /* 0x, 0o or 0b with no digits, treat it as an error. 12248 Originally this backed up the parse before the b or 12249 x, but that has the potential for silent changes in 12250 behaviour, like for: "0x.3" and "0x+$foo". 12251 */ 12252 const char *d = s; 12253 char *oldbp = PL_bufptr; 12254 if (*d) ++d; /* so the user sees the bad non-digit */ 12255 PL_bufptr = (char *)d; /* so yyerror reports the context */ 12256 yyerror(Perl_form(aTHX_ "No digits found for %s literal", 12257 bases[shift])); 12258 PL_bufptr = oldbp; 12259 } 12260 12261 if (overflowed) { 12262 if (n > 4294967295.0) 12263 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 12264 "%s number > %s non-portable", 12265 Bases[shift], 12266 new_octal ? "0o37777777777" : maxima[shift]); 12267 sv = newSVnv(n); 12268 } 12269 else { 12270#if UVSIZE > 4 12271 if (u > 0xffffffff) 12272 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 12273 "%s number > %s non-portable", 12274 Bases[shift], 12275 new_octal ? "0o37777777777" : maxima[shift]); 12276#endif 12277 sv = newSVuv(u); 12278 } 12279 if (just_zero && (PL_hints & HINT_NEW_INTEGER)) 12280 sv = new_constant(start, s - start, "integer", 12281 sv, NULL, NULL, 0, NULL); 12282 else if (PL_hints & HINT_NEW_BINARY) 12283 sv = new_constant(start, s - start, "binary", 12284 sv, NULL, NULL, 0, NULL); 12285 } 12286 break; 12287 12288 /* 12289 handle decimal numbers. 12290 we're also sent here when we read a 0 as the first digit 12291 */ 12292 case '1': case '2': case '3': case '4': case '5': 12293 case '6': case '7': case '8': case '9': case '.': 12294 decimal: 12295 d = PL_tokenbuf; 12296 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ 12297 floatit = FALSE; 12298 if (hexfp) { 12299 floatit = TRUE; 12300 *d++ = '0'; 12301 switch (shift) { 12302 case 4: 12303 *d++ = 'x'; 12304 s = start + 2; 12305 break; 12306 case 3: 12307 if (new_octal) { 12308 *d++ = 'o'; 12309 s = start + 2; 12310 break; 12311 } 12312 s = start + 1; 12313 break; 12314 case 1: 12315 *d++ = 'b'; 12316 s = start + 2; 12317 break; 12318 default: 12319 NOT_REACHED; /* NOTREACHED */ 12320 } 12321 } 12322 12323 /* read next group of digits and _ and copy into d */ 12324 while (isDIGIT(*s) 12325 || *s == '_' 12326 || UNLIKELY(hexfp && isXDIGIT(*s))) 12327 { 12328 /* skip underscores, checking for misplaced ones 12329 if -w is on 12330 */ 12331 if (*s == '_') { 12332 if (lastub && s == lastub + 1) 12333 WARN_ABOUT_UNDERSCORE(); 12334 lastub = s++; 12335 } 12336 else { 12337 /* check for end of fixed-length buffer */ 12338 if (d >= e) 12339 Perl_croak(aTHX_ "%s", number_too_long); 12340 /* if we're ok, copy the character */ 12341 *d++ = *s++; 12342 } 12343 } 12344 12345 /* final misplaced underbar check */ 12346 if (lastub && s == lastub + 1) 12347 WARN_ABOUT_UNDERSCORE(); 12348 12349 /* read a decimal portion if there is one. avoid 12350 3..5 being interpreted as the number 3. followed 12351 by .5 12352 */ 12353 if (*s == '.' && s[1] != '.') { 12354 floatit = TRUE; 12355 *d++ = *s++; 12356 12357 if (*s == '_') { 12358 WARN_ABOUT_UNDERSCORE(); 12359 lastub = s; 12360 } 12361 12362 /* copy, ignoring underbars, until we run out of digits. 12363 */ 12364 for (; isDIGIT(*s) 12365 || *s == '_' 12366 || UNLIKELY(hexfp && isXDIGIT(*s)); 12367 s++) 12368 { 12369 /* fixed length buffer check */ 12370 if (d >= e) 12371 Perl_croak(aTHX_ "%s", number_too_long); 12372 if (*s == '_') { 12373 if (lastub && s == lastub + 1) 12374 WARN_ABOUT_UNDERSCORE(); 12375 lastub = s; 12376 } 12377 else 12378 *d++ = *s; 12379 } 12380 /* fractional part ending in underbar? */ 12381 if (s[-1] == '_') 12382 WARN_ABOUT_UNDERSCORE(); 12383 if (*s == '.' && isDIGIT(s[1])) { 12384 /* oops, it's really a v-string, but without the "v" */ 12385 s = start; 12386 goto vstring; 12387 } 12388 } 12389 12390 /* read exponent part, if present */ 12391 if ((isALPHA_FOLD_EQ(*s, 'e') 12392 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) 12393 && memCHRs("+-0123456789_", s[1])) 12394 { 12395 int exp_digits = 0; 12396 const char *save_s = s; 12397 char * save_d = d; 12398 12399 /* regardless of whether user said 3E5 or 3e5, use lower 'e', 12400 ditto for p (hexfloats) */ 12401 if ((isALPHA_FOLD_EQ(*s, 'e'))) { 12402 /* At least some Mach atof()s don't grok 'E' */ 12403 *d++ = 'e'; 12404 } 12405 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { 12406 *d++ = 'p'; 12407 } 12408 12409 s++; 12410 12411 12412 /* stray preinitial _ */ 12413 if (*s == '_') { 12414 WARN_ABOUT_UNDERSCORE(); 12415 lastub = s++; 12416 } 12417 12418 /* allow positive or negative exponent */ 12419 if (*s == '+' || *s == '-') 12420 *d++ = *s++; 12421 12422 /* stray initial _ */ 12423 if (*s == '_') { 12424 WARN_ABOUT_UNDERSCORE(); 12425 lastub = s++; 12426 } 12427 12428 /* read digits of exponent */ 12429 while (isDIGIT(*s) || *s == '_') { 12430 if (isDIGIT(*s)) { 12431 ++exp_digits; 12432 if (d >= e) 12433 Perl_croak(aTHX_ "%s", number_too_long); 12434 *d++ = *s++; 12435 } 12436 else { 12437 if (((lastub && s == lastub + 1) 12438 || (!isDIGIT(s[1]) && s[1] != '_'))) 12439 WARN_ABOUT_UNDERSCORE(); 12440 lastub = s++; 12441 } 12442 } 12443 12444 if (!exp_digits) { 12445 /* no exponent digits, the [eEpP] could be for something else, 12446 * though in practice we don't get here for p since that's preparsed 12447 * earlier, and results in only the 0xX being consumed, so behave similarly 12448 * for decimal floats and consume only the D.DD, leaving the [eE] to the 12449 * next token. 12450 */ 12451 s = save_s; 12452 d = save_d; 12453 } 12454 else { 12455 floatit = TRUE; 12456 } 12457 } 12458 12459 12460 /* 12461 We try to do an integer conversion first if no characters 12462 indicating "float" have been found. 12463 */ 12464 12465 if (!floatit) { 12466 UV uv; 12467 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); 12468 12469 if (flags == IS_NUMBER_IN_UV) { 12470 if (uv <= IV_MAX) 12471 sv = newSViv(uv); /* Prefer IVs over UVs. */ 12472 else 12473 sv = newSVuv(uv); 12474 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { 12475 if (uv <= (UV) IV_MIN) 12476 sv = newSViv(-(IV)uv); 12477 else 12478 floatit = TRUE; 12479 } else 12480 floatit = TRUE; 12481 } 12482 if (floatit) { 12483 /* terminate the string */ 12484 *d = '\0'; 12485 if (UNLIKELY(hexfp)) { 12486# ifdef NV_MANT_DIG 12487 if (significant_bits > NV_MANT_DIG) 12488 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), 12489 "Hexadecimal float: mantissa overflow"); 12490# endif 12491#ifdef HEXFP_UQUAD 12492 nv = hexfp_uquad * hexfp_mult; 12493#else /* HEXFP_NV */ 12494 nv = hexfp_nv * hexfp_mult; 12495#endif 12496 } else { 12497 nv = Atof(PL_tokenbuf); 12498 } 12499 sv = newSVnv(nv); 12500 } 12501 12502 if ( floatit 12503 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { 12504 const char *const key = floatit ? "float" : "integer"; 12505 const STRLEN keylen = floatit ? 5 : 7; 12506 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, 12507 key, keylen, sv, NULL, NULL, 0, NULL); 12508 } 12509 break; 12510 12511 /* if it starts with a v, it could be a v-string */ 12512 case 'v': 12513 vstring: 12514 sv = newSV(5); /* preallocate storage space */ 12515 ENTER_with_name("scan_vstring"); 12516 SAVEFREESV(sv); 12517 s = scan_vstring(s, PL_bufend, sv); 12518 SvREFCNT_inc_simple_void_NN(sv); 12519 LEAVE_with_name("scan_vstring"); 12520 break; 12521 } 12522 12523 /* make the op for the constant and return */ 12524 12525 if (sv) 12526 lvalp->opval = newSVOP(OP_CONST, 0, sv); 12527 else 12528 lvalp->opval = NULL; 12529 12530 return (char *)s; 12531} 12532 12533STATIC char * 12534S_scan_formline(pTHX_ char *s) 12535{ 12536 SV * const stuff = newSVpvs(""); 12537 bool needargs = FALSE; 12538 bool eofmt = FALSE; 12539 12540 PERL_ARGS_ASSERT_SCAN_FORMLINE; 12541 12542 while (!needargs) { 12543 char *eol; 12544 if (*s == '.') { 12545 char *t = s+1; 12546#ifdef PERL_STRICT_CR 12547 while (SPACE_OR_TAB(*t)) 12548 t++; 12549#else 12550 while (SPACE_OR_TAB(*t) || *t == '\r') 12551 t++; 12552#endif 12553 if (*t == '\n' || t == PL_bufend) { 12554 eofmt = TRUE; 12555 break; 12556 } 12557 } 12558 eol = (char *) memchr(s,'\n',PL_bufend-s); 12559 if (! eol) { 12560 eol = PL_bufend; 12561 } 12562 else { 12563 eol++; 12564 } 12565 if (*s != '#') { 12566 char *t; 12567 for (t = s; t < eol; t++) { 12568 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { 12569 needargs = FALSE; 12570 goto enough; /* ~~ must be first line in formline */ 12571 } 12572 if (*t == '@' || *t == '^') 12573 needargs = TRUE; 12574 } 12575 if (eol > s) { 12576 sv_catpvn(stuff, s, eol-s); 12577#ifndef PERL_STRICT_CR 12578 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { 12579 char *end = SvPVX(stuff) + SvCUR(stuff); 12580 end[-2] = '\n'; 12581 end[-1] = '\0'; 12582 SvCUR_set(stuff, SvCUR(stuff) - 1); 12583 } 12584#endif 12585 } 12586 else 12587 break; 12588 } 12589 s = (char*)eol; 12590 if ((PL_rsfp || PL_parser->filtered) 12591 && PL_parser->form_lex_state == LEX_NORMAL) { 12592 bool got_some; 12593 PL_bufptr = PL_bufend; 12594 COPLINE_INC_WITH_HERELINES; 12595 got_some = lex_next_chunk(0); 12596 CopLINE_dec(PL_curcop); 12597 s = PL_bufptr; 12598 if (!got_some) 12599 break; 12600 } 12601 incline(s, PL_bufend); 12602 } 12603 enough: 12604 if (!SvCUR(stuff) || needargs) 12605 PL_lex_state = PL_parser->form_lex_state; 12606 if (SvCUR(stuff)) { 12607 PL_expect = XSTATE; 12608 if (needargs) { 12609 const char *s2 = s; 12610 while (isSPACE(*s2) && *s2 != '\n') 12611 s2++; 12612 if (*s2 == '{') { 12613 PL_expect = XTERMBLOCK; 12614 NEXTVAL_NEXTTOKE.ival = 0; 12615 force_next(KW_DO); 12616 } 12617 NEXTVAL_NEXTTOKE.ival = 0; 12618 force_next(FORMLBRACK); 12619 } 12620 if (!IN_BYTES) { 12621 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) 12622 SvUTF8_on(stuff); 12623 } 12624 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff); 12625 force_next(THING); 12626 } 12627 else { 12628 SvREFCNT_dec(stuff); 12629 if (eofmt) 12630 PL_lex_formbrack = 0; 12631 } 12632 return s; 12633} 12634 12635/* 12636=for apidoc start_subparse 12637 12638Set things up for parsing a subroutine. 12639 12640If C<is_format> is non-zero, the input is to be considered a format sub 12641(a specialised sub used to implement perl's C<format> feature); else a 12642normal C<sub>. 12643 12644C<flags> are added to the flags for C<PL_compcv>. C<flags> may include the 12645C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method. 12646 12647This returns the value of C<PL_savestack_ix> that was in effect upon entry to 12648the function; 12649 12650=cut 12651*/ 12652 12653I32 12654Perl_start_subparse(pTHX_ I32 is_format, U32 flags) 12655{ 12656 const I32 oldsavestack_ix = PL_savestack_ix; 12657 CV* const outsidecv = PL_compcv; 12658 bool is_method = flags & CVf_IsMETHOD; 12659 12660 if (is_method) 12661 croak_kw_unless_class("method"); 12662 12663 SAVEI32(PL_subline); 12664 save_item(PL_subname); 12665 SAVESPTR(PL_compcv); 12666 12667 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); 12668 CvFLAGS(PL_compcv) |= flags; 12669 12670 PL_subline = CopLINE(PL_curcop); 12671 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); 12672 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); 12673 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; 12674 if (outsidecv && CvPADLIST(outsidecv)) 12675 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; 12676 if (is_method) 12677 class_prepare_method_parse(PL_compcv); 12678 12679 return oldsavestack_ix; 12680} 12681 12682/* If o represents a builtin attribute, apply it to cv and returns true. 12683 * Otherwise does nothing and returns false 12684 */ 12685 12686STATIC bool 12687S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o) 12688{ 12689 assert(o->op_type == OP_CONST); 12690 SV *sv = cSVOPo_sv; 12691 STRLEN len = SvCUR(sv); 12692 12693 /* NOTE: any CV attrs applied here need to be part of 12694 the CVf_BUILTIN_ATTRS define in cv.h! */ 12695 12696 if(memEQs(SvPVX(sv), len, "lvalue")) 12697 CvLVALUE_on(cv); 12698 else if(memEQs(SvPVX(sv), len, "method")) 12699 CvNOWARN_AMBIGUOUS_on(cv); 12700 else if(memEQs(SvPVX(sv), len, "const")) { 12701 Perl_ck_warner_d(aTHX_ 12702 packWARN(WARN_EXPERIMENTAL__CONST_ATTR), 12703 ":const is experimental" 12704 ); 12705 CvANONCONST_on(cv); 12706 if (!CvANON(cv)) 12707 yyerror(":const is not permitted on named subroutines"); 12708 } 12709 else 12710 return false; 12711 12712 return true; 12713} 12714 12715/* 12716=for apidoc apply_builtin_cv_attributes 12717 12718Given an OP_LIST containing attribute definitions, filter it for known builtin 12719attributes to apply to the cv, returning a possibly-smaller list containing 12720just the remaining ones. 12721 12722=cut 12723*/ 12724 12725OP * 12726Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist) 12727{ 12728 PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES; 12729 12730 if(!attrlist) 12731 return attrlist; 12732 12733 if(attrlist->op_type != OP_LIST) { 12734 /* Not in fact a list but just a single attribute */ 12735 if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) { 12736 op_free(attrlist); 12737 return NULL; 12738 } 12739 12740 return attrlist; 12741 } 12742 12743 OP *prev = cLISTOPx(attrlist)->op_first; 12744 assert(prev->op_type == OP_PUSHMARK); 12745 OP *o = OpSIBLING(prev); 12746 12747 OP *next; 12748 for(; o; o = next) { 12749 next = OpSIBLING(o); 12750 12751 if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) { 12752 op_sibling_splice(attrlist, prev, 1, NULL); 12753 op_free(o); 12754 } 12755 else { 12756 prev = o; 12757 } 12758 } 12759 12760 if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first)) 12761 return attrlist; 12762 12763 /* The list is now entirely empty, we might as well discard it */ 12764 op_free(attrlist); 12765 return NULL; 12766} 12767 12768 12769/* Do extra initialisation of a CV (typically one just created by 12770 * start_subparse()) if that CV is for a named sub 12771 */ 12772 12773void 12774Perl_init_named_cv(pTHX_ CV *cv, OP *nameop) 12775{ 12776 PERL_ARGS_ASSERT_INIT_NAMED_CV; 12777 12778 if (nameop->op_type == OP_CONST) { 12779 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv); 12780 if ( strEQ(name, "BEGIN") 12781 || strEQ(name, "END") 12782 || strEQ(name, "INIT") 12783 || strEQ(name, "CHECK") 12784 || strEQ(name, "UNITCHECK") 12785 ) 12786 CvSPECIAL_on(cv); 12787 } 12788 else 12789 /* State subs inside anonymous subs need to be 12790 clonable themselves. */ 12791 if ( CvANON(CvOUTSIDE(cv)) 12792 || CvCLONE(CvOUTSIDE(cv)) 12793 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST( 12794 CvOUTSIDE(cv) 12795 ))[nameop->op_targ]) 12796 ) 12797 CvCLONE_on(cv); 12798} 12799 12800 12801static int 12802S_yywarn(pTHX_ const char *const s, U32 flags) 12803{ 12804 PERL_ARGS_ASSERT_YYWARN; 12805 12806 PL_in_eval |= EVAL_WARNONLY; 12807 yyerror_pv(s, flags); 12808 return 0; 12809} 12810 12811void 12812Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name) 12813{ 12814 PERL_ARGS_ASSERT_ABORT_EXECUTION; 12815 12816 if (msg_sv) { 12817 if (PL_minus_c) 12818 Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name); 12819 else { 12820 Perl_croak(aTHX_ 12821 "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name); 12822 } 12823 } else { 12824 if (PL_minus_c) 12825 Perl_croak(aTHX_ "%s had compilation errors.\n", name); 12826 else { 12827 Perl_croak(aTHX_ 12828 "Execution of %s aborted due to compilation errors.\n", name); 12829 } 12830 } 12831 12832 NOT_REACHED; /* NOTREACHED */ 12833} 12834 12835void 12836Perl_yyquit(pTHX) 12837{ 12838 /* Called, after at least one error has been found, to abort the parse now, 12839 * instead of trying to forge ahead */ 12840 12841 yyerror_pvn(NULL, 0, 0); 12842} 12843 12844int 12845Perl_yyerror(pTHX_ const char *const s) 12846{ 12847 PERL_ARGS_ASSERT_YYERROR; 12848 int r = yyerror_pvn(s, strlen(s), 0); 12849 return r; 12850} 12851 12852int 12853Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) 12854{ 12855 PERL_ARGS_ASSERT_YYERROR_PV; 12856 int r = yyerror_pvn(s, strlen(s), flags); 12857 return r; 12858} 12859 12860int 12861Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) 12862{ 12863 const char *context = NULL; 12864 int contlen = -1; 12865 SV *msg; 12866 SV * const where_sv = newSVpvs_flags("", SVs_TEMP); 12867 int yychar = PL_parser->yychar; 12868 12869 /* Output error message 's' with length 'len'. 'flags' are SV flags that 12870 * apply. If the number of errors found is large enough, it abandons 12871 * parsing. If 's' is NULL, there is no message, and it abandons 12872 * processing unconditionally */ 12873 12874 if (s != NULL) { 12875 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp)) 12876 sv_catpvs(where_sv, "at EOF"); 12877 else if ( PL_oldoldbufptr 12878 && PL_bufptr > PL_oldoldbufptr 12879 && PL_bufptr - PL_oldoldbufptr < 200 12880 && PL_oldoldbufptr != PL_oldbufptr 12881 && PL_oldbufptr != PL_bufptr) 12882 { 12883 while (isSPACE(*PL_oldoldbufptr)) 12884 PL_oldoldbufptr++; 12885 context = PL_oldoldbufptr; 12886 contlen = PL_bufptr - PL_oldoldbufptr; 12887 } 12888 else if ( PL_oldbufptr 12889 && PL_bufptr > PL_oldbufptr 12890 && PL_bufptr - PL_oldbufptr < 200 12891 && PL_oldbufptr != PL_bufptr) 12892 { 12893 while (isSPACE(*PL_oldbufptr)) 12894 PL_oldbufptr++; 12895 context = PL_oldbufptr; 12896 contlen = PL_bufptr - PL_oldbufptr; 12897 } 12898 else if (yychar > 255) 12899 sv_catpvs(where_sv, "next token ???"); 12900 else if (yychar == YYEMPTY) { 12901 if (PL_lex_state == LEX_NORMAL) 12902 sv_catpvs(where_sv, "at end of line"); 12903 else if (PL_lex_inpat) 12904 sv_catpvs(where_sv, "within pattern"); 12905 else 12906 sv_catpvs(where_sv, "within string"); 12907 } 12908 else { 12909 sv_catpvs(where_sv, "next char "); 12910 if (yychar < 32) 12911 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); 12912 else if (isPRINT_LC(yychar)) { 12913 const char string = yychar; 12914 sv_catpvn(where_sv, &string, 1); 12915 } 12916 else 12917 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); 12918 } 12919 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); 12920 Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ", 12921 OutCopFILE(PL_curcop), 12922 (PL_parser->preambling == NOLINE 12923 ? CopLINE(PL_curcop) 12924 : PL_parser->preambling)); 12925 if (context) 12926 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", 12927 UTF8fARG(UTF, contlen, context)); 12928 else 12929 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); 12930 if ( PL_multi_start < PL_multi_end 12931 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) 12932 { 12933 Perl_sv_catpvf(aTHX_ msg, 12934 " (Might be a runaway multi-line %c%c string starting on" 12935 " line %" LINE_Tf ")\n", 12936 (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start); 12937 PL_multi_end = 0; 12938 } 12939 if (PL_in_eval & EVAL_WARNONLY) { 12940 PL_in_eval &= ~EVAL_WARNONLY; 12941 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); 12942 } 12943 else { 12944 qerror(msg); 12945 } 12946 } 12947 /* if there was no message then this is a yyquit(), which is actualy handled 12948 * by qerror() with a NULL argument */ 12949 if (s == NULL) 12950 qerror(NULL); 12951 12952 PL_in_my = 0; 12953 PL_in_my_stash = NULL; 12954 return 0; 12955} 12956 12957STATIC char* 12958S_swallow_bom(pTHX_ U8 *s) 12959{ 12960 const STRLEN slen = SvCUR(PL_linestr); 12961 12962 PERL_ARGS_ASSERT_SWALLOW_BOM; 12963 12964 switch (s[0]) { 12965 case 0xFF: 12966 if (s[1] == 0xFE) { 12967 /* UTF-16 little-endian? (or UTF-32LE?) */ 12968 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ 12969 /* diag_listed_as: Unsupported script encoding %s */ 12970 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); 12971#ifndef PERL_NO_UTF16_FILTER 12972#ifdef DEBUGGING 12973 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); 12974#endif 12975 s += 2; 12976 if (PL_bufend > (char*)s) { 12977 s = add_utf16_textfilter(s, TRUE); 12978 } 12979#else 12980 /* diag_listed_as: Unsupported script encoding %s */ 12981 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 12982#endif 12983 } 12984 break; 12985 case 0xFE: 12986 if (s[1] == 0xFF) { /* UTF-16 big-endian? */ 12987#ifndef PERL_NO_UTF16_FILTER 12988#ifdef DEBUGGING 12989 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); 12990#endif 12991 s += 2; 12992 if (PL_bufend > (char *)s) { 12993 s = add_utf16_textfilter(s, FALSE); 12994 } 12995#else 12996 /* diag_listed_as: Unsupported script encoding %s */ 12997 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 12998#endif 12999 } 13000 break; 13001 case BOM_UTF8_FIRST_BYTE: { 13002 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { 13003#ifdef DEBUGGING 13004 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); 13005#endif 13006 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ 13007 } 13008 break; 13009 } 13010 case 0: 13011 if (slen > 3) { 13012 if (s[1] == 0) { 13013 if (s[2] == 0xFE && s[3] == 0xFF) { 13014 /* UTF-32 big-endian */ 13015 /* diag_listed_as: Unsupported script encoding %s */ 13016 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); 13017 } 13018 } 13019 else if (s[2] == 0 && s[3] != 0) { 13020 /* Leading bytes 13021 * 00 xx 00 xx 13022 * are a good indicator of UTF-16BE. */ 13023#ifndef PERL_NO_UTF16_FILTER 13024#ifdef DEBUGGING 13025 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); 13026#endif 13027 s = add_utf16_textfilter(s, FALSE); 13028#else 13029 /* diag_listed_as: Unsupported script encoding %s */ 13030 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); 13031#endif 13032 } 13033 } 13034 break; 13035 13036 default: 13037 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { 13038 /* Leading bytes 13039 * xx 00 xx 00 13040 * are a good indicator of UTF-16LE. */ 13041#ifndef PERL_NO_UTF16_FILTER 13042#ifdef DEBUGGING 13043 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); 13044#endif 13045 s = add_utf16_textfilter(s, TRUE); 13046#else 13047 /* diag_listed_as: Unsupported script encoding %s */ 13048 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); 13049#endif 13050 } 13051 } 13052 return (char*)s; 13053} 13054 13055 13056#ifndef PERL_NO_UTF16_FILTER 13057static I32 13058S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) 13059{ 13060 SV *const filter = FILTER_DATA(idx); 13061 /* We re-use this each time round, throwing the contents away before we 13062 return. */ 13063 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); 13064 SV *const utf8_buffer = filter; 13065 IV status = IoPAGE(filter); 13066 const bool reverse = cBOOL(IoLINES(filter)); 13067 I32 retval; 13068 13069 PERL_ARGS_ASSERT_UTF16_TEXTFILTER; 13070 13071 /* As we're automatically added, at the lowest level, and hence only called 13072 from this file, we can be sure that we're not called in block mode. Hence 13073 don't bother writing code to deal with block mode. */ 13074 if (maxlen) { 13075 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); 13076 } 13077 if (status < 0) { 13078 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status); 13079 } 13080 DEBUG_P(PerlIO_printf(Perl_debug_log, 13081 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 13082 FPTR2DPTR(void *, S_utf16_textfilter), 13083 reverse ? 'l' : 'b', idx, maxlen, status, 13084 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 13085 13086 while (1) { 13087 STRLEN chars; 13088 STRLEN have; 13089 Size_t newlen; 13090 U8 *end; 13091 /* First, look in our buffer of existing UTF-8 data: */ 13092 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); 13093 13094 if (nl) { 13095 ++nl; 13096 } else if (status == 0) { 13097 /* EOF */ 13098 IoPAGE(filter) = 0; 13099 nl = SvEND(utf8_buffer); 13100 } 13101 if (nl) { 13102 STRLEN got = nl - SvPVX(utf8_buffer); 13103 /* Did we have anything to append? */ 13104 retval = got != 0; 13105 sv_catpvn(sv, SvPVX(utf8_buffer), got); 13106 /* Everything else in this code works just fine if SVp_POK isn't 13107 set. This, however, needs it, and we need it to work, else 13108 we loop infinitely because the buffer is never consumed. */ 13109 sv_chop(utf8_buffer, nl); 13110 break; 13111 } 13112 13113 /* OK, not a complete line there, so need to read some more UTF-16. 13114 Read an extra octect if the buffer currently has an odd number. */ 13115 while (1) { 13116 if (status <= 0) 13117 break; 13118 if (SvCUR(utf16_buffer) >= 2) { 13119 /* Location of the high octet of the last complete code point. 13120 Gosh, UTF-16 is a pain. All the benefits of variable length, 13121 *coupled* with all the benefits of partial reads and 13122 endianness. */ 13123 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) 13124 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); 13125 13126 if (*last_hi < 0xd8 || *last_hi > 0xdb) { 13127 break; 13128 } 13129 13130 /* We have the first half of a surrogate. Read more. */ 13131 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); 13132 } 13133 13134 status = FILTER_READ(idx + 1, utf16_buffer, 13135 160 + (SvCUR(utf16_buffer) & 1)); 13136 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer))); 13137 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); 13138 if (status < 0) { 13139 /* Error */ 13140 IoPAGE(filter) = status; 13141 return status; 13142 } 13143 } 13144 13145 /* 'chars' isn't quite the right name, as code points above 0xFFFF 13146 * require 4 bytes per char */ 13147 chars = SvCUR(utf16_buffer) >> 1; 13148 have = SvCUR(utf8_buffer); 13149 13150 /* Assume the worst case size as noted by the functions: twice the 13151 * number of input bytes */ 13152 SvGROW(utf8_buffer, have + chars * 4 + 1); 13153 13154 if (reverse) { 13155 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), 13156 (U8*)SvPVX_const(utf8_buffer) + have, 13157 chars * 2, &newlen); 13158 } else { 13159 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), 13160 (U8*)SvPVX_const(utf8_buffer) + have, 13161 chars * 2, &newlen); 13162 } 13163 SvCUR_set(utf8_buffer, have + newlen); 13164 *end = '\0'; 13165 13166 /* No need to keep this SV "well-formed" with a '\0' after the end, as 13167 it's private to us, and utf16_to_utf8{,reversed} take a 13168 (pointer,length) pair, rather than a NUL-terminated string. */ 13169 if(SvCUR(utf16_buffer) & 1) { 13170 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; 13171 SvCUR_set(utf16_buffer, 1); 13172 } else { 13173 SvCUR_set(utf16_buffer, 0); 13174 } 13175 } 13176 DEBUG_P(PerlIO_printf(Perl_debug_log, 13177 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", 13178 status, 13179 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); 13180 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); 13181 return retval; 13182} 13183 13184static U8 * 13185S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) 13186{ 13187 SV *filter = filter_add(S_utf16_textfilter, NULL); 13188 13189 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER; 13190 13191 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); 13192 SvPVCLEAR(filter); 13193 IoLINES(filter) = reversed; 13194 IoPAGE(filter) = 1; /* Not EOF */ 13195 13196 /* Sadly, we have to return a valid pointer, come what may, so we have to 13197 ignore any error return from this. */ 13198 SvCUR_set(PL_linestr, 0); 13199 if (FILTER_READ(0, PL_linestr, 0)) { 13200 SvUTF8_on(PL_linestr); 13201 } else { 13202 SvUTF8_on(PL_linestr); 13203 } 13204 PL_bufend = SvEND(PL_linestr); 13205 return (U8*)SvPVX(PL_linestr); 13206} 13207#endif 13208 13209/* 13210=for apidoc scan_vstring 13211 13212Returns a pointer to the next character after the parsed 13213vstring, as well as updating the passed in sv. 13214 13215Function must be called like 13216 13217 sv = sv_2mortal(newSV(5)); 13218 s = scan_vstring(s,e,sv); 13219 13220where s and e are the start and end of the string. 13221The sv should already be large enough to store the vstring 13222passed in, for performance reasons. 13223 13224This function may croak if fatal warnings are enabled in the 13225calling scope, hence the sv_2mortal in the example (to prevent 13226a leak). Make sure to do SvREFCNT_inc afterwards if you use 13227sv_2mortal. 13228 13229=cut 13230*/ 13231 13232char * 13233Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) 13234{ 13235 const char *pos = s; 13236 const char *start = s; 13237 13238 PERL_ARGS_ASSERT_SCAN_VSTRING; 13239 13240 if (*pos == 'v') pos++; /* get past 'v' */ 13241 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 13242 pos++; 13243 if ( *pos != '.') { 13244 /* this may not be a v-string if followed by => */ 13245 const char *next = pos; 13246 while (next < e && isSPACE(*next)) 13247 ++next; 13248 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { 13249 /* return string not v-string */ 13250 sv_setpvn(sv,(char *)s,pos-s); 13251 return (char *)pos; 13252 } 13253 } 13254 13255 if (!isALPHA(*pos)) { 13256 U8 tmpbuf[UTF8_MAXBYTES+1]; 13257 13258 if (*s == 'v') 13259 s++; /* get past 'v' */ 13260 13261 SvPVCLEAR(sv); 13262 13263 for (;;) { 13264 /* this is atoi() that tolerates underscores */ 13265 U8 *tmpend; 13266 UV rev = 0; 13267 const char *end = pos; 13268 UV mult = 1; 13269 while (--end >= s) { 13270 if (*end != '_') { 13271 const UV orev = rev; 13272 rev += (*end - '0') * mult; 13273 mult *= 10; 13274 if (orev > rev) 13275 /* diag_listed_as: Integer overflow in %s number */ 13276 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 13277 "Integer overflow in decimal number"); 13278 } 13279 } 13280 13281 /* Append native character for the rev point */ 13282 tmpend = uvchr_to_utf8(tmpbuf, rev); 13283 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); 13284 if (!UVCHR_IS_INVARIANT(rev)) 13285 SvUTF8_on(sv); 13286 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) 13287 s = ++pos; 13288 else { 13289 s = pos; 13290 break; 13291 } 13292 while (pos < e && (isDIGIT(*pos) || *pos == '_')) 13293 pos++; 13294 } 13295 SvPOK_on(sv); 13296 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); 13297 SvRMAGICAL_on(sv); 13298 } 13299 return (char *)s; 13300} 13301 13302int 13303Perl_keyword_plugin_standard(pTHX_ 13304 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 13305{ 13306 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; 13307 PERL_UNUSED_CONTEXT; 13308 PERL_UNUSED_ARG(keyword_ptr); 13309 PERL_UNUSED_ARG(keyword_len); 13310 PERL_UNUSED_ARG(op_ptr); 13311 return KEYWORD_PLUGIN_DECLINE; 13312} 13313 13314STRLEN 13315Perl_infix_plugin_standard(pTHX_ 13316 char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def) 13317{ 13318 PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD; 13319 PERL_UNUSED_CONTEXT; 13320 PERL_UNUSED_ARG(operator_ptr); 13321 PERL_UNUSED_ARG(operator_len); 13322 PERL_UNUSED_ARG(def); 13323 return 0; 13324} 13325 13326/* 13327=for apidoc_section $lexer 13328=for apidoc wrap_keyword_plugin 13329 13330Puts a C function into the chain of keyword plugins. This is the 13331preferred way to manipulate the L</PL_keyword_plugin> variable. 13332C<new_plugin> is a pointer to the C function that is to be added to the 13333keyword plugin chain, and C<old_plugin_p> points to the storage location 13334where a pointer to the next function in the chain will be stored. The 13335value of C<new_plugin> is written into the L</PL_keyword_plugin> variable, 13336while the value previously stored there is written to C<*old_plugin_p>. 13337 13338L</PL_keyword_plugin> is global to an entire process, and a module wishing 13339to hook keyword parsing may find itself invoked more than once per 13340process, typically in different threads. To handle that situation, this 13341function is idempotent. The location C<*old_plugin_p> must initially 13342(once per process) contain a null pointer. A C variable of static 13343duration (declared at file scope, typically also marked C<static> to give 13344it internal linkage) will be implicitly initialised appropriately, if it 13345does not have an explicit initialiser. This function will only actually 13346modify the plugin chain if it finds C<*old_plugin_p> to be null. This 13347function is also thread safe on the small scale. It uses appropriate 13348locking to avoid race conditions in accessing L</PL_keyword_plugin>. 13349 13350When this function is called, the function referenced by C<new_plugin> 13351must be ready to be called, except for C<*old_plugin_p> being unfilled. 13352In a threading situation, C<new_plugin> may be called immediately, even 13353before this function has returned. C<*old_plugin_p> will always be 13354appropriately set before C<new_plugin> is called. If C<new_plugin> 13355decides not to do anything special with the identifier that it is given 13356(which is the usual case for most calls to a keyword plugin), it must 13357chain the plugin function referenced by C<*old_plugin_p>. 13358 13359Taken all together, XS code to install a keyword plugin should typically 13360look something like this: 13361 13362 static Perl_keyword_plugin_t next_keyword_plugin; 13363 static OP *my_keyword_plugin(pTHX_ 13364 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) 13365 { 13366 if (memEQs(keyword_ptr, keyword_len, 13367 "my_new_keyword")) { 13368 ... 13369 } else { 13370 return next_keyword_plugin(aTHX_ 13371 keyword_ptr, keyword_len, op_ptr); 13372 } 13373 } 13374 BOOT: 13375 wrap_keyword_plugin(my_keyword_plugin, 13376 &next_keyword_plugin); 13377 13378Direct access to L</PL_keyword_plugin> should be avoided. 13379 13380=cut 13381*/ 13382 13383void 13384Perl_wrap_keyword_plugin(pTHX_ 13385 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) 13386{ 13387 13388 PERL_UNUSED_CONTEXT; 13389 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; 13390 if (*old_plugin_p) return; 13391 KEYWORD_PLUGIN_MUTEX_LOCK; 13392 if (!*old_plugin_p) { 13393 *old_plugin_p = PL_keyword_plugin; 13394 PL_keyword_plugin = new_plugin; 13395 } 13396 KEYWORD_PLUGIN_MUTEX_UNLOCK; 13397} 13398 13399/* 13400=for apidoc wrap_infix_plugin 13401 13402B<NOTE:> This API exists entirely for the purpose of making the CPAN module 13403C<XS::Parse::Infix> work. It is not expected that additional modules will make 13404use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing 13405of new infix operators. 13406 13407Puts a C function into the chain of infix plugins. This is the preferred 13408way to manipulate the L</PL_infix_plugin> variable. C<new_plugin> is a 13409pointer to the C function that is to be added to the infix plugin chain, and 13410C<old_plugin_p> points to a storage location where a pointer to the next 13411function in the chain will be stored. The value of C<new_plugin> is written 13412into the L</PL_infix_plugin> variable, while the value previously stored there 13413is written to C<*old_plugin_p>. 13414 13415Direct access to L</PL_infix_plugin> should be avoided. 13416 13417=cut 13418*/ 13419 13420void 13421Perl_wrap_infix_plugin(pTHX_ 13422 Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p) 13423{ 13424 13425 PERL_UNUSED_CONTEXT; 13426 PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN; 13427 if (*old_plugin_p) return; 13428 /* We use the same mutex as for PL_keyword_plugin as it's so rare either 13429 * of them is actually updated; no need for a dedicated one each */ 13430 KEYWORD_PLUGIN_MUTEX_LOCK; 13431 if (!*old_plugin_p) { 13432 *old_plugin_p = PL_infix_plugin; 13433 PL_infix_plugin = new_plugin; 13434 } 13435 KEYWORD_PLUGIN_MUTEX_UNLOCK; 13436} 13437 13438#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) 13439static void 13440S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) 13441{ 13442 SAVEI32(PL_lex_brackets); 13443 if (PL_lex_brackets > 100) 13444 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); 13445 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; 13446 SAVEI32(PL_lex_allbrackets); 13447 PL_lex_allbrackets = 0; 13448 SAVEI8(PL_lex_fakeeof); 13449 PL_lex_fakeeof = (U8)fakeeof; 13450 if(yyparse(gramtype) && !PL_parser->error_count) 13451 qerror(Perl_mess(aTHX_ "Parse error")); 13452} 13453 13454#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) 13455static OP * 13456S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof) 13457{ 13458 OP *o; 13459 ENTER; 13460 SAVEVPTR(PL_eval_root); 13461 PL_eval_root = NULL; 13462 parse_recdescent(gramtype, fakeeof); 13463 o = PL_eval_root; 13464 LEAVE; 13465 return o; 13466} 13467 13468#define parse_expr(p,f) S_parse_expr(aTHX_ p,f) 13469static OP * 13470S_parse_expr(pTHX_ I32 fakeeof, U32 flags) 13471{ 13472 OP *exprop; 13473 if (flags & ~PARSE_OPTIONAL) 13474 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); 13475 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); 13476 if (!exprop && !(flags & PARSE_OPTIONAL)) { 13477 if (!PL_parser->error_count) 13478 qerror(Perl_mess(aTHX_ "Parse error")); 13479 exprop = newOP(OP_NULL, 0); 13480 } 13481 return exprop; 13482} 13483 13484/* 13485=for apidoc parse_arithexpr 13486 13487Parse a Perl arithmetic expression. This may contain operators of precedence 13488down to the bit shift operators. The expression must be followed (and thus 13489terminated) either by a comparison or lower-precedence operator or by 13490something that would normally terminate an expression such as semicolon. 13491If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13492otherwise it is mandatory. It is up to the caller to ensure that the 13493dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13494the source of the code to be parsed and the lexical context for the 13495expression. 13496 13497The op tree representing the expression is returned. If an optional 13498expression is absent, a null pointer is returned, otherwise the pointer 13499will be non-null. 13500 13501If an error occurs in parsing or compilation, in most cases a valid op 13502tree is returned anyway. The error is reflected in the parser state, 13503normally resulting in a single exception at the top level of parsing 13504which covers all the compilation errors that occurred. Some compilation 13505errors, however, will throw an exception immediately. 13506 13507=for apidoc Amnh||PARSE_OPTIONAL 13508 13509=cut 13510 13511*/ 13512 13513OP * 13514Perl_parse_arithexpr(pTHX_ U32 flags) 13515{ 13516 return parse_expr(LEX_FAKEEOF_COMPARE, flags); 13517} 13518 13519/* 13520=for apidoc parse_termexpr 13521 13522Parse a Perl term expression. This may contain operators of precedence 13523down to the assignment operators. The expression must be followed (and thus 13524terminated) either by a comma or lower-precedence operator or by 13525something that would normally terminate an expression such as semicolon. 13526If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13527otherwise it is mandatory. It is up to the caller to ensure that the 13528dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13529the source of the code to be parsed and the lexical context for the 13530expression. 13531 13532The op tree representing the expression is returned. If an optional 13533expression is absent, a null pointer is returned, otherwise the pointer 13534will be non-null. 13535 13536If an error occurs in parsing or compilation, in most cases a valid op 13537tree is returned anyway. The error is reflected in the parser state, 13538normally resulting in a single exception at the top level of parsing 13539which covers all the compilation errors that occurred. Some compilation 13540errors, however, will throw an exception immediately. 13541 13542=cut 13543*/ 13544 13545OP * 13546Perl_parse_termexpr(pTHX_ U32 flags) 13547{ 13548 return parse_expr(LEX_FAKEEOF_COMMA, flags); 13549} 13550 13551/* 13552=for apidoc parse_listexpr 13553 13554Parse a Perl list expression. This may contain operators of precedence 13555down to the comma operator. The expression must be followed (and thus 13556terminated) either by a low-precedence logic operator such as C<or> or by 13557something that would normally terminate an expression such as semicolon. 13558If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional, 13559otherwise it is mandatory. It is up to the caller to ensure that the 13560dynamic parser state (L</PL_parser> et al) is correctly set to reflect 13561the source of the code to be parsed and the lexical context for the 13562expression. 13563 13564The op tree representing the expression is returned. If an optional 13565expression is absent, a null pointer is returned, otherwise the pointer 13566will be non-null. 13567 13568If an error occurs in parsing or compilation, in most cases a valid op 13569tree is returned anyway. The error is reflected in the parser state, 13570normally resulting in a single exception at the top level of parsing 13571which covers all the compilation errors that occurred. Some compilation 13572errors, however, will throw an exception immediately. 13573 13574=cut 13575*/ 13576 13577OP * 13578Perl_parse_listexpr(pTHX_ U32 flags) 13579{ 13580 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags); 13581} 13582 13583/* 13584=for apidoc parse_fullexpr 13585 13586Parse a single complete Perl expression. This allows the full 13587expression grammar, including the lowest-precedence operators such 13588as C<or>. The expression must be followed (and thus terminated) by a 13589token that an expression would normally be terminated by: end-of-file, 13590closing bracketing punctuation, semicolon, or one of the keywords that 13591signals a postfix expression-statement modifier. If C<flags> has the 13592C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is 13593mandatory. It is up to the caller to ensure that the dynamic parser 13594state (L</PL_parser> et al) is correctly set to reflect the source of 13595the code to be parsed and the lexical context for the expression. 13596 13597The op tree representing the expression is returned. If an optional 13598expression is absent, a null pointer is returned, otherwise the pointer 13599will be non-null. 13600 13601If an error occurs in parsing or compilation, in most cases a valid op 13602tree is returned anyway. The error is reflected in the parser state, 13603normally resulting in a single exception at the top level of parsing 13604which covers all the compilation errors that occurred. Some compilation 13605errors, however, will throw an exception immediately. 13606 13607=cut 13608*/ 13609 13610OP * 13611Perl_parse_fullexpr(pTHX_ U32 flags) 13612{ 13613 return parse_expr(LEX_FAKEEOF_NONEXPR, flags); 13614} 13615 13616/* 13617=for apidoc parse_block 13618 13619Parse a single complete Perl code block. This consists of an opening 13620brace, a sequence of statements, and a closing brace. The block 13621constitutes a lexical scope, so C<my> variables and various compile-time 13622effects can be contained within it. It is up to the caller to ensure 13623that the dynamic parser state (L</PL_parser> et al) is correctly set to 13624reflect the source of the code to be parsed and the lexical context for 13625the statement. 13626 13627The op tree representing the code block is returned. This is always a 13628real op, never a null pointer. It will normally be a C<lineseq> list, 13629including C<nextstate> or equivalent ops. No ops to construct any kind 13630of runtime scope are included by virtue of it being a block. 13631 13632If an error occurs in parsing or compilation, in most cases a valid op 13633tree (most likely null) is returned anyway. The error is reflected in 13634the parser state, normally resulting in a single exception at the top 13635level of parsing which covers all the compilation errors that occurred. 13636Some compilation errors, however, will throw an exception immediately. 13637 13638The C<flags> parameter is reserved for future use, and must always 13639be zero. 13640 13641=cut 13642*/ 13643 13644OP * 13645Perl_parse_block(pTHX_ U32 flags) 13646{ 13647 if (flags) 13648 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); 13649 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); 13650} 13651 13652/* 13653=for apidoc parse_barestmt 13654 13655Parse a single unadorned Perl statement. This may be a normal imperative 13656statement or a declaration that has compile-time effect. It does not 13657include any label or other affixture. It is up to the caller to ensure 13658that the dynamic parser state (L</PL_parser> et al) is correctly set to 13659reflect the source of the code to be parsed and the lexical context for 13660the statement. 13661 13662The op tree representing the statement is returned. This may be a 13663null pointer if the statement is null, for example if it was actually 13664a subroutine definition (which has compile-time side effects). If not 13665null, it will be ops directly implementing the statement, suitable to 13666pass to L</newSTATEOP>. It will not normally include a C<nextstate> or 13667equivalent op (except for those embedded in a scope contained entirely 13668within the statement). 13669 13670If an error occurs in parsing or compilation, in most cases a valid op 13671tree (most likely null) is returned anyway. The error is reflected in 13672the parser state, normally resulting in a single exception at the top 13673level of parsing which covers all the compilation errors that occurred. 13674Some compilation errors, however, will throw an exception immediately. 13675 13676The C<flags> parameter is reserved for future use, and must always 13677be zero. 13678 13679=cut 13680*/ 13681 13682OP * 13683Perl_parse_barestmt(pTHX_ U32 flags) 13684{ 13685 if (flags) 13686 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); 13687 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); 13688} 13689 13690/* 13691=for apidoc parse_label 13692 13693Parse a single label, possibly optional, of the type that may prefix a 13694Perl statement. It is up to the caller to ensure that the dynamic parser 13695state (L</PL_parser> et al) is correctly set to reflect the source of 13696the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the 13697label is optional, otherwise it is mandatory. 13698 13699The name of the label is returned in the form of a fresh scalar. If an 13700optional label is absent, a null pointer is returned. 13701 13702If an error occurs in parsing, which can only occur if the label is 13703mandatory, a valid label is returned anyway. The error is reflected in 13704the parser state, normally resulting in a single exception at the top 13705level of parsing which covers all the compilation errors that occurred. 13706 13707=cut 13708*/ 13709 13710SV * 13711Perl_parse_label(pTHX_ U32 flags) 13712{ 13713 if (flags & ~PARSE_OPTIONAL) 13714 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); 13715 if (PL_nexttoke) { 13716 PL_parser->yychar = yylex(); 13717 if (PL_parser->yychar == LABEL) { 13718 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv; 13719 PL_parser->yychar = YYEMPTY; 13720 cSVOPx(pl_yylval.opval)->op_sv = NULL; 13721 op_free(pl_yylval.opval); 13722 return labelsv; 13723 } else { 13724 yyunlex(); 13725 goto no_label; 13726 } 13727 } else { 13728 char *s, *t; 13729 STRLEN wlen, bufptr_pos; 13730 lex_read_space(0); 13731 t = s = PL_bufptr; 13732 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) 13733 goto no_label; 13734 t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE); 13735 if (word_takes_any_delimiter(s, wlen)) 13736 goto no_label; 13737 bufptr_pos = s - SvPVX(PL_linestr); 13738 PL_bufptr = t; 13739 lex_read_space(LEX_KEEP_PREVIOUS); 13740 t = PL_bufptr; 13741 s = SvPVX(PL_linestr) + bufptr_pos; 13742 if (t[0] == ':' && t[1] != ':') { 13743 PL_oldoldbufptr = PL_oldbufptr; 13744 PL_oldbufptr = s; 13745 PL_bufptr = t+1; 13746 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); 13747 } else { 13748 PL_bufptr = s; 13749 no_label: 13750 if (flags & PARSE_OPTIONAL) { 13751 return NULL; 13752 } else { 13753 qerror(Perl_mess(aTHX_ "Parse error")); 13754 return newSVpvs("x"); 13755 } 13756 } 13757 } 13758} 13759 13760/* 13761=for apidoc parse_fullstmt 13762 13763Parse a single complete Perl statement. This may be a normal imperative 13764statement or a declaration that has compile-time effect, and may include 13765optional labels. It is up to the caller to ensure that the dynamic 13766parser state (L</PL_parser> et al) is correctly set to reflect the source 13767of the code to be parsed and the lexical context for the statement. 13768 13769The op tree representing the statement is returned. This may be a 13770null pointer if the statement is null, for example if it was actually 13771a subroutine definition (which has compile-time side effects). If not 13772null, it will be the result of a L</newSTATEOP> call, normally including 13773a C<nextstate> or equivalent op. 13774 13775If an error occurs in parsing or compilation, in most cases a valid op 13776tree (most likely null) is returned anyway. The error is reflected in 13777the parser state, normally resulting in a single exception at the top 13778level of parsing which covers all the compilation errors that occurred. 13779Some compilation errors, however, will throw an exception immediately. 13780 13781The C<flags> parameter is reserved for future use, and must always 13782be zero. 13783 13784=cut 13785*/ 13786 13787OP * 13788Perl_parse_fullstmt(pTHX_ U32 flags) 13789{ 13790 if (flags) 13791 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); 13792 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); 13793} 13794 13795/* 13796=for apidoc parse_stmtseq 13797 13798Parse a sequence of zero or more Perl statements. These may be normal 13799imperative statements, including optional labels, or declarations 13800that have compile-time effect, or any mixture thereof. The statement 13801sequence ends when a closing brace or end-of-file is encountered in a 13802place where a new statement could have validly started. It is up to 13803the caller to ensure that the dynamic parser state (L</PL_parser> et al) 13804is correctly set to reflect the source of the code to be parsed and the 13805lexical context for the statements. 13806 13807The op tree representing the statement sequence is returned. This may 13808be a null pointer if the statements were all null, for example if there 13809were no statements or if there were only subroutine definitions (which 13810have compile-time side effects). If not null, it will be a C<lineseq> 13811list, normally including C<nextstate> or equivalent ops. 13812 13813If an error occurs in parsing or compilation, in most cases a valid op 13814tree is returned anyway. The error is reflected in the parser state, 13815normally resulting in a single exception at the top level of parsing 13816which covers all the compilation errors that occurred. Some compilation 13817errors, however, will throw an exception immediately. 13818 13819The C<flags> parameter is reserved for future use, and must always 13820be zero. 13821 13822=cut 13823*/ 13824 13825OP * 13826Perl_parse_stmtseq(pTHX_ U32 flags) 13827{ 13828 OP *stmtseqop; 13829 I32 c; 13830 if (flags) 13831 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); 13832 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); 13833 c = lex_peek_unichar(0); 13834 if (c != -1 && c != /*{*/'}') 13835 qerror(Perl_mess(aTHX_ "Parse error")); 13836 return stmtseqop; 13837} 13838 13839/* 13840=for apidoc parse_subsignature 13841 13842Parse a subroutine signature declaration. This is the contents of the 13843parentheses following a named or anonymous subroutine declaration when the 13844C<signatures> feature is enabled. Note that this function neither expects 13845nor consumes the opening and closing parentheses around the signature; it 13846is the caller's job to handle these. 13847 13848This function must only be called during parsing of a subroutine; after 13849L</start_subparse> has been called. It might allocate lexical variables on 13850the pad for the current subroutine. 13851 13852The op tree to unpack the arguments from the stack at runtime is returned. 13853This op tree should appear at the beginning of the compiled function. The 13854caller may wish to use L</op_append_list> to build their function body 13855after it, or splice it together with the body before calling L</newATTRSUB>. 13856 13857The C<flags> parameter is reserved for future use, and must always 13858be zero. 13859 13860=cut 13861*/ 13862 13863OP * 13864Perl_parse_subsignature(pTHX_ U32 flags) 13865{ 13866 if (flags) 13867 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature"); 13868 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR); 13869} 13870 13871/* 13872 * ex: set ts=8 sts=4 sw=4 et: 13873 */ 13874