1/* peep.c 2 * 3 * Copyright (C) 1991-2022 by Larry Wall and others 4 * 5 * You may distribute under the terms of either the GNU General Public 6 * License or the Artistic License, as specified in the README file. 7 * 8 */ 9 10/* 11 * Aragorn sped on up the hill. Every now and again he bent to the ground. 12 * Hobbits go light, and their footprints are not easy even for a Ranger to 13 * read, but not far from the top a spring crossed the path, and in the wet 14 * earth he saw what he was seeking. 15 * 'I read the signs aright,' he said to himself. 'Frodo ran to the hill-top. 16 * I wonder what he saw there? But he returned by the same way, and went down 17 * the hill again.' 18 */ 19 20/* This file contains functions for optimizing and finalizing the OP 21 * structures that hold a compiled perl program 22 */ 23 24#include "EXTERN.h" 25#define PERL_IN_PEEP_C 26#include "perl.h" 27 28 29#define CALL_RPEEP(o) PL_rpeepp(aTHX_ o) 30 31 32static void 33S_scalar_slice_warning(pTHX_ const OP *o) 34{ 35 OP *kid; 36 const bool is_hash = o->op_type == OP_HSLICE 37 || (o->op_type == OP_NULL && o->op_targ == OP_HSLICE); 38 SV *name; 39 40 if (!(o->op_private & OPpSLICEWARNING)) 41 return; 42 if (PL_parser && PL_parser->error_count) 43 /* This warning can be nonsensical when there is a syntax error. */ 44 return; 45 46 kid = cLISTOPo->op_first; 47 kid = OpSIBLING(kid); /* get past pushmark */ 48 /* weed out false positives: any ops that can return lists */ 49 switch (kid->op_type) { 50 case OP_BACKTICK: 51 case OP_GLOB: 52 case OP_READLINE: 53 case OP_MATCH: 54 case OP_RV2AV: 55 case OP_EACH: 56 case OP_VALUES: 57 case OP_KEYS: 58 case OP_SPLIT: 59 case OP_LIST: 60 case OP_SORT: 61 case OP_REVERSE: 62 case OP_ENTERSUB: 63 case OP_CALLER: 64 case OP_LSTAT: 65 case OP_STAT: 66 case OP_READDIR: 67 case OP_SYSTEM: 68 case OP_TMS: 69 case OP_LOCALTIME: 70 case OP_GMTIME: 71 case OP_ENTEREVAL: 72 return; 73 } 74 75 /* Don't warn if we have a nulled list either. */ 76 if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST) 77 return; 78 79 assert(OpSIBLING(kid)); 80 name = op_varname(OpSIBLING(kid)); 81 if (!name) /* XS module fiddling with the op tree */ 82 return; 83 warn_elem_scalar_context(kid, name, is_hash, true); 84} 85 86 87/* info returned by S_sprintf_is_multiconcatable() */ 88 89struct sprintf_ismc_info { 90 SSize_t nargs; /* num of args to sprintf (not including the format) */ 91 char *start; /* start of raw format string */ 92 char *end; /* bytes after end of raw format string */ 93 STRLEN total_len; /* total length (in bytes) of format string, not 94 including '%s' and half of '%%' */ 95 STRLEN variant; /* number of bytes by which total_len_p would grow 96 if upgraded to utf8 */ 97 bool utf8; /* whether the format is utf8 */ 98}; 99 100/* is the OP_SPRINTF o suitable for converting into a multiconcat op? 101 * i.e. its format argument is a const string with only '%s' and '%%' 102 * formats, and the number of args is known, e.g. 103 * sprintf "a=%s f=%s", $a[0], scalar(f()); 104 * but not 105 * sprintf "i=%d a=%s f=%s", $i, @a, f(); 106 * 107 * If successful, the sprintf_ismc_info struct pointed to by info will be 108 * populated. 109 */ 110 111STATIC bool 112S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) 113{ 114 OP *pm, *constop, *kid; 115 SV *sv; 116 char *s, *e, *p; 117 SSize_t nargs, nformats; 118 STRLEN cur, total_len, variant; 119 bool utf8; 120 121 /* if sprintf's behaviour changes, die here so that someone 122 * can decide whether to enhance this function or skip optimising 123 * under those new circumstances */ 124 assert(!(o->op_flags & OPf_STACKED)); 125 assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX)); 126 assert(!(o->op_private & ~OPpARG4_MASK)); 127 128 pm = cUNOPo->op_first; 129 if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */ 130 return FALSE; 131 constop = OpSIBLING(pm); 132 if (!constop || constop->op_type != OP_CONST) 133 return FALSE; 134 sv = cSVOPx_sv(constop); 135 if (SvMAGICAL(sv) || !SvPOK(sv)) 136 return FALSE; 137 138 s = SvPV(sv, cur); 139 e = s + cur; 140 141 /* Scan format for %% and %s and work out how many %s there are. 142 * Abandon if other format types are found. 143 */ 144 145 nformats = 0; 146 total_len = 0; 147 variant = 0; 148 149 for (p = s; p < e; p++) { 150 if (*p != '%') { 151 total_len++; 152 if (!UTF8_IS_INVARIANT(*p)) 153 variant++; 154 continue; 155 } 156 p++; 157 if (p >= e) 158 return FALSE; /* lone % at end gives "Invalid conversion" */ 159 if (*p == '%') 160 total_len++; 161 else if (*p == 's') 162 nformats++; 163 else 164 return FALSE; 165 } 166 167 if (!nformats || nformats > PERL_MULTICONCAT_MAXARG) 168 return FALSE; 169 170 utf8 = cBOOL(SvUTF8(sv)); 171 if (utf8) 172 variant = 0; 173 174 /* scan args; they must all be in scalar cxt */ 175 176 nargs = 0; 177 kid = OpSIBLING(constop); 178 179 while (kid) { 180 if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR) 181 return FALSE; 182 nargs++; 183 kid = OpSIBLING(kid); 184 } 185 186 if (nargs != nformats) 187 return FALSE; /* e.g. sprintf("%s%s", $a); */ 188 189 190 info->nargs = nargs; 191 info->start = s; 192 info->end = e; 193 info->total_len = total_len; 194 info->variant = variant; 195 info->utf8 = utf8; 196 197 return TRUE; 198} 199 200/* S_maybe_multiconcat(): 201 * 202 * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly 203 * convert it (and its children) into an OP_MULTICONCAT. See the code 204 * comments just before pp_multiconcat() for the full details of what 205 * OP_MULTICONCAT supports. 206 * 207 * Basically we're looking for an optree with a chain of OP_CONCATS down 208 * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or 209 * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g. 210 * 211 * $x = "$a$b-$c" 212 * 213 * looks like 214 * 215 * SASSIGN 216 * | 217 * STRINGIFY -- PADSV[$x] 218 * | 219 * | 220 * ex-PUSHMARK -- CONCAT/S 221 * | 222 * CONCAT/S -- PADSV[$d] 223 * | 224 * CONCAT -- CONST["-"] 225 * | 226 * PADSV[$a] -- PADSV[$b] 227 * 228 * Note that at this stage the OP_SASSIGN may have already been optimised 229 * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT. 230 */ 231 232STATIC void 233S_maybe_multiconcat(pTHX_ OP *o) 234{ 235 OP *lastkidop; /* the right-most of any kids unshifted onto o */ 236 OP *topop; /* the top-most op in the concat tree (often equals o, 237 unless there are assign/stringify ops above it */ 238 OP *parentop; /* the parent op of topop (or itself if no parent) */ 239 OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */ 240 OP *targetop; /* the op corresponding to target=... or target.=... */ 241 OP *stringop; /* the OP_STRINGIFY op, if any */ 242 OP *nextop; /* used for recreating the op_next chain without consts */ 243 OP *kid; /* general-purpose op pointer */ 244 UNOP_AUX_item *aux; 245 UNOP_AUX_item *lenp; 246 char *const_str, *p; 247 struct sprintf_ismc_info sprintf_info; 248 249 /* store info about each arg in args[]; 250 * toparg is the highest used slot; argp is a general 251 * pointer to args[] slots */ 252 struct { 253 void *p; /* initially points to const sv (or null for op); 254 later, set to SvPV(constsv), with ... */ 255 STRLEN len; /* ... len set to SvPV(..., len) */ 256 } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1]; 257 258 SSize_t nargs = 0; 259 SSize_t nconst = 0; 260 SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */ 261 STRLEN variant; 262 bool utf8 = FALSE; 263 bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; 264 the last-processed arg will the LHS of one, 265 as args are processed in reverse order */ 266 U8 stacked_last = 0; /* whether the last seen concat op was STACKED */ 267 STRLEN total_len = 0; /* sum of the lengths of the const segments */ 268 U8 flags = 0; /* what will become the op_flags and ... */ 269 U8 private_flags = 0; /* ... op_private of the multiconcat op */ 270 bool is_sprintf = FALSE; /* we're optimising an sprintf */ 271 bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ 272 bool prev_was_const = FALSE; /* previous arg was a const */ 273 274 /* ----------------------------------------------------------------- 275 * Phase 1: 276 * 277 * Examine the optree non-destructively to determine whether it's 278 * suitable to be converted into an OP_MULTICONCAT. Accumulate 279 * information about the optree in args[]. 280 */ 281 282 argp = args; 283 targmyop = NULL; 284 targetop = NULL; 285 stringop = NULL; 286 topop = o; 287 parentop = o; 288 289 assert( o->op_type == OP_SASSIGN 290 || o->op_type == OP_CONCAT 291 || o->op_type == OP_SPRINTF 292 || o->op_type == OP_STRINGIFY); 293 294 Zero(&sprintf_info, 1, struct sprintf_ismc_info); 295 296 /* first see if, at the top of the tree, there is an assign, 297 * append and/or stringify */ 298 299 if (topop->op_type == OP_SASSIGN) { 300 /* expr = ..... */ 301 if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN]) 302 return; 303 if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) 304 return; 305 assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */ 306 307 parentop = topop; 308 topop = cBINOPo->op_first; 309 targetop = OpSIBLING(topop); 310 if (!targetop) /* probably some sort of syntax error */ 311 return; 312 313 /* don't optimise away assign in 'local $foo = ....' */ 314 if ( (targetop->op_private & OPpLVAL_INTRO) 315 /* these are the common ops which do 'local', but 316 * not all */ 317 && ( targetop->op_type == OP_GVSV 318 || targetop->op_type == OP_RV2SV 319 || targetop->op_type == OP_AELEM 320 || targetop->op_type == OP_HELEM 321 ) 322 ) 323 return; 324 } 325 else if ( topop->op_type == OP_CONCAT 326 && (topop->op_flags & OPf_STACKED) 327 && (!(topop->op_private & OPpCONCAT_NESTED)) 328 ) 329 { 330 /* expr .= ..... */ 331 332 /* OPpTARGET_MY shouldn't be able to be set here. If it is, 333 * decide what to do about it */ 334 assert(!(o->op_private & OPpTARGET_MY)); 335 336 /* barf on unknown flags */ 337 assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY))); 338 private_flags |= OPpMULTICONCAT_APPEND; 339 targetop = cBINOPo->op_first; 340 parentop = topop; 341 topop = OpSIBLING(targetop); 342 343 /* $x .= <FOO> gets optimised to rcatline instead */ 344 if (topop->op_type == OP_READLINE) 345 return; 346 } 347 348 if (targetop) { 349 /* Can targetop (the LHS) if it's a padsv, be optimised 350 * away and use OPpTARGET_MY instead? 351 */ 352 if ( (targetop->op_type == OP_PADSV) 353 && !(targetop->op_private & OPpDEREF) 354 && !(targetop->op_private & OPpPAD_STATE) 355 /* we don't support 'my $x .= ...' */ 356 && ( o->op_type == OP_SASSIGN 357 || !(targetop->op_private & OPpLVAL_INTRO)) 358 ) 359 is_targable = TRUE; 360 } 361 362 if (topop->op_type == OP_STRINGIFY) { 363 if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY]) 364 return; 365 stringop = topop; 366 367 /* barf on unknown flags */ 368 assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY))); 369 370 if ((topop->op_private & OPpTARGET_MY)) { 371 if (o->op_type == OP_SASSIGN) 372 return; /* can't have two assigns */ 373 targmyop = topop; 374 } 375 376 private_flags |= OPpMULTICONCAT_STRINGIFY; 377 parentop = topop; 378 topop = cBINOPx(topop)->op_first; 379 assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK)); 380 topop = OpSIBLING(topop); 381 } 382 383 if (topop->op_type == OP_SPRINTF) { 384 if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF]) 385 return; 386 if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) { 387 nargs = sprintf_info.nargs; 388 total_len = sprintf_info.total_len; 389 variant = sprintf_info.variant; 390 utf8 = sprintf_info.utf8; 391 is_sprintf = TRUE; 392 private_flags |= OPpMULTICONCAT_FAKE; 393 toparg = argp; 394 /* we have an sprintf op rather than a concat optree. 395 * Skip most of the code below which is associated with 396 * processing that optree. We also skip phase 2, determining 397 * whether its cost effective to optimise, since for sprintf, 398 * multiconcat is *always* faster */ 399 goto create_aux; 400 } 401 /* note that even if the sprintf itself isn't multiconcatable, 402 * the expression as a whole may be, e.g. in 403 * $x .= sprintf("%d",...) 404 * the sprintf op will be left as-is, but the concat/S op may 405 * be upgraded to multiconcat 406 */ 407 } 408 else if (topop->op_type == OP_CONCAT) { 409 if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT]) 410 return; 411 412 if ((topop->op_private & OPpTARGET_MY)) { 413 if (o->op_type == OP_SASSIGN || targmyop) 414 return; /* can't have two assigns */ 415 targmyop = topop; 416 } 417 } 418 419 /* Is it safe to convert a sassign/stringify/concat op into 420 * a multiconcat? */ 421 assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP); 422 assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP); 423 assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP); 424 assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP); 425 STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last) 426 == STRUCT_OFFSET(UNOP_AUX, op_aux)); 427 STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last) 428 == STRUCT_OFFSET(UNOP_AUX, op_aux)); 429 430 /* Now scan the down the tree looking for a series of 431 * CONCAT/OPf_STACKED ops on the LHS (with the last one not 432 * stacked). For example this tree: 433 * 434 * | 435 * CONCAT/STACKED 436 * | 437 * CONCAT/STACKED -- EXPR5 438 * | 439 * CONCAT/STACKED -- EXPR4 440 * | 441 * CONCAT -- EXPR3 442 * | 443 * EXPR1 -- EXPR2 444 * 445 * corresponds to an expression like 446 * 447 * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5) 448 * 449 * Record info about each EXPR in args[]: in particular, whether it is 450 * a stringifiable OP_CONST and if so what the const sv is. 451 * 452 * The reason why the last concat can't be STACKED is the difference 453 * between 454 * 455 * ((($a .= $a) .= $a) .= $a) .= $a 456 * 457 * and 458 * $a . $a . $a . $a . $a 459 * 460 * The main difference between the optrees for those two constructs 461 * is the presence of the last STACKED. As well as modifying $a, 462 * the former sees the changed $a between each concat, so if $s is 463 * initially 'a', the first returns 'a' x 16, while the latter returns 464 * 'a' x 5. And pp_multiconcat can't handle that kind of thing. 465 */ 466 467 kid = topop; 468 469 for (;;) { 470 OP *argop; 471 SV *sv; 472 bool last = FALSE; 473 474 if ( kid->op_type == OP_CONCAT 475 && !kid_is_last 476 ) { 477 OP *k1, *k2; 478 k1 = cUNOPx(kid)->op_first; 479 k2 = OpSIBLING(k1); 480 /* shouldn't happen except maybe after compile err? */ 481 if (!k2) 482 return; 483 484 /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */ 485 if (kid->op_private & OPpTARGET_MY) 486 kid_is_last = TRUE; 487 488 stacked_last = (kid->op_flags & OPf_STACKED); 489 if (!stacked_last) 490 kid_is_last = TRUE; 491 492 kid = k1; 493 argop = k2; 494 } 495 else { 496 argop = kid; 497 last = TRUE; 498 } 499 500 if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2 501 || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) 502 { 503 /* At least two spare slots are needed to decompose both 504 * concat args. If there are no slots left, continue to 505 * examine the rest of the optree, but don't push new values 506 * on args[]. If the optree as a whole is legal for conversion 507 * (in particular that the last concat isn't STACKED), then 508 * the first PERL_MULTICONCAT_MAXARG elements of the optree 509 * can be converted into an OP_MULTICONCAT now, with the first 510 * child of that op being the remainder of the optree - 511 * which may itself later be converted to a multiconcat op 512 * too. 513 */ 514 if (last) { 515 /* the last arg is the rest of the optree */ 516 argp++->p = NULL; 517 nargs++; 518 } 519 } 520 else if ( argop->op_type == OP_CONST 521 && ((sv = cSVOPx_sv(argop))) 522 /* defer stringification until runtime of 'constant' 523 * things that might stringify variantly, e.g. the radix 524 * point of NVs, or overloaded RVs */ 525 && (SvPOK(sv) || SvIOK(sv)) 526 && (!SvGMAGICAL(sv)) 527 ) { 528 if (argop->op_private & OPpCONST_STRICT) 529 no_bareword_allowed(argop); 530 argp++->p = sv; 531 utf8 |= cBOOL(SvUTF8(sv)); 532 nconst++; 533 if (prev_was_const) 534 /* this const may be demoted back to a plain arg later; 535 * make sure we have enough arg slots left */ 536 nadjconst++; 537 prev_was_const = !prev_was_const; 538 } 539 else { 540 argp++->p = NULL; 541 nargs++; 542 prev_was_const = FALSE; 543 } 544 545 if (last) 546 break; 547 } 548 549 toparg = argp - 1; 550 551 if (stacked_last) 552 return; /* we don't support ((A.=B).=C)...) */ 553 554 /* look for two adjacent consts and don't fold them together: 555 * $o . "a" . "b" 556 * should do 557 * $o->concat("a")->concat("b") 558 * rather than 559 * $o->concat("ab") 560 * (but $o .= "a" . "b" should still fold) 561 */ 562 { 563 bool seen_nonconst = FALSE; 564 for (argp = toparg; argp >= args; argp--) { 565 if (argp->p == NULL) { 566 seen_nonconst = TRUE; 567 continue; 568 } 569 if (!seen_nonconst) 570 continue; 571 if (argp[1].p) { 572 /* both previous and current arg were constants; 573 * leave the current OP_CONST as-is */ 574 argp->p = NULL; 575 nconst--; 576 nargs++; 577 } 578 } 579 } 580 581 /* ----------------------------------------------------------------- 582 * Phase 2: 583 * 584 * At this point we have determined that the optree *can* be converted 585 * into a multiconcat. Having gathered all the evidence, we now decide 586 * whether it *should*. 587 */ 588 589 590 /* we need at least one concat action, e.g.: 591 * 592 * Y . Z 593 * X = Y . Z 594 * X .= Y 595 * 596 * otherwise we could be doing something like $x = "foo", which 597 * if treated as a concat, would fail to COW. 598 */ 599 if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2) 600 return; 601 602 /* Benchmarking seems to indicate that we gain if: 603 * * we optimise at least two actions into a single multiconcat 604 * (e.g concat+concat, sassign+concat); 605 * * or if we can eliminate at least 1 OP_CONST; 606 * * or if we can eliminate a padsv via OPpTARGET_MY 607 */ 608 609 if ( 610 /* eliminated at least one OP_CONST */ 611 nconst >= 1 612 /* eliminated an OP_SASSIGN */ 613 || o->op_type == OP_SASSIGN 614 /* eliminated an OP_PADSV */ 615 || (!targmyop && is_targable) 616 ) 617 /* definitely a net gain to optimise */ 618 goto optimise; 619 620 /* ... if not, what else? */ 621 622 /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1): 623 * multiconcat is faster (due to not creating a temporary copy of 624 * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is 625 * faster. 626 */ 627 if ( nconst == 0 628 && nargs == 2 629 && targmyop 630 && topop->op_type == OP_CONCAT 631 ) { 632 PADOFFSET t = targmyop->op_targ; 633 OP *k1 = cBINOPx(topop)->op_first; 634 OP *k2 = cBINOPx(topop)->op_last; 635 if ( k2->op_type == OP_PADSV 636 && k2->op_targ == t 637 && ( k1->op_type != OP_PADSV 638 || k1->op_targ != t) 639 ) 640 goto optimise; 641 } 642 643 /* need at least two concats */ 644 if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3) 645 return; 646 647 648 649 /* ----------------------------------------------------------------- 650 * Phase 3: 651 * 652 * At this point the optree has been verified as ok to be optimised 653 * into an OP_MULTICONCAT. Now start changing things. 654 */ 655 656 optimise: 657 658 /* stringify all const args and determine utf8ness */ 659 660 variant = 0; 661 for (argp = args; argp <= toparg; argp++) { 662 SV *sv = (SV*)argp->p; 663 if (!sv) 664 continue; /* not a const op */ 665 if (utf8 && !SvUTF8(sv)) 666 sv_utf8_upgrade_nomg(sv); 667 argp->p = SvPV_nomg(sv, argp->len); 668 total_len += argp->len; 669 670 /* see if any strings would grow if converted to utf8 */ 671 if (!utf8) { 672 variant += variant_under_utf8_count((U8 *) argp->p, 673 (U8 *) argp->p + argp->len); 674 } 675 } 676 677 /* create and populate aux struct */ 678 679 create_aux: 680 681 aux = (UNOP_AUX_item*)PerlMemShared_malloc( 682 sizeof(UNOP_AUX_item) 683 * ( 684 PERL_MULTICONCAT_HEADER_SIZE 685 + ((nargs + 1) * (variant ? 2 : 1)) 686 ) 687 ); 688 const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1); 689 690 /* Extract all the non-const expressions from the concat tree then 691 * dispose of the old tree, e.g. convert the tree from this: 692 * 693 * o => SASSIGN 694 * | 695 * STRINGIFY -- TARGET 696 * | 697 * ex-PUSHMARK -- CONCAT 698 * | 699 * CONCAT -- EXPR5 700 * | 701 * CONCAT -- EXPR4 702 * | 703 * CONCAT -- EXPR3 704 * | 705 * EXPR1 -- EXPR2 706 * 707 * 708 * to: 709 * 710 * o => MULTICONCAT 711 * | 712 * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET 713 * 714 * except that if EXPRi is an OP_CONST, it's discarded. 715 * 716 * During the conversion process, EXPR ops are stripped from the tree 717 * and unshifted onto o. Finally, any of o's remaining original 718 * children are discarded and o is converted into an OP_MULTICONCAT. 719 * 720 * In this middle of this, o may contain both: unshifted args on the 721 * left, and some remaining original args on the right. lastkidop 722 * is set to point to the right-most unshifted arg to delineate 723 * between the two sets. 724 */ 725 726 727 if (is_sprintf) { 728 /* create a copy of the format with the %'s removed, and record 729 * the sizes of the const string segments in the aux struct */ 730 char *q, *oldq; 731 lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; 732 733 p = sprintf_info.start; 734 q = const_str; 735 oldq = q; 736 for (; p < sprintf_info.end; p++) { 737 if (*p == '%') { 738 p++; 739 if (*p != '%') { 740 (lenp++)->ssize = q - oldq; 741 oldq = q; 742 continue; 743 } 744 } 745 *q++ = *p; 746 } 747 lenp->ssize = q - oldq; 748 assert((STRLEN)(q - const_str) == total_len); 749 750 /* Attach all the args (i.e. the kids of the sprintf) to o (which 751 * may or may not be topop) The pushmark and const ops need to be 752 * kept in case they're an op_next entry point. 753 */ 754 lastkidop = cLISTOPx(topop)->op_last; 755 kid = cUNOPx(topop)->op_first; /* pushmark */ 756 op_null(kid); 757 op_null(OpSIBLING(kid)); /* const */ 758 if (o != topop) { 759 kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */ 760 op_sibling_splice(o, NULL, 0, kid); /* and attach to o */ 761 lastkidop->op_next = o; 762 } 763 } 764 else { 765 p = const_str; 766 lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; 767 768 lenp->ssize = -1; 769 770 /* Concatenate all const strings into const_str. 771 * Note that args[] contains the RHS args in reverse order, so 772 * we scan args[] from top to bottom to get constant strings 773 * in L-R order 774 */ 775 for (argp = toparg; argp >= args; argp--) { 776 if (!argp->p) 777 /* not a const op */ 778 (++lenp)->ssize = -1; 779 else { 780 STRLEN l = argp->len; 781 Copy(argp->p, p, l, char); 782 p += l; 783 if (lenp->ssize == -1) 784 lenp->ssize = l; 785 else 786 lenp->ssize += l; 787 } 788 } 789 790 kid = topop; 791 nextop = o; 792 lastkidop = NULL; 793 794 for (argp = args; argp <= toparg; argp++) { 795 /* only keep non-const args, except keep the first-in-next-chain 796 * arg no matter what it is (but nulled if OP_CONST), because it 797 * may be the entry point to this subtree from the previous 798 * op_next. 799 */ 800 bool last = (argp == toparg); 801 OP *prev; 802 803 /* set prev to the sibling *before* the arg to be cut out, 804 * e.g. when cutting EXPR: 805 * 806 * | 807 * kid= CONCAT 808 * | 809 * prev= CONCAT -- EXPR 810 * | 811 */ 812 if (argp == args && kid->op_type != OP_CONCAT) { 813 /* in e.g. '$x .= f(1)' there's no RHS concat tree 814 * so the expression to be cut isn't kid->op_last but 815 * kid itself */ 816 OP *o1, *o2; 817 /* find the op before kid */ 818 o1 = NULL; 819 o2 = cUNOPx(parentop)->op_first; 820 while (o2 && o2 != kid) { 821 o1 = o2; 822 o2 = OpSIBLING(o2); 823 } 824 assert(o2 == kid); 825 prev = o1; 826 kid = parentop; 827 } 828 else if (kid == o && lastkidop) 829 prev = last ? lastkidop : OpSIBLING(lastkidop); 830 else 831 prev = last ? NULL : cUNOPx(kid)->op_first; 832 833 if (!argp->p || last) { 834 /* cut RH op */ 835 OP *aop = op_sibling_splice(kid, prev, 1, NULL); 836 /* and unshift to front of o */ 837 op_sibling_splice(o, NULL, 0, aop); 838 /* record the right-most op added to o: later we will 839 * free anything to the right of it */ 840 if (!lastkidop) 841 lastkidop = aop; 842 aop->op_next = nextop; 843 if (last) { 844 if (argp->p) 845 /* null the const at start of op_next chain */ 846 op_null(aop); 847 } 848 else if (prev) 849 nextop = prev->op_next; 850 } 851 852 /* the last two arguments are both attached to the same concat op */ 853 if (argp < toparg - 1) 854 kid = prev; 855 } 856 } 857 858 /* Populate the aux struct */ 859 860 aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs; 861 aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str; 862 aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len; 863 aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str; 864 aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len; 865 866 /* if variant > 0, calculate a variant const string and lengths where 867 * the utf8 version of the string will take 'variant' more bytes than 868 * the plain one. */ 869 870 if (variant) { 871 char *p = const_str; 872 STRLEN ulen = total_len + variant; 873 UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; 874 UNOP_AUX_item *ulens = lens + (nargs + 1); 875 char *up = (char*)PerlMemShared_malloc(ulen); 876 SSize_t n; 877 878 aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up; 879 aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen; 880 881 for (n = 0; n < (nargs + 1); n++) { 882 SSize_t i; 883 char * orig_up = up; 884 for (i = (lens++)->ssize; i > 0; i--) { 885 U8 c = *p++; 886 append_utf8_from_native_byte(c, (U8**)&up); 887 } 888 (ulens++)->ssize = (i < 0) ? i : up - orig_up; 889 } 890 } 891 892 if (stringop) { 893 /* if there was a top(ish)-level OP_STRINGIFY, we need to keep 894 * that op's first child - an ex-PUSHMARK - because the op_next of 895 * the previous op may point to it (i.e. it's the entry point for 896 * the o optree) 897 */ 898 OP *pmop = 899 (stringop == o) 900 ? op_sibling_splice(o, lastkidop, 1, NULL) 901 : op_sibling_splice(stringop, NULL, 1, NULL); 902 assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK)); 903 op_sibling_splice(o, NULL, 0, pmop); 904 if (!lastkidop) 905 lastkidop = pmop; 906 } 907 908 /* Optimise 909 * target = A.B.C... 910 * target .= A.B.C... 911 */ 912 913 if (targetop) { 914 assert(!targmyop); 915 916 if (o->op_type == OP_SASSIGN) { 917 /* Move the target subtree from being the last of o's children 918 * to being the last of o's preserved children. 919 * Note the difference between 'target = ...' and 'target .= ...': 920 * for the former, target is executed last; for the latter, 921 * first. 922 */ 923 kid = OpSIBLING(lastkidop); 924 op_sibling_splice(o, kid, 1, NULL); /* cut target op */ 925 op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */ 926 lastkidop->op_next = kid->op_next; 927 lastkidop = targetop; 928 } 929 else { 930 /* Move the target subtree from being the first of o's 931 * original children to being the first of *all* o's children. 932 */ 933 if (lastkidop) { 934 op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */ 935 op_sibling_splice(o, NULL, 0, targetop); /* and paste*/ 936 } 937 else { 938 /* if the RHS of .= doesn't contain a concat (e.g. 939 * $x .= "foo"), it gets missed by the "strip ops from the 940 * tree and add to o" loop earlier */ 941 assert(topop->op_type != OP_CONCAT); 942 if (stringop) { 943 /* in e.g. $x .= "$y", move the $y expression 944 * from being a child of OP_STRINGIFY to being the 945 * second child of the OP_CONCAT 946 */ 947 assert(cUNOPx(stringop)->op_first == topop); 948 op_sibling_splice(stringop, NULL, 1, NULL); 949 op_sibling_splice(o, cUNOPo->op_first, 0, topop); 950 } 951 assert(topop == OpSIBLING(cBINOPo->op_first)); 952 if (toparg->p) 953 op_null(topop); 954 lastkidop = topop; 955 } 956 } 957 958 if (is_targable) { 959 /* optimise 960 * my $lex = A.B.C... 961 * $lex = A.B.C... 962 * $lex .= A.B.C... 963 * The original padsv op is kept but nulled in case it's the 964 * entry point for the optree (which it will be for 965 * '$lex .= ... ' 966 */ 967 private_flags |= OPpTARGET_MY; 968 private_flags |= (targetop->op_private & OPpLVAL_INTRO); 969 o->op_targ = targetop->op_targ; 970 targetop->op_targ = 0; 971 op_null(targetop); 972 } 973 else 974 flags |= OPf_STACKED; 975 } 976 else if (targmyop) { 977 private_flags |= OPpTARGET_MY; 978 if (o != targmyop) { 979 o->op_targ = targmyop->op_targ; 980 targmyop->op_targ = 0; 981 } 982 } 983 984 /* detach the emaciated husk of the sprintf/concat optree and free it */ 985 for (;;) { 986 kid = op_sibling_splice(o, lastkidop, 1, NULL); 987 if (!kid) 988 break; 989 op_free(kid); 990 } 991 992 /* and convert o into a multiconcat */ 993 994 o->op_flags = (flags|OPf_KIDS|stacked_last 995 |(o->op_flags & (OPf_WANT|OPf_PARENS))); 996 o->op_private = private_flags; 997 o->op_type = OP_MULTICONCAT; 998 o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT]; 999 cUNOP_AUXo->op_aux = aux; 1000} 1001 1002 1003/* 1004=for apidoc_section $optree_manipulation 1005 1006=for apidoc optimize_optree 1007 1008This function applies some optimisations to the optree in top-down order. 1009It is called before the peephole optimizer, which processes ops in 1010execution order. Note that finalize_optree() also does a top-down scan, 1011but is called *after* the peephole optimizer. 1012 1013=cut 1014*/ 1015 1016void 1017Perl_optimize_optree(pTHX_ OP* o) 1018{ 1019 PERL_ARGS_ASSERT_OPTIMIZE_OPTREE; 1020 1021 ENTER; 1022 SAVEVPTR(PL_curcop); 1023 1024 optimize_op(o); 1025 1026 LEAVE; 1027} 1028 1029 1030#define warn_implicit_snail_cvsig(o) S_warn_implicit_snail_cvsig(aTHX_ o) 1031static void 1032S_warn_implicit_snail_cvsig(pTHX_ OP *o) 1033{ 1034 CV *cv = PL_compcv; 1035 while(cv && CvEVAL(cv)) 1036 cv = CvOUTSIDE(cv); 1037 1038 if(cv && CvSIGNATURE(cv)) 1039 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES), 1040 "Implicit use of @_ in %s with signatured subroutine is experimental", OP_DESC(o)); 1041} 1042 1043 1044#define OP_ZOOM(o) (OP_TYPE_IS(o, OP_NULL) ? cUNOPx(o)->op_first : (o)) 1045 1046/* helper for optimize_optree() which optimises one op then recurses 1047 * to optimise any children. 1048 */ 1049 1050STATIC void 1051S_optimize_op(pTHX_ OP* o) 1052{ 1053 OP *top_op = o; 1054 1055 PERL_ARGS_ASSERT_OPTIMIZE_OP; 1056 1057 while (1) { 1058 OP * next_kid = NULL; 1059 1060 assert(o->op_type != OP_FREED); 1061 1062 switch (o->op_type) { 1063 case OP_NEXTSTATE: 1064 case OP_DBSTATE: 1065 PL_curcop = ((COP*)o); /* for warnings */ 1066 break; 1067 1068 1069 case OP_CONCAT: 1070 case OP_SASSIGN: 1071 case OP_STRINGIFY: 1072 case OP_SPRINTF: 1073 S_maybe_multiconcat(aTHX_ o); 1074 break; 1075 1076 case OP_SUBST: 1077 if (cPMOPo->op_pmreplrootu.op_pmreplroot) { 1078 /* we can't assume that op_pmreplroot->op_sibparent == o 1079 * and that it is thus possible to walk back up the tree 1080 * past op_pmreplroot. So, although we try to avoid 1081 * recursing through op trees, do it here. After all, 1082 * there are unlikely to be many nested s///e's within 1083 * the replacement part of a s///e. 1084 */ 1085 optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); 1086 } 1087 break; 1088 1089 case OP_RV2AV: 1090 { 1091 OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL; 1092 CV *cv = PL_compcv; 1093 while(cv && CvEVAL(cv)) 1094 cv = CvOUTSIDE(cv); 1095 1096 if(cv && CvSIGNATURE(cv) && 1097 OP_TYPE_IS(first, OP_GV) && cGVOPx_gv(first) == PL_defgv) { 1098 OP *parent = op_parent(o); 1099 while(OP_TYPE_IS(parent, OP_NULL)) 1100 parent = op_parent(parent); 1101 1102 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES), 1103 "Use of @_ in %s with signatured subroutine is experimental", OP_DESC(parent)); 1104 } 1105 break; 1106 } 1107 1108 case OP_SHIFT: 1109 case OP_POP: 1110 if(!CvUNIQUE(PL_compcv) && !(o->op_flags & OPf_KIDS)) 1111 warn_implicit_snail_cvsig(o); 1112 break; 1113 1114 case OP_ENTERSUB: 1115 if(!(o->op_flags & OPf_STACKED)) 1116 warn_implicit_snail_cvsig(o); 1117 break; 1118 1119 case OP_GOTO: 1120 { 1121 OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL; 1122 OP *ffirst; 1123 if(OP_TYPE_IS(first, OP_SREFGEN) && 1124 (ffirst = OP_ZOOM(cUNOPx(first)->op_first)) && 1125 OP_TYPE_IS(ffirst, OP_RV2CV)) 1126 warn_implicit_snail_cvsig(o); 1127 break; 1128 } 1129 1130 default: 1131 break; 1132 } 1133 1134 if (o->op_flags & OPf_KIDS) 1135 next_kid = cUNOPo->op_first; 1136 1137 /* if a kid hasn't been nominated to process, continue with the 1138 * next sibling, or if no siblings left, go back to the parent's 1139 * siblings and so on 1140 */ 1141 while (!next_kid) { 1142 if (o == top_op) 1143 return; /* at top; no parents/siblings to try */ 1144 if (OpHAS_SIBLING(o)) 1145 next_kid = o->op_sibparent; 1146 else 1147 o = o->op_sibparent; /*try parent's next sibling */ 1148 } 1149 1150 /* this label not yet used. Goto here if any code above sets 1151 * next-kid 1152 get_next_op: 1153 */ 1154 o = next_kid; 1155 } 1156} 1157 1158/* 1159=for apidoc finalize_optree 1160 1161This function finalizes the optree. Should be called directly after 1162the complete optree is built. It does some additional 1163checking which can't be done in the normal C<ck_>xxx functions and makes 1164the tree thread-safe. 1165 1166=cut 1167*/ 1168 1169void 1170Perl_finalize_optree(pTHX_ OP* o) 1171{ 1172 PERL_ARGS_ASSERT_FINALIZE_OPTREE; 1173 1174 ENTER; 1175 SAVEVPTR(PL_curcop); 1176 1177 finalize_op(o); 1178 1179 LEAVE; 1180} 1181 1182 1183/* 1184=for apidoc traverse_op_tree 1185 1186Return the next op in a depth-first traversal of the op tree, 1187returning NULL when the traversal is complete. 1188 1189The initial call must supply the root of the tree as both top and o. 1190 1191For now it's static, but it may be exposed to the API in the future. 1192 1193=cut 1194*/ 1195 1196STATIC OP* 1197S_traverse_op_tree(pTHX_ OP *top, OP *o) { 1198 OP *sib; 1199 1200 PERL_ARGS_ASSERT_TRAVERSE_OP_TREE; 1201 1202 if ((o->op_flags & OPf_KIDS) && cUNOPo->op_first) { 1203 return cUNOPo->op_first; 1204 } 1205 else if ((sib = OpSIBLING(o))) { 1206 return sib; 1207 } 1208 else { 1209 OP *parent = o->op_sibparent; 1210 assert(!(o->op_moresib)); 1211 while (parent && parent != top) { 1212 OP *sib = OpSIBLING(parent); 1213 if (sib) 1214 return sib; 1215 parent = parent->op_sibparent; 1216 } 1217 1218 return NULL; 1219 } 1220} 1221 1222STATIC void 1223S_finalize_op(pTHX_ OP* o) 1224{ 1225 OP * const top = o; 1226 PERL_ARGS_ASSERT_FINALIZE_OP; 1227 1228 do { 1229 assert(o->op_type != OP_FREED); 1230 1231 switch (o->op_type) { 1232 case OP_NEXTSTATE: 1233 case OP_DBSTATE: 1234 PL_curcop = ((COP*)o); /* for warnings */ 1235 break; 1236 case OP_EXEC: 1237 if (OpHAS_SIBLING(o)) { 1238 OP *sib = OpSIBLING(o); 1239 if (( sib->op_type == OP_NEXTSTATE || sib->op_type == OP_DBSTATE) 1240 && ckWARN(WARN_EXEC) 1241 && OpHAS_SIBLING(sib)) 1242 { 1243 const OPCODE type = OpSIBLING(sib)->op_type; 1244 if (type != OP_EXIT && type != OP_WARN && type != OP_DIE) { 1245 const line_t oldline = CopLINE(PL_curcop); 1246 CopLINE_set(PL_curcop, CopLINE((COP*)sib)); 1247 Perl_warner(aTHX_ packWARN(WARN_EXEC), 1248 "Statement unlikely to be reached"); 1249 Perl_warner(aTHX_ packWARN(WARN_EXEC), 1250 "\t(Maybe you meant system() when you said exec()?)\n"); 1251 CopLINE_set(PL_curcop, oldline); 1252 } 1253 } 1254 } 1255 break; 1256 1257 case OP_GV: 1258 if ((o->op_private & OPpEARLY_CV) && ckWARN(WARN_PROTOTYPE)) { 1259 GV * const gv = cGVOPo_gv; 1260 if (SvTYPE(gv) == SVt_PVGV && GvCV(gv) && SvPVX_const(GvCV(gv))) { 1261 /* XXX could check prototype here instead of just carping */ 1262 SV * const sv = sv_newmortal(); 1263 gv_efullname3(sv, gv, NULL); 1264 Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE), 1265 "%" SVf "() called too early to check prototype", 1266 SVfARG(sv)); 1267 } 1268 } 1269 break; 1270 1271 case OP_CONST: 1272 if (cSVOPo->op_private & OPpCONST_STRICT) 1273 no_bareword_allowed(o); 1274#ifdef USE_ITHREADS 1275 /* FALLTHROUGH */ 1276 case OP_HINTSEVAL: 1277 op_relocate_sv(&cSVOPo->op_sv, &o->op_targ); 1278#endif 1279 break; 1280 1281#ifdef USE_ITHREADS 1282 /* Relocate all the METHOP's SVs to the pad for thread safety. */ 1283 case OP_METHOD_NAMED: 1284 case OP_METHOD_SUPER: 1285 case OP_METHOD_REDIR: 1286 case OP_METHOD_REDIR_SUPER: 1287 op_relocate_sv(&cMETHOPo->op_u.op_meth_sv, &o->op_targ); 1288 break; 1289#endif 1290 1291 case OP_HELEM: { 1292 UNOP *rop; 1293 SVOP *key_op; 1294 OP *kid; 1295 1296 if ((key_op = cSVOPx(cBINOPo->op_last))->op_type != OP_CONST) 1297 break; 1298 1299 rop = cUNOPx(cBINOPo->op_first); 1300 1301 goto check_keys; 1302 1303 case OP_HSLICE: 1304 S_scalar_slice_warning(aTHX_ o); 1305 /* FALLTHROUGH */ 1306 1307 case OP_KVHSLICE: 1308 kid = OpSIBLING(cLISTOPo->op_first); 1309 if (/* I bet there's always a pushmark... */ 1310 OP_TYPE_ISNT_AND_WASNT_NN(kid, OP_LIST) 1311 && OP_TYPE_ISNT_NN(kid, OP_CONST)) 1312 { 1313 break; 1314 } 1315 1316 key_op = cSVOPx(kid->op_type == OP_CONST 1317 ? kid 1318 : OpSIBLING(kLISTOP->op_first)); 1319 1320 rop = cUNOPx(cLISTOPo->op_last); 1321 1322 check_keys: 1323 if (o->op_private & OPpLVAL_INTRO || rop->op_type != OP_RV2HV) 1324 rop = NULL; 1325 check_hash_fields_and_hekify(rop, key_op, 1); 1326 break; 1327 } 1328 case OP_NULL: 1329 if (o->op_targ != OP_HSLICE && o->op_targ != OP_ASLICE) 1330 break; 1331 /* FALLTHROUGH */ 1332 case OP_ASLICE: 1333 S_scalar_slice_warning(aTHX_ o); 1334 break; 1335 1336 case OP_SUBST: { 1337 if (cPMOPo->op_pmreplrootu.op_pmreplroot) 1338 finalize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); 1339 break; 1340 } 1341 default: 1342 break; 1343 } 1344 1345#ifdef DEBUGGING 1346 if (o->op_flags & OPf_KIDS) { 1347 OP *kid; 1348 1349 /* check that op_last points to the last sibling, and that 1350 * the last op_sibling/op_sibparent field points back to the 1351 * parent, and that the only ops with KIDS are those which are 1352 * entitled to them */ 1353 U32 type = o->op_type; 1354 U32 family; 1355 bool has_last; 1356 1357 if (type == OP_NULL) { 1358 type = o->op_targ; 1359 /* ck_glob creates a null UNOP with ex-type GLOB 1360 * (which is a list op. So pretend it wasn't a listop */ 1361 if (type == OP_GLOB) 1362 type = OP_NULL; 1363 } 1364 family = PL_opargs[type] & OA_CLASS_MASK; 1365 1366 has_last = ( family == OA_BINOP 1367 || family == OA_LISTOP 1368 || family == OA_PMOP 1369 || family == OA_LOOP 1370 ); 1371 assert( has_last /* has op_first and op_last, or ... 1372 ... has (or may have) op_first: */ 1373 || family == OA_UNOP 1374 || family == OA_UNOP_AUX 1375 || family == OA_LOGOP 1376 || family == OA_BASEOP_OR_UNOP 1377 || family == OA_FILESTATOP 1378 || family == OA_LOOPEXOP 1379 || family == OA_METHOP 1380 || type == OP_CUSTOM 1381 || type == OP_NULL /* new_logop does this */ 1382 ); 1383 1384 for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) { 1385 if (!OpHAS_SIBLING(kid)) { 1386 if (has_last) 1387 assert(kid == cLISTOPo->op_last); 1388 assert(kid->op_sibparent == o); 1389 } 1390 } 1391 } 1392#endif 1393 } while (( o = traverse_op_tree(top, o)) != NULL); 1394} 1395 1396 1397/* 1398 --------------------------------------------------------- 1399 1400 Common vars in list assignment 1401 1402 There now follows some enums and static functions for detecting 1403 common variables in list assignments. Here is a little essay I wrote 1404 for myself when trying to get my head around this. DAPM. 1405 1406 ---- 1407 1408 First some random observations: 1409 1410 * If a lexical var is an alias of something else, e.g. 1411 for my $x ($lex, $pkg, $a[0]) {...} 1412 then the act of aliasing will increase the reference count of the SV 1413 1414 * If a package var is an alias of something else, it may still have a 1415 reference count of 1, depending on how the alias was created, e.g. 1416 in *a = *b, $a may have a refcount of 1 since the GP is shared 1417 with a single GvSV pointer to the SV. So If it's an alias of another 1418 package var, then RC may be 1; if it's an alias of another scalar, e.g. 1419 a lexical var or an array element, then it will have RC > 1. 1420 1421 * There are many ways to create a package alias; ultimately, XS code 1422 may quite legally do GvSV(gv) = SvREFCNT_inc(sv) for example, so 1423 run-time tracing mechanisms are unlikely to be able to catch all cases. 1424 1425 * When the LHS is all my declarations, the same vars can't appear directly 1426 on the RHS, but they can indirectly via closures, aliasing and lvalue 1427 subs. But those techniques all involve an increase in the lexical 1428 scalar's ref count. 1429 1430 * When the LHS is all lexical vars (but not necessarily my declarations), 1431 it is possible for the same lexicals to appear directly on the RHS, and 1432 without an increased ref count, since the stack isn't refcounted. 1433 This case can be detected at compile time by scanning for common lex 1434 vars with PL_generation. 1435 1436 * lvalue subs defeat common var detection, but they do at least 1437 return vars with a temporary ref count increment. Also, you can't 1438 tell at compile time whether a sub call is lvalue. 1439 1440 1441 So... 1442 1443 A: There are a few circumstances where there definitely can't be any 1444 commonality: 1445 1446 LHS empty: () = (...); 1447 RHS empty: (....) = (); 1448 RHS contains only constants or other 'can't possibly be shared' 1449 elements (e.g. ops that return PADTMPs): (...) = (1,2, length) 1450 i.e. they only contain ops not marked as dangerous, whose children 1451 are also not dangerous; 1452 LHS ditto; 1453 LHS contains a single scalar element: e.g. ($x) = (....); because 1454 after $x has been modified, it won't be used again on the RHS; 1455 RHS contains a single element with no aggregate on LHS: e.g. 1456 ($a,$b,$c) = ($x); again, once $a has been modified, its value 1457 won't be used again. 1458 1459 B: If LHS are all 'my' lexical var declarations (or safe ops, which 1460 we can ignore): 1461 1462 my ($a, $b, @c) = ...; 1463 1464 Due to closure and goto tricks, these vars may already have content. 1465 For the same reason, an element on the RHS may be a lexical or package 1466 alias of one of the vars on the left, or share common elements, for 1467 example: 1468 1469 my ($x,$y) = f(); # $x and $y on both sides 1470 sub f : lvalue { ($x,$y) = (1,2); $y, $x } 1471 1472 and 1473 1474 my $ra = f(); 1475 my @a = @$ra; # elements of @a on both sides 1476 sub f { @a = 1..4; \@a } 1477 1478 1479 First, just consider scalar vars on LHS: 1480 1481 RHS is safe only if (A), or in addition, 1482 * contains only lexical *scalar* vars, where neither side's 1483 lexicals have been flagged as aliases 1484 1485 If RHS is not safe, then it's always legal to check LHS vars for 1486 RC==1, since the only RHS aliases will always be associated 1487 with an RC bump. 1488 1489 Note that in particular, RHS is not safe if: 1490 1491 * it contains package scalar vars; e.g.: 1492 1493 f(); 1494 my ($x, $y) = (2, $x_alias); 1495 sub f { $x = 1; *x_alias = \$x; } 1496 1497 * It contains other general elements, such as flattened or 1498 * spliced or single array or hash elements, e.g. 1499 1500 f(); 1501 my ($x,$y) = @a; # or $a[0] or @a{@b} etc 1502 1503 sub f { 1504 ($x, $y) = (1,2); 1505 use feature 'refaliasing'; 1506 \($a[0], $a[1]) = \($y,$x); 1507 } 1508 1509 It doesn't matter if the array/hash is lexical or package. 1510 1511 * it contains a function call that happens to be an lvalue 1512 sub which returns one or more of the above, e.g. 1513 1514 f(); 1515 my ($x,$y) = f(); 1516 1517 sub f : lvalue { 1518 ($x, $y) = (1,2); 1519 *x1 = \$x; 1520 $y, $x1; 1521 } 1522 1523 (so a sub call on the RHS should be treated the same 1524 as having a package var on the RHS). 1525 1526 * any other "dangerous" thing, such an op or built-in that 1527 returns one of the above, e.g. pp_preinc 1528 1529 1530 If RHS is not safe, what we can do however is at compile time flag 1531 that the LHS are all my declarations, and at run time check whether 1532 all the LHS have RC == 1, and if so skip the full scan. 1533 1534 Now consider array and hash vars on LHS: e.g. my (...,@a) = ...; 1535 1536 Here the issue is whether there can be elements of @a on the RHS 1537 which will get prematurely freed when @a is cleared prior to 1538 assignment. This is only a problem if the aliasing mechanism 1539 is one which doesn't increase the refcount - only if RC == 1 1540 will the RHS element be prematurely freed. 1541 1542 Because the array/hash is being INTROed, it or its elements 1543 can't directly appear on the RHS: 1544 1545 my (@a) = ($a[0], @a, etc) # NOT POSSIBLE 1546 1547 but can indirectly, e.g.: 1548 1549 my $r = f(); 1550 my (@a) = @$r; 1551 sub f { @a = 1..3; \@a } 1552 1553 So if the RHS isn't safe as defined by (A), we must always 1554 mortalise and bump the ref count of any remaining RHS elements 1555 when assigning to a non-empty LHS aggregate. 1556 1557 Lexical scalars on the RHS aren't safe if they've been involved in 1558 aliasing, e.g. 1559 1560 use feature 'refaliasing'; 1561 1562 f(); 1563 \(my $lex) = \$pkg; 1564 my @a = ($lex,3); # equivalent to ($a[0],3) 1565 1566 sub f { 1567 @a = (1,2); 1568 \$pkg = \$a[0]; 1569 } 1570 1571 Similarly with lexical arrays and hashes on the RHS: 1572 1573 f(); 1574 my @b; 1575 my @a = (@b); 1576 1577 sub f { 1578 @a = (1,2); 1579 \$b[0] = \$a[1]; 1580 \$b[1] = \$a[0]; 1581 } 1582 1583 1584 1585 C: As (B), but in addition the LHS may contain non-intro lexicals, e.g. 1586 my $a; ($a, my $b) = (....); 1587 1588 The difference between (B) and (C) is that it is now physically 1589 possible for the LHS vars to appear on the RHS too, where they 1590 are not reference counted; but in this case, the compile-time 1591 PL_generation sweep will detect such common vars. 1592 1593 So the rules for (C) differ from (B) in that if common vars are 1594 detected, the runtime "test RC==1" optimisation can no longer be used, 1595 and a full mark and sweep is required 1596 1597 D: As (C), but in addition the LHS may contain package vars. 1598 1599 Since package vars can be aliased without a corresponding refcount 1600 increase, all bets are off. It's only safe if (A). E.g. 1601 1602 my ($x, $y) = (1,2); 1603 1604 for $x_alias ($x) { 1605 ($x_alias, $y) = (3, $x); # whoops 1606 } 1607 1608 Ditto for LHS aggregate package vars. 1609 1610 E: Any other dangerous ops on LHS, e.g. 1611 (f(), $a[0], @$r) = (...); 1612 1613 this is similar to (E) in that all bets are off. In addition, it's 1614 impossible to determine at compile time whether the LHS 1615 contains a scalar or an aggregate, e.g. 1616 1617 sub f : lvalue { @a } 1618 (f()) = 1..3; 1619 1620* --------------------------------------------------------- 1621*/ 1622 1623/* A set of bit flags returned by S_aassign_scan(). Each flag indicates 1624 * that at least one of the things flagged was seen. 1625 */ 1626 1627enum { 1628 AAS_MY_SCALAR = 0x001, /* my $scalar */ 1629 AAS_MY_AGG = 0x002, /* aggregate: my @array or my %hash */ 1630 AAS_LEX_SCALAR = 0x004, /* $lexical */ 1631 AAS_LEX_AGG = 0x008, /* @lexical or %lexical aggregate */ 1632 AAS_LEX_SCALAR_COMM = 0x010, /* $lexical seen on both sides */ 1633 AAS_PKG_SCALAR = 0x020, /* $scalar (where $scalar is pkg var) */ 1634 AAS_PKG_AGG = 0x040, /* package @array or %hash aggregate */ 1635 AAS_DANGEROUS = 0x080, /* an op (other than the above) 1636 that's flagged OA_DANGEROUS */ 1637 AAS_SAFE_SCALAR = 0x100, /* produces at least one scalar SV that's 1638 not in any of the categories above */ 1639 AAS_DEFAV = 0x200 /* contains just a single '@_' on RHS */ 1640}; 1641 1642/* helper function for S_aassign_scan(). 1643 * check a PAD-related op for commonality and/or set its generation number. 1644 * Returns a boolean indicating whether its shared */ 1645 1646static bool 1647S_aassign_padcheck(pTHX_ OP* o, bool rhs) 1648{ 1649 if (PAD_COMPNAME_GEN(o->op_targ) == PERL_INT_MAX) 1650 /* lexical used in aliasing */ 1651 return TRUE; 1652 1653 if (rhs) 1654 return cBOOL(PAD_COMPNAME_GEN(o->op_targ) == (STRLEN)PL_generation); 1655 else 1656 PAD_COMPNAME_GEN_set(o->op_targ, PL_generation); 1657 1658 return FALSE; 1659} 1660 1661/* 1662 Helper function for OPpASSIGN_COMMON* detection in rpeep(). 1663 It scans the left or right hand subtree of the aassign op, and returns a 1664 set of flags indicating what sorts of things it found there. 1665 'rhs' indicates whether we're scanning the LHS or RHS. If the former, we 1666 set PL_generation on lexical vars; if the latter, we see if 1667 PL_generation matches. 1668 'scalars_p' is a pointer to a counter of the number of scalar SVs seen. 1669 This fn will increment it by the number seen. It's not intended to 1670 be an accurate count (especially as many ops can push a variable 1671 number of SVs onto the stack); rather it's used as to test whether there 1672 can be at most 1 SV pushed; so it's only meanings are "0, 1, many". 1673*/ 1674 1675static int 1676S_aassign_scan(pTHX_ OP* o, bool rhs, int *scalars_p) 1677{ 1678 OP *top_op = o; 1679 OP *effective_top_op = o; 1680 int all_flags = 0; 1681 1682 while (1) { 1683 bool top = o == effective_top_op; 1684 int flags = 0; 1685 OP* next_kid = NULL; 1686 1687 /* first, look for a solitary @_ on the RHS */ 1688 if ( rhs 1689 && top 1690 && (o->op_flags & OPf_KIDS) 1691 && OP_TYPE_IS_OR_WAS(o, OP_LIST) 1692 ) { 1693 OP *kid = cUNOPo->op_first; 1694 if ( ( kid->op_type == OP_PUSHMARK 1695 || kid->op_type == OP_PADRANGE) /* ex-pushmark */ 1696 && ((kid = OpSIBLING(kid))) 1697 && !OpHAS_SIBLING(kid) 1698 && kid->op_type == OP_RV2AV 1699 && !(kid->op_flags & OPf_REF) 1700 && !(kid->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB)) 1701 && ((kid->op_flags & OPf_WANT) == OPf_WANT_LIST) 1702 && ((kid = cUNOPx(kid)->op_first)) 1703 && kid->op_type == OP_GV 1704 && cGVOPx_gv(kid) == PL_defgv 1705 ) 1706 flags = AAS_DEFAV; 1707 } 1708 1709 switch (o->op_type) { 1710 case OP_GVSV: 1711 (*scalars_p)++; 1712 all_flags |= AAS_PKG_SCALAR; 1713 goto do_next; 1714 1715 case OP_PADAV: 1716 case OP_PADHV: 1717 (*scalars_p) += 2; 1718 /* if !top, could be e.g. @a[0,1] */ 1719 all_flags |= (top && (o->op_flags & OPf_REF)) 1720 ? ((o->op_private & OPpLVAL_INTRO) 1721 ? AAS_MY_AGG : AAS_LEX_AGG) 1722 : AAS_DANGEROUS; 1723 goto do_next; 1724 1725 case OP_PADSV: 1726 { 1727 int comm = S_aassign_padcheck(aTHX_ o, rhs) 1728 ? AAS_LEX_SCALAR_COMM : 0; 1729 (*scalars_p)++; 1730 all_flags |= (o->op_private & OPpLVAL_INTRO) 1731 ? (AAS_MY_SCALAR|comm) : (AAS_LEX_SCALAR|comm); 1732 goto do_next; 1733 1734 } 1735 1736 case OP_RV2AV: 1737 case OP_RV2HV: 1738 (*scalars_p) += 2; 1739 if (cUNOPx(o)->op_first->op_type != OP_GV) 1740 all_flags |= AAS_DANGEROUS; /* @{expr}, %{expr} */ 1741 /* @pkg, %pkg */ 1742 /* if !top, could be e.g. @a[0,1] */ 1743 else if (top && (o->op_flags & OPf_REF)) 1744 all_flags |= AAS_PKG_AGG; 1745 else 1746 all_flags |= AAS_DANGEROUS; 1747 goto do_next; 1748 1749 case OP_RV2SV: 1750 (*scalars_p)++; 1751 if (cUNOPx(o)->op_first->op_type != OP_GV) { 1752 (*scalars_p) += 2; 1753 all_flags |= AAS_DANGEROUS; /* ${expr} */ 1754 } 1755 else 1756 all_flags |= AAS_PKG_SCALAR; /* $pkg */ 1757 goto do_next; 1758 1759 case OP_SPLIT: 1760 if (o->op_private & OPpSPLIT_ASSIGN) { 1761 /* the assign in @a = split() has been optimised away 1762 * and the @a attached directly to the split op 1763 * Treat the array as appearing on the RHS, i.e. 1764 * ... = (@a = split) 1765 * is treated like 1766 * ... = @a; 1767 */ 1768 1769 if (o->op_flags & OPf_STACKED) { 1770 /* @{expr} = split() - the array expression is tacked 1771 * on as an extra child to split - process kid */ 1772 next_kid = cLISTOPo->op_last; 1773 goto do_next; 1774 } 1775 1776 /* ... else array is directly attached to split op */ 1777 (*scalars_p) += 2; 1778 all_flags |= (PL_op->op_private & OPpSPLIT_LEX) 1779 ? ((o->op_private & OPpLVAL_INTRO) 1780 ? AAS_MY_AGG : AAS_LEX_AGG) 1781 : AAS_PKG_AGG; 1782 goto do_next; 1783 } 1784 (*scalars_p)++; 1785 /* other args of split can't be returned */ 1786 all_flags |= AAS_SAFE_SCALAR; 1787 goto do_next; 1788 1789 case OP_UNDEF: 1790 /* undef on LHS following a var is significant, e.g. 1791 * my $x = 1; 1792 * @a = (($x, undef) = (2 => $x)); 1793 * # @a shoul be (2,1) not (2,2) 1794 * 1795 * undef on RHS counts as a scalar: 1796 * ($x, $y) = (undef, $x); # 2 scalars on RHS: unsafe 1797 */ 1798 if ((!rhs && *scalars_p) || rhs) 1799 (*scalars_p)++; 1800 flags = AAS_SAFE_SCALAR; 1801 break; 1802 1803 case OP_PUSHMARK: 1804 case OP_STUB: 1805 /* these are all no-ops; they don't push a potentially common SV 1806 * onto the stack, so they are neither AAS_DANGEROUS nor 1807 * AAS_SAFE_SCALAR */ 1808 goto do_next; 1809 1810 case OP_PADRANGE: /* Ignore padrange; checking its siblings is enough */ 1811 break; 1812 1813 case OP_NULL: 1814 case OP_LIST: 1815 /* these do nothing, but may have children */ 1816 break; 1817 1818 default: 1819 if (PL_opargs[o->op_type] & OA_DANGEROUS) { 1820 (*scalars_p) += 2; 1821 flags = AAS_DANGEROUS; 1822 break; 1823 } 1824 1825 if ( (PL_opargs[o->op_type] & OA_TARGLEX) 1826 && (o->op_private & OPpTARGET_MY)) 1827 { 1828 (*scalars_p)++; 1829 all_flags |= S_aassign_padcheck(aTHX_ o, rhs) 1830 ? AAS_LEX_SCALAR_COMM : AAS_LEX_SCALAR; 1831 goto do_next; 1832 } 1833 1834 /* if its an unrecognised, non-dangerous op, assume that it 1835 * is the cause of at least one safe scalar */ 1836 (*scalars_p)++; 1837 flags = AAS_SAFE_SCALAR; 1838 break; 1839 } 1840 1841 all_flags |= flags; 1842 1843 /* by default, process all kids next 1844 * XXX this assumes that all other ops are "transparent" - i.e. that 1845 * they can return some of their children. While this true for e.g. 1846 * sort and grep, it's not true for e.g. map. We really need a 1847 * 'transparent' flag added to regen/opcodes 1848 */ 1849 if (o->op_flags & OPf_KIDS) { 1850 next_kid = cUNOPo->op_first; 1851 /* these ops do nothing but may have children; but their 1852 * children should also be treated as top-level */ 1853 if ( o == effective_top_op 1854 && (o->op_type == OP_NULL || o->op_type == OP_LIST) 1855 ) 1856 effective_top_op = next_kid; 1857 } 1858 1859 1860 /* If next_kid is set, someone in the code above wanted us to process 1861 * that kid and all its remaining siblings. Otherwise, work our way 1862 * back up the tree */ 1863 do_next: 1864 while (!next_kid) { 1865 if (o == top_op) 1866 return all_flags; /* at top; no parents/siblings to try */ 1867 if (OpHAS_SIBLING(o)) { 1868 next_kid = o->op_sibparent; 1869 if (o == effective_top_op) 1870 effective_top_op = next_kid; 1871 } 1872 else if (o == effective_top_op) 1873 effective_top_op = o->op_sibparent; 1874 o = o->op_sibparent; /* try parent's next sibling */ 1875 } 1876 o = next_kid; 1877 } /* while */ 1878} 1879 1880/* S_maybe_multideref(): given an op_next chain of ops beginning at 'start' 1881 * that potentially represent a series of one or more aggregate derefs 1882 * (such as $a->[1]{$key}), examine the chain, and if appropriate, convert 1883 * the whole chain to a single OP_MULTIDEREF op (maybe with a few 1884 * additional ops left in too). 1885 * 1886 * The caller will have already verified that the first few ops in the 1887 * chain following 'start' indicate a multideref candidate, and will have 1888 * set 'orig_o' to the point further on in the chain where the first index 1889 * expression (if any) begins. 'orig_action' specifies what type of 1890 * beginning has already been determined by the ops between start..orig_o 1891 * (e.g. $lex_ary[], $pkg_ary->{}, expr->[], etc). 1892 * 1893 * 'hints' contains any hints flags that need adding (currently just 1894 * OPpHINT_STRICT_REFS) as found in any rv2av/hv skipped by the caller. 1895 */ 1896 1897STATIC void 1898S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints) 1899{ 1900 int pass; 1901 UNOP_AUX_item *arg_buf = NULL; 1902 bool reset_start_targ = FALSE; /* start->op_targ needs zeroing */ 1903 int index_skip = -1; /* don't output index arg on this action */ 1904 1905 /* similar to regex compiling, do two passes; the first pass 1906 * determines whether the op chain is convertible and calculates the 1907 * buffer size; the second pass populates the buffer and makes any 1908 * changes necessary to ops (such as moving consts to the pad on 1909 * threaded builds). 1910 * 1911 * NB: for things like Coverity, note that both passes take the same 1912 * path through the logic tree (except for 'if (pass)' bits), since 1913 * both passes are following the same op_next chain; and in 1914 * particular, if it would return early on the second pass, it would 1915 * already have returned early on the first pass. 1916 */ 1917 for (pass = 0; pass < 2; pass++) { 1918 OP *o = orig_o; 1919 UV action = orig_action; 1920 OP *first_elem_op = NULL; /* first seen aelem/helem */ 1921 OP *top_op = NULL; /* highest [ah]elem/exists/del/rv2[ah]v */ 1922 int action_count = 0; /* number of actions seen so far */ 1923 int action_ix = 0; /* action_count % (actions per IV) */ 1924 bool next_is_hash = FALSE; /* is the next lookup to be a hash? */ 1925 bool is_last = FALSE; /* no more derefs to follow */ 1926 bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */ 1927 UV action_word = 0; /* all actions so far */ 1928 size_t argi = 0; 1929 UNOP_AUX_item *action_ptr = arg_buf; 1930 1931 argi++; /* reserve slot for first action word */ 1932 1933 switch (action) { 1934 case MDEREF_HV_gvsv_vivify_rv2hv_helem: 1935 case MDEREF_HV_gvhv_helem: 1936 next_is_hash = TRUE; 1937 /* FALLTHROUGH */ 1938 case MDEREF_AV_gvsv_vivify_rv2av_aelem: 1939 case MDEREF_AV_gvav_aelem: 1940 if (pass) { 1941#ifdef USE_ITHREADS 1942 arg_buf[argi].pad_offset = cPADOPx(start)->op_padix; 1943 /* stop it being swiped when nulled */ 1944 cPADOPx(start)->op_padix = 0; 1945#else 1946 arg_buf[argi].sv = cSVOPx(start)->op_sv; 1947 cSVOPx(start)->op_sv = NULL; 1948#endif 1949 } 1950 argi++; 1951 break; 1952 1953 case MDEREF_HV_padhv_helem: 1954 case MDEREF_HV_padsv_vivify_rv2hv_helem: 1955 next_is_hash = TRUE; 1956 /* FALLTHROUGH */ 1957 case MDEREF_AV_padav_aelem: 1958 case MDEREF_AV_padsv_vivify_rv2av_aelem: 1959 if (pass) { 1960 arg_buf[argi].pad_offset = start->op_targ; 1961 /* we skip setting op_targ = 0 for now, since the intact 1962 * OP_PADXV is needed by check_hash_fields_and_hekify */ 1963 reset_start_targ = TRUE; 1964 } 1965 argi++; 1966 break; 1967 1968 case MDEREF_HV_pop_rv2hv_helem: 1969 next_is_hash = TRUE; 1970 /* FALLTHROUGH */ 1971 case MDEREF_AV_pop_rv2av_aelem: 1972 break; 1973 1974 default: 1975 NOT_REACHED; /* NOTREACHED */ 1976 return; 1977 } 1978 1979 while (!is_last) { 1980 /* look for another (rv2av/hv; get index; 1981 * aelem/helem/exists/delele) sequence */ 1982 1983 OP *kid; 1984 bool is_deref; 1985 bool ok; 1986 UV index_type = MDEREF_INDEX_none; 1987 1988 if (action_count) { 1989 /* if this is not the first lookup, consume the rv2av/hv */ 1990 1991 /* for N levels of aggregate lookup, we normally expect 1992 * that the first N-1 [ah]elem ops will be flagged as 1993 * /DEREF (so they autovivify if necessary), and the last 1994 * lookup op not to be. 1995 * For other things (like @{$h{k1}{k2}}) extra scope or 1996 * leave ops can appear, so abandon the effort in that 1997 * case */ 1998 if (o->op_type != OP_RV2AV && o->op_type != OP_RV2HV) 1999 return; 2000 2001 /* rv2av or rv2hv sKR/1 */ 2002 2003 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS 2004 |OPf_REF|OPf_MOD|OPf_SPECIAL))); 2005 if (o->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF)) 2006 return; 2007 2008 /* at this point, we wouldn't expect any of these 2009 * possible private flags: 2010 * OPpMAYBE_LVSUB, OPpOUR_INTRO, OPpLVAL_INTRO 2011 * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL (rv2hv only) 2012 */ 2013 ASSUME(!(o->op_private & 2014 ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING))); 2015 2016 hints = (o->op_private & OPpHINT_STRICT_REFS); 2017 2018 /* make sure the type of the previous /DEREF matches the 2019 * type of the next lookup */ 2020 ASSUME(o->op_type == (next_is_hash ? OP_RV2HV : OP_RV2AV)); 2021 top_op = o; 2022 2023 action = next_is_hash 2024 ? MDEREF_HV_vivify_rv2hv_helem 2025 : MDEREF_AV_vivify_rv2av_aelem; 2026 o = o->op_next; 2027 } 2028 2029 /* if this is the second pass, and we're at the depth where 2030 * previously we encountered a non-simple index expression, 2031 * stop processing the index at this point */ 2032 if (action_count != index_skip) { 2033 2034 /* look for one or more simple ops that return an array 2035 * index or hash key */ 2036 2037 switch (o->op_type) { 2038 case OP_PADSV: 2039 /* it may be a lexical var index */ 2040 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS 2041 |OPf_REF|OPf_MOD|OPf_SPECIAL))); 2042 ASSUME(!(o->op_private & 2043 ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO))); 2044 2045 if ( OP_GIMME(o,0) == G_SCALAR 2046 && !(o->op_flags & (OPf_REF|OPf_MOD)) 2047 && o->op_private == 0) 2048 { 2049 if (pass) 2050 arg_buf[argi].pad_offset = o->op_targ; 2051 argi++; 2052 index_type = MDEREF_INDEX_padsv; 2053 o = o->op_next; 2054 } 2055 break; 2056 2057 case OP_CONST: 2058 if (next_is_hash) { 2059 /* it's a constant hash index */ 2060 if (!(SvFLAGS(cSVOPo_sv) & (SVf_IOK|SVf_NOK|SVf_POK))) 2061 /* "use constant foo => FOO; $h{+foo}" for 2062 * some weird FOO, can leave you with constants 2063 * that aren't simple strings. It's not worth 2064 * the extra hassle for those edge cases */ 2065 break; 2066 2067 { 2068 UNOP *rop = NULL; 2069 OP * helem_op = o->op_next; 2070 2071 ASSUME( helem_op->op_type == OP_HELEM 2072 || helem_op->op_type == OP_NULL 2073 || pass == 0); 2074 if (helem_op->op_type == OP_HELEM) { 2075 rop = cUNOPx(cBINOPx(helem_op)->op_first); 2076 if ( helem_op->op_private & OPpLVAL_INTRO 2077 || rop->op_type != OP_RV2HV 2078 ) 2079 rop = NULL; 2080 } 2081 /* on first pass just check; on second pass 2082 * hekify */ 2083 check_hash_fields_and_hekify(rop, cSVOPo, pass); 2084 } 2085 2086 if (pass) { 2087#ifdef USE_ITHREADS 2088 /* Relocate sv to the pad for thread safety */ 2089 op_relocate_sv(&cSVOPo->op_sv, &o->op_targ); 2090 arg_buf[argi].pad_offset = o->op_targ; 2091 o->op_targ = 0; 2092#else 2093 arg_buf[argi].sv = cSVOPx_sv(o); 2094#endif 2095 } 2096 } 2097 else { 2098 /* it's a constant array index */ 2099 IV iv; 2100 SV *ix_sv = cSVOPo->op_sv; 2101 if (!SvIOK(ix_sv)) 2102 break; 2103 iv = SvIV(ix_sv); 2104 2105 if ( action_count == 0 2106 && iv >= -128 2107 && iv <= 127 2108 && ( action == MDEREF_AV_padav_aelem 2109 || action == MDEREF_AV_gvav_aelem) 2110 ) 2111 maybe_aelemfast = TRUE; 2112 2113 if (pass) { 2114 arg_buf[argi].iv = iv; 2115 SvREFCNT_dec_NN(cSVOPo->op_sv); 2116 } 2117 } 2118 if (pass) 2119 /* we've taken ownership of the SV */ 2120 cSVOPo->op_sv = NULL; 2121 argi++; 2122 index_type = MDEREF_INDEX_const; 2123 o = o->op_next; 2124 break; 2125 2126 case OP_GV: 2127 /* it may be a package var index */ 2128 2129 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS|OPf_SPECIAL))); 2130 ASSUME(!(o->op_private & ~(OPpEARLY_CV))); 2131 if ( (o->op_flags & ~(OPf_PARENS|OPf_SPECIAL)) != OPf_WANT_SCALAR 2132 || o->op_private != 0 2133 ) 2134 break; 2135 2136 kid = o->op_next; 2137 if (kid->op_type != OP_RV2SV) 2138 break; 2139 2140 ASSUME(!(kid->op_flags & 2141 ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_REF 2142 |OPf_SPECIAL|OPf_PARENS))); 2143 ASSUME(!(kid->op_private & 2144 ~(OPpARG1_MASK 2145 |OPpHINT_STRICT_REFS|OPpOUR_INTRO 2146 |OPpDEREF|OPpLVAL_INTRO))); 2147 if( (kid->op_flags &~ OPf_PARENS) 2148 != (OPf_WANT_SCALAR|OPf_KIDS) 2149 || (kid->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS)) 2150 ) 2151 break; 2152 2153 if (pass) { 2154#ifdef USE_ITHREADS 2155 arg_buf[argi].pad_offset = cPADOPx(o)->op_padix; 2156 /* stop it being swiped when nulled */ 2157 cPADOPx(o)->op_padix = 0; 2158#else 2159 arg_buf[argi].sv = cSVOPx(o)->op_sv; 2160 cSVOPo->op_sv = NULL; 2161#endif 2162 } 2163 argi++; 2164 index_type = MDEREF_INDEX_gvsv; 2165 o = kid->op_next; 2166 break; 2167 2168 } /* switch */ 2169 } /* action_count != index_skip */ 2170 2171 action |= index_type; 2172 2173 2174 /* at this point we have either: 2175 * * detected what looks like a simple index expression, 2176 * and expect the next op to be an [ah]elem, or 2177 * an nulled [ah]elem followed by a delete or exists; 2178 * * found a more complex expression, so something other 2179 * than the above follows. 2180 */ 2181 2182 /* possibly an optimised away [ah]elem (where op_next is 2183 * exists or delete) */ 2184 if (o->op_type == OP_NULL) 2185 o = o->op_next; 2186 2187 /* at this point we're looking for an OP_AELEM, OP_HELEM, 2188 * OP_EXISTS or OP_DELETE */ 2189 2190 /* if a custom array/hash access checker is in scope, 2191 * abandon optimisation attempt */ 2192 if ( (o->op_type == OP_AELEM || o->op_type == OP_HELEM) 2193 && PL_check[o->op_type] != Perl_ck_null) 2194 return; 2195 /* similarly for customised exists and delete */ 2196 if ( (o->op_type == OP_EXISTS) 2197 && PL_check[o->op_type] != Perl_ck_exists) 2198 return; 2199 if ( (o->op_type == OP_DELETE) 2200 && PL_check[o->op_type] != Perl_ck_delete) 2201 return; 2202 2203 if ( o->op_type != OP_AELEM 2204 || (o->op_private & 2205 (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB)) 2206 ) 2207 maybe_aelemfast = FALSE; 2208 2209 /* look for aelem/helem/exists/delete. If it's not the last elem 2210 * lookup, it *must* have OPpDEREF_AV/HV, but not many other 2211 * flags; if it's the last, then it mustn't have 2212 * OPpDEREF_AV/HV, but may have lots of other flags, like 2213 * OPpLVAL_INTRO etc 2214 */ 2215 2216 if ( index_type == MDEREF_INDEX_none 2217 || ( o->op_type != OP_AELEM && o->op_type != OP_HELEM 2218 && o->op_type != OP_EXISTS && o->op_type != OP_DELETE) 2219 ) 2220 ok = FALSE; 2221 else { 2222 /* we have aelem/helem/exists/delete with valid simple index */ 2223 2224 is_deref = (o->op_type == OP_AELEM || o->op_type == OP_HELEM) 2225 && ( (o->op_private & OPpDEREF) == OPpDEREF_AV 2226 || (o->op_private & OPpDEREF) == OPpDEREF_HV); 2227 2228 /* This doesn't make much sense but is legal: 2229 * @{ local $x[0][0] } = 1 2230 * Since scope exit will undo the autovivification, 2231 * don't bother in the first place. The OP_LEAVE 2232 * assertion is in case there are other cases of both 2233 * OPpLVAL_INTRO and OPpDEREF which don't include a scope 2234 * exit that would undo the local - in which case this 2235 * block of code would need rethinking. 2236 */ 2237 if (is_deref && (o->op_private & OPpLVAL_INTRO)) { 2238#ifdef DEBUGGING 2239 OP *n = o->op_next; 2240 while (n && ( n->op_type == OP_NULL 2241 || n->op_type == OP_LIST 2242 || n->op_type == OP_SCALAR)) 2243 n = n->op_next; 2244 assert(n && n->op_type == OP_LEAVE); 2245#endif 2246 o->op_private &= ~OPpDEREF; 2247 is_deref = FALSE; 2248 } 2249 2250 if (is_deref) { 2251 ASSUME(!(o->op_flags & 2252 ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_PARENS))); 2253 ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpDEREF))); 2254 2255 ok = (o->op_flags &~ OPf_PARENS) 2256 == (OPf_WANT_SCALAR|OPf_KIDS|OPf_MOD) 2257 && !(o->op_private & ~(OPpDEREF|OPpARG2_MASK)); 2258 } 2259 else if (o->op_type == OP_EXISTS) { 2260 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS 2261 |OPf_REF|OPf_MOD|OPf_SPECIAL))); 2262 ASSUME(!(o->op_private & ~(OPpARG1_MASK|OPpEXISTS_SUB))); 2263 ok = !(o->op_private & ~OPpARG1_MASK); 2264 } 2265 else if (o->op_type == OP_DELETE) { 2266 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS 2267 |OPf_REF|OPf_MOD|OPf_SPECIAL))); 2268 ASSUME(!(o->op_private & 2269 ~(OPpARG1_MASK|OPpSLICE|OPpLVAL_INTRO))); 2270 /* don't handle slices or 'local delete'; the latter 2271 * is fairly rare, and has a complex runtime */ 2272 ok = !(o->op_private & ~OPpARG1_MASK); 2273 if (OP_TYPE_IS_OR_WAS(cUNOPo->op_first, OP_AELEM)) 2274 /* skip handling run-tome error */ 2275 ok = (ok && cBOOL(o->op_flags & OPf_SPECIAL)); 2276 } 2277 else { 2278 ASSUME(o->op_type == OP_AELEM || o->op_type == OP_HELEM); 2279 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_MOD 2280 |OPf_PARENS|OPf_REF|OPf_SPECIAL))); 2281 ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpMAYBE_LVSUB 2282 |OPpLVAL_DEFER|OPpDEREF|OPpLVAL_INTRO))); 2283 ok = (o->op_private & OPpDEREF) != OPpDEREF_SV; 2284 } 2285 } 2286 2287 if (ok) { 2288 if (!first_elem_op) 2289 first_elem_op = o; 2290 top_op = o; 2291 if (is_deref) { 2292 next_is_hash = cBOOL((o->op_private & OPpDEREF) == OPpDEREF_HV); 2293 o = o->op_next; 2294 } 2295 else { 2296 is_last = TRUE; 2297 action |= MDEREF_FLAG_last; 2298 } 2299 } 2300 else { 2301 /* at this point we have something that started 2302 * promisingly enough (with rv2av or whatever), but failed 2303 * to find a simple index followed by an 2304 * aelem/helem/exists/delete. If this is the first action, 2305 * give up; but if we've already seen at least one 2306 * aelem/helem, then keep them and add a new action with 2307 * MDEREF_INDEX_none, which causes it to do the vivify 2308 * from the end of the previous lookup, and do the deref, 2309 * but stop at that point. So $a[0][expr] will do one 2310 * av_fetch, vivify and deref, then continue executing at 2311 * expr */ 2312 if (!action_count) 2313 return; 2314 is_last = TRUE; 2315 index_skip = action_count; 2316 action |= MDEREF_FLAG_last; 2317 if (index_type != MDEREF_INDEX_none) 2318 argi--; 2319 } 2320 2321 action_word |= (action << (action_ix * MDEREF_SHIFT)); 2322 action_ix++; 2323 action_count++; 2324 /* if there's no space for the next action, reserve a new slot 2325 * for it *before* we start adding args for that action */ 2326 if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) { 2327 if (pass) { 2328 action_ptr->uv = action_word; 2329 action_ptr = arg_buf + argi; 2330 } 2331 action_word = 0; 2332 argi++; 2333 action_ix = 0; 2334 } 2335 } /* while !is_last */ 2336 2337 /* success! */ 2338 2339 if (!action_ix) 2340 /* slot reserved for next action word not now needed */ 2341 argi--; 2342 else if (pass) 2343 action_ptr->uv = action_word; 2344 2345 if (pass) { 2346 OP *mderef; 2347 OP *p, *q; 2348 2349 mderef = newUNOP_AUX(OP_MULTIDEREF, 0, NULL, arg_buf); 2350 if (index_skip == -1) { 2351 mderef->op_flags = o->op_flags 2352 & (OPf_WANT|OPf_MOD|(next_is_hash ? OPf_SPECIAL : 0)); 2353 if (o->op_type == OP_EXISTS) 2354 mderef->op_private = OPpMULTIDEREF_EXISTS; 2355 else if (o->op_type == OP_DELETE) 2356 mderef->op_private = OPpMULTIDEREF_DELETE; 2357 else 2358 mderef->op_private = o->op_private 2359 & (OPpMAYBE_LVSUB|OPpLVAL_DEFER|OPpLVAL_INTRO); 2360 } 2361 /* accumulate strictness from every level (although I don't think 2362 * they can actually vary) */ 2363 mderef->op_private |= hints; 2364 2365 /* integrate the new multideref op into the optree and the 2366 * op_next chain. 2367 * 2368 * In general an op like aelem or helem has two child 2369 * sub-trees: the aggregate expression (a_expr) and the 2370 * index expression (i_expr): 2371 * 2372 * aelem 2373 * | 2374 * a_expr - i_expr 2375 * 2376 * The a_expr returns an AV or HV, while the i-expr returns an 2377 * index. In general a multideref replaces most or all of a 2378 * multi-level tree, e.g. 2379 * 2380 * exists 2381 * | 2382 * ex-aelem 2383 * | 2384 * rv2av - i_expr1 2385 * | 2386 * helem 2387 * | 2388 * rv2hv - i_expr2 2389 * | 2390 * aelem 2391 * | 2392 * a_expr - i_expr3 2393 * 2394 * With multideref, all the i_exprs will be simple vars or 2395 * constants, except that i_expr1 may be arbitrary in the case 2396 * of MDEREF_INDEX_none. 2397 * 2398 * The bottom-most a_expr will be either: 2399 * 1) a simple var (so padXv or gv+rv2Xv); 2400 * 2) a simple scalar var dereferenced (e.g. $r->[0]): 2401 * so a simple var with an extra rv2Xv; 2402 * 3) or an arbitrary expression. 2403 * 2404 * 'start', the first op in the execution chain, will point to 2405 * 1),2): the padXv or gv op; 2406 * 3): the rv2Xv which forms the last op in the a_expr 2407 * execution chain, and the top-most op in the a_expr 2408 * subtree. 2409 * 2410 * For all cases, the 'start' node is no longer required, 2411 * but we can't free it since one or more external nodes 2412 * may point to it. E.g. consider 2413 * $h{foo} = $a ? $b : $c 2414 * Here, both the op_next and op_other branches of the 2415 * cond_expr point to the gv[*h] of the hash expression, so 2416 * we can't free the 'start' op. 2417 * 2418 * For expr->[...], we need to save the subtree containing the 2419 * expression; for the other cases, we just need to save the 2420 * start node. 2421 * So in all cases, we null the start op and keep it around by 2422 * making it the child of the multideref op; for the expr-> 2423 * case, the expr will be a subtree of the start node. 2424 * 2425 * So in the simple 1,2 case the optree above changes to 2426 * 2427 * ex-exists 2428 * | 2429 * multideref 2430 * | 2431 * ex-gv (or ex-padxv) 2432 * 2433 * with the op_next chain being 2434 * 2435 * -> ex-gv -> multideref -> op-following-ex-exists -> 2436 * 2437 * In the 3 case, we have 2438 * 2439 * ex-exists 2440 * | 2441 * multideref 2442 * | 2443 * ex-rv2xv 2444 * | 2445 * rest-of-a_expr 2446 * subtree 2447 * 2448 * and 2449 * 2450 * -> rest-of-a_expr subtree -> 2451 * ex-rv2xv -> multideref -> op-following-ex-exists -> 2452 * 2453 * 2454 * Where the last i_expr is non-simple (i.e. MDEREF_INDEX_none, 2455 * e.g. $a[0]{foo}[$x+1], the next rv2xv is nulled and the 2456 * multideref attached as the child, e.g. 2457 * 2458 * exists 2459 * | 2460 * ex-aelem 2461 * | 2462 * ex-rv2av - i_expr1 2463 * | 2464 * multideref 2465 * | 2466 * ex-whatever 2467 * 2468 */ 2469 2470 /* if we free this op, don't free the pad entry */ 2471 if (reset_start_targ) 2472 start->op_targ = 0; 2473 2474 2475 /* Cut the bit we need to save out of the tree and attach to 2476 * the multideref op, then free the rest of the tree */ 2477 2478 /* find parent of node to be detached (for use by splice) */ 2479 p = first_elem_op; 2480 if ( orig_action == MDEREF_AV_pop_rv2av_aelem 2481 || orig_action == MDEREF_HV_pop_rv2hv_helem) 2482 { 2483 /* there is an arbitrary expression preceding us, e.g. 2484 * expr->[..]? so we need to save the 'expr' subtree */ 2485 if (p->op_type == OP_EXISTS || p->op_type == OP_DELETE) 2486 p = cUNOPx(p)->op_first; 2487 ASSUME( start->op_type == OP_RV2AV 2488 || start->op_type == OP_RV2HV); 2489 } 2490 else { 2491 /* either a padXv or rv2Xv+gv, maybe with an ex-Xelem 2492 * above for exists/delete. */ 2493 while ( (p->op_flags & OPf_KIDS) 2494 && cUNOPx(p)->op_first != start 2495 ) 2496 p = cUNOPx(p)->op_first; 2497 } 2498 ASSUME(cUNOPx(p)->op_first == start); 2499 2500 /* detach from main tree, and re-attach under the multideref */ 2501 op_sibling_splice(mderef, NULL, 0, 2502 op_sibling_splice(p, NULL, 1, NULL)); 2503 op_null(start); 2504 2505 start->op_next = mderef; 2506 2507 mderef->op_next = index_skip == -1 ? o->op_next : o; 2508 2509 /* excise and free the original tree, and replace with 2510 * the multideref op */ 2511 p = op_sibling_splice(top_op, NULL, -1, mderef); 2512 while (p) { 2513 q = OpSIBLING(p); 2514 op_free(p); 2515 p = q; 2516 } 2517 op_null(top_op); 2518 } 2519 else { 2520 Size_t size = argi; 2521 2522 if (maybe_aelemfast && action_count == 1) 2523 return; 2524 2525 arg_buf = (UNOP_AUX_item*)PerlMemShared_malloc( 2526 sizeof(UNOP_AUX_item) * (size + 1)); 2527 /* for dumping etc: store the length in a hidden first slot; 2528 * we set the op_aux pointer to the second slot */ 2529 arg_buf->uv = size; 2530 arg_buf++; 2531 } 2532 } /* for (pass = ...) */ 2533} 2534 2535/* See if the ops following o are such that o will always be executed in 2536 * boolean context: that is, the SV which o pushes onto the stack will 2537 * only ever be consumed by later ops via SvTRUE(sv) or similar. 2538 * If so, set a suitable private flag on o. Normally this will be 2539 * bool_flag; but see below why maybe_flag is needed too. 2540 * 2541 * Typically the two flags you pass will be the generic OPpTRUEBOOL and 2542 * OPpMAYBE_TRUEBOOL, buts it's possible that for some ops those bits may 2543 * already be taken, so you'll have to give that op two different flags. 2544 * 2545 * More explanation of 'maybe_flag' and 'safe_and' parameters. 2546 * The binary logical ops &&, ||, // (plus 'if' and 'unless' which use 2547 * those underlying ops) short-circuit, which means that rather than 2548 * necessarily returning a truth value, they may return the LH argument, 2549 * which may not be boolean. For example in $x = (keys %h || -1), keys 2550 * should return a key count rather than a boolean, even though its 2551 * sort-of being used in boolean context. 2552 * 2553 * So we only consider such logical ops to provide boolean context to 2554 * their LH argument if they themselves are in void or boolean context. 2555 * However, sometimes the context isn't known until run-time. In this 2556 * case the op is marked with the maybe_flag flag it. 2557 * 2558 * Consider the following. 2559 * 2560 * sub f { ....; if (%h) { .... } } 2561 * 2562 * This is actually compiled as 2563 * 2564 * sub f { ....; %h && do { .... } } 2565 * 2566 * Here we won't know until runtime whether the final statement (and hence 2567 * the &&) is in void context and so is safe to return a boolean value. 2568 * So mark o with maybe_flag rather than the bool_flag. 2569 * Note that there is cost associated with determining context at runtime 2570 * (e.g. a call to block_gimme()), so it may not be worth setting (at 2571 * compile time) and testing (at runtime) maybe_flag if the scalar verses 2572 * boolean costs savings are marginal. 2573 * 2574 * However, we can do slightly better with && (compared to || and //): 2575 * this op only returns its LH argument when that argument is false. In 2576 * this case, as long as the op promises to return a false value which is 2577 * valid in both boolean and scalar contexts, we can mark an op consumed 2578 * by && with bool_flag rather than maybe_flag. 2579 * For example as long as pp_padhv and pp_rv2hv return &PL_sv_zero rather 2580 * than &PL_sv_no for a false result in boolean context, then it's safe. An 2581 * op which promises to handle this case is indicated by setting safe_and 2582 * to true. 2583 */ 2584 2585static void 2586S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag) 2587{ 2588 OP *lop; 2589 U8 flag = 0; 2590 2591 assert((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR); 2592 2593 /* OPpTARGET_MY and boolean context probably don't mix well. 2594 * If someone finds a valid use case, maybe add an extra flag to this 2595 * function which indicates its safe to do so for this op? */ 2596 assert(!( (PL_opargs[o->op_type] & OA_TARGLEX) 2597 && (o->op_private & OPpTARGET_MY))); 2598 2599 lop = o->op_next; 2600 2601 while (lop) { 2602 switch (lop->op_type) { 2603 case OP_NULL: 2604 case OP_SCALAR: 2605 break; 2606 2607 /* these two consume the stack argument in the scalar case, 2608 * and treat it as a boolean in the non linenumber case */ 2609 case OP_FLIP: 2610 case OP_FLOP: 2611 if ( ((lop->op_flags & OPf_WANT) == OPf_WANT_LIST) 2612 || (lop->op_private & OPpFLIP_LINENUM)) 2613 { 2614 lop = NULL; 2615 break; 2616 } 2617 /* FALLTHROUGH */ 2618 /* these never leave the original value on the stack */ 2619 case OP_NOT: 2620 case OP_XOR: 2621 case OP_COND_EXPR: 2622 case OP_GREPWHILE: 2623 flag = bool_flag; 2624 lop = NULL; 2625 break; 2626 2627 /* OR DOR and AND evaluate their arg as a boolean, but then may 2628 * leave the original scalar value on the stack when following the 2629 * op_next route. If not in void context, we need to ensure 2630 * that whatever follows consumes the arg only in boolean context 2631 * too. 2632 */ 2633 case OP_AND: 2634 if (safe_and) { 2635 flag = bool_flag; 2636 lop = NULL; 2637 break; 2638 } 2639 /* FALLTHROUGH */ 2640 case OP_OR: 2641 case OP_DOR: 2642 if ((lop->op_flags & OPf_WANT) == OPf_WANT_VOID) { 2643 flag = bool_flag; 2644 lop = NULL; 2645 } 2646 else if (!(lop->op_flags & OPf_WANT)) { 2647 /* unknown context - decide at runtime */ 2648 flag = maybe_flag; 2649 lop = NULL; 2650 } 2651 break; 2652 2653 default: 2654 lop = NULL; 2655 break; 2656 } 2657 2658 if (lop) 2659 lop = lop->op_next; 2660 } 2661 2662 o->op_private |= flag; 2663} 2664 2665/* mechanism for deferring recursion in rpeep() */ 2666 2667#define MAX_DEFERRED 4 2668 2669#define DEFER(o) \ 2670 STMT_START { \ 2671 if (defer_ix == (MAX_DEFERRED-1)) { \ 2672 OP **defer = defer_queue[defer_base]; \ 2673 CALL_RPEEP(*defer); \ 2674 op_prune_chain_head(defer); \ 2675 defer_base = (defer_base + 1) % MAX_DEFERRED; \ 2676 defer_ix--; \ 2677 } \ 2678 defer_queue[(defer_base + ++defer_ix) % MAX_DEFERRED] = &(o); \ 2679 } STMT_END 2680 2681#define IS_AND_OP(o) (o->op_type == OP_AND) 2682#define IS_OR_OP(o) (o->op_type == OP_OR) 2683 2684/* A peephole optimizer. We visit the ops in the order they're to execute. 2685 * See the comments at the top of this file for more details about when 2686 * peep() is called */ 2687 2688void 2689Perl_rpeep(pTHX_ OP *o) 2690{ 2691 OP* oldop = NULL; 2692 OP* oldoldop = NULL; 2693 OP** defer_queue[MAX_DEFERRED] = { NULL }; /* small queue of deferred branches */ 2694 int defer_base = 0; 2695 int defer_ix = -1; 2696 2697 if (!o || o->op_opt) 2698 return; 2699 2700 assert(o->op_type != OP_FREED); 2701 2702 ENTER; 2703 SAVEOP(); 2704 SAVEVPTR(PL_curcop); 2705 for (;; o = o->op_next) { 2706 if (o && o->op_opt) 2707 o = NULL; 2708 if (!o) { 2709 while (defer_ix >= 0) { 2710 OP **defer = 2711 defer_queue[(defer_base + defer_ix--) % MAX_DEFERRED]; 2712 CALL_RPEEP(*defer); 2713 op_prune_chain_head(defer); 2714 } 2715 break; 2716 } 2717 2718 redo: 2719 2720 /* oldoldop -> oldop -> o should be a chain of 3 adjacent ops */ 2721 assert(!oldoldop || oldoldop->op_next == oldop); 2722 assert(!oldop || oldop->op_next == o); 2723 2724 /* By default, this op has now been optimised. A couple of cases below 2725 clear this again. */ 2726 o->op_opt = 1; 2727 PL_op = o; 2728 2729 /* look for a series of 1 or more aggregate derefs, e.g. 2730 * $a[1]{foo}[$i]{$k} 2731 * and replace with a single OP_MULTIDEREF op. 2732 * Each index must be either a const, or a simple variable, 2733 * 2734 * First, look for likely combinations of starting ops, 2735 * corresponding to (global and lexical variants of) 2736 * $a[...] $h{...} 2737 * $r->[...] $r->{...} 2738 * (preceding expression)->[...] 2739 * (preceding expression)->{...} 2740 * and if so, call maybe_multideref() to do a full inspection 2741 * of the op chain and if appropriate, replace with an 2742 * OP_MULTIDEREF 2743 */ 2744 { 2745 UV action; 2746 OP *o2 = o; 2747 U8 hints = 0; 2748 2749 switch (o2->op_type) { 2750 case OP_GV: 2751 /* $pkg[..] : gv[*pkg] 2752 * $pkg->[...]: gv[*pkg]; rv2sv sKM/DREFAV */ 2753 2754 /* Fail if there are new op flag combinations that we're 2755 * not aware of, rather than: 2756 * * silently failing to optimise, or 2757 * * silently optimising the flag away. 2758 * If this ASSUME starts failing, examine what new flag 2759 * has been added to the op, and decide whether the 2760 * optimisation should still occur with that flag, then 2761 * update the code accordingly. This applies to all the 2762 * other ASSUMEs in the block of code too. 2763 */ 2764 ASSUME(!(o2->op_flags & 2765 ~(OPf_WANT|OPf_MOD|OPf_PARENS|OPf_SPECIAL))); 2766 ASSUME(!(o2->op_private & ~OPpEARLY_CV)); 2767 2768 o2 = o2->op_next; 2769 2770 if (o2->op_type == OP_RV2AV) { 2771 action = MDEREF_AV_gvav_aelem; 2772 goto do_deref; 2773 } 2774 2775 if (o2->op_type == OP_RV2HV) { 2776 action = MDEREF_HV_gvhv_helem; 2777 goto do_deref; 2778 } 2779 2780 if (o2->op_type != OP_RV2SV) 2781 break; 2782 2783 /* at this point we've seen gv,rv2sv, so the only valid 2784 * construct left is $pkg->[] or $pkg->{} */ 2785 2786 ASSUME(!(o2->op_flags & OPf_STACKED)); 2787 if ((o2->op_flags & (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL)) 2788 != (OPf_WANT_SCALAR|OPf_MOD)) 2789 break; 2790 2791 ASSUME(!(o2->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS 2792 |OPpOUR_INTRO|OPpDEREF|OPpLVAL_INTRO))); 2793 if (o2->op_private & (OPpOUR_INTRO|OPpLVAL_INTRO)) 2794 break; 2795 if ( (o2->op_private & OPpDEREF) != OPpDEREF_AV 2796 && (o2->op_private & OPpDEREF) != OPpDEREF_HV) 2797 break; 2798 2799 o2 = o2->op_next; 2800 if (o2->op_type == OP_RV2AV) { 2801 action = MDEREF_AV_gvsv_vivify_rv2av_aelem; 2802 goto do_deref; 2803 } 2804 if (o2->op_type == OP_RV2HV) { 2805 action = MDEREF_HV_gvsv_vivify_rv2hv_helem; 2806 goto do_deref; 2807 } 2808 break; 2809 2810 case OP_PADSV: 2811 /* $lex->[...]: padsv[$lex] sM/DREFAV */ 2812 2813 ASSUME(!(o2->op_flags & 2814 ~(OPf_WANT|OPf_PARENS|OPf_REF|OPf_MOD|OPf_SPECIAL))); 2815 if ((o2->op_flags & 2816 (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL)) 2817 != (OPf_WANT_SCALAR|OPf_MOD)) 2818 break; 2819 2820 ASSUME(!(o2->op_private & 2821 ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO))); 2822 /* skip if state or intro, or not a deref */ 2823 if ( o2->op_private != OPpDEREF_AV 2824 && o2->op_private != OPpDEREF_HV) 2825 break; 2826 2827 o2 = o2->op_next; 2828 if (o2->op_type == OP_RV2AV) { 2829 action = MDEREF_AV_padsv_vivify_rv2av_aelem; 2830 goto do_deref; 2831 } 2832 if (o2->op_type == OP_RV2HV) { 2833 action = MDEREF_HV_padsv_vivify_rv2hv_helem; 2834 goto do_deref; 2835 } 2836 break; 2837 2838 case OP_PADAV: 2839 case OP_PADHV: 2840 /* $lex[..]: padav[@lex:1,2] sR * 2841 * or $lex{..}: padhv[%lex:1,2] sR */ 2842 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_MOD|OPf_PARENS| 2843 OPf_REF|OPf_SPECIAL))); 2844 if ((o2->op_flags & 2845 (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL)) 2846 != (OPf_WANT_SCALAR|OPf_REF)) 2847 break; 2848 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_REF)) 2849 break; 2850 /* OPf_PARENS isn't currently used in this case; 2851 * if that changes, let us know! */ 2852 ASSUME(!(o2->op_flags & OPf_PARENS)); 2853 2854 /* at this point, we wouldn't expect any of the remaining 2855 * possible private flags: 2856 * OPpPAD_STATE, OPpLVAL_INTRO, OPpTRUEBOOL, 2857 * OPpMAYBE_TRUEBOOL, OPpMAYBE_LVSUB 2858 * 2859 * OPpSLICEWARNING shouldn't affect runtime 2860 */ 2861 ASSUME(!(o2->op_private & ~(OPpSLICEWARNING))); 2862 2863 action = o2->op_type == OP_PADAV 2864 ? MDEREF_AV_padav_aelem 2865 : MDEREF_HV_padhv_helem; 2866 o2 = o2->op_next; 2867 S_maybe_multideref(aTHX_ o, o2, action, 0); 2868 break; 2869 2870 2871 case OP_RV2AV: 2872 case OP_RV2HV: 2873 action = o2->op_type == OP_RV2AV 2874 ? MDEREF_AV_pop_rv2av_aelem 2875 : MDEREF_HV_pop_rv2hv_helem; 2876 /* FALLTHROUGH */ 2877 do_deref: 2878 /* (expr)->[...]: rv2av sKR/1; 2879 * (expr)->{...}: rv2hv sKR/1; */ 2880 2881 ASSUME(o2->op_type == OP_RV2AV || o2->op_type == OP_RV2HV); 2882 2883 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS 2884 |OPf_REF|OPf_MOD|OPf_STACKED|OPf_SPECIAL))); 2885 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF)) 2886 break; 2887 2888 /* at this point, we wouldn't expect any of these 2889 * possible private flags: 2890 * OPpMAYBE_LVSUB, OPpLVAL_INTRO 2891 * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL, (rv2hv only) 2892 */ 2893 ASSUME(!(o2->op_private & 2894 ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING 2895 |OPpOUR_INTRO))); 2896 hints |= (o2->op_private & OPpHINT_STRICT_REFS); 2897 2898 o2 = o2->op_next; 2899 2900 S_maybe_multideref(aTHX_ o, o2, action, hints); 2901 break; 2902 2903 default: 2904 break; 2905 } 2906 } 2907 2908 2909 switch (o->op_type) { 2910 case OP_DBSTATE: 2911 PL_curcop = ((COP*)o); /* for warnings */ 2912 break; 2913 case OP_NEXTSTATE: 2914 PL_curcop = ((COP*)o); /* for warnings */ 2915 2916 /* Optimise a "return ..." at the end of a sub to just be "...". 2917 * This saves 2 ops. Before: 2918 * 1 <;> nextstate(main 1 -e:1) v ->2 2919 * 4 <@> return K ->5 2920 * 2 <0> pushmark s ->3 2921 * - <1> ex-rv2sv sK/1 ->4 2922 * 3 <#> gvsv[*cat] s ->4 2923 * 2924 * After: 2925 * - <@> return K ->- 2926 * - <0> pushmark s ->2 2927 * - <1> ex-rv2sv sK/1 ->- 2928 * 2 <$> gvsv(*cat) s ->3 2929 */ 2930 { 2931 OP *next = o->op_next; 2932 OP *sibling = OpSIBLING(o); 2933 if ( OP_TYPE_IS(next, OP_PUSHMARK) 2934 && OP_TYPE_IS(sibling, OP_RETURN) 2935 && OP_TYPE_IS(sibling->op_next, OP_LINESEQ) 2936 && ( OP_TYPE_IS(sibling->op_next->op_next, OP_LEAVESUB) 2937 ||OP_TYPE_IS(sibling->op_next->op_next, 2938 OP_LEAVESUBLV)) 2939 && cUNOPx(sibling)->op_first == next 2940 && OpHAS_SIBLING(next) && OpSIBLING(next)->op_next 2941 && next->op_next 2942 ) { 2943 /* Look through the PUSHMARK's siblings for one that 2944 * points to the RETURN */ 2945 OP *top = OpSIBLING(next); 2946 while (top && top->op_next) { 2947 if (top->op_next == sibling) { 2948 top->op_next = sibling->op_next; 2949 o->op_next = next->op_next; 2950 break; 2951 } 2952 top = OpSIBLING(top); 2953 } 2954 } 2955 } 2956 2957 /* Optimise 'my $x; my $y;' into 'my ($x, $y);' 2958 * 2959 * This latter form is then suitable for conversion into padrange 2960 * later on. Convert: 2961 * 2962 * nextstate1 -> padop1 -> nextstate2 -> padop2 -> nextstate3 2963 * 2964 * into: 2965 * 2966 * nextstate1 -> listop -> nextstate3 2967 * / \ 2968 * pushmark -> padop1 -> padop2 2969 */ 2970 if (o->op_next && ( 2971 o->op_next->op_type == OP_PADSV 2972 || o->op_next->op_type == OP_PADAV 2973 || o->op_next->op_type == OP_PADHV 2974 ) 2975 && !(o->op_next->op_private & ~OPpLVAL_INTRO) 2976 && o->op_next->op_next && o->op_next->op_next->op_type == OP_NEXTSTATE 2977 && o->op_next->op_next->op_next && ( 2978 o->op_next->op_next->op_next->op_type == OP_PADSV 2979 || o->op_next->op_next->op_next->op_type == OP_PADAV 2980 || o->op_next->op_next->op_next->op_type == OP_PADHV 2981 ) 2982 && !(o->op_next->op_next->op_next->op_private & ~OPpLVAL_INTRO) 2983 && o->op_next->op_next->op_next->op_next && o->op_next->op_next->op_next->op_next->op_type == OP_NEXTSTATE 2984 && (!CopLABEL((COP*)o)) /* Don't mess with labels */ 2985 && (!CopLABEL((COP*)o->op_next->op_next)) /* ... */ 2986 ) { 2987 OP *pad1, *ns2, *pad2, *ns3, *newop, *newpm; 2988 2989 pad1 = o->op_next; 2990 ns2 = pad1->op_next; 2991 pad2 = ns2->op_next; 2992 ns3 = pad2->op_next; 2993 2994 /* we assume here that the op_next chain is the same as 2995 * the op_sibling chain */ 2996 assert(OpSIBLING(o) == pad1); 2997 assert(OpSIBLING(pad1) == ns2); 2998 assert(OpSIBLING(ns2) == pad2); 2999 assert(OpSIBLING(pad2) == ns3); 3000 3001 /* excise and delete ns2 */ 3002 op_sibling_splice(NULL, pad1, 1, NULL); 3003 op_free(ns2); 3004 3005 /* excise pad1 and pad2 */ 3006 op_sibling_splice(NULL, o, 2, NULL); 3007 3008 /* create new listop, with children consisting of: 3009 * a new pushmark, pad1, pad2. */ 3010 newop = newLISTOP(OP_LIST, 0, pad1, pad2); 3011 newop->op_flags |= OPf_PARENS; 3012 newop->op_flags = (newop->op_flags & ~OPf_WANT) | OPf_WANT_VOID; 3013 3014 /* insert newop between o and ns3 */ 3015 op_sibling_splice(NULL, o, 0, newop); 3016 3017 /*fixup op_next chain */ 3018 newpm = cUNOPx(newop)->op_first; /* pushmark */ 3019 o ->op_next = newpm; 3020 newpm->op_next = pad1; 3021 pad1 ->op_next = pad2; 3022 pad2 ->op_next = newop; /* listop */ 3023 newop->op_next = ns3; 3024 3025 /* Ensure pushmark has this flag if padops do */ 3026 if (pad1->op_flags & OPf_MOD && pad2->op_flags & OPf_MOD) { 3027 newpm->op_flags |= OPf_MOD; 3028 } 3029 3030 break; 3031 } 3032 3033 /* Two NEXTSTATEs in a row serve no purpose. Except if they happen 3034 to carry two labels. For now, take the easier option, and skip 3035 this optimisation if the first NEXTSTATE has a label. 3036 Yves asked what about if they have different hints or features? 3037 Tony thinks that as we remove the first of the pair it should 3038 be fine. 3039 */ 3040 if (!CopLABEL((COP*)o) && !PERLDB_NOOPT) { 3041 OP *nextop = o->op_next; 3042 while (nextop) { 3043 switch (nextop->op_type) { 3044 case OP_NULL: 3045 case OP_SCALAR: 3046 case OP_LINESEQ: 3047 case OP_SCOPE: 3048 nextop = nextop->op_next; 3049 continue; 3050 } 3051 break; 3052 } 3053 3054 if (nextop && (nextop->op_type == OP_NEXTSTATE)) { 3055 op_null(o); 3056 if (oldop) 3057 oldop->op_next = nextop; 3058 o = nextop; 3059 /* Skip (old)oldop assignment since the current oldop's 3060 op_next already points to the next op. */ 3061 goto redo; 3062 } 3063 } 3064 break; 3065 3066 case OP_CONCAT: 3067 if (o->op_next && o->op_next->op_type == OP_STRINGIFY) { 3068 if (o->op_next->op_private & OPpTARGET_MY) { 3069 if (o->op_flags & OPf_STACKED) /* chained concats */ 3070 break; /* ignore_optimization */ 3071 else { 3072 /* assert(PL_opargs[o->op_type] & OA_TARGLEX); */ 3073 o->op_targ = o->op_next->op_targ; 3074 o->op_next->op_targ = 0; 3075 o->op_private |= OPpTARGET_MY; 3076 } 3077 } 3078 op_null(o->op_next); 3079 } 3080 break; 3081 case OP_STUB: 3082 if ((o->op_flags & OPf_WANT) != OPf_WANT_LIST) { 3083 break; /* Scalar stub must produce undef. List stub is noop */ 3084 } 3085 goto nothin; 3086 case OP_NULL: 3087 if (o->op_targ == OP_NEXTSTATE 3088 || o->op_targ == OP_DBSTATE) 3089 { 3090 PL_curcop = ((COP*)o); 3091 } 3092 /* XXX: We avoid setting op_seq here to prevent later calls 3093 to rpeep() from mistakenly concluding that optimisation 3094 has already occurred. This doesn't fix the real problem, 3095 though (See 20010220.007 (#5874)). AMS 20010719 */ 3096 /* op_seq functionality is now replaced by op_opt */ 3097 o->op_opt = 0; 3098 /* FALLTHROUGH */ 3099 case OP_SCALAR: 3100 case OP_LINESEQ: 3101 case OP_SCOPE: 3102 nothin: 3103 if (oldop) { 3104 oldop->op_next = o->op_next; 3105 o->op_opt = 0; 3106 continue; 3107 } 3108 break; 3109 3110 case OP_PUSHMARK: 3111 3112 /* Given 3113 5 repeat/DOLIST 3114 3 ex-list 3115 1 pushmark 3116 2 scalar or const 3117 4 const[0] 3118 convert repeat into a stub with no kids. 3119 */ 3120 if (o->op_next->op_type == OP_CONST 3121 || ( o->op_next->op_type == OP_PADSV 3122 && !(o->op_next->op_private & OPpLVAL_INTRO)) 3123 || ( o->op_next->op_type == OP_GV 3124 && o->op_next->op_next->op_type == OP_RV2SV 3125 && !(o->op_next->op_next->op_private 3126 & (OPpLVAL_INTRO|OPpOUR_INTRO)))) 3127 { 3128 const OP *kid = o->op_next->op_next; 3129 if (o->op_next->op_type == OP_GV) 3130 kid = kid->op_next; 3131 /* kid is now the ex-list. */ 3132 if (kid->op_type == OP_NULL 3133 && (kid = kid->op_next)->op_type == OP_CONST 3134 /* kid is now the repeat count. */ 3135 && kid->op_next->op_type == OP_REPEAT 3136 && kid->op_next->op_private & OPpREPEAT_DOLIST 3137 && (kid->op_next->op_flags & OPf_WANT) == OPf_WANT_LIST 3138 && SvIOK(kSVOP_sv) && SvIVX(kSVOP_sv) == 0 3139 && oldop) 3140 { 3141 o = kid->op_next; /* repeat */ 3142 oldop->op_next = o; 3143 op_free(cBINOPo->op_first); 3144 op_free(cBINOPo->op_last ); 3145 o->op_flags &=~ OPf_KIDS; 3146 /* stub is a baseop; repeat is a binop */ 3147 STATIC_ASSERT_STMT(sizeof(OP) <= sizeof(BINOP)); 3148 OpTYPE_set(o, OP_STUB); 3149 o->op_private = 0; 3150 break; 3151 } 3152 } 3153 3154 /* If the pushmark is associated with an empty anonhash 3155 * or anonlist, null out the pushmark and swap in a 3156 * specialised op for the parent. 3157 * 4 <@> anonhash sK* ->5 3158 * 3 <0> pushmark s ->4 3159 * becomes: 3160 * 3 <@> emptyavhv sK* ->4 3161 * - <0> pushmark s ->3 3162 */ 3163 if (!OpHAS_SIBLING(o) && (o->op_next == o->op_sibparent) && ( 3164 (o->op_next->op_type == OP_ANONHASH) || 3165 (o->op_next->op_type == OP_ANONLIST) ) && 3166 (o->op_next->op_flags & OPf_SPECIAL) ) { 3167 3168 OP* anon = o->op_next; 3169 /* These next two are _potentially_ a padsv and an sassign */ 3170 OP* padsv = anon->op_next; 3171 OP* sassign = (padsv) ? padsv->op_next: NULL; 3172 3173 anon->op_private = (anon->op_type == OP_ANONLIST) ? 3174 0 : OPpEMPTYAVHV_IS_HV; 3175 OpTYPE_set(anon, OP_EMPTYAVHV); 3176 op_null(o); 3177 o = anon; 3178 if (oldop) /* A previous optimization may have NULLED it */ 3179 oldop->op_next = anon; 3180 3181 /* Further optimise scalar assignment of an empty anonhash 3182 * or anonlist by subsuming the padsv & sassign OPs. */ 3183 if ((padsv->op_type == OP_PADSV) && 3184 !(padsv->op_private & OPpDEREF) && 3185 sassign && (sassign->op_type == OP_SASSIGN) ){ 3186 3187 /* Take some public flags from the sassign */ 3188 anon->op_flags = OPf_KIDS | OPf_SPECIAL | 3189 (anon->op_flags & OPf_PARENS) | 3190 (sassign->op_flags & (OPf_WANT|OPf_PARENS)); 3191 3192 /* Take some private flags from the padsv */ 3193 anon->op_private |= OPpTARGET_MY | 3194 (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)); 3195 3196 /* Take the targ slot from the padsv*/ 3197 anon->op_targ = padsv->op_targ; 3198 padsv->op_targ = 0; 3199 3200 /* Clean up */ 3201 anon->op_next = sassign->op_next; 3202 op_null(padsv); 3203 op_null(sassign); 3204 } 3205 break; 3206 3207 } 3208 3209 3210 /* Convert a series of PAD ops for my vars plus support into a 3211 * single padrange op. Basically 3212 * 3213 * pushmark -> pad[ahs]v -> pad[ahs]?v -> ... -> (list) -> rest 3214 * 3215 * becomes, depending on circumstances, one of 3216 * 3217 * padrange ----------------------------------> (list) -> rest 3218 * padrange --------------------------------------------> rest 3219 * 3220 * where all the pad indexes are sequential and of the same type 3221 * (INTRO or not). 3222 * We convert the pushmark into a padrange op, then skip 3223 * any other pad ops, and possibly some trailing ops. 3224 * Note that we don't null() the skipped ops, to make it 3225 * easier for Deparse to undo this optimisation (and none of 3226 * the skipped ops are holding any resources). It also makes 3227 * it easier for find_uninit_var(), as it can just ignore 3228 * padrange, and examine the original pad ops. 3229 */ 3230 { 3231 OP *p; 3232 OP *followop = NULL; /* the op that will follow the padrange op */ 3233 U8 count = 0; 3234 U8 intro = 0; 3235 PADOFFSET base = 0; /* init only to stop compiler whining */ 3236 bool gvoid = 0; /* init only to stop compiler whining */ 3237 bool defav = 0; /* seen (...) = @_ */ 3238 bool reuse = 0; /* reuse an existing padrange op */ 3239 3240 /* look for a pushmark -> gv[_] -> rv2av */ 3241 3242 { 3243 OP *rv2av, *q; 3244 p = o->op_next; 3245 if ( p->op_type == OP_GV 3246 && cGVOPx_gv(p) == PL_defgv 3247 && (rv2av = p->op_next) 3248 && rv2av->op_type == OP_RV2AV 3249 && !(rv2av->op_flags & OPf_REF) 3250 && !(rv2av->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB)) 3251 && ((rv2av->op_flags & OPf_WANT) == OPf_WANT_LIST) 3252 ) { 3253 q = rv2av->op_next; 3254 if (q->op_type == OP_NULL) 3255 q = q->op_next; 3256 if (q->op_type == OP_PUSHMARK) { 3257 defav = 1; 3258 p = q; 3259 } 3260 } 3261 } 3262 if (!defav) { 3263 p = o; 3264 } 3265 3266 /* scan for PAD ops */ 3267 3268 for (p = p->op_next; p; p = p->op_next) { 3269 if (p->op_type == OP_NULL) 3270 continue; 3271 3272 if (( p->op_type != OP_PADSV 3273 && p->op_type != OP_PADAV 3274 && p->op_type != OP_PADHV 3275 ) 3276 /* any private flag other than INTRO? e.g. STATE */ 3277 || (p->op_private & ~OPpLVAL_INTRO) 3278 ) 3279 break; 3280 3281 /* let $a[N] potentially be optimised into AELEMFAST_LEX 3282 * instead */ 3283 if ( p->op_type == OP_PADAV 3284 && p->op_next 3285 && p->op_next->op_type == OP_CONST 3286 && p->op_next->op_next 3287 && p->op_next->op_next->op_type == OP_AELEM 3288 ) 3289 break; 3290 3291 /* for 1st padop, note what type it is and the range 3292 * start; for the others, check that it's the same type 3293 * and that the targs are contiguous */ 3294 if (count == 0) { 3295 intro = (p->op_private & OPpLVAL_INTRO); 3296 base = p->op_targ; 3297 gvoid = OP_GIMME(p,0) == G_VOID; 3298 } 3299 else { 3300 if ((p->op_private & OPpLVAL_INTRO) != intro) 3301 break; 3302 /* Note that you'd normally expect targs to be 3303 * contiguous in my($a,$b,$c), but that's not the case 3304 * when external modules start doing things, e.g. 3305 * Function::Parameters */ 3306 if (p->op_targ != base + count) 3307 break; 3308 assert(p->op_targ == base + count); 3309 /* Either all the padops or none of the padops should 3310 be in void context. Since we only do the optimisa- 3311 tion for av/hv when the aggregate itself is pushed 3312 on to the stack (one item), there is no need to dis- 3313 tinguish list from scalar context. */ 3314 if (gvoid != (OP_GIMME(p,0) == G_VOID)) 3315 break; 3316 } 3317 3318 /* for AV, HV, only when we're not flattening */ 3319 if ( p->op_type != OP_PADSV 3320 && !gvoid 3321 && !(p->op_flags & OPf_REF) 3322 ) 3323 break; 3324 3325 if (count >= OPpPADRANGE_COUNTMASK) 3326 break; 3327 3328 /* there's a biggest base we can fit into a 3329 * SAVEt_CLEARPADRANGE in pp_padrange. 3330 * (The sizeof() stuff will be constant-folded, and is 3331 * intended to avoid getting "comparison is always false" 3332 * compiler warnings. See the comments above 3333 * MEM_WRAP_CHECK for more explanation on why we do this 3334 * in a weird way to avoid compiler warnings.) 3335 */ 3336 if ( intro 3337 && (8*sizeof(base) > 3338 8*sizeof(UV)-OPpPADRANGE_COUNTSHIFT-SAVE_TIGHT_SHIFT 3339 ? (Size_t)base 3340 : (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT)) 3341 ) > 3342 (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT)) 3343 ) 3344 break; 3345 3346 /* Success! We've got another valid pad op to optimise away */ 3347 count++; 3348 followop = p->op_next; 3349 } 3350 3351 if (count < 1 || (count == 1 && !defav)) 3352 break; 3353 3354 /* pp_padrange in specifically compile-time void context 3355 * skips pushing a mark and lexicals; in all other contexts 3356 * (including unknown till runtime) it pushes a mark and the 3357 * lexicals. We must be very careful then, that the ops we 3358 * optimise away would have exactly the same effect as the 3359 * padrange. 3360 * In particular in void context, we can only optimise to 3361 * a padrange if we see the complete sequence 3362 * pushmark, pad*v, ...., list 3363 * which has the net effect of leaving the markstack as it 3364 * was. Not pushing onto the stack (whereas padsv does touch 3365 * the stack) makes no difference in void context. 3366 */ 3367 assert(followop); 3368 if (gvoid) { 3369 if (followop->op_type == OP_LIST 3370 && OP_GIMME(followop,0) == G_VOID 3371 ) 3372 { 3373 followop = followop->op_next; /* skip OP_LIST */ 3374 3375 /* consolidate two successive my(...);'s */ 3376 3377 if ( oldoldop 3378 && oldoldop->op_type == OP_PADRANGE 3379 && (oldoldop->op_flags & OPf_WANT) == OPf_WANT_VOID 3380 && (oldoldop->op_private & OPpLVAL_INTRO) == intro 3381 && !(oldoldop->op_flags & OPf_SPECIAL) 3382 ) { 3383 U8 old_count; 3384 assert(oldoldop->op_next == oldop); 3385 assert( oldop->op_type == OP_NEXTSTATE 3386 || oldop->op_type == OP_DBSTATE); 3387 assert(oldop->op_next == o); 3388 3389 old_count 3390 = (oldoldop->op_private & OPpPADRANGE_COUNTMASK); 3391 3392 /* Do not assume pad offsets for $c and $d are con- 3393 tiguous in 3394 my ($a,$b,$c); 3395 my ($d,$e,$f); 3396 */ 3397 if ( oldoldop->op_targ + old_count == base 3398 && old_count < OPpPADRANGE_COUNTMASK - count) { 3399 base = oldoldop->op_targ; 3400 count += old_count; 3401 reuse = 1; 3402 } 3403 } 3404 3405 /* if there's any immediately following singleton 3406 * my var's; then swallow them and the associated 3407 * nextstates; i.e. 3408 * my ($a,$b); my $c; my $d; 3409 * is treated as 3410 * my ($a,$b,$c,$d); 3411 */ 3412 3413 while ( ((p = followop->op_next)) 3414 && ( p->op_type == OP_PADSV 3415 || p->op_type == OP_PADAV 3416 || p->op_type == OP_PADHV) 3417 && (p->op_flags & OPf_WANT) == OPf_WANT_VOID 3418 && (p->op_private & OPpLVAL_INTRO) == intro 3419 && !(p->op_private & ~OPpLVAL_INTRO) 3420 && p->op_next 3421 && ( p->op_next->op_type == OP_NEXTSTATE 3422 || p->op_next->op_type == OP_DBSTATE) 3423 && count < OPpPADRANGE_COUNTMASK 3424 && base + count == p->op_targ 3425 ) { 3426 count++; 3427 followop = p->op_next; 3428 } 3429 } 3430 else 3431 break; 3432 } 3433 3434 if (reuse) { 3435 assert(oldoldop->op_type == OP_PADRANGE); 3436 oldoldop->op_next = followop; 3437 oldoldop->op_private = (intro | count); 3438 o = oldoldop; 3439 oldop = NULL; 3440 oldoldop = NULL; 3441 } 3442 else { 3443 /* Convert the pushmark into a padrange. 3444 * To make Deparse easier, we guarantee that a padrange was 3445 * *always* formerly a pushmark */ 3446 assert(o->op_type == OP_PUSHMARK); 3447 o->op_next = followop; 3448 OpTYPE_set(o, OP_PADRANGE); 3449 o->op_targ = base; 3450 /* bit 7: INTRO; bit 6..0: count */ 3451 o->op_private = (intro | count); 3452 o->op_flags = ((o->op_flags & ~(OPf_WANT|OPf_SPECIAL)) 3453 | gvoid * OPf_WANT_VOID 3454 | (defav ? OPf_SPECIAL : 0)); 3455 } 3456 break; 3457 } 3458 3459 case OP_RV2AV: 3460 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 3461 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); 3462 break; 3463 3464 case OP_RV2HV: 3465 case OP_PADHV: 3466 /*'keys %h' in void or scalar context: skip the OP_KEYS 3467 * and perform the functionality directly in the RV2HV/PADHV 3468 * op 3469 */ 3470 if (o->op_flags & OPf_REF) { 3471 OP *k = o->op_next; 3472 U8 want = (k->op_flags & OPf_WANT); 3473 if ( k 3474 && k->op_type == OP_KEYS 3475 && ( want == OPf_WANT_VOID 3476 || want == OPf_WANT_SCALAR) 3477 && !(k->op_private & OPpMAYBE_LVSUB) 3478 && !(k->op_flags & OPf_MOD) 3479 ) { 3480 o->op_next = k->op_next; 3481 o->op_flags &= ~(OPf_REF|OPf_WANT); 3482 o->op_flags |= want; 3483 o->op_private |= (o->op_type == OP_PADHV ? 3484 OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS); 3485 /* for keys(%lex), hold onto the OP_KEYS's targ 3486 * since padhv doesn't have its own targ to return 3487 * an int with */ 3488 if (!(o->op_type ==OP_PADHV && want == OPf_WANT_SCALAR)) 3489 op_null(k); 3490 } 3491 } 3492 3493 /* see if %h is used in boolean context */ 3494 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 3495 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL); 3496 3497 3498 if (o->op_type != OP_PADHV) 3499 break; 3500 /* FALLTHROUGH */ 3501 case OP_PADAV: 3502 if ( o->op_type == OP_PADAV 3503 && (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR 3504 ) 3505 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); 3506 /* FALLTHROUGH */ 3507 case OP_PADSV: 3508 /* Skip over state($x) in void context. */ 3509 if (oldop && o->op_private == (OPpPAD_STATE|OPpLVAL_INTRO) 3510 && (o->op_flags & OPf_WANT) == OPf_WANT_VOID) 3511 { 3512 oldop->op_next = o->op_next; 3513 goto redo_nextstate; 3514 } 3515 if (o->op_type != OP_PADAV) 3516 break; 3517 /* FALLTHROUGH */ 3518 case OP_GV: 3519 if (o->op_type == OP_PADAV || o->op_next->op_type == OP_RV2AV) { 3520 OP* const pop = (o->op_type == OP_PADAV) ? 3521 o->op_next : o->op_next->op_next; 3522 IV i; 3523 if (pop && pop->op_type == OP_CONST && 3524 ((PL_op = pop->op_next)) && 3525 pop->op_next->op_type == OP_AELEM && 3526 !(pop->op_next->op_private & 3527 (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB)) && 3528 (i = SvIV(cSVOPx(pop)->op_sv)) >= -128 && i <= 127) 3529 { 3530 GV *gv; 3531 if (cSVOPx(pop)->op_private & OPpCONST_STRICT) 3532 no_bareword_allowed(pop); 3533 if (o->op_type == OP_GV) 3534 op_null(o->op_next); 3535 op_null(pop->op_next); 3536 op_null(pop); 3537 o->op_flags |= pop->op_next->op_flags & OPf_MOD; 3538 o->op_next = pop->op_next->op_next; 3539 o->op_ppaddr = PL_ppaddr[OP_AELEMFAST]; 3540 o->op_private = (U8)i; 3541 if (o->op_type == OP_GV) { 3542 gv = cGVOPo_gv; 3543 GvAVn(gv); 3544 o->op_type = OP_AELEMFAST; 3545 } 3546 else 3547 o->op_type = OP_AELEMFAST_LEX; 3548 } 3549 if (o->op_type != OP_GV) 3550 break; 3551 } 3552 3553 /* Remove $foo from the op_next chain in void context. */ 3554 if (oldop 3555 && ( o->op_next->op_type == OP_RV2SV 3556 || o->op_next->op_type == OP_RV2AV 3557 || o->op_next->op_type == OP_RV2HV ) 3558 && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID 3559 && !(o->op_next->op_private & OPpLVAL_INTRO)) 3560 { 3561 oldop->op_next = o->op_next->op_next; 3562 /* Reprocess the previous op if it is a nextstate, to 3563 allow double-nextstate optimisation. */ 3564 redo_nextstate: 3565 if (oldop->op_type == OP_NEXTSTATE) { 3566 oldop->op_opt = 0; 3567 o = oldop; 3568 oldop = oldoldop; 3569 oldoldop = NULL; 3570 goto redo; 3571 } 3572 o = oldop->op_next; 3573 goto redo; 3574 } 3575 else if (o->op_next->op_type == OP_RV2SV) { 3576 if (!(o->op_next->op_private & OPpDEREF)) { 3577 op_null(o->op_next); 3578 o->op_private |= o->op_next->op_private & (OPpLVAL_INTRO 3579 | OPpOUR_INTRO); 3580 o->op_next = o->op_next->op_next; 3581 OpTYPE_set(o, OP_GVSV); 3582 } 3583 } 3584 else if (o->op_next->op_type == OP_READLINE 3585 && o->op_next->op_next->op_type == OP_CONCAT 3586 && (o->op_next->op_next->op_flags & OPf_STACKED)) 3587 { 3588 /* Turn "$a .= <FH>" into an OP_RCATLINE. AMS 20010917 */ 3589 OpTYPE_set(o, OP_RCATLINE); 3590 o->op_flags |= OPf_STACKED; 3591 op_null(o->op_next->op_next); 3592 op_null(o->op_next); 3593 } 3594 3595 break; 3596 3597 case OP_NOT: 3598 break; 3599 3600 case OP_AND: 3601 case OP_OR: 3602 case OP_DOR: 3603 case OP_CMPCHAIN_AND: 3604 case OP_PUSHDEFER: 3605 while (cLOGOP->op_other->op_type == OP_NULL) 3606 cLOGOP->op_other = cLOGOP->op_other->op_next; 3607 while (o->op_next && ( o->op_type == o->op_next->op_type 3608 || o->op_next->op_type == OP_NULL)) 3609 o->op_next = o->op_next->op_next; 3610 3611 /* If we're an OR and our next is an AND in void context, we'll 3612 follow its op_other on short circuit, same for reverse. 3613 We can't do this with OP_DOR since if it's true, its return 3614 value is the underlying value which must be evaluated 3615 by the next op. */ 3616 if (o->op_next && 3617 ( 3618 (IS_AND_OP(o) && IS_OR_OP(o->op_next)) 3619 || (IS_OR_OP(o) && IS_AND_OP(o->op_next)) 3620 ) 3621 && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID 3622 ) { 3623 o->op_next = cLOGOPx(o->op_next)->op_other; 3624 } 3625 DEFER(cLOGOP->op_other); 3626 o->op_opt = 1; 3627 break; 3628 3629 case OP_GREPWHILE: 3630 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 3631 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); 3632 /* FALLTHROUGH */ 3633 case OP_COND_EXPR: 3634 case OP_MAPWHILE: 3635 case OP_ANDASSIGN: 3636 case OP_ORASSIGN: 3637 case OP_DORASSIGN: 3638 case OP_RANGE: 3639 case OP_ONCE: 3640 case OP_ARGDEFELEM: 3641 while (cLOGOP->op_other->op_type == OP_NULL) 3642 cLOGOP->op_other = cLOGOP->op_other->op_next; 3643 DEFER(cLOGOP->op_other); 3644 break; 3645 3646 case OP_ENTERLOOP: 3647 case OP_ENTERITER: 3648 while (cLOOP->op_redoop->op_type == OP_NULL) 3649 cLOOP->op_redoop = cLOOP->op_redoop->op_next; 3650 while (cLOOP->op_nextop->op_type == OP_NULL) 3651 cLOOP->op_nextop = cLOOP->op_nextop->op_next; 3652 while (cLOOP->op_lastop->op_type == OP_NULL) 3653 cLOOP->op_lastop = cLOOP->op_lastop->op_next; 3654 /* a while(1) loop doesn't have an op_next that escapes the 3655 * loop, so we have to explicitly follow the op_lastop to 3656 * process the rest of the code */ 3657 DEFER(cLOOP->op_lastop); 3658 break; 3659 3660 case OP_ENTERTRY: 3661 assert(cLOGOPo->op_other->op_type == OP_LEAVETRY); 3662 DEFER(cLOGOPo->op_other); 3663 break; 3664 3665 case OP_ENTERTRYCATCH: 3666 assert(cLOGOPo->op_other->op_type == OP_CATCH); 3667 /* catch body is the ->op_other of the OP_CATCH */ 3668 DEFER(cLOGOPx(cLOGOPo->op_other)->op_other); 3669 break; 3670 3671 case OP_SUBST: 3672 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 3673 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); 3674 assert(!(cPMOP->op_pmflags & PMf_ONCE)); 3675 while (cPMOP->op_pmstashstartu.op_pmreplstart && 3676 cPMOP->op_pmstashstartu.op_pmreplstart->op_type == OP_NULL) 3677 cPMOP->op_pmstashstartu.op_pmreplstart 3678 = cPMOP->op_pmstashstartu.op_pmreplstart->op_next; 3679 DEFER(cPMOP->op_pmstashstartu.op_pmreplstart); 3680 break; 3681 3682 case OP_SORT: { 3683 OP *oright; 3684 3685 if (o->op_flags & OPf_SPECIAL) { 3686 /* first arg is a code block */ 3687 OP * const nullop = OpSIBLING(cLISTOP->op_first); 3688 OP * kid = cUNOPx(nullop)->op_first; 3689 3690 assert(nullop->op_type == OP_NULL); 3691 assert(kid->op_type == OP_SCOPE 3692 || (kid->op_type == OP_NULL && kid->op_targ == OP_LEAVE)); 3693 /* since OP_SORT doesn't have a handy op_other-style 3694 * field that can point directly to the start of the code 3695 * block, store it in the otherwise-unused op_next field 3696 * of the top-level OP_NULL. This will be quicker at 3697 * run-time, and it will also allow us to remove leading 3698 * OP_NULLs by just messing with op_nexts without 3699 * altering the basic op_first/op_sibling layout. */ 3700 kid = kLISTOP->op_first; 3701 assert( 3702 (kid->op_type == OP_NULL 3703 && ( kid->op_targ == OP_NEXTSTATE 3704 || kid->op_targ == OP_DBSTATE )) 3705 || kid->op_type == OP_STUB 3706 || kid->op_type == OP_ENTER 3707 || (PL_parser && PL_parser->error_count)); 3708 nullop->op_next = kid->op_next; 3709 DEFER(nullop->op_next); 3710 } 3711 3712 /* check that RHS of sort is a single plain array */ 3713 oright = cUNOPo->op_first; 3714 if (!oright || oright->op_type != OP_PUSHMARK) 3715 break; 3716 3717 if (o->op_private & OPpSORT_INPLACE) 3718 break; 3719 3720 /* reverse sort ... can be optimised. */ 3721 if (!OpHAS_SIBLING(cUNOPo)) { 3722 /* Nothing follows us on the list. */ 3723 OP * const reverse = o->op_next; 3724 3725 if (reverse->op_type == OP_REVERSE && 3726 (reverse->op_flags & OPf_WANT) == OPf_WANT_LIST) { 3727 OP * const pushmark = cUNOPx(reverse)->op_first; 3728 if (pushmark && (pushmark->op_type == OP_PUSHMARK) 3729 && (OpSIBLING(cUNOPx(pushmark)) == o)) { 3730 /* reverse -> pushmark -> sort */ 3731 o->op_private |= OPpSORT_REVERSE; 3732 op_null(reverse); 3733 pushmark->op_next = oright->op_next; 3734 op_null(oright); 3735 } 3736 } 3737 } 3738 3739 break; 3740 } 3741 3742 case OP_REVERSE: { 3743 OP *ourmark, *theirmark, *ourlast, *iter, *expushmark, *rv2av; 3744 OP *gvop = NULL; 3745 LISTOP *enter, *exlist; 3746 3747 if (o->op_private & OPpSORT_INPLACE) 3748 break; 3749 3750 enter = cLISTOPx(o->op_next); 3751 if (!enter) 3752 break; 3753 if (enter->op_type == OP_NULL) { 3754 enter = cLISTOPx(enter->op_next); 3755 if (!enter) 3756 break; 3757 } 3758 /* for $a (...) will have OP_GV then OP_RV2GV here. 3759 for (...) just has an OP_GV. */ 3760 if (enter->op_type == OP_GV) { 3761 gvop = (OP *) enter; 3762 enter = cLISTOPx(enter->op_next); 3763 if (!enter) 3764 break; 3765 if (enter->op_type == OP_RV2GV) { 3766 enter = cLISTOPx(enter->op_next); 3767 if (!enter) 3768 break; 3769 } 3770 } 3771 3772 if (enter->op_type != OP_ENTERITER) 3773 break; 3774 3775 iter = enter->op_next; 3776 if (!iter || iter->op_type != OP_ITER) 3777 break; 3778 3779 expushmark = enter->op_first; 3780 if (!expushmark || expushmark->op_type != OP_NULL 3781 || expushmark->op_targ != OP_PUSHMARK) 3782 break; 3783 3784 exlist = cLISTOPx(OpSIBLING(expushmark)); 3785 if (!exlist || exlist->op_type != OP_NULL 3786 || exlist->op_targ != OP_LIST) 3787 break; 3788 3789 if (exlist->op_last != o) { 3790 /* Mmm. Was expecting to point back to this op. */ 3791 break; 3792 } 3793 theirmark = exlist->op_first; 3794 if (!theirmark || theirmark->op_type != OP_PUSHMARK) 3795 break; 3796 3797 if (OpSIBLING(theirmark) != o) { 3798 /* There's something between the mark and the reverse, eg 3799 for (1, reverse (...)) 3800 so no go. */ 3801 break; 3802 } 3803 3804 ourmark = cLISTOPo->op_first; 3805 if (!ourmark || ourmark->op_type != OP_PUSHMARK) 3806 break; 3807 3808 ourlast = cLISTOPo->op_last; 3809 if (!ourlast || ourlast->op_next != o) 3810 break; 3811 3812 rv2av = OpSIBLING(ourmark); 3813 if (rv2av && rv2av->op_type == OP_RV2AV && !OpHAS_SIBLING(rv2av) 3814 && rv2av->op_flags == (OPf_WANT_LIST | OPf_KIDS)) { 3815 /* We're just reversing a single array. */ 3816 rv2av->op_flags = OPf_WANT_SCALAR | OPf_KIDS | OPf_REF; 3817 enter->op_flags |= OPf_STACKED; 3818 } 3819 3820 /* We don't have control over who points to theirmark, so sacrifice 3821 ours. */ 3822 theirmark->op_next = ourmark->op_next; 3823 theirmark->op_flags = ourmark->op_flags; 3824 ourlast->op_next = gvop ? gvop : (OP *) enter; 3825 op_null(ourmark); 3826 op_null(o); 3827 enter->op_private |= OPpITER_REVERSED; 3828 iter->op_private |= OPpITER_REVERSED; 3829 3830 oldoldop = NULL; 3831 oldop = ourlast; 3832 o = oldop->op_next; 3833 goto redo; 3834 NOT_REACHED; /* NOTREACHED */ 3835 break; 3836 } 3837 3838 case OP_UNDEF: 3839 if ((o->op_flags & OPf_KIDS) && 3840 (cUNOPx(o)->op_first->op_type == OP_PADSV)) { 3841 3842 /* Convert: 3843 * undef 3844 * padsv[$x] 3845 * to: 3846 * undef[$x] 3847 */ 3848 3849 OP * padsv = cUNOPx(o)->op_first; 3850 o->op_private = OPpTARGET_MY | 3851 (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)); 3852 o->op_targ = padsv->op_targ; padsv->op_targ = 0; 3853 op_null(padsv); 3854 /* Optimizer does NOT seem to fix up the padsv op_next ptr */ 3855 if (oldoldop) 3856 oldoldop->op_next = o; 3857 oldop = oldoldop; 3858 oldoldop = NULL; 3859 3860 } else if (o->op_next->op_type == OP_PADSV) { 3861 OP * padsv = o->op_next; 3862 OP * sassign = (padsv->op_next && 3863 padsv->op_next->op_type == OP_SASSIGN) ? 3864 padsv->op_next : NULL; 3865 if (sassign && cBINOPx(sassign)->op_first == o) { 3866 /* Convert: 3867 * sassign 3868 * undef 3869 * padsv[$x] 3870 * to: 3871 * undef[$x] 3872 * NOTE: undef does not have the "T" flag set in 3873 * regen/opcodes, as this would cause 3874 * S_maybe_targlex to do the optimization. 3875 * Seems easier to keep it all here, rather 3876 * than have an undef-specific branch in 3877 * S_maybe_targlex just to add the 3878 * OPpUNDEF_KEEP_PV flag. 3879 */ 3880 o->op_private = OPpTARGET_MY | OPpUNDEF_KEEP_PV | 3881 (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)); 3882 o->op_targ = padsv->op_targ; padsv->op_targ = 0; 3883 op_null(padsv); 3884 op_null(sassign); 3885 /* Optimizer DOES seems to fix up the op_next ptrs */ 3886 } 3887 } 3888 break; 3889 3890 case OP_QR: 3891 case OP_MATCH: 3892 if (!(cPMOP->op_pmflags & PMf_ONCE)) { 3893 assert (!cPMOP->op_pmstashstartu.op_pmreplstart); 3894 } 3895 break; 3896 3897 case OP_RUNCV: 3898 if (!(o->op_private & OPpOFFBYONE) && !CvCLONE(PL_compcv) 3899 && (!CvANON(PL_compcv) || (!PL_cv_has_eval && !PL_perldb))) 3900 { 3901 SV *sv; 3902 if (CvEVAL(PL_compcv)) sv = &PL_sv_undef; 3903 else { 3904 sv = newRV((SV *)PL_compcv); 3905 sv_rvweaken(sv); 3906 SvREADONLY_on(sv); 3907 } 3908 OpTYPE_set(o, OP_CONST); 3909 o->op_flags |= OPf_SPECIAL; 3910 cSVOPo->op_sv = sv; 3911 } 3912 break; 3913 3914 case OP_SASSIGN: { 3915 if (OP_GIMME(o,0) == G_VOID 3916 || ( o->op_next->op_type == OP_LINESEQ 3917 && ( o->op_next->op_next->op_type == OP_LEAVESUB 3918 || ( o->op_next->op_next->op_type == OP_RETURN 3919 && !CvLVALUE(PL_compcv))))) 3920 { 3921 OP *right = cBINOP->op_first; 3922 if (right) { 3923 /* sassign 3924 * RIGHT 3925 * substr 3926 * pushmark 3927 * arg1 3928 * arg2 3929 * ... 3930 * becomes 3931 * 3932 * ex-sassign 3933 * substr 3934 * pushmark 3935 * RIGHT 3936 * arg1 3937 * arg2 3938 * ... 3939 */ 3940 OP *left = OpSIBLING(right); 3941 if (left->op_type == OP_SUBSTR 3942 && (left->op_private & 7) < 4) { 3943 op_null(o); 3944 /* cut out right */ 3945 op_sibling_splice(o, NULL, 1, NULL); 3946 /* and insert it as second child of OP_SUBSTR */ 3947 op_sibling_splice(left, cBINOPx(left)->op_first, 0, 3948 right); 3949 left->op_private |= OPpSUBSTR_REPL_FIRST; 3950 left->op_flags = 3951 (o->op_flags & ~OPf_WANT) | OPf_WANT_VOID; 3952 } 3953 } 3954 } 3955 OP* rhs = cBINOPx(o)->op_first; 3956 OP* lval = cBINOPx(o)->op_last; 3957 3958 /* Combine a simple SASSIGN OP with a PADSV lvalue child OP 3959 * into a single OP. */ 3960 3961 /* This optimization covers arbitrarily complicated RHS OP 3962 * trees. Separate optimizations may exist for specific, 3963 * single RHS OPs, such as: 3964 * "my $foo = undef;" or "my $bar = $other_padsv;" */ 3965 3966 if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) 3967 && lval && (lval->op_type == OP_PADSV) && 3968 !(lval->op_private & OPpDEREF) 3969 /* skip if padrange has already gazumped the padsv */ 3970 && (lval == oldop) 3971 /* Memoize::Once produces a non-standard SASSIGN that 3972 * doesn't actually point to pp_sassign, has only one 3973 * child (PADSV), and gets to it via op_other rather 3974 * than op_next. Don't try to optimize this. */ 3975 && (lval != rhs) 3976 ) { 3977 /* SASSIGN's bitfield flags, such as op_moresib and 3978 * op_slabbed, will be carried over unchanged. */ 3979 OpTYPE_set(o, OP_PADSV_STORE); 3980 3981 /* Explicitly craft the new OP's op_flags, carrying 3982 * some bits over from the SASSIGN */ 3983 o->op_flags = ( 3984 OPf_KIDS | OPf_STACKED | 3985 (o->op_flags & (OPf_WANT|OPf_PARENS)) 3986 ); 3987 3988 /* Reset op_private flags, taking relevant private flags 3989 * from the PADSV */ 3990 o->op_private = (lval->op_private & 3991 (OPpLVAL_INTRO|OPpPAD_STATE|OPpDEREF)); 3992 3993 /* Steal the targ from the PADSV */ 3994 o->op_targ = lval->op_targ; lval->op_targ = 0; 3995 3996 /* Fixup op_next ptrs */ 3997 assert(oldop->op_type == OP_PADSV); 3998 /* oldoldop can be arbitrarily deep in the RHS OP tree */ 3999 oldoldop->op_next = o; 4000 4001 /* Even when (rhs != oldoldop), rhs might still have a 4002 * relevant op_next ptr to lval. This is definitely true 4003 * when rhs is OP_NULL with a LOGOP kid (e.g. orassign). 4004 * There may be other cases. */ 4005 if (rhs->op_next == lval) 4006 rhs->op_next = o; 4007 4008 /* Now null-out the PADSV */ 4009 op_null(lval); 4010 4011 /* NULL the previous op ptrs, so rpeep can continue */ 4012 oldoldop = NULL; oldop = NULL; 4013 } 4014 4015 /* Combine a simple SASSIGN OP with an AELEMFAST_LEX lvalue 4016 * into a single OP. This optimization covers arbitrarily 4017 * complicated RHS OP trees. */ 4018 4019 if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) 4020 && (lval->op_type == OP_NULL) && (lval->op_private == 2) && 4021 (cBINOPx(lval)->op_first->op_type == OP_AELEMFAST_LEX) 4022 ) { 4023 OP * lex = cBINOPx(lval)->op_first; 4024 /* SASSIGN's bitfield flags, such as op_moresib and 4025 * op_slabbed, will be carried over unchanged. */ 4026 OpTYPE_set(o, OP_AELEMFASTLEX_STORE); 4027 4028 /* Explicitly craft the new OP's op_flags, carrying 4029 * some bits over from the SASSIGN */ 4030 o->op_flags = ( 4031 OPf_KIDS | OPf_STACKED | 4032 (o->op_flags & (OPf_WANT|OPf_PARENS)) 4033 ); 4034 4035 /* Copy the AELEMFAST_LEX op->private, which contains 4036 * the key index. */ 4037 o->op_private = lex->op_private; 4038 4039 /* Take the targ from the AELEMFAST_LEX */ 4040 o->op_targ = lex->op_targ; lex->op_targ = 0; 4041 4042 assert(oldop->op_type == OP_AELEMFAST_LEX); 4043 /* oldoldop can be arbitrarily deep in the RHS OP tree */ 4044 oldoldop->op_next = o; 4045 4046 /* Even when (rhs != oldoldop), rhs might still have a 4047 * relevant op_next ptr to lex. (Updating it here can 4048 * also cause other ops in the RHS to get the desired 4049 * op_next pointer, presumably thanks to the finalizer.) 4050 * This is definitely truewhen rhs is OP_NULL with a 4051 * LOGOP kid (e.g. orassign). There may be other cases. */ 4052 if (rhs->op_next == lex) 4053 rhs->op_next = o; 4054 4055 /* Now null-out the AELEMFAST_LEX */ 4056 op_null(lex); 4057 4058 /* NULL the previous op ptrs, so rpeep can continue */ 4059 oldop = oldoldop; oldoldop = NULL; 4060 } 4061 4062 break; 4063 } 4064 4065 case OP_AASSIGN: { 4066 int l, r, lr, lscalars, rscalars; 4067 4068 /* handle common vars detection, e.g. ($a,$b) = ($b,$a). 4069 Note that we do this now rather than in newASSIGNOP(), 4070 since only by now are aliased lexicals flagged as such 4071 4072 See the essay "Common vars in list assignment" above for 4073 the full details of the rationale behind all the conditions 4074 below. 4075 4076 PL_generation sorcery: 4077 To detect whether there are common vars, the global var 4078 PL_generation is incremented for each assign op we scan. 4079 Then we run through all the lexical variables on the LHS, 4080 of the assignment, setting a spare slot in each of them to 4081 PL_generation. Then we scan the RHS, and if any lexicals 4082 already have that value, we know we've got commonality. 4083 Also, if the generation number is already set to 4084 PERL_INT_MAX, then the variable is involved in aliasing, so 4085 we also have potential commonality in that case. 4086 */ 4087 4088 PL_generation++; 4089 /* scan LHS */ 4090 lscalars = 0; 4091 l = S_aassign_scan(aTHX_ cLISTOPo->op_last, FALSE, &lscalars); 4092 /* scan RHS */ 4093 rscalars = 0; 4094 r = S_aassign_scan(aTHX_ cLISTOPo->op_first, TRUE, &rscalars); 4095 lr = (l|r); 4096 4097 4098 /* After looking for things which are *always* safe, this main 4099 * if/else chain selects primarily based on the type of the 4100 * LHS, gradually working its way down from the more dangerous 4101 * to the more restrictive and thus safer cases */ 4102 4103 if ( !l /* () = ....; */ 4104 || !r /* .... = (); */ 4105 || !(l & ~AAS_SAFE_SCALAR) /* (undef, pos()) = ...; */ 4106 || !(r & ~AAS_SAFE_SCALAR) /* ... = (1,2,length,undef); */ 4107 || (lscalars < 2) /* (undef, $x) = ... */ 4108 ) { 4109 NOOP; /* always safe */ 4110 } 4111 else if (l & AAS_DANGEROUS) { 4112 /* always dangerous */ 4113 o->op_private |= OPpASSIGN_COMMON_SCALAR; 4114 o->op_private |= OPpASSIGN_COMMON_AGG; 4115 } 4116 else if (l & (AAS_PKG_SCALAR|AAS_PKG_AGG)) { 4117 /* package vars are always dangerous - too many 4118 * aliasing possibilities */ 4119 if (l & AAS_PKG_SCALAR) 4120 o->op_private |= OPpASSIGN_COMMON_SCALAR; 4121 if (l & AAS_PKG_AGG) 4122 o->op_private |= OPpASSIGN_COMMON_AGG; 4123 } 4124 else if (l & ( AAS_MY_SCALAR|AAS_MY_AGG 4125 |AAS_LEX_SCALAR|AAS_LEX_AGG)) 4126 { 4127 /* LHS contains only lexicals and safe ops */ 4128 4129 if (l & (AAS_MY_AGG|AAS_LEX_AGG)) 4130 o->op_private |= OPpASSIGN_COMMON_AGG; 4131 4132 if (l & (AAS_MY_SCALAR|AAS_LEX_SCALAR)) { 4133 if (lr & AAS_LEX_SCALAR_COMM) 4134 o->op_private |= OPpASSIGN_COMMON_SCALAR; 4135 else if ( !(l & AAS_LEX_SCALAR) 4136 && (r & AAS_DEFAV)) 4137 { 4138 /* falsely mark 4139 * my (...) = @_ 4140 * as scalar-safe for performance reasons. 4141 * (it will still have been marked _AGG if necessary */ 4142 NOOP; 4143 } 4144 else if (r & (AAS_PKG_SCALAR|AAS_PKG_AGG|AAS_DANGEROUS)) 4145 /* if there are only lexicals on the LHS and no 4146 * common ones on the RHS, then we assume that the 4147 * only way those lexicals could also get 4148 * on the RHS is via some sort of dereffing or 4149 * closure, e.g. 4150 * $r = \$lex; 4151 * ($lex, $x) = (1, $$r) 4152 * and in this case we assume the var must have 4153 * a bumped ref count. So if its ref count is 1, 4154 * it must only be on the LHS. 4155 */ 4156 o->op_private |= OPpASSIGN_COMMON_RC1; 4157 } 4158 } 4159 4160 /* ... = ($x) 4161 * may have to handle aggregate on LHS, but we can't 4162 * have common scalars. */ 4163 if (rscalars < 2) 4164 o->op_private &= 4165 ~(OPpASSIGN_COMMON_SCALAR|OPpASSIGN_COMMON_RC1); 4166 4167 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 4168 S_check_for_bool_cxt(o, 1, OPpASSIGN_TRUEBOOL, 0); 4169 break; 4170 } 4171 4172 case OP_REF: 4173 case OP_BLESSED: 4174 /* if the op is used in boolean context, set the TRUEBOOL flag 4175 * which enables an optimisation at runtime which avoids creating 4176 * a stack temporary for known-true package names */ 4177 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 4178 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL); 4179 break; 4180 4181 case OP_LENGTH: 4182 /* see if the op is used in known boolean context, 4183 * but not if OA_TARGLEX optimisation is enabled */ 4184 if ( (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR 4185 && !(o->op_private & OPpTARGET_MY) 4186 ) 4187 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); 4188 break; 4189 4190 case OP_POS: 4191 /* see if the op is used in known boolean context */ 4192 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR) 4193 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0); 4194 break; 4195 4196 case OP_CUSTOM: { 4197 Perl_cpeep_t cpeep = 4198 XopENTRYCUSTOM(o, xop_peep); 4199 if (cpeep) 4200 cpeep(aTHX_ o, oldop); 4201 break; 4202 } 4203 4204 } 4205 /* did we just null the current op? If so, re-process it to handle 4206 * eliding "empty" ops from the chain */ 4207 if (o->op_type == OP_NULL && oldop && oldop->op_next == o) { 4208 o->op_opt = 0; 4209 o = oldop; 4210 } 4211 else { 4212 oldoldop = oldop; 4213 oldop = o; 4214 } 4215 } 4216 LEAVE; 4217} 4218 4219void 4220Perl_peep(pTHX_ OP *o) 4221{ 4222 CALL_RPEEP(o); 4223} 4224 4225/* 4226 * ex: set ts=8 sts=4 sw=4 et: 4227 */ 4228