1/*    peep.c
2 *
3 *    Copyright (C) 1991-2022 by Larry Wall and others
4 *
5 *    You may distribute under the terms of either the GNU General Public
6 *    License or the Artistic License, as specified in the README file.
7 *
8 */
9
10/*
11 * Aragorn sped on up the hill. Every now and again he bent to the ground.
12 * Hobbits go light, and their footprints are not easy even for a Ranger to
13 * read, but not far from the top a spring crossed the path, and in the wet
14 * earth he saw what he was seeking.
15 * 'I read the signs aright,' he said to himself. 'Frodo ran to the hill-top.
16 * I wonder what he saw there? But he returned by the same way, and went down
17 * the hill again.'
18 */
19
20/* This file contains functions for optimizing and finalizing the OP
21 * structures that hold a compiled perl program
22 */
23
24#include "EXTERN.h"
25#define PERL_IN_PEEP_C
26#include "perl.h"
27
28
29#define CALL_RPEEP(o) PL_rpeepp(aTHX_ o)
30
31
32static void
33S_scalar_slice_warning(pTHX_ const OP *o)
34{
35    OP *kid;
36    const bool is_hash = o->op_type == OP_HSLICE
37                || (o->op_type == OP_NULL && o->op_targ == OP_HSLICE);
38    SV *name;
39
40    if (!(o->op_private & OPpSLICEWARNING))
41        return;
42    if (PL_parser && PL_parser->error_count)
43        /* This warning can be nonsensical when there is a syntax error. */
44        return;
45
46    kid = cLISTOPo->op_first;
47    kid = OpSIBLING(kid); /* get past pushmark */
48    /* weed out false positives: any ops that can return lists */
49    switch (kid->op_type) {
50    case OP_BACKTICK:
51    case OP_GLOB:
52    case OP_READLINE:
53    case OP_MATCH:
54    case OP_RV2AV:
55    case OP_EACH:
56    case OP_VALUES:
57    case OP_KEYS:
58    case OP_SPLIT:
59    case OP_LIST:
60    case OP_SORT:
61    case OP_REVERSE:
62    case OP_ENTERSUB:
63    case OP_CALLER:
64    case OP_LSTAT:
65    case OP_STAT:
66    case OP_READDIR:
67    case OP_SYSTEM:
68    case OP_TMS:
69    case OP_LOCALTIME:
70    case OP_GMTIME:
71    case OP_ENTEREVAL:
72        return;
73    }
74
75    /* Don't warn if we have a nulled list either. */
76    if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST)
77        return;
78
79    assert(OpSIBLING(kid));
80    name = op_varname(OpSIBLING(kid));
81    if (!name) /* XS module fiddling with the op tree */
82        return;
83    warn_elem_scalar_context(kid, name, is_hash, true);
84}
85
86
87/* info returned by S_sprintf_is_multiconcatable() */
88
89struct sprintf_ismc_info {
90    SSize_t nargs;    /* num of args to sprintf (not including the format) */
91    char  *start;     /* start of raw format string */
92    char  *end;       /* bytes after end of raw format string */
93    STRLEN total_len; /* total length (in bytes) of format string, not
94                         including '%s' and  half of '%%' */
95    STRLEN variant;   /* number of bytes by which total_len_p would grow
96                         if upgraded to utf8 */
97    bool   utf8;      /* whether the format is utf8 */
98};
99
100/* is the OP_SPRINTF o suitable for converting into a multiconcat op?
101 * i.e. its format argument is a const string with only '%s' and '%%'
102 * formats, and the number of args is known, e.g.
103 *    sprintf "a=%s f=%s", $a[0], scalar(f());
104 * but not
105 *    sprintf "i=%d a=%s f=%s", $i, @a, f();
106 *
107 * If successful, the sprintf_ismc_info struct pointed to by info will be
108 * populated.
109 */
110
111STATIC bool
112S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info)
113{
114    OP    *pm, *constop, *kid;
115    SV    *sv;
116    char  *s, *e, *p;
117    SSize_t nargs, nformats;
118    STRLEN cur, total_len, variant;
119    bool   utf8;
120
121    /* if sprintf's behaviour changes, die here so that someone
122     * can decide whether to enhance this function or skip optimising
123     * under those new circumstances */
124    assert(!(o->op_flags & OPf_STACKED));
125    assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX));
126    assert(!(o->op_private & ~OPpARG4_MASK));
127
128    pm = cUNOPo->op_first;
129    if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */
130        return FALSE;
131    constop = OpSIBLING(pm);
132    if (!constop || constop->op_type != OP_CONST)
133        return FALSE;
134    sv = cSVOPx_sv(constop);
135    if (SvMAGICAL(sv) || !SvPOK(sv))
136        return FALSE;
137
138    s = SvPV(sv, cur);
139    e = s + cur;
140
141    /* Scan format for %% and %s and work out how many %s there are.
142     * Abandon if other format types are found.
143     */
144
145    nformats  = 0;
146    total_len = 0;
147    variant   = 0;
148
149    for (p = s; p < e; p++) {
150        if (*p != '%') {
151            total_len++;
152            if (!UTF8_IS_INVARIANT(*p))
153                variant++;
154            continue;
155        }
156        p++;
157        if (p >= e)
158            return FALSE; /* lone % at end gives "Invalid conversion" */
159        if (*p == '%')
160            total_len++;
161        else if (*p == 's')
162            nformats++;
163        else
164            return FALSE;
165    }
166
167    if (!nformats || nformats > PERL_MULTICONCAT_MAXARG)
168        return FALSE;
169
170    utf8 = cBOOL(SvUTF8(sv));
171    if (utf8)
172        variant = 0;
173
174    /* scan args; they must all be in scalar cxt */
175
176    nargs = 0;
177    kid = OpSIBLING(constop);
178
179    while (kid) {
180        if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR)
181            return FALSE;
182        nargs++;
183        kid = OpSIBLING(kid);
184    }
185
186    if (nargs != nformats)
187        return FALSE; /* e.g. sprintf("%s%s", $a); */
188
189
190    info->nargs      = nargs;
191    info->start      = s;
192    info->end        = e;
193    info->total_len  = total_len;
194    info->variant    = variant;
195    info->utf8       = utf8;
196
197    return TRUE;
198}
199
200/* S_maybe_multiconcat():
201 *
202 * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly
203 * convert it (and its children) into an OP_MULTICONCAT. See the code
204 * comments just before pp_multiconcat() for the full details of what
205 * OP_MULTICONCAT supports.
206 *
207 * Basically we're looking for an optree with a chain of OP_CONCATS down
208 * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or
209 * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g.
210 *
211 *      $x = "$a$b-$c"
212 *
213 *  looks like
214 *
215 *      SASSIGN
216 *         |
217 *      STRINGIFY   -- PADSV[$x]
218 *         |
219 *         |
220 *      ex-PUSHMARK -- CONCAT/S
221 *                        |
222 *                     CONCAT/S  -- PADSV[$d]
223 *                        |
224 *                     CONCAT    -- CONST["-"]
225 *                        |
226 *                     PADSV[$a] -- PADSV[$b]
227 *
228 * Note that at this stage the OP_SASSIGN may have already been optimised
229 * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT.
230 */
231
232STATIC void
233S_maybe_multiconcat(pTHX_ OP *o)
234{
235    OP *lastkidop;   /* the right-most of any kids unshifted onto o */
236    OP *topop;       /* the top-most op in the concat tree (often equals o,
237                        unless there are assign/stringify ops above it */
238    OP *parentop;    /* the parent op of topop (or itself if no parent) */
239    OP *targmyop;    /* the op (if any) with the OPpTARGET_MY flag */
240    OP *targetop;    /* the op corresponding to target=... or target.=... */
241    OP *stringop;    /* the OP_STRINGIFY op, if any */
242    OP *nextop;      /* used for recreating the op_next chain without consts */
243    OP *kid;         /* general-purpose op pointer */
244    UNOP_AUX_item *aux;
245    UNOP_AUX_item *lenp;
246    char *const_str, *p;
247    struct sprintf_ismc_info sprintf_info;
248
249                     /* store info about each arg in args[];
250                      * toparg is the highest used slot; argp is a general
251                      * pointer to args[] slots */
252    struct {
253        void *p;      /* initially points to const sv (or null for op);
254                         later, set to SvPV(constsv), with ... */
255        STRLEN len;   /* ... len set to SvPV(..., len) */
256    } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1];
257
258    SSize_t nargs  = 0;
259    SSize_t nconst = 0;
260    SSize_t nadjconst  = 0; /* adjacent consts - may be demoted to args */
261    STRLEN variant;
262    bool utf8 = FALSE;
263    bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
264                                 the last-processed arg will the LHS of one,
265                                 as args are processed in reverse order */
266    U8   stacked_last = 0;   /* whether the last seen concat op was STACKED */
267    STRLEN total_len  = 0;   /* sum of the lengths of the const segments */
268    U8 flags          = 0;   /* what will become the op_flags and ... */
269    U8 private_flags  = 0;   /* ... op_private of the multiconcat op */
270    bool is_sprintf = FALSE; /* we're optimising an sprintf */
271    bool is_targable  = FALSE; /* targetop is an OPpTARGET_MY candidate */
272    bool prev_was_const = FALSE; /* previous arg was a const */
273
274    /* -----------------------------------------------------------------
275     * Phase 1:
276     *
277     * Examine the optree non-destructively to determine whether it's
278     * suitable to be converted into an OP_MULTICONCAT. Accumulate
279     * information about the optree in args[].
280     */
281
282    argp     = args;
283    targmyop = NULL;
284    targetop = NULL;
285    stringop = NULL;
286    topop    = o;
287    parentop = o;
288
289    assert(   o->op_type == OP_SASSIGN
290           || o->op_type == OP_CONCAT
291           || o->op_type == OP_SPRINTF
292           || o->op_type == OP_STRINGIFY);
293
294    Zero(&sprintf_info, 1, struct sprintf_ismc_info);
295
296    /* first see if, at the top of the tree, there is an assign,
297     * append and/or stringify */
298
299    if (topop->op_type == OP_SASSIGN) {
300        /* expr = ..... */
301        if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN])
302            return;
303        if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
304            return;
305        assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */
306
307        parentop = topop;
308        topop = cBINOPo->op_first;
309        targetop = OpSIBLING(topop);
310        if (!targetop) /* probably some sort of syntax error */
311            return;
312
313        /* don't optimise away assign in 'local $foo = ....' */
314        if (   (targetop->op_private & OPpLVAL_INTRO)
315            /* these are the common ops which do 'local', but
316             * not all */
317            && (   targetop->op_type == OP_GVSV
318                || targetop->op_type == OP_RV2SV
319                || targetop->op_type == OP_AELEM
320                || targetop->op_type == OP_HELEM
321                )
322        )
323            return;
324    }
325    else if (   topop->op_type == OP_CONCAT
326             && (topop->op_flags & OPf_STACKED)
327             && (!(topop->op_private & OPpCONCAT_NESTED))
328            )
329    {
330        /* expr .= ..... */
331
332        /* OPpTARGET_MY shouldn't be able to be set here. If it is,
333         * decide what to do about it */
334        assert(!(o->op_private & OPpTARGET_MY));
335
336        /* barf on unknown flags */
337        assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY)));
338        private_flags |= OPpMULTICONCAT_APPEND;
339        targetop = cBINOPo->op_first;
340        parentop = topop;
341        topop    = OpSIBLING(targetop);
342
343        /* $x .= <FOO> gets optimised to rcatline instead */
344        if (topop->op_type == OP_READLINE)
345            return;
346    }
347
348    if (targetop) {
349        /* Can targetop (the LHS) if it's a padsv, be optimised
350         * away and use OPpTARGET_MY instead?
351         */
352        if (    (targetop->op_type == OP_PADSV)
353            && !(targetop->op_private & OPpDEREF)
354            && !(targetop->op_private & OPpPAD_STATE)
355               /* we don't support 'my $x .= ...' */
356            && (   o->op_type == OP_SASSIGN
357                || !(targetop->op_private & OPpLVAL_INTRO))
358        )
359            is_targable = TRUE;
360    }
361
362    if (topop->op_type == OP_STRINGIFY) {
363        if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY])
364            return;
365        stringop = topop;
366
367        /* barf on unknown flags */
368        assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY)));
369
370        if ((topop->op_private & OPpTARGET_MY)) {
371            if (o->op_type == OP_SASSIGN)
372                return; /* can't have two assigns */
373            targmyop = topop;
374        }
375
376        private_flags |= OPpMULTICONCAT_STRINGIFY;
377        parentop = topop;
378        topop = cBINOPx(topop)->op_first;
379        assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK));
380        topop = OpSIBLING(topop);
381    }
382
383    if (topop->op_type == OP_SPRINTF) {
384        if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF])
385            return;
386        if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) {
387            nargs     = sprintf_info.nargs;
388            total_len = sprintf_info.total_len;
389            variant   = sprintf_info.variant;
390            utf8      = sprintf_info.utf8;
391            is_sprintf = TRUE;
392            private_flags |= OPpMULTICONCAT_FAKE;
393            toparg = argp;
394            /* we have an sprintf op rather than a concat optree.
395             * Skip most of the code below which is associated with
396             * processing that optree. We also skip phase 2, determining
397             * whether its cost effective to optimise, since for sprintf,
398             * multiconcat is *always* faster */
399            goto create_aux;
400        }
401        /* note that even if the sprintf itself isn't multiconcatable,
402         * the expression as a whole may be, e.g. in
403         *    $x .= sprintf("%d",...)
404         * the sprintf op will be left as-is, but the concat/S op may
405         * be upgraded to multiconcat
406         */
407    }
408    else if (topop->op_type == OP_CONCAT) {
409        if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT])
410            return;
411
412        if ((topop->op_private & OPpTARGET_MY)) {
413            if (o->op_type == OP_SASSIGN || targmyop)
414                return; /* can't have two assigns */
415            targmyop = topop;
416        }
417    }
418
419    /* Is it safe to convert a sassign/stringify/concat op into
420     * a multiconcat? */
421    assert((PL_opargs[OP_SASSIGN]   & OA_CLASS_MASK) == OA_BINOP);
422    assert((PL_opargs[OP_CONCAT]    & OA_CLASS_MASK) == OA_BINOP);
423    assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP);
424    assert((PL_opargs[OP_SPRINTF]   & OA_CLASS_MASK) == OA_LISTOP);
425    STATIC_ASSERT_STMT(   STRUCT_OFFSET(BINOP,    op_last)
426                       == STRUCT_OFFSET(UNOP_AUX, op_aux));
427    STATIC_ASSERT_STMT(   STRUCT_OFFSET(LISTOP,   op_last)
428                       == STRUCT_OFFSET(UNOP_AUX, op_aux));
429
430    /* Now scan the down the tree looking for a series of
431     * CONCAT/OPf_STACKED ops on the LHS (with the last one not
432     * stacked). For example this tree:
433     *
434     *     |
435     *   CONCAT/STACKED
436     *     |
437     *   CONCAT/STACKED -- EXPR5
438     *     |
439     *   CONCAT/STACKED -- EXPR4
440     *     |
441     *   CONCAT -- EXPR3
442     *     |
443     *   EXPR1  -- EXPR2
444     *
445     * corresponds to an expression like
446     *
447     *   (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5)
448     *
449     * Record info about each EXPR in args[]: in particular, whether it is
450     * a stringifiable OP_CONST and if so what the const sv is.
451     *
452     * The reason why the last concat can't be STACKED is the difference
453     * between
454     *
455     *    ((($a .= $a) .= $a) .= $a) .= $a
456     *
457     * and
458     *    $a . $a . $a . $a . $a
459     *
460     * The main difference between the optrees for those two constructs
461     * is the presence of the last STACKED. As well as modifying $a,
462     * the former sees the changed $a between each concat, so if $s is
463     * initially 'a', the first returns 'a' x 16, while the latter returns
464     * 'a' x 5. And pp_multiconcat can't handle that kind of thing.
465     */
466
467    kid = topop;
468
469    for (;;) {
470        OP *argop;
471        SV *sv;
472        bool last = FALSE;
473
474        if (    kid->op_type == OP_CONCAT
475            && !kid_is_last
476        ) {
477            OP *k1, *k2;
478            k1 = cUNOPx(kid)->op_first;
479            k2 = OpSIBLING(k1);
480            /* shouldn't happen except maybe after compile err? */
481            if (!k2)
482                return;
483
484            /* avoid turning (A . B . ($lex = C) ...)  into  (A . B . C ...) */
485            if (kid->op_private & OPpTARGET_MY)
486                kid_is_last = TRUE;
487
488            stacked_last = (kid->op_flags & OPf_STACKED);
489            if (!stacked_last)
490                kid_is_last = TRUE;
491
492            kid   = k1;
493            argop = k2;
494        }
495        else {
496            argop = kid;
497            last = TRUE;
498        }
499
500        if (   nargs + nadjconst  >  PERL_MULTICONCAT_MAXARG        - 2
501            || (argp - args + 1)  > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
502        {
503            /* At least two spare slots are needed to decompose both
504             * concat args. If there are no slots left, continue to
505             * examine the rest of the optree, but don't push new values
506             * on args[]. If the optree as a whole is legal for conversion
507             * (in particular that the last concat isn't STACKED), then
508             * the first PERL_MULTICONCAT_MAXARG elements of the optree
509             * can be converted into an OP_MULTICONCAT now, with the first
510             * child of that op being the remainder of the optree -
511             * which may itself later be converted to a multiconcat op
512             * too.
513             */
514            if (last) {
515                /* the last arg is the rest of the optree */
516                argp++->p = NULL;
517                nargs++;
518            }
519        }
520        else if (   argop->op_type == OP_CONST
521            && ((sv = cSVOPx_sv(argop)))
522            /* defer stringification until runtime of 'constant'
523             * things that might stringify variantly, e.g. the radix
524             * point of NVs, or overloaded RVs */
525            && (SvPOK(sv) || SvIOK(sv))
526            && (!SvGMAGICAL(sv))
527        ) {
528            if (argop->op_private & OPpCONST_STRICT)
529                no_bareword_allowed(argop);
530            argp++->p = sv;
531            utf8   |= cBOOL(SvUTF8(sv));
532            nconst++;
533            if (prev_was_const)
534                /* this const may be demoted back to a plain arg later;
535                 * make sure we have enough arg slots left */
536                nadjconst++;
537            prev_was_const = !prev_was_const;
538        }
539        else {
540            argp++->p = NULL;
541            nargs++;
542            prev_was_const = FALSE;
543        }
544
545        if (last)
546            break;
547    }
548
549    toparg = argp - 1;
550
551    if (stacked_last)
552        return; /* we don't support ((A.=B).=C)...) */
553
554    /* look for two adjacent consts and don't fold them together:
555     *     $o . "a" . "b"
556     * should do
557     *     $o->concat("a")->concat("b")
558     * rather than
559     *     $o->concat("ab")
560     * (but $o .=  "a" . "b" should still fold)
561     */
562    {
563        bool seen_nonconst = FALSE;
564        for (argp = toparg; argp >= args; argp--) {
565            if (argp->p == NULL) {
566                seen_nonconst = TRUE;
567                continue;
568            }
569            if (!seen_nonconst)
570                continue;
571            if (argp[1].p) {
572                /* both previous and current arg were constants;
573                 * leave the current OP_CONST as-is */
574                argp->p = NULL;
575                nconst--;
576                nargs++;
577            }
578        }
579    }
580
581    /* -----------------------------------------------------------------
582     * Phase 2:
583     *
584     * At this point we have determined that the optree *can* be converted
585     * into a multiconcat. Having gathered all the evidence, we now decide
586     * whether it *should*.
587     */
588
589
590    /* we need at least one concat action, e.g.:
591     *
592     *  Y . Z
593     *  X = Y . Z
594     *  X .= Y
595     *
596     * otherwise we could be doing something like $x = "foo", which
597     * if treated as a concat, would fail to COW.
598     */
599    if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
600        return;
601
602    /* Benchmarking seems to indicate that we gain if:
603     * * we optimise at least two actions into a single multiconcat
604     *    (e.g concat+concat, sassign+concat);
605     * * or if we can eliminate at least 1 OP_CONST;
606     * * or if we can eliminate a padsv via OPpTARGET_MY
607     */
608
609    if (
610           /* eliminated at least one OP_CONST */
611           nconst >= 1
612           /* eliminated an OP_SASSIGN */
613        || o->op_type == OP_SASSIGN
614           /* eliminated an OP_PADSV */
615        || (!targmyop && is_targable)
616    )
617        /* definitely a net gain to optimise */
618        goto optimise;
619
620    /* ... if not, what else? */
621
622    /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1):
623     * multiconcat is faster (due to not creating a temporary copy of
624     * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is
625     * faster.
626     */
627    if (   nconst == 0
628         && nargs == 2
629         && targmyop
630         && topop->op_type == OP_CONCAT
631    ) {
632        PADOFFSET t = targmyop->op_targ;
633        OP *k1 = cBINOPx(topop)->op_first;
634        OP *k2 = cBINOPx(topop)->op_last;
635        if (   k2->op_type == OP_PADSV
636            && k2->op_targ == t
637            && (   k1->op_type != OP_PADSV
638                || k1->op_targ != t)
639        )
640            goto optimise;
641    }
642
643    /* need at least two concats */
644    if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3)
645        return;
646
647
648
649    /* -----------------------------------------------------------------
650     * Phase 3:
651     *
652     * At this point the optree has been verified as ok to be optimised
653     * into an OP_MULTICONCAT. Now start changing things.
654     */
655
656   optimise:
657
658    /* stringify all const args and determine utf8ness */
659
660    variant = 0;
661    for (argp = args; argp <= toparg; argp++) {
662        SV *sv = (SV*)argp->p;
663        if (!sv)
664            continue; /* not a const op */
665        if (utf8 && !SvUTF8(sv))
666            sv_utf8_upgrade_nomg(sv);
667        argp->p = SvPV_nomg(sv, argp->len);
668        total_len += argp->len;
669
670        /* see if any strings would grow if converted to utf8 */
671        if (!utf8) {
672            variant += variant_under_utf8_count((U8 *) argp->p,
673                                                (U8 *) argp->p + argp->len);
674        }
675    }
676
677    /* create and populate aux struct */
678
679  create_aux:
680
681    aux = (UNOP_AUX_item*)PerlMemShared_malloc(
682                    sizeof(UNOP_AUX_item)
683                    *  (
684                           PERL_MULTICONCAT_HEADER_SIZE
685                         + ((nargs + 1) * (variant ? 2 : 1))
686                        )
687                    );
688    const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1);
689
690    /* Extract all the non-const expressions from the concat tree then
691     * dispose of the old tree, e.g. convert the tree from this:
692     *
693     *  o => SASSIGN
694     *         |
695     *       STRINGIFY   -- TARGET
696     *         |
697     *       ex-PUSHMARK -- CONCAT
698     *                        |
699     *                      CONCAT -- EXPR5
700     *                        |
701     *                      CONCAT -- EXPR4
702     *                        |
703     *                      CONCAT -- EXPR3
704     *                        |
705     *                      EXPR1  -- EXPR2
706     *
707     *
708     * to:
709     *
710     *  o => MULTICONCAT
711     *         |
712     *       ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET
713     *
714     * except that if EXPRi is an OP_CONST, it's discarded.
715     *
716     * During the conversion process, EXPR ops are stripped from the tree
717     * and unshifted onto o. Finally, any of o's remaining original
718     * children are discarded and o is converted into an OP_MULTICONCAT.
719     *
720     * In this middle of this, o may contain both: unshifted args on the
721     * left, and some remaining original args on the right. lastkidop
722     * is set to point to the right-most unshifted arg to delineate
723     * between the two sets.
724     */
725
726
727    if (is_sprintf) {
728        /* create a copy of the format with the %'s removed, and record
729         * the sizes of the const string segments in the aux struct */
730        char *q, *oldq;
731        lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
732
733        p    = sprintf_info.start;
734        q    = const_str;
735        oldq = q;
736        for (; p < sprintf_info.end; p++) {
737            if (*p == '%') {
738                p++;
739                if (*p != '%') {
740                    (lenp++)->ssize = q - oldq;
741                    oldq = q;
742                    continue;
743                }
744            }
745            *q++ = *p;
746        }
747        lenp->ssize = q - oldq;
748        assert((STRLEN)(q - const_str) == total_len);
749
750        /* Attach all the args (i.e. the kids of the sprintf) to o (which
751         * may or may not be topop) The pushmark and const ops need to be
752         * kept in case they're an op_next entry point.
753         */
754        lastkidop = cLISTOPx(topop)->op_last;
755        kid = cUNOPx(topop)->op_first; /* pushmark */
756        op_null(kid);
757        op_null(OpSIBLING(kid));       /* const */
758        if (o != topop) {
759            kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */
760            op_sibling_splice(o, NULL, 0, kid); /* and attach to o */
761            lastkidop->op_next = o;
762        }
763    }
764    else {
765        p = const_str;
766        lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
767
768        lenp->ssize = -1;
769
770        /* Concatenate all const strings into const_str.
771         * Note that args[] contains the RHS args in reverse order, so
772         * we scan args[] from top to bottom to get constant strings
773         * in L-R order
774         */
775        for (argp = toparg; argp >= args; argp--) {
776            if (!argp->p)
777                /* not a const op */
778                (++lenp)->ssize = -1;
779            else {
780                STRLEN l = argp->len;
781                Copy(argp->p, p, l, char);
782                p += l;
783                if (lenp->ssize == -1)
784                    lenp->ssize = l;
785                else
786                    lenp->ssize += l;
787            }
788        }
789
790        kid = topop;
791        nextop = o;
792        lastkidop = NULL;
793
794        for (argp = args; argp <= toparg; argp++) {
795            /* only keep non-const args, except keep the first-in-next-chain
796             * arg no matter what it is (but nulled if OP_CONST), because it
797             * may be the entry point to this subtree from the previous
798             * op_next.
799             */
800            bool last = (argp == toparg);
801            OP *prev;
802
803            /* set prev to the sibling *before* the arg to be cut out,
804             * e.g. when cutting EXPR:
805             *
806             *         |
807             * kid=  CONCAT
808             *         |
809             * prev= CONCAT -- EXPR
810             *         |
811             */
812            if (argp == args && kid->op_type != OP_CONCAT) {
813                /* in e.g. '$x .= f(1)' there's no RHS concat tree
814                 * so the expression to be cut isn't kid->op_last but
815                 * kid itself */
816                OP *o1, *o2;
817                /* find the op before kid */
818                o1 = NULL;
819                o2 = cUNOPx(parentop)->op_first;
820                while (o2 && o2 != kid) {
821                    o1 = o2;
822                    o2 = OpSIBLING(o2);
823                }
824                assert(o2 == kid);
825                prev = o1;
826                kid  = parentop;
827            }
828            else if (kid == o && lastkidop)
829                prev = last ? lastkidop : OpSIBLING(lastkidop);
830            else
831                prev = last ? NULL : cUNOPx(kid)->op_first;
832
833            if (!argp->p || last) {
834                /* cut RH op */
835                OP *aop = op_sibling_splice(kid, prev, 1, NULL);
836                /* and unshift to front of o */
837                op_sibling_splice(o, NULL, 0, aop);
838                /* record the right-most op added to o: later we will
839                 * free anything to the right of it */
840                if (!lastkidop)
841                    lastkidop = aop;
842                aop->op_next = nextop;
843                if (last) {
844                    if (argp->p)
845                        /* null the const at start of op_next chain */
846                        op_null(aop);
847                }
848                else if (prev)
849                    nextop = prev->op_next;
850            }
851
852            /* the last two arguments are both attached to the same concat op */
853            if (argp < toparg - 1)
854                kid = prev;
855        }
856    }
857
858    /* Populate the aux struct */
859
860    aux[PERL_MULTICONCAT_IX_NARGS].ssize     = nargs;
861    aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv    = utf8 ? NULL : const_str;
862    aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ?    0 : total_len;
863    aux[PERL_MULTICONCAT_IX_UTF8_PV].pv     = const_str;
864    aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize  = total_len;
865
866    /* if variant > 0, calculate a variant const string and lengths where
867     * the utf8 version of the string will take 'variant' more bytes than
868     * the plain one. */
869
870    if (variant) {
871        char              *p = const_str;
872        STRLEN          ulen = total_len + variant;
873        UNOP_AUX_item  *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
874        UNOP_AUX_item *ulens = lens + (nargs + 1);
875        char             *up = (char*)PerlMemShared_malloc(ulen);
876        SSize_t            n;
877
878        aux[PERL_MULTICONCAT_IX_UTF8_PV].pv    = up;
879        aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen;
880
881        for (n = 0; n < (nargs + 1); n++) {
882            SSize_t i;
883            char * orig_up = up;
884            for (i = (lens++)->ssize; i > 0; i--) {
885                U8 c = *p++;
886                append_utf8_from_native_byte(c, (U8**)&up);
887            }
888            (ulens++)->ssize = (i < 0) ? i : up - orig_up;
889        }
890    }
891
892    if (stringop) {
893        /* if there was a top(ish)-level OP_STRINGIFY, we need to keep
894         * that op's first child - an ex-PUSHMARK - because the op_next of
895         * the previous op may point to it (i.e. it's the entry point for
896         * the o optree)
897         */
898        OP *pmop =
899            (stringop == o)
900                ? op_sibling_splice(o, lastkidop, 1, NULL)
901                : op_sibling_splice(stringop, NULL, 1, NULL);
902        assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK));
903        op_sibling_splice(o, NULL, 0, pmop);
904        if (!lastkidop)
905            lastkidop = pmop;
906    }
907
908    /* Optimise
909     *    target  = A.B.C...
910     *    target .= A.B.C...
911     */
912
913    if (targetop) {
914        assert(!targmyop);
915
916        if (o->op_type == OP_SASSIGN) {
917            /* Move the target subtree from being the last of o's children
918             * to being the last of o's preserved children.
919             * Note the difference between 'target = ...' and 'target .= ...':
920             * for the former, target is executed last; for the latter,
921             * first.
922             */
923            kid = OpSIBLING(lastkidop);
924            op_sibling_splice(o, kid, 1, NULL); /* cut target op */
925            op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */
926            lastkidop->op_next = kid->op_next;
927            lastkidop = targetop;
928        }
929        else {
930            /* Move the target subtree from being the first of o's
931             * original children to being the first of *all* o's children.
932             */
933            if (lastkidop) {
934                op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */
935                op_sibling_splice(o, NULL, 0, targetop);  /* and paste*/
936            }
937            else {
938                /* if the RHS of .= doesn't contain a concat (e.g.
939                 * $x .= "foo"), it gets missed by the "strip ops from the
940                 * tree and add to o" loop earlier */
941                assert(topop->op_type != OP_CONCAT);
942                if (stringop) {
943                    /* in e.g. $x .= "$y", move the $y expression
944                     * from being a child of OP_STRINGIFY to being the
945                     * second child of the OP_CONCAT
946                     */
947                    assert(cUNOPx(stringop)->op_first == topop);
948                    op_sibling_splice(stringop, NULL, 1, NULL);
949                    op_sibling_splice(o, cUNOPo->op_first, 0, topop);
950                }
951                assert(topop == OpSIBLING(cBINOPo->op_first));
952                if (toparg->p)
953                    op_null(topop);
954                lastkidop = topop;
955            }
956        }
957
958        if (is_targable) {
959            /* optimise
960             *  my $lex  = A.B.C...
961             *     $lex  = A.B.C...
962             *     $lex .= A.B.C...
963             * The original padsv op is kept but nulled in case it's the
964             * entry point for the optree (which it will be for
965             * '$lex .=  ... '
966             */
967            private_flags |= OPpTARGET_MY;
968            private_flags |= (targetop->op_private & OPpLVAL_INTRO);
969            o->op_targ = targetop->op_targ;
970            targetop->op_targ = 0;
971            op_null(targetop);
972        }
973        else
974            flags |= OPf_STACKED;
975    }
976    else if (targmyop) {
977        private_flags |= OPpTARGET_MY;
978        if (o != targmyop) {
979            o->op_targ = targmyop->op_targ;
980            targmyop->op_targ = 0;
981        }
982    }
983
984    /* detach the emaciated husk of the sprintf/concat optree and free it */
985    for (;;) {
986        kid = op_sibling_splice(o, lastkidop, 1, NULL);
987        if (!kid)
988            break;
989        op_free(kid);
990    }
991
992    /* and convert o into a multiconcat */
993
994    o->op_flags        = (flags|OPf_KIDS|stacked_last
995                         |(o->op_flags & (OPf_WANT|OPf_PARENS)));
996    o->op_private      = private_flags;
997    o->op_type         = OP_MULTICONCAT;
998    o->op_ppaddr       = PL_ppaddr[OP_MULTICONCAT];
999    cUNOP_AUXo->op_aux = aux;
1000}
1001
1002
1003/*
1004=for apidoc_section $optree_manipulation
1005
1006=for apidoc optimize_optree
1007
1008This function applies some optimisations to the optree in top-down order.
1009It is called before the peephole optimizer, which processes ops in
1010execution order. Note that finalize_optree() also does a top-down scan,
1011but is called *after* the peephole optimizer.
1012
1013=cut
1014*/
1015
1016void
1017Perl_optimize_optree(pTHX_ OP* o)
1018{
1019    PERL_ARGS_ASSERT_OPTIMIZE_OPTREE;
1020
1021    ENTER;
1022    SAVEVPTR(PL_curcop);
1023
1024    optimize_op(o);
1025
1026    LEAVE;
1027}
1028
1029
1030#define warn_implicit_snail_cvsig(o)  S_warn_implicit_snail_cvsig(aTHX_ o)
1031static void
1032S_warn_implicit_snail_cvsig(pTHX_ OP *o)
1033{
1034    CV *cv = PL_compcv;
1035    while(cv && CvEVAL(cv))
1036        cv = CvOUTSIDE(cv);
1037
1038    if(cv && CvSIGNATURE(cv))
1039        Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1040            "Implicit use of @_ in %s with signatured subroutine is experimental", OP_DESC(o));
1041}
1042
1043
1044#define OP_ZOOM(o)  (OP_TYPE_IS(o, OP_NULL) ? cUNOPx(o)->op_first : (o))
1045
1046/* helper for optimize_optree() which optimises one op then recurses
1047 * to optimise any children.
1048 */
1049
1050STATIC void
1051S_optimize_op(pTHX_ OP* o)
1052{
1053    OP *top_op = o;
1054
1055    PERL_ARGS_ASSERT_OPTIMIZE_OP;
1056
1057    while (1) {
1058        OP * next_kid = NULL;
1059
1060        assert(o->op_type != OP_FREED);
1061
1062        switch (o->op_type) {
1063        case OP_NEXTSTATE:
1064        case OP_DBSTATE:
1065            PL_curcop = ((COP*)o);		/* for warnings */
1066            break;
1067
1068
1069        case OP_CONCAT:
1070        case OP_SASSIGN:
1071        case OP_STRINGIFY:
1072        case OP_SPRINTF:
1073            S_maybe_multiconcat(aTHX_ o);
1074            break;
1075
1076        case OP_SUBST:
1077            if (cPMOPo->op_pmreplrootu.op_pmreplroot) {
1078                /* we can't assume that op_pmreplroot->op_sibparent == o
1079                 * and that it is thus possible to walk back up the tree
1080                 * past op_pmreplroot. So, although we try to avoid
1081                 * recursing through op trees, do it here. After all,
1082                 * there are unlikely to be many nested s///e's within
1083                 * the replacement part of a s///e.
1084                 */
1085                optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1086            }
1087            break;
1088
1089        case OP_RV2AV:
1090        {
1091            OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1092            CV *cv = PL_compcv;
1093            while(cv && CvEVAL(cv))
1094                cv = CvOUTSIDE(cv);
1095
1096            if(cv && CvSIGNATURE(cv) &&
1097                    OP_TYPE_IS(first, OP_GV) && cGVOPx_gv(first) == PL_defgv) {
1098                OP *parent = op_parent(o);
1099                while(OP_TYPE_IS(parent, OP_NULL))
1100                    parent = op_parent(parent);
1101
1102                Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1103                    "Use of @_ in %s with signatured subroutine is experimental", OP_DESC(parent));
1104            }
1105            break;
1106        }
1107
1108        case OP_SHIFT:
1109        case OP_POP:
1110            if(!CvUNIQUE(PL_compcv) && !(o->op_flags & OPf_KIDS))
1111                warn_implicit_snail_cvsig(o);
1112            break;
1113
1114        case OP_ENTERSUB:
1115            if(!(o->op_flags & OPf_STACKED))
1116                warn_implicit_snail_cvsig(o);
1117            break;
1118
1119        case OP_GOTO:
1120        {
1121            OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1122            OP *ffirst;
1123            if(OP_TYPE_IS(first, OP_SREFGEN) &&
1124                    (ffirst = OP_ZOOM(cUNOPx(first)->op_first)) &&
1125                    OP_TYPE_IS(ffirst, OP_RV2CV))
1126                warn_implicit_snail_cvsig(o);
1127            break;
1128        }
1129
1130        default:
1131            break;
1132        }
1133
1134        if (o->op_flags & OPf_KIDS)
1135            next_kid = cUNOPo->op_first;
1136
1137        /* if a kid hasn't been nominated to process, continue with the
1138         * next sibling, or if no siblings left, go back to the parent's
1139         * siblings and so on
1140         */
1141        while (!next_kid) {
1142            if (o == top_op)
1143                return; /* at top; no parents/siblings to try */
1144            if (OpHAS_SIBLING(o))
1145                next_kid = o->op_sibparent;
1146            else
1147                o = o->op_sibparent; /*try parent's next sibling */
1148        }
1149
1150      /* this label not yet used. Goto here if any code above sets
1151       * next-kid
1152       get_next_op:
1153       */
1154        o = next_kid;
1155    }
1156}
1157
1158/*
1159=for apidoc finalize_optree
1160
1161This function finalizes the optree.  Should be called directly after
1162the complete optree is built.  It does some additional
1163checking which can't be done in the normal C<ck_>xxx functions and makes
1164the tree thread-safe.
1165
1166=cut
1167*/
1168
1169void
1170Perl_finalize_optree(pTHX_ OP* o)
1171{
1172    PERL_ARGS_ASSERT_FINALIZE_OPTREE;
1173
1174    ENTER;
1175    SAVEVPTR(PL_curcop);
1176
1177    finalize_op(o);
1178
1179    LEAVE;
1180}
1181
1182
1183/*
1184=for apidoc traverse_op_tree
1185
1186Return the next op in a depth-first traversal of the op tree,
1187returning NULL when the traversal is complete.
1188
1189The initial call must supply the root of the tree as both top and o.
1190
1191For now it's static, but it may be exposed to the API in the future.
1192
1193=cut
1194*/
1195
1196STATIC OP*
1197S_traverse_op_tree(pTHX_ OP *top, OP *o) {
1198    OP *sib;
1199
1200    PERL_ARGS_ASSERT_TRAVERSE_OP_TREE;
1201
1202    if ((o->op_flags & OPf_KIDS) && cUNOPo->op_first) {
1203        return cUNOPo->op_first;
1204    }
1205    else if ((sib = OpSIBLING(o))) {
1206        return sib;
1207    }
1208    else {
1209        OP *parent = o->op_sibparent;
1210        assert(!(o->op_moresib));
1211        while (parent && parent != top) {
1212            OP *sib = OpSIBLING(parent);
1213            if (sib)
1214                return sib;
1215            parent = parent->op_sibparent;
1216        }
1217
1218        return NULL;
1219    }
1220}
1221
1222STATIC void
1223S_finalize_op(pTHX_ OP* o)
1224{
1225    OP * const top = o;
1226    PERL_ARGS_ASSERT_FINALIZE_OP;
1227
1228    do {
1229        assert(o->op_type != OP_FREED);
1230
1231        switch (o->op_type) {
1232        case OP_NEXTSTATE:
1233        case OP_DBSTATE:
1234            PL_curcop = ((COP*)o);		/* for warnings */
1235            break;
1236        case OP_EXEC:
1237            if (OpHAS_SIBLING(o)) {
1238                OP *sib = OpSIBLING(o);
1239                if ((  sib->op_type == OP_NEXTSTATE || sib->op_type == OP_DBSTATE)
1240                    && ckWARN(WARN_EXEC)
1241                    && OpHAS_SIBLING(sib))
1242                {
1243                    const OPCODE type = OpSIBLING(sib)->op_type;
1244                    if (type != OP_EXIT && type != OP_WARN && type != OP_DIE) {
1245                        const line_t oldline = CopLINE(PL_curcop);
1246                        CopLINE_set(PL_curcop, CopLINE((COP*)sib));
1247                        Perl_warner(aTHX_ packWARN(WARN_EXEC),
1248                            "Statement unlikely to be reached");
1249                        Perl_warner(aTHX_ packWARN(WARN_EXEC),
1250                            "\t(Maybe you meant system() when you said exec()?)\n");
1251                        CopLINE_set(PL_curcop, oldline);
1252                    }
1253                }
1254            }
1255            break;
1256
1257        case OP_GV:
1258            if ((o->op_private & OPpEARLY_CV) && ckWARN(WARN_PROTOTYPE)) {
1259                GV * const gv = cGVOPo_gv;
1260                if (SvTYPE(gv) == SVt_PVGV && GvCV(gv) && SvPVX_const(GvCV(gv))) {
1261                    /* XXX could check prototype here instead of just carping */
1262                    SV * const sv = sv_newmortal();
1263                    gv_efullname3(sv, gv, NULL);
1264                    Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE),
1265                                "%" SVf "() called too early to check prototype",
1266                                SVfARG(sv));
1267                }
1268            }
1269            break;
1270
1271        case OP_CONST:
1272            if (cSVOPo->op_private & OPpCONST_STRICT)
1273                no_bareword_allowed(o);
1274#ifdef USE_ITHREADS
1275            /* FALLTHROUGH */
1276        case OP_HINTSEVAL:
1277            op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
1278#endif
1279            break;
1280
1281#ifdef USE_ITHREADS
1282            /* Relocate all the METHOP's SVs to the pad for thread safety. */
1283        case OP_METHOD_NAMED:
1284        case OP_METHOD_SUPER:
1285        case OP_METHOD_REDIR:
1286        case OP_METHOD_REDIR_SUPER:
1287            op_relocate_sv(&cMETHOPo->op_u.op_meth_sv, &o->op_targ);
1288            break;
1289#endif
1290
1291        case OP_HELEM: {
1292            UNOP *rop;
1293            SVOP *key_op;
1294            OP *kid;
1295
1296            if ((key_op = cSVOPx(cBINOPo->op_last))->op_type != OP_CONST)
1297                break;
1298
1299            rop = cUNOPx(cBINOPo->op_first);
1300
1301            goto check_keys;
1302
1303            case OP_HSLICE:
1304                S_scalar_slice_warning(aTHX_ o);
1305                /* FALLTHROUGH */
1306
1307            case OP_KVHSLICE:
1308                kid = OpSIBLING(cLISTOPo->op_first);
1309            if (/* I bet there's always a pushmark... */
1310                OP_TYPE_ISNT_AND_WASNT_NN(kid, OP_LIST)
1311                && OP_TYPE_ISNT_NN(kid, OP_CONST))
1312            {
1313                break;
1314            }
1315
1316            key_op = cSVOPx(kid->op_type == OP_CONST
1317                             ? kid
1318                             : OpSIBLING(kLISTOP->op_first));
1319
1320            rop = cUNOPx(cLISTOPo->op_last);
1321
1322        check_keys:
1323            if (o->op_private & OPpLVAL_INTRO || rop->op_type != OP_RV2HV)
1324                rop = NULL;
1325            check_hash_fields_and_hekify(rop, key_op, 1);
1326            break;
1327        }
1328        case OP_NULL:
1329            if (o->op_targ != OP_HSLICE && o->op_targ != OP_ASLICE)
1330                break;
1331            /* FALLTHROUGH */
1332        case OP_ASLICE:
1333            S_scalar_slice_warning(aTHX_ o);
1334            break;
1335
1336        case OP_SUBST: {
1337            if (cPMOPo->op_pmreplrootu.op_pmreplroot)
1338                finalize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1339            break;
1340        }
1341        default:
1342            break;
1343        }
1344
1345#ifdef DEBUGGING
1346        if (o->op_flags & OPf_KIDS) {
1347            OP *kid;
1348
1349            /* check that op_last points to the last sibling, and that
1350             * the last op_sibling/op_sibparent field points back to the
1351             * parent, and that the only ops with KIDS are those which are
1352             * entitled to them */
1353            U32 type = o->op_type;
1354            U32 family;
1355            bool has_last;
1356
1357            if (type == OP_NULL) {
1358                type = o->op_targ;
1359                /* ck_glob creates a null UNOP with ex-type GLOB
1360                 * (which is a list op. So pretend it wasn't a listop */
1361                if (type == OP_GLOB)
1362                    type = OP_NULL;
1363            }
1364            family = PL_opargs[type] & OA_CLASS_MASK;
1365
1366            has_last = (   family == OA_BINOP
1367                        || family == OA_LISTOP
1368                        || family == OA_PMOP
1369                        || family == OA_LOOP
1370                       );
1371            assert(  has_last /* has op_first and op_last, or ...
1372                  ... has (or may have) op_first: */
1373                  || family == OA_UNOP
1374                  || family == OA_UNOP_AUX
1375                  || family == OA_LOGOP
1376                  || family == OA_BASEOP_OR_UNOP
1377                  || family == OA_FILESTATOP
1378                  || family == OA_LOOPEXOP
1379                  || family == OA_METHOP
1380                  || type == OP_CUSTOM
1381                  || type == OP_NULL /* new_logop does this */
1382                  );
1383
1384            for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) {
1385                if (!OpHAS_SIBLING(kid)) {
1386                    if (has_last)
1387                        assert(kid == cLISTOPo->op_last);
1388                    assert(kid->op_sibparent == o);
1389                }
1390            }
1391        }
1392#endif
1393    } while (( o = traverse_op_tree(top, o)) != NULL);
1394}
1395
1396
1397/*
1398   ---------------------------------------------------------
1399
1400   Common vars in list assignment
1401
1402   There now follows some enums and static functions for detecting
1403   common variables in list assignments. Here is a little essay I wrote
1404   for myself when trying to get my head around this. DAPM.
1405
1406   ----
1407
1408   First some random observations:
1409
1410   * If a lexical var is an alias of something else, e.g.
1411       for my $x ($lex, $pkg, $a[0]) {...}
1412     then the act of aliasing will increase the reference count of the SV
1413
1414   * If a package var is an alias of something else, it may still have a
1415     reference count of 1, depending on how the alias was created, e.g.
1416     in *a = *b, $a may have a refcount of 1 since the GP is shared
1417     with a single GvSV pointer to the SV. So If it's an alias of another
1418     package var, then RC may be 1; if it's an alias of another scalar, e.g.
1419     a lexical var or an array element, then it will have RC > 1.
1420
1421   * There are many ways to create a package alias; ultimately, XS code
1422     may quite legally do GvSV(gv) = SvREFCNT_inc(sv) for example, so
1423     run-time tracing mechanisms are unlikely to be able to catch all cases.
1424
1425   * When the LHS is all my declarations, the same vars can't appear directly
1426     on the RHS, but they can indirectly via closures, aliasing and lvalue
1427     subs. But those techniques all involve an increase in the lexical
1428     scalar's ref count.
1429
1430   * When the LHS is all lexical vars (but not necessarily my declarations),
1431     it is possible for the same lexicals to appear directly on the RHS, and
1432     without an increased ref count, since the stack isn't refcounted.
1433     This case can be detected at compile time by scanning for common lex
1434     vars with PL_generation.
1435
1436   * lvalue subs defeat common var detection, but they do at least
1437     return vars with a temporary ref count increment. Also, you can't
1438     tell at compile time whether a sub call is lvalue.
1439
1440
1441   So...
1442
1443   A: There are a few circumstances where there definitely can't be any
1444     commonality:
1445
1446       LHS empty:  () = (...);
1447       RHS empty:  (....) = ();
1448       RHS contains only constants or other 'can't possibly be shared'
1449           elements (e.g. ops that return PADTMPs):  (...) = (1,2, length)
1450           i.e. they only contain ops not marked as dangerous, whose children
1451           are also not dangerous;
1452       LHS ditto;
1453       LHS contains a single scalar element: e.g. ($x) = (....); because
1454           after $x has been modified, it won't be used again on the RHS;
1455       RHS contains a single element with no aggregate on LHS: e.g.
1456           ($a,$b,$c)  = ($x); again, once $a has been modified, its value
1457           won't be used again.
1458
1459   B: If LHS are all 'my' lexical var declarations (or safe ops, which
1460     we can ignore):
1461
1462       my ($a, $b, @c) = ...;
1463
1464       Due to closure and goto tricks, these vars may already have content.
1465       For the same reason, an element on the RHS may be a lexical or package
1466       alias of one of the vars on the left, or share common elements, for
1467       example:
1468
1469           my ($x,$y) = f(); # $x and $y on both sides
1470           sub f : lvalue { ($x,$y) = (1,2); $y, $x }
1471
1472       and
1473
1474           my $ra = f();
1475           my @a = @$ra;  # elements of @a on both sides
1476           sub f { @a = 1..4; \@a }
1477
1478
1479       First, just consider scalar vars on LHS:
1480
1481           RHS is safe only if (A), or in addition,
1482               * contains only lexical *scalar* vars, where neither side's
1483                 lexicals have been flagged as aliases
1484
1485           If RHS is not safe, then it's always legal to check LHS vars for
1486           RC==1, since the only RHS aliases will always be associated
1487           with an RC bump.
1488
1489           Note that in particular, RHS is not safe if:
1490
1491               * it contains package scalar vars; e.g.:
1492
1493                   f();
1494                   my ($x, $y) = (2, $x_alias);
1495                   sub f { $x = 1; *x_alias = \$x; }
1496
1497               * It contains other general elements, such as flattened or
1498               * spliced or single array or hash elements, e.g.
1499
1500                   f();
1501                   my ($x,$y) = @a; # or $a[0] or @a{@b} etc
1502
1503                   sub f {
1504                       ($x, $y) = (1,2);
1505                       use feature 'refaliasing';
1506                       \($a[0], $a[1]) = \($y,$x);
1507                   }
1508
1509                 It doesn't matter if the array/hash is lexical or package.
1510
1511               * it contains a function call that happens to be an lvalue
1512                 sub which returns one or more of the above, e.g.
1513
1514                   f();
1515                   my ($x,$y) = f();
1516
1517                   sub f : lvalue {
1518                       ($x, $y) = (1,2);
1519                       *x1 = \$x;
1520                       $y, $x1;
1521                   }
1522
1523                   (so a sub call on the RHS should be treated the same
1524                   as having a package var on the RHS).
1525
1526               * any other "dangerous" thing, such an op or built-in that
1527                 returns one of the above, e.g. pp_preinc
1528
1529
1530           If RHS is not safe, what we can do however is at compile time flag
1531           that the LHS are all my declarations, and at run time check whether
1532           all the LHS have RC == 1, and if so skip the full scan.
1533
1534       Now consider array and hash vars on LHS: e.g. my (...,@a) = ...;
1535
1536           Here the issue is whether there can be elements of @a on the RHS
1537           which will get prematurely freed when @a is cleared prior to
1538           assignment. This is only a problem if the aliasing mechanism
1539           is one which doesn't increase the refcount - only if RC == 1
1540           will the RHS element be prematurely freed.
1541
1542           Because the array/hash is being INTROed, it or its elements
1543           can't directly appear on the RHS:
1544
1545               my (@a) = ($a[0], @a, etc) # NOT POSSIBLE
1546
1547           but can indirectly, e.g.:
1548
1549               my $r = f();
1550               my (@a) = @$r;
1551               sub f { @a = 1..3; \@a }
1552
1553           So if the RHS isn't safe as defined by (A), we must always
1554           mortalise and bump the ref count of any remaining RHS elements
1555           when assigning to a non-empty LHS aggregate.
1556
1557           Lexical scalars on the RHS aren't safe if they've been involved in
1558           aliasing, e.g.
1559
1560               use feature 'refaliasing';
1561
1562               f();
1563               \(my $lex) = \$pkg;
1564               my @a = ($lex,3); # equivalent to ($a[0],3)
1565
1566               sub f {
1567                   @a = (1,2);
1568                   \$pkg = \$a[0];
1569               }
1570
1571           Similarly with lexical arrays and hashes on the RHS:
1572
1573               f();
1574               my @b;
1575               my @a = (@b);
1576
1577               sub f {
1578                   @a = (1,2);
1579                   \$b[0] = \$a[1];
1580                   \$b[1] = \$a[0];
1581               }
1582
1583
1584
1585   C: As (B), but in addition the LHS may contain non-intro lexicals, e.g.
1586       my $a; ($a, my $b) = (....);
1587
1588       The difference between (B) and (C) is that it is now physically
1589       possible for the LHS vars to appear on the RHS too, where they
1590       are not reference counted; but in this case, the compile-time
1591       PL_generation sweep will detect such common vars.
1592
1593       So the rules for (C) differ from (B) in that if common vars are
1594       detected, the runtime "test RC==1" optimisation can no longer be used,
1595       and a full mark and sweep is required
1596
1597   D: As (C), but in addition the LHS may contain package vars.
1598
1599       Since package vars can be aliased without a corresponding refcount
1600       increase, all bets are off. It's only safe if (A). E.g.
1601
1602           my ($x, $y) = (1,2);
1603
1604           for $x_alias ($x) {
1605               ($x_alias, $y) = (3, $x); # whoops
1606           }
1607
1608       Ditto for LHS aggregate package vars.
1609
1610   E: Any other dangerous ops on LHS, e.g.
1611           (f(), $a[0], @$r) = (...);
1612
1613       this is similar to (E) in that all bets are off. In addition, it's
1614       impossible to determine at compile time whether the LHS
1615       contains a scalar or an aggregate, e.g.
1616
1617           sub f : lvalue { @a }
1618           (f()) = 1..3;
1619
1620* ---------------------------------------------------------
1621*/
1622
1623/* A set of bit flags returned by S_aassign_scan(). Each flag indicates
1624 * that at least one of the things flagged was seen.
1625 */
1626
1627enum {
1628    AAS_MY_SCALAR       = 0x001, /* my $scalar */
1629    AAS_MY_AGG          = 0x002, /* aggregate: my @array or my %hash */
1630    AAS_LEX_SCALAR      = 0x004, /* $lexical */
1631    AAS_LEX_AGG         = 0x008, /* @lexical or %lexical aggregate */
1632    AAS_LEX_SCALAR_COMM = 0x010, /* $lexical seen on both sides */
1633    AAS_PKG_SCALAR      = 0x020, /* $scalar (where $scalar is pkg var) */
1634    AAS_PKG_AGG         = 0x040, /* package @array or %hash aggregate */
1635    AAS_DANGEROUS       = 0x080, /* an op (other than the above)
1636                                         that's flagged OA_DANGEROUS */
1637    AAS_SAFE_SCALAR     = 0x100, /* produces at least one scalar SV that's
1638                                        not in any of the categories above */
1639    AAS_DEFAV           = 0x200  /* contains just a single '@_' on RHS */
1640};
1641
1642/* helper function for S_aassign_scan().
1643 * check a PAD-related op for commonality and/or set its generation number.
1644 * Returns a boolean indicating whether its shared */
1645
1646static bool
1647S_aassign_padcheck(pTHX_ OP* o, bool rhs)
1648{
1649    if (PAD_COMPNAME_GEN(o->op_targ) == PERL_INT_MAX)
1650        /* lexical used in aliasing */
1651        return TRUE;
1652
1653    if (rhs)
1654        return cBOOL(PAD_COMPNAME_GEN(o->op_targ) == (STRLEN)PL_generation);
1655    else
1656        PAD_COMPNAME_GEN_set(o->op_targ, PL_generation);
1657
1658    return FALSE;
1659}
1660
1661/*
1662  Helper function for OPpASSIGN_COMMON* detection in rpeep().
1663  It scans the left or right hand subtree of the aassign op, and returns a
1664  set of flags indicating what sorts of things it found there.
1665  'rhs' indicates whether we're scanning the LHS or RHS. If the former, we
1666  set PL_generation on lexical vars; if the latter, we see if
1667  PL_generation matches.
1668  'scalars_p' is a pointer to a counter of the number of scalar SVs seen.
1669  This fn will increment it by the number seen. It's not intended to
1670  be an accurate count (especially as many ops can push a variable
1671  number of SVs onto the stack); rather it's used as to test whether there
1672  can be at most 1 SV pushed; so it's only meanings are "0, 1, many".
1673*/
1674
1675static int
1676S_aassign_scan(pTHX_ OP* o, bool rhs, int *scalars_p)
1677{
1678    OP *top_op           = o;
1679    OP *effective_top_op = o;
1680    int all_flags = 0;
1681
1682    while (1) {
1683        bool top = o == effective_top_op;
1684        int flags = 0;
1685        OP* next_kid = NULL;
1686
1687        /* first, look for a solitary @_ on the RHS */
1688        if (   rhs
1689            && top
1690            && (o->op_flags & OPf_KIDS)
1691            && OP_TYPE_IS_OR_WAS(o, OP_LIST)
1692        ) {
1693            OP *kid = cUNOPo->op_first;
1694            if (   (   kid->op_type == OP_PUSHMARK
1695                    || kid->op_type == OP_PADRANGE) /* ex-pushmark */
1696                && ((kid = OpSIBLING(kid)))
1697                && !OpHAS_SIBLING(kid)
1698                && kid->op_type == OP_RV2AV
1699                && !(kid->op_flags & OPf_REF)
1700                && !(kid->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
1701                && ((kid->op_flags & OPf_WANT) == OPf_WANT_LIST)
1702                && ((kid = cUNOPx(kid)->op_first))
1703                && kid->op_type == OP_GV
1704                && cGVOPx_gv(kid) == PL_defgv
1705            )
1706                flags = AAS_DEFAV;
1707        }
1708
1709        switch (o->op_type) {
1710        case OP_GVSV:
1711            (*scalars_p)++;
1712            all_flags |= AAS_PKG_SCALAR;
1713            goto do_next;
1714
1715        case OP_PADAV:
1716        case OP_PADHV:
1717            (*scalars_p) += 2;
1718            /* if !top, could be e.g. @a[0,1] */
1719            all_flags |=  (top && (o->op_flags & OPf_REF))
1720                            ? ((o->op_private & OPpLVAL_INTRO)
1721                                ? AAS_MY_AGG : AAS_LEX_AGG)
1722                            : AAS_DANGEROUS;
1723            goto do_next;
1724
1725        case OP_PADSV:
1726            {
1727                int comm = S_aassign_padcheck(aTHX_ o, rhs)
1728                            ?  AAS_LEX_SCALAR_COMM : 0;
1729                (*scalars_p)++;
1730                all_flags |= (o->op_private & OPpLVAL_INTRO)
1731                    ? (AAS_MY_SCALAR|comm) : (AAS_LEX_SCALAR|comm);
1732                goto do_next;
1733
1734            }
1735
1736        case OP_RV2AV:
1737        case OP_RV2HV:
1738            (*scalars_p) += 2;
1739            if (cUNOPx(o)->op_first->op_type != OP_GV)
1740                all_flags |= AAS_DANGEROUS; /* @{expr}, %{expr} */
1741            /* @pkg, %pkg */
1742            /* if !top, could be e.g. @a[0,1] */
1743            else if (top && (o->op_flags & OPf_REF))
1744                all_flags |= AAS_PKG_AGG;
1745            else
1746                all_flags |= AAS_DANGEROUS;
1747            goto do_next;
1748
1749        case OP_RV2SV:
1750            (*scalars_p)++;
1751            if (cUNOPx(o)->op_first->op_type != OP_GV) {
1752                (*scalars_p) += 2;
1753                all_flags |= AAS_DANGEROUS; /* ${expr} */
1754            }
1755            else
1756                all_flags |= AAS_PKG_SCALAR; /* $pkg */
1757            goto do_next;
1758
1759        case OP_SPLIT:
1760            if (o->op_private & OPpSPLIT_ASSIGN) {
1761                /* the assign in @a = split() has been optimised away
1762                 * and the @a attached directly to the split op
1763                 * Treat the array as appearing on the RHS, i.e.
1764                 *    ... = (@a = split)
1765                 * is treated like
1766                 *    ... = @a;
1767                 */
1768
1769                if (o->op_flags & OPf_STACKED) {
1770                    /* @{expr} = split() - the array expression is tacked
1771                     * on as an extra child to split - process kid */
1772                    next_kid = cLISTOPo->op_last;
1773                    goto do_next;
1774                }
1775
1776                /* ... else array is directly attached to split op */
1777                (*scalars_p) += 2;
1778                all_flags |= (PL_op->op_private & OPpSPLIT_LEX)
1779                                ? ((o->op_private & OPpLVAL_INTRO)
1780                                    ? AAS_MY_AGG : AAS_LEX_AGG)
1781                                : AAS_PKG_AGG;
1782                goto do_next;
1783            }
1784            (*scalars_p)++;
1785            /* other args of split can't be returned */
1786            all_flags |= AAS_SAFE_SCALAR;
1787            goto do_next;
1788
1789        case OP_UNDEF:
1790            /* undef on LHS following a var is significant, e.g.
1791             *    my $x = 1;
1792             *    @a = (($x, undef) = (2 => $x));
1793             *    # @a shoul be (2,1) not (2,2)
1794             *
1795             * undef on RHS counts as a scalar:
1796             *   ($x, $y)    = (undef, $x); # 2 scalars on RHS: unsafe
1797             */
1798            if ((!rhs && *scalars_p) || rhs)
1799                (*scalars_p)++;
1800            flags = AAS_SAFE_SCALAR;
1801            break;
1802
1803        case OP_PUSHMARK:
1804        case OP_STUB:
1805            /* these are all no-ops; they don't push a potentially common SV
1806             * onto the stack, so they are neither AAS_DANGEROUS nor
1807             * AAS_SAFE_SCALAR */
1808            goto do_next;
1809
1810        case OP_PADRANGE: /* Ignore padrange; checking its siblings is enough */
1811            break;
1812
1813        case OP_NULL:
1814        case OP_LIST:
1815            /* these do nothing, but may have children */
1816            break;
1817
1818        default:
1819            if (PL_opargs[o->op_type] & OA_DANGEROUS) {
1820                (*scalars_p) += 2;
1821                flags = AAS_DANGEROUS;
1822                break;
1823            }
1824
1825            if (   (PL_opargs[o->op_type] & OA_TARGLEX)
1826                && (o->op_private & OPpTARGET_MY))
1827            {
1828                (*scalars_p)++;
1829                all_flags |= S_aassign_padcheck(aTHX_ o, rhs)
1830                                ? AAS_LEX_SCALAR_COMM : AAS_LEX_SCALAR;
1831                goto do_next;
1832            }
1833
1834            /* if its an unrecognised, non-dangerous op, assume that it
1835             * is the cause of at least one safe scalar */
1836            (*scalars_p)++;
1837            flags = AAS_SAFE_SCALAR;
1838            break;
1839        }
1840
1841        all_flags |= flags;
1842
1843        /* by default, process all kids next
1844         * XXX this assumes that all other ops are "transparent" - i.e. that
1845         * they can return some of their children. While this true for e.g.
1846         * sort and grep, it's not true for e.g. map. We really need a
1847         * 'transparent' flag added to regen/opcodes
1848         */
1849        if (o->op_flags & OPf_KIDS) {
1850            next_kid = cUNOPo->op_first;
1851            /* these ops do nothing but may have children; but their
1852             * children should also be treated as top-level */
1853            if (   o == effective_top_op
1854                && (o->op_type == OP_NULL || o->op_type == OP_LIST)
1855            )
1856                effective_top_op = next_kid;
1857        }
1858
1859
1860        /* If next_kid is set, someone in the code above wanted us to process
1861         * that kid and all its remaining siblings.  Otherwise, work our way
1862         * back up the tree */
1863      do_next:
1864        while (!next_kid) {
1865            if (o == top_op)
1866                return all_flags; /* at top; no parents/siblings to try */
1867            if (OpHAS_SIBLING(o)) {
1868                next_kid = o->op_sibparent;
1869                if (o == effective_top_op)
1870                    effective_top_op = next_kid;
1871            }
1872            else if (o == effective_top_op)
1873              effective_top_op = o->op_sibparent;
1874            o = o->op_sibparent; /* try parent's next sibling */
1875        }
1876        o = next_kid;
1877    } /* while */
1878}
1879
1880/* S_maybe_multideref(): given an op_next chain of ops beginning at 'start'
1881 * that potentially represent a series of one or more aggregate derefs
1882 * (such as $a->[1]{$key}), examine the chain, and if appropriate, convert
1883 * the whole chain to a single OP_MULTIDEREF op (maybe with a few
1884 * additional ops left in too).
1885 *
1886 * The caller will have already verified that the first few ops in the
1887 * chain following 'start' indicate a multideref candidate, and will have
1888 * set 'orig_o' to the point further on in the chain where the first index
1889 * expression (if any) begins.  'orig_action' specifies what type of
1890 * beginning has already been determined by the ops between start..orig_o
1891 * (e.g.  $lex_ary[], $pkg_ary->{}, expr->[], etc).
1892 *
1893 * 'hints' contains any hints flags that need adding (currently just
1894 * OPpHINT_STRICT_REFS) as found in any rv2av/hv skipped by the caller.
1895 */
1896
1897STATIC void
1898S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints)
1899{
1900    int pass;
1901    UNOP_AUX_item *arg_buf = NULL;
1902    bool reset_start_targ  = FALSE; /* start->op_targ needs zeroing */
1903    int index_skip         = -1;    /* don't output index arg on this action */
1904
1905    /* similar to regex compiling, do two passes; the first pass
1906     * determines whether the op chain is convertible and calculates the
1907     * buffer size; the second pass populates the buffer and makes any
1908     * changes necessary to ops (such as moving consts to the pad on
1909     * threaded builds).
1910     *
1911     * NB: for things like Coverity, note that both passes take the same
1912     * path through the logic tree (except for 'if (pass)' bits), since
1913     * both passes are following the same op_next chain; and in
1914     * particular, if it would return early on the second pass, it would
1915     * already have returned early on the first pass.
1916     */
1917    for (pass = 0; pass < 2; pass++) {
1918        OP *o                = orig_o;
1919        UV action            = orig_action;
1920        OP *first_elem_op    = NULL;  /* first seen aelem/helem */
1921        OP *top_op           = NULL;  /* highest [ah]elem/exists/del/rv2[ah]v */
1922        int action_count     = 0;     /* number of actions seen so far */
1923        int action_ix        = 0;     /* action_count % (actions per IV) */
1924        bool next_is_hash    = FALSE; /* is the next lookup to be a hash? */
1925        bool is_last         = FALSE; /* no more derefs to follow */
1926        bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */
1927        UV action_word       = 0;     /* all actions so far */
1928        size_t argi          = 0;
1929        UNOP_AUX_item *action_ptr = arg_buf;
1930
1931        argi++; /* reserve slot for first action word */
1932
1933        switch (action) {
1934        case MDEREF_HV_gvsv_vivify_rv2hv_helem:
1935        case MDEREF_HV_gvhv_helem:
1936            next_is_hash = TRUE;
1937            /* FALLTHROUGH */
1938        case MDEREF_AV_gvsv_vivify_rv2av_aelem:
1939        case MDEREF_AV_gvav_aelem:
1940            if (pass) {
1941#ifdef USE_ITHREADS
1942                arg_buf[argi].pad_offset = cPADOPx(start)->op_padix;
1943                /* stop it being swiped when nulled */
1944                cPADOPx(start)->op_padix = 0;
1945#else
1946                arg_buf[argi].sv = cSVOPx(start)->op_sv;
1947                cSVOPx(start)->op_sv = NULL;
1948#endif
1949            }
1950            argi++;
1951            break;
1952
1953        case MDEREF_HV_padhv_helem:
1954        case MDEREF_HV_padsv_vivify_rv2hv_helem:
1955            next_is_hash = TRUE;
1956            /* FALLTHROUGH */
1957        case MDEREF_AV_padav_aelem:
1958        case MDEREF_AV_padsv_vivify_rv2av_aelem:
1959            if (pass) {
1960                arg_buf[argi].pad_offset = start->op_targ;
1961                /* we skip setting op_targ = 0 for now, since the intact
1962                 * OP_PADXV is needed by check_hash_fields_and_hekify */
1963                reset_start_targ = TRUE;
1964            }
1965            argi++;
1966            break;
1967
1968        case MDEREF_HV_pop_rv2hv_helem:
1969            next_is_hash = TRUE;
1970            /* FALLTHROUGH */
1971        case MDEREF_AV_pop_rv2av_aelem:
1972            break;
1973
1974        default:
1975            NOT_REACHED; /* NOTREACHED */
1976            return;
1977        }
1978
1979        while (!is_last) {
1980            /* look for another (rv2av/hv; get index;
1981             * aelem/helem/exists/delele) sequence */
1982
1983            OP *kid;
1984            bool is_deref;
1985            bool ok;
1986            UV index_type = MDEREF_INDEX_none;
1987
1988            if (action_count) {
1989                /* if this is not the first lookup, consume the rv2av/hv  */
1990
1991                /* for N levels of aggregate lookup, we normally expect
1992                 * that the first N-1 [ah]elem ops will be flagged as
1993                 * /DEREF (so they autovivify if necessary), and the last
1994                 * lookup op not to be.
1995                 * For other things (like @{$h{k1}{k2}}) extra scope or
1996                 * leave ops can appear, so abandon the effort in that
1997                 * case */
1998                if (o->op_type != OP_RV2AV && o->op_type != OP_RV2HV)
1999                    return;
2000
2001                /* rv2av or rv2hv sKR/1 */
2002
2003                ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2004                                            |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2005                if (o->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2006                    return;
2007
2008                /* at this point, we wouldn't expect any of these
2009                 * possible private flags:
2010                 * OPpMAYBE_LVSUB, OPpOUR_INTRO, OPpLVAL_INTRO
2011                 * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL (rv2hv only)
2012                 */
2013                ASSUME(!(o->op_private &
2014                    ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING)));
2015
2016                hints = (o->op_private & OPpHINT_STRICT_REFS);
2017
2018                /* make sure the type of the previous /DEREF matches the
2019                 * type of the next lookup */
2020                ASSUME(o->op_type == (next_is_hash ? OP_RV2HV : OP_RV2AV));
2021                top_op = o;
2022
2023                action = next_is_hash
2024                            ? MDEREF_HV_vivify_rv2hv_helem
2025                            : MDEREF_AV_vivify_rv2av_aelem;
2026                o = o->op_next;
2027            }
2028
2029            /* if this is the second pass, and we're at the depth where
2030             * previously we encountered a non-simple index expression,
2031             * stop processing the index at this point */
2032            if (action_count != index_skip) {
2033
2034                /* look for one or more simple ops that return an array
2035                 * index or hash key */
2036
2037                switch (o->op_type) {
2038                case OP_PADSV:
2039                    /* it may be a lexical var index */
2040                    ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS
2041                                            |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2042                    ASSUME(!(o->op_private &
2043                            ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2044
2045                    if (   OP_GIMME(o,0) == G_SCALAR
2046                        && !(o->op_flags & (OPf_REF|OPf_MOD))
2047                        && o->op_private == 0)
2048                    {
2049                        if (pass)
2050                            arg_buf[argi].pad_offset = o->op_targ;
2051                        argi++;
2052                        index_type = MDEREF_INDEX_padsv;
2053                        o = o->op_next;
2054                    }
2055                    break;
2056
2057                case OP_CONST:
2058                    if (next_is_hash) {
2059                        /* it's a constant hash index */
2060                        if (!(SvFLAGS(cSVOPo_sv) & (SVf_IOK|SVf_NOK|SVf_POK)))
2061                            /* "use constant foo => FOO; $h{+foo}" for
2062                             * some weird FOO, can leave you with constants
2063                             * that aren't simple strings. It's not worth
2064                             * the extra hassle for those edge cases */
2065                            break;
2066
2067                        {
2068                            UNOP *rop = NULL;
2069                            OP * helem_op = o->op_next;
2070
2071                            ASSUME(   helem_op->op_type == OP_HELEM
2072                                   || helem_op->op_type == OP_NULL
2073                                   || pass == 0);
2074                            if (helem_op->op_type == OP_HELEM) {
2075                                rop = cUNOPx(cBINOPx(helem_op)->op_first);
2076                                if (   helem_op->op_private & OPpLVAL_INTRO
2077                                    || rop->op_type != OP_RV2HV
2078                                )
2079                                    rop = NULL;
2080                            }
2081                            /* on first pass just check; on second pass
2082                             * hekify */
2083                            check_hash_fields_and_hekify(rop, cSVOPo, pass);
2084                        }
2085
2086                        if (pass) {
2087#ifdef USE_ITHREADS
2088                            /* Relocate sv to the pad for thread safety */
2089                            op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
2090                            arg_buf[argi].pad_offset = o->op_targ;
2091                            o->op_targ = 0;
2092#else
2093                            arg_buf[argi].sv = cSVOPx_sv(o);
2094#endif
2095                        }
2096                    }
2097                    else {
2098                        /* it's a constant array index */
2099                        IV iv;
2100                        SV *ix_sv = cSVOPo->op_sv;
2101                        if (!SvIOK(ix_sv))
2102                            break;
2103                        iv = SvIV(ix_sv);
2104
2105                        if (   action_count == 0
2106                            && iv >= -128
2107                            && iv <= 127
2108                            && (   action == MDEREF_AV_padav_aelem
2109                                || action == MDEREF_AV_gvav_aelem)
2110                        )
2111                            maybe_aelemfast = TRUE;
2112
2113                        if (pass) {
2114                            arg_buf[argi].iv = iv;
2115                            SvREFCNT_dec_NN(cSVOPo->op_sv);
2116                        }
2117                    }
2118                    if (pass)
2119                        /* we've taken ownership of the SV */
2120                        cSVOPo->op_sv = NULL;
2121                    argi++;
2122                    index_type = MDEREF_INDEX_const;
2123                    o = o->op_next;
2124                    break;
2125
2126                case OP_GV:
2127                    /* it may be a package var index */
2128
2129                    ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS|OPf_SPECIAL)));
2130                    ASSUME(!(o->op_private & ~(OPpEARLY_CV)));
2131                    if (  (o->op_flags & ~(OPf_PARENS|OPf_SPECIAL)) != OPf_WANT_SCALAR
2132                        || o->op_private != 0
2133                    )
2134                        break;
2135
2136                    kid = o->op_next;
2137                    if (kid->op_type != OP_RV2SV)
2138                        break;
2139
2140                    ASSUME(!(kid->op_flags &
2141                            ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_REF
2142                             |OPf_SPECIAL|OPf_PARENS)));
2143                    ASSUME(!(kid->op_private &
2144                                    ~(OPpARG1_MASK
2145                                     |OPpHINT_STRICT_REFS|OPpOUR_INTRO
2146                                     |OPpDEREF|OPpLVAL_INTRO)));
2147                    if(   (kid->op_flags &~ OPf_PARENS)
2148                            != (OPf_WANT_SCALAR|OPf_KIDS)
2149                       || (kid->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS))
2150                    )
2151                        break;
2152
2153                    if (pass) {
2154#ifdef USE_ITHREADS
2155                        arg_buf[argi].pad_offset = cPADOPx(o)->op_padix;
2156                        /* stop it being swiped when nulled */
2157                        cPADOPx(o)->op_padix = 0;
2158#else
2159                        arg_buf[argi].sv = cSVOPx(o)->op_sv;
2160                        cSVOPo->op_sv = NULL;
2161#endif
2162                    }
2163                    argi++;
2164                    index_type = MDEREF_INDEX_gvsv;
2165                    o = kid->op_next;
2166                    break;
2167
2168                } /* switch */
2169            } /* action_count != index_skip */
2170
2171            action |= index_type;
2172
2173
2174            /* at this point we have either:
2175             *   * detected what looks like a simple index expression,
2176             *     and expect the next op to be an [ah]elem, or
2177             *     an nulled  [ah]elem followed by a delete or exists;
2178             *  * found a more complex expression, so something other
2179             *    than the above follows.
2180             */
2181
2182            /* possibly an optimised away [ah]elem (where op_next is
2183             * exists or delete) */
2184            if (o->op_type == OP_NULL)
2185                o = o->op_next;
2186
2187            /* at this point we're looking for an OP_AELEM, OP_HELEM,
2188             * OP_EXISTS or OP_DELETE */
2189
2190            /* if a custom array/hash access checker is in scope,
2191             * abandon optimisation attempt */
2192            if (  (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2193               && PL_check[o->op_type] != Perl_ck_null)
2194                return;
2195            /* similarly for customised exists and delete */
2196            if (  (o->op_type == OP_EXISTS)
2197               && PL_check[o->op_type] != Perl_ck_exists)
2198                return;
2199            if (  (o->op_type == OP_DELETE)
2200               && PL_check[o->op_type] != Perl_ck_delete)
2201                return;
2202
2203            if (   o->op_type != OP_AELEM
2204                || (o->op_private &
2205                      (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB))
2206                )
2207                maybe_aelemfast = FALSE;
2208
2209            /* look for aelem/helem/exists/delete. If it's not the last elem
2210             * lookup, it *must* have OPpDEREF_AV/HV, but not many other
2211             * flags; if it's the last, then it mustn't have
2212             * OPpDEREF_AV/HV, but may have lots of other flags, like
2213             * OPpLVAL_INTRO etc
2214             */
2215
2216            if (   index_type == MDEREF_INDEX_none
2217                || (   o->op_type != OP_AELEM  && o->op_type != OP_HELEM
2218                    && o->op_type != OP_EXISTS && o->op_type != OP_DELETE)
2219            )
2220                ok = FALSE;
2221            else {
2222                /* we have aelem/helem/exists/delete with valid simple index */
2223
2224                is_deref =    (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2225                           && (   (o->op_private & OPpDEREF) == OPpDEREF_AV
2226                               || (o->op_private & OPpDEREF) == OPpDEREF_HV);
2227
2228                /* This doesn't make much sense but is legal:
2229                 *    @{ local $x[0][0] } = 1
2230                 * Since scope exit will undo the autovivification,
2231                 * don't bother in the first place. The OP_LEAVE
2232                 * assertion is in case there are other cases of both
2233                 * OPpLVAL_INTRO and OPpDEREF which don't include a scope
2234                 * exit that would undo the local - in which case this
2235                 * block of code would need rethinking.
2236                 */
2237                if (is_deref && (o->op_private & OPpLVAL_INTRO)) {
2238#ifdef DEBUGGING
2239                    OP *n = o->op_next;
2240                    while (n && (  n->op_type == OP_NULL
2241                                || n->op_type == OP_LIST
2242                                || n->op_type == OP_SCALAR))
2243                        n = n->op_next;
2244                    assert(n && n->op_type == OP_LEAVE);
2245#endif
2246                    o->op_private &= ~OPpDEREF;
2247                    is_deref = FALSE;
2248                }
2249
2250                if (is_deref) {
2251                    ASSUME(!(o->op_flags &
2252                                 ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_PARENS)));
2253                    ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpDEREF)));
2254
2255                    ok =    (o->op_flags &~ OPf_PARENS)
2256                               == (OPf_WANT_SCALAR|OPf_KIDS|OPf_MOD)
2257                         && !(o->op_private & ~(OPpDEREF|OPpARG2_MASK));
2258                }
2259                else if (o->op_type == OP_EXISTS) {
2260                    ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2261                                |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2262                    ASSUME(!(o->op_private & ~(OPpARG1_MASK|OPpEXISTS_SUB)));
2263                    ok =  !(o->op_private & ~OPpARG1_MASK);
2264                }
2265                else if (o->op_type == OP_DELETE) {
2266                    ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2267                                |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2268                    ASSUME(!(o->op_private &
2269                                    ~(OPpARG1_MASK|OPpSLICE|OPpLVAL_INTRO)));
2270                    /* don't handle slices or 'local delete'; the latter
2271                     * is fairly rare, and has a complex runtime */
2272                    ok =  !(o->op_private & ~OPpARG1_MASK);
2273                    if (OP_TYPE_IS_OR_WAS(cUNOPo->op_first, OP_AELEM))
2274                        /* skip handling run-tome error */
2275                        ok = (ok && cBOOL(o->op_flags & OPf_SPECIAL));
2276                }
2277                else {
2278                    ASSUME(o->op_type == OP_AELEM || o->op_type == OP_HELEM);
2279                    ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_MOD
2280                                            |OPf_PARENS|OPf_REF|OPf_SPECIAL)));
2281                    ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpMAYBE_LVSUB
2282                                    |OPpLVAL_DEFER|OPpDEREF|OPpLVAL_INTRO)));
2283                    ok = (o->op_private & OPpDEREF) != OPpDEREF_SV;
2284                }
2285            }
2286
2287            if (ok) {
2288                if (!first_elem_op)
2289                    first_elem_op = o;
2290                top_op = o;
2291                if (is_deref) {
2292                    next_is_hash = cBOOL((o->op_private & OPpDEREF) == OPpDEREF_HV);
2293                    o = o->op_next;
2294                }
2295                else {
2296                    is_last = TRUE;
2297                    action |= MDEREF_FLAG_last;
2298                }
2299            }
2300            else {
2301                /* at this point we have something that started
2302                 * promisingly enough (with rv2av or whatever), but failed
2303                 * to find a simple index followed by an
2304                 * aelem/helem/exists/delete. If this is the first action,
2305                 * give up; but if we've already seen at least one
2306                 * aelem/helem, then keep them and add a new action with
2307                 * MDEREF_INDEX_none, which causes it to do the vivify
2308                 * from the end of the previous lookup, and do the deref,
2309                 * but stop at that point. So $a[0][expr] will do one
2310                 * av_fetch, vivify and deref, then continue executing at
2311                 * expr */
2312                if (!action_count)
2313                    return;
2314                is_last = TRUE;
2315                index_skip = action_count;
2316                action |= MDEREF_FLAG_last;
2317                if (index_type != MDEREF_INDEX_none)
2318                    argi--;
2319            }
2320
2321            action_word |= (action << (action_ix * MDEREF_SHIFT));
2322            action_ix++;
2323            action_count++;
2324            /* if there's no space for the next action, reserve a new slot
2325             * for it *before* we start adding args for that action */
2326            if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) {
2327                if (pass) {
2328                    action_ptr->uv = action_word;
2329                    action_ptr = arg_buf + argi;
2330                }
2331                action_word = 0;
2332                argi++;
2333                action_ix = 0;
2334            }
2335        } /* while !is_last */
2336
2337        /* success! */
2338
2339        if (!action_ix)
2340            /* slot reserved for next action word not now needed */
2341            argi--;
2342        else if (pass)
2343            action_ptr->uv = action_word;
2344
2345        if (pass) {
2346            OP *mderef;
2347            OP *p, *q;
2348
2349            mderef = newUNOP_AUX(OP_MULTIDEREF, 0, NULL, arg_buf);
2350            if (index_skip == -1) {
2351                mderef->op_flags = o->op_flags
2352                        & (OPf_WANT|OPf_MOD|(next_is_hash ? OPf_SPECIAL : 0));
2353                if (o->op_type == OP_EXISTS)
2354                    mderef->op_private = OPpMULTIDEREF_EXISTS;
2355                else if (o->op_type == OP_DELETE)
2356                    mderef->op_private = OPpMULTIDEREF_DELETE;
2357                else
2358                    mderef->op_private = o->op_private
2359                        & (OPpMAYBE_LVSUB|OPpLVAL_DEFER|OPpLVAL_INTRO);
2360            }
2361            /* accumulate strictness from every level (although I don't think
2362             * they can actually vary) */
2363            mderef->op_private |= hints;
2364
2365            /* integrate the new multideref op into the optree and the
2366             * op_next chain.
2367             *
2368             * In general an op like aelem or helem has two child
2369             * sub-trees: the aggregate expression (a_expr) and the
2370             * index expression (i_expr):
2371             *
2372             *     aelem
2373             *       |
2374             *     a_expr - i_expr
2375             *
2376             * The a_expr returns an AV or HV, while the i-expr returns an
2377             * index. In general a multideref replaces most or all of a
2378             * multi-level tree, e.g.
2379             *
2380             *     exists
2381             *       |
2382             *     ex-aelem
2383             *       |
2384             *     rv2av  - i_expr1
2385             *       |
2386             *     helem
2387             *       |
2388             *     rv2hv  - i_expr2
2389             *       |
2390             *     aelem
2391             *       |
2392             *     a_expr - i_expr3
2393             *
2394             * With multideref, all the i_exprs will be simple vars or
2395             * constants, except that i_expr1 may be arbitrary in the case
2396             * of MDEREF_INDEX_none.
2397             *
2398             * The bottom-most a_expr will be either:
2399             *   1) a simple var (so padXv or gv+rv2Xv);
2400             *   2) a simple scalar var dereferenced (e.g. $r->[0]):
2401             *      so a simple var with an extra rv2Xv;
2402             *   3) or an arbitrary expression.
2403             *
2404             * 'start', the first op in the execution chain, will point to
2405             *   1),2): the padXv or gv op;
2406             *   3):    the rv2Xv which forms the last op in the a_expr
2407             *          execution chain, and the top-most op in the a_expr
2408             *          subtree.
2409             *
2410             * For all cases, the 'start' node is no longer required,
2411             * but we can't free it since one or more external nodes
2412             * may point to it. E.g. consider
2413             *     $h{foo} = $a ? $b : $c
2414             * Here, both the op_next and op_other branches of the
2415             * cond_expr point to the gv[*h] of the hash expression, so
2416             * we can't free the 'start' op.
2417             *
2418             * For expr->[...], we need to save the subtree containing the
2419             * expression; for the other cases, we just need to save the
2420             * start node.
2421             * So in all cases, we null the start op and keep it around by
2422             * making it the child of the multideref op; for the expr->
2423             * case, the expr will be a subtree of the start node.
2424             *
2425             * So in the simple 1,2 case the  optree above changes to
2426             *
2427             *     ex-exists
2428             *       |
2429             *     multideref
2430             *       |
2431             *     ex-gv (or ex-padxv)
2432             *
2433             *  with the op_next chain being
2434             *
2435             *  -> ex-gv -> multideref -> op-following-ex-exists ->
2436             *
2437             *  In the 3 case, we have
2438             *
2439             *     ex-exists
2440             *       |
2441             *     multideref
2442             *       |
2443             *     ex-rv2xv
2444             *       |
2445             *    rest-of-a_expr
2446             *      subtree
2447             *
2448             *  and
2449             *
2450             *  -> rest-of-a_expr subtree ->
2451             *    ex-rv2xv -> multideref -> op-following-ex-exists ->
2452             *
2453             *
2454             * Where the last i_expr is non-simple (i.e. MDEREF_INDEX_none,
2455             * e.g. $a[0]{foo}[$x+1], the next rv2xv is nulled and the
2456             * multideref attached as the child, e.g.
2457             *
2458             *     exists
2459             *       |
2460             *     ex-aelem
2461             *       |
2462             *     ex-rv2av  - i_expr1
2463             *       |
2464             *     multideref
2465             *       |
2466             *     ex-whatever
2467             *
2468             */
2469
2470            /* if we free this op, don't free the pad entry */
2471            if (reset_start_targ)
2472                start->op_targ = 0;
2473
2474
2475            /* Cut the bit we need to save out of the tree and attach to
2476             * the multideref op, then free the rest of the tree */
2477
2478            /* find parent of node to be detached (for use by splice) */
2479            p = first_elem_op;
2480            if (   orig_action == MDEREF_AV_pop_rv2av_aelem
2481                || orig_action == MDEREF_HV_pop_rv2hv_helem)
2482            {
2483                /* there is an arbitrary expression preceding us, e.g.
2484                 * expr->[..]? so we need to save the 'expr' subtree */
2485                if (p->op_type == OP_EXISTS || p->op_type == OP_DELETE)
2486                    p = cUNOPx(p)->op_first;
2487                ASSUME(   start->op_type == OP_RV2AV
2488                       || start->op_type == OP_RV2HV);
2489            }
2490            else {
2491                /* either a padXv or rv2Xv+gv, maybe with an ex-Xelem
2492                 * above for exists/delete. */
2493                while (   (p->op_flags & OPf_KIDS)
2494                       && cUNOPx(p)->op_first != start
2495                )
2496                    p = cUNOPx(p)->op_first;
2497            }
2498            ASSUME(cUNOPx(p)->op_first == start);
2499
2500            /* detach from main tree, and re-attach under the multideref */
2501            op_sibling_splice(mderef, NULL, 0,
2502                    op_sibling_splice(p, NULL, 1, NULL));
2503            op_null(start);
2504
2505            start->op_next = mderef;
2506
2507            mderef->op_next = index_skip == -1 ? o->op_next : o;
2508
2509            /* excise and free the original tree, and replace with
2510             * the multideref op */
2511            p = op_sibling_splice(top_op, NULL, -1, mderef);
2512            while (p) {
2513                q = OpSIBLING(p);
2514                op_free(p);
2515                p = q;
2516            }
2517            op_null(top_op);
2518        }
2519        else {
2520            Size_t size = argi;
2521
2522            if (maybe_aelemfast && action_count == 1)
2523                return;
2524
2525            arg_buf = (UNOP_AUX_item*)PerlMemShared_malloc(
2526                                sizeof(UNOP_AUX_item) * (size + 1));
2527            /* for dumping etc: store the length in a hidden first slot;
2528             * we set the op_aux pointer to the second slot */
2529            arg_buf->uv = size;
2530            arg_buf++;
2531        }
2532    } /* for (pass = ...) */
2533}
2534
2535/* See if the ops following o are such that o will always be executed in
2536 * boolean context: that is, the SV which o pushes onto the stack will
2537 * only ever be consumed by later ops via SvTRUE(sv) or similar.
2538 * If so, set a suitable private flag on o. Normally this will be
2539 * bool_flag; but see below why maybe_flag is needed too.
2540 *
2541 * Typically the two flags you pass will be the generic OPpTRUEBOOL and
2542 * OPpMAYBE_TRUEBOOL, buts it's possible that for some ops those bits may
2543 * already be taken, so you'll have to give that op two different flags.
2544 *
2545 * More explanation of 'maybe_flag' and 'safe_and' parameters.
2546 * The binary logical ops &&, ||, // (plus 'if' and 'unless' which use
2547 * those underlying ops) short-circuit, which means that rather than
2548 * necessarily returning a truth value, they may return the LH argument,
2549 * which may not be boolean. For example in $x = (keys %h || -1), keys
2550 * should return a key count rather than a boolean, even though its
2551 * sort-of being used in boolean context.
2552 *
2553 * So we only consider such logical ops to provide boolean context to
2554 * their LH argument if they themselves are in void or boolean context.
2555 * However, sometimes the context isn't known until run-time. In this
2556 * case the op is marked with the maybe_flag flag it.
2557 *
2558 * Consider the following.
2559 *
2560 *     sub f { ....;  if (%h) { .... } }
2561 *
2562 * This is actually compiled as
2563 *
2564 *     sub f { ....;  %h && do { .... } }
2565 *
2566 * Here we won't know until runtime whether the final statement (and hence
2567 * the &&) is in void context and so is safe to return a boolean value.
2568 * So mark o with maybe_flag rather than the bool_flag.
2569 * Note that there is cost associated with determining context at runtime
2570 * (e.g. a call to block_gimme()), so it may not be worth setting (at
2571 * compile time) and testing (at runtime) maybe_flag if the scalar verses
2572 * boolean costs savings are marginal.
2573 *
2574 * However, we can do slightly better with && (compared to || and //):
2575 * this op only returns its LH argument when that argument is false. In
2576 * this case, as long as the op promises to return a false value which is
2577 * valid in both boolean and scalar contexts, we can mark an op consumed
2578 * by && with bool_flag rather than maybe_flag.
2579 * For example as long as pp_padhv and pp_rv2hv return &PL_sv_zero rather
2580 * than &PL_sv_no for a false result in boolean context, then it's safe. An
2581 * op which promises to handle this case is indicated by setting safe_and
2582 * to true.
2583 */
2584
2585static void
2586S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag)
2587{
2588    OP *lop;
2589    U8 flag = 0;
2590
2591    assert((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR);
2592
2593    /* OPpTARGET_MY and boolean context probably don't mix well.
2594     * If someone finds a valid use case, maybe add an extra flag to this
2595     * function which indicates its safe to do so for this op? */
2596    assert(!(   (PL_opargs[o->op_type] & OA_TARGLEX)
2597             && (o->op_private & OPpTARGET_MY)));
2598
2599    lop = o->op_next;
2600
2601    while (lop) {
2602        switch (lop->op_type) {
2603        case OP_NULL:
2604        case OP_SCALAR:
2605            break;
2606
2607        /* these two consume the stack argument in the scalar case,
2608         * and treat it as a boolean in the non linenumber case */
2609        case OP_FLIP:
2610        case OP_FLOP:
2611            if (   ((lop->op_flags & OPf_WANT) == OPf_WANT_LIST)
2612                || (lop->op_private & OPpFLIP_LINENUM))
2613            {
2614                lop = NULL;
2615                break;
2616            }
2617            /* FALLTHROUGH */
2618        /* these never leave the original value on the stack */
2619        case OP_NOT:
2620        case OP_XOR:
2621        case OP_COND_EXPR:
2622        case OP_GREPWHILE:
2623            flag = bool_flag;
2624            lop = NULL;
2625            break;
2626
2627        /* OR DOR and AND evaluate their arg as a boolean, but then may
2628         * leave the original scalar value on the stack when following the
2629         * op_next route. If not in void context, we need to ensure
2630         * that whatever follows consumes the arg only in boolean context
2631         * too.
2632         */
2633        case OP_AND:
2634            if (safe_and) {
2635                flag = bool_flag;
2636                lop = NULL;
2637                break;
2638            }
2639            /* FALLTHROUGH */
2640        case OP_OR:
2641        case OP_DOR:
2642            if ((lop->op_flags & OPf_WANT) == OPf_WANT_VOID) {
2643                flag = bool_flag;
2644                lop = NULL;
2645            }
2646            else if (!(lop->op_flags & OPf_WANT)) {
2647                /* unknown context - decide at runtime */
2648                flag = maybe_flag;
2649                lop = NULL;
2650            }
2651            break;
2652
2653        default:
2654            lop = NULL;
2655            break;
2656        }
2657
2658        if (lop)
2659            lop = lop->op_next;
2660    }
2661
2662    o->op_private |= flag;
2663}
2664
2665/* mechanism for deferring recursion in rpeep() */
2666
2667#define MAX_DEFERRED 4
2668
2669#define DEFER(o) \
2670  STMT_START { \
2671    if (defer_ix == (MAX_DEFERRED-1)) { \
2672        OP **defer = defer_queue[defer_base]; \
2673        CALL_RPEEP(*defer); \
2674        op_prune_chain_head(defer); \
2675        defer_base = (defer_base + 1) % MAX_DEFERRED; \
2676        defer_ix--; \
2677    } \
2678    defer_queue[(defer_base + ++defer_ix) % MAX_DEFERRED] = &(o); \
2679  } STMT_END
2680
2681#define IS_AND_OP(o)   (o->op_type == OP_AND)
2682#define IS_OR_OP(o)    (o->op_type == OP_OR)
2683
2684/* A peephole optimizer.  We visit the ops in the order they're to execute.
2685 * See the comments at the top of this file for more details about when
2686 * peep() is called */
2687
2688void
2689Perl_rpeep(pTHX_ OP *o)
2690{
2691    OP* oldop = NULL;
2692    OP* oldoldop = NULL;
2693    OP** defer_queue[MAX_DEFERRED] = { NULL }; /* small queue of deferred branches */
2694    int defer_base = 0;
2695    int defer_ix = -1;
2696
2697    if (!o || o->op_opt)
2698        return;
2699
2700    assert(o->op_type != OP_FREED);
2701
2702    ENTER;
2703    SAVEOP();
2704    SAVEVPTR(PL_curcop);
2705    for (;; o = o->op_next) {
2706        if (o && o->op_opt)
2707            o = NULL;
2708        if (!o) {
2709            while (defer_ix >= 0) {
2710                OP **defer =
2711                        defer_queue[(defer_base + defer_ix--) % MAX_DEFERRED];
2712                CALL_RPEEP(*defer);
2713                op_prune_chain_head(defer);
2714            }
2715            break;
2716        }
2717
2718      redo:
2719
2720        /* oldoldop -> oldop -> o should be a chain of 3 adjacent ops */
2721        assert(!oldoldop || oldoldop->op_next == oldop);
2722        assert(!oldop    || oldop->op_next    == o);
2723
2724        /* By default, this op has now been optimised. A couple of cases below
2725           clear this again.  */
2726        o->op_opt = 1;
2727        PL_op = o;
2728
2729        /* look for a series of 1 or more aggregate derefs, e.g.
2730         *   $a[1]{foo}[$i]{$k}
2731         * and replace with a single OP_MULTIDEREF op.
2732         * Each index must be either a const, or a simple variable,
2733         *
2734         * First, look for likely combinations of starting ops,
2735         * corresponding to (global and lexical variants of)
2736         *     $a[...]   $h{...}
2737         *     $r->[...] $r->{...}
2738         *     (preceding expression)->[...]
2739         *     (preceding expression)->{...}
2740         * and if so, call maybe_multideref() to do a full inspection
2741         * of the op chain and if appropriate, replace with an
2742         * OP_MULTIDEREF
2743         */
2744        {
2745            UV action;
2746            OP *o2 = o;
2747            U8 hints = 0;
2748
2749            switch (o2->op_type) {
2750            case OP_GV:
2751                /* $pkg[..]   :   gv[*pkg]
2752                 * $pkg->[...]:   gv[*pkg]; rv2sv sKM/DREFAV */
2753
2754                /* Fail if there are new op flag combinations that we're
2755                 * not aware of, rather than:
2756                 *  * silently failing to optimise, or
2757                 *  * silently optimising the flag away.
2758                 * If this ASSUME starts failing, examine what new flag
2759                 * has been added to the op, and decide whether the
2760                 * optimisation should still occur with that flag, then
2761                 * update the code accordingly. This applies to all the
2762                 * other ASSUMEs in the block of code too.
2763                 */
2764                ASSUME(!(o2->op_flags &
2765                            ~(OPf_WANT|OPf_MOD|OPf_PARENS|OPf_SPECIAL)));
2766                ASSUME(!(o2->op_private & ~OPpEARLY_CV));
2767
2768                o2 = o2->op_next;
2769
2770                if (o2->op_type == OP_RV2AV) {
2771                    action = MDEREF_AV_gvav_aelem;
2772                    goto do_deref;
2773                }
2774
2775                if (o2->op_type == OP_RV2HV) {
2776                    action = MDEREF_HV_gvhv_helem;
2777                    goto do_deref;
2778                }
2779
2780                if (o2->op_type != OP_RV2SV)
2781                    break;
2782
2783                /* at this point we've seen gv,rv2sv, so the only valid
2784                 * construct left is $pkg->[] or $pkg->{} */
2785
2786                ASSUME(!(o2->op_flags & OPf_STACKED));
2787                if ((o2->op_flags & (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2788                            != (OPf_WANT_SCALAR|OPf_MOD))
2789                    break;
2790
2791                ASSUME(!(o2->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS
2792                                    |OPpOUR_INTRO|OPpDEREF|OPpLVAL_INTRO)));
2793                if (o2->op_private & (OPpOUR_INTRO|OPpLVAL_INTRO))
2794                    break;
2795                if (   (o2->op_private & OPpDEREF) != OPpDEREF_AV
2796                    && (o2->op_private & OPpDEREF) != OPpDEREF_HV)
2797                    break;
2798
2799                o2 = o2->op_next;
2800                if (o2->op_type == OP_RV2AV) {
2801                    action = MDEREF_AV_gvsv_vivify_rv2av_aelem;
2802                    goto do_deref;
2803                }
2804                if (o2->op_type == OP_RV2HV) {
2805                    action = MDEREF_HV_gvsv_vivify_rv2hv_helem;
2806                    goto do_deref;
2807                }
2808                break;
2809
2810            case OP_PADSV:
2811                /* $lex->[...]: padsv[$lex] sM/DREFAV */
2812
2813                ASSUME(!(o2->op_flags &
2814                    ~(OPf_WANT|OPf_PARENS|OPf_REF|OPf_MOD|OPf_SPECIAL)));
2815                if ((o2->op_flags &
2816                        (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2817                     != (OPf_WANT_SCALAR|OPf_MOD))
2818                    break;
2819
2820                ASSUME(!(o2->op_private &
2821                                ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2822                /* skip if state or intro, or not a deref */
2823                if (      o2->op_private != OPpDEREF_AV
2824                       && o2->op_private != OPpDEREF_HV)
2825                    break;
2826
2827                o2 = o2->op_next;
2828                if (o2->op_type == OP_RV2AV) {
2829                    action = MDEREF_AV_padsv_vivify_rv2av_aelem;
2830                    goto do_deref;
2831                }
2832                if (o2->op_type == OP_RV2HV) {
2833                    action = MDEREF_HV_padsv_vivify_rv2hv_helem;
2834                    goto do_deref;
2835                }
2836                break;
2837
2838            case OP_PADAV:
2839            case OP_PADHV:
2840                /*    $lex[..]:  padav[@lex:1,2] sR *
2841                 * or $lex{..}:  padhv[%lex:1,2] sR */
2842                ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_MOD|OPf_PARENS|
2843                                            OPf_REF|OPf_SPECIAL)));
2844                if ((o2->op_flags &
2845                        (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2846                     != (OPf_WANT_SCALAR|OPf_REF))
2847                    break;
2848                if (o2->op_flags != (OPf_WANT_SCALAR|OPf_REF))
2849                    break;
2850                /* OPf_PARENS isn't currently used in this case;
2851                 * if that changes, let us know! */
2852                ASSUME(!(o2->op_flags & OPf_PARENS));
2853
2854                /* at this point, we wouldn't expect any of the remaining
2855                 * possible private flags:
2856                 * OPpPAD_STATE, OPpLVAL_INTRO, OPpTRUEBOOL,
2857                 * OPpMAYBE_TRUEBOOL, OPpMAYBE_LVSUB
2858                 *
2859                 * OPpSLICEWARNING shouldn't affect runtime
2860                 */
2861                ASSUME(!(o2->op_private & ~(OPpSLICEWARNING)));
2862
2863                action = o2->op_type == OP_PADAV
2864                            ? MDEREF_AV_padav_aelem
2865                            : MDEREF_HV_padhv_helem;
2866                o2 = o2->op_next;
2867                S_maybe_multideref(aTHX_ o, o2, action, 0);
2868                break;
2869
2870
2871            case OP_RV2AV:
2872            case OP_RV2HV:
2873                action = o2->op_type == OP_RV2AV
2874                            ? MDEREF_AV_pop_rv2av_aelem
2875                            : MDEREF_HV_pop_rv2hv_helem;
2876                /* FALLTHROUGH */
2877            do_deref:
2878                /* (expr)->[...]:  rv2av sKR/1;
2879                 * (expr)->{...}:  rv2hv sKR/1; */
2880
2881                ASSUME(o2->op_type == OP_RV2AV || o2->op_type == OP_RV2HV);
2882
2883                ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2884                                |OPf_REF|OPf_MOD|OPf_STACKED|OPf_SPECIAL)));
2885                if (o2->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2886                    break;
2887
2888                /* at this point, we wouldn't expect any of these
2889                 * possible private flags:
2890                 * OPpMAYBE_LVSUB, OPpLVAL_INTRO
2891                 * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL, (rv2hv only)
2892                 */
2893                ASSUME(!(o2->op_private &
2894                    ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING
2895                     |OPpOUR_INTRO)));
2896                hints |= (o2->op_private & OPpHINT_STRICT_REFS);
2897
2898                o2 = o2->op_next;
2899
2900                S_maybe_multideref(aTHX_ o, o2, action, hints);
2901                break;
2902
2903            default:
2904                break;
2905            }
2906        }
2907
2908
2909        switch (o->op_type) {
2910        case OP_DBSTATE:
2911            PL_curcop = ((COP*)o);		/* for warnings */
2912            break;
2913        case OP_NEXTSTATE:
2914            PL_curcop = ((COP*)o);		/* for warnings */
2915
2916            /* Optimise a "return ..." at the end of a sub to just be "...".
2917             * This saves 2 ops. Before:
2918             * 1  <;> nextstate(main 1 -e:1) v ->2
2919             * 4  <@> return K ->5
2920             * 2    <0> pushmark s ->3
2921             * -    <1> ex-rv2sv sK/1 ->4
2922             * 3      <#> gvsv[*cat] s ->4
2923             *
2924             * After:
2925             * -  <@> return K ->-
2926             * -    <0> pushmark s ->2
2927             * -    <1> ex-rv2sv sK/1 ->-
2928             * 2      <$> gvsv(*cat) s ->3
2929             */
2930            {
2931                OP *next = o->op_next;
2932                OP *sibling = OpSIBLING(o);
2933                if (   OP_TYPE_IS(next, OP_PUSHMARK)
2934                    && OP_TYPE_IS(sibling, OP_RETURN)
2935                    && OP_TYPE_IS(sibling->op_next, OP_LINESEQ)
2936                    && ( OP_TYPE_IS(sibling->op_next->op_next, OP_LEAVESUB)
2937                       ||OP_TYPE_IS(sibling->op_next->op_next,
2938                                    OP_LEAVESUBLV))
2939                    && cUNOPx(sibling)->op_first == next
2940                    && OpHAS_SIBLING(next) && OpSIBLING(next)->op_next
2941                    && next->op_next
2942                ) {
2943                    /* Look through the PUSHMARK's siblings for one that
2944                     * points to the RETURN */
2945                    OP *top = OpSIBLING(next);
2946                    while (top && top->op_next) {
2947                        if (top->op_next == sibling) {
2948                            top->op_next = sibling->op_next;
2949                            o->op_next = next->op_next;
2950                            break;
2951                        }
2952                        top = OpSIBLING(top);
2953                    }
2954                }
2955            }
2956
2957            /* Optimise 'my $x; my $y;' into 'my ($x, $y);'
2958             *
2959             * This latter form is then suitable for conversion into padrange
2960             * later on. Convert:
2961             *
2962             *   nextstate1 -> padop1 -> nextstate2 -> padop2 -> nextstate3
2963             *
2964             * into:
2965             *
2966             *   nextstate1 ->     listop     -> nextstate3
2967             *                 /            \
2968             *         pushmark -> padop1 -> padop2
2969             */
2970            if (o->op_next && (
2971                    o->op_next->op_type == OP_PADSV
2972                 || o->op_next->op_type == OP_PADAV
2973                 || o->op_next->op_type == OP_PADHV
2974                )
2975                && !(o->op_next->op_private & ~OPpLVAL_INTRO)
2976                && o->op_next->op_next && o->op_next->op_next->op_type == OP_NEXTSTATE
2977                && o->op_next->op_next->op_next && (
2978                    o->op_next->op_next->op_next->op_type == OP_PADSV
2979                 || o->op_next->op_next->op_next->op_type == OP_PADAV
2980                 || o->op_next->op_next->op_next->op_type == OP_PADHV
2981                )
2982                && !(o->op_next->op_next->op_next->op_private & ~OPpLVAL_INTRO)
2983                && o->op_next->op_next->op_next->op_next && o->op_next->op_next->op_next->op_next->op_type == OP_NEXTSTATE
2984                && (!CopLABEL((COP*)o)) /* Don't mess with labels */
2985                && (!CopLABEL((COP*)o->op_next->op_next)) /* ... */
2986            ) {
2987                OP *pad1, *ns2, *pad2, *ns3, *newop, *newpm;
2988
2989                pad1 =    o->op_next;
2990                ns2  = pad1->op_next;
2991                pad2 =  ns2->op_next;
2992                ns3  = pad2->op_next;
2993
2994                /* we assume here that the op_next chain is the same as
2995                 * the op_sibling chain */
2996                assert(OpSIBLING(o)    == pad1);
2997                assert(OpSIBLING(pad1) == ns2);
2998                assert(OpSIBLING(ns2)  == pad2);
2999                assert(OpSIBLING(pad2) == ns3);
3000
3001                /* excise and delete ns2 */
3002                op_sibling_splice(NULL, pad1, 1, NULL);
3003                op_free(ns2);
3004
3005                /* excise pad1 and pad2 */
3006                op_sibling_splice(NULL, o, 2, NULL);
3007
3008                /* create new listop, with children consisting of:
3009                 * a new pushmark, pad1, pad2. */
3010                newop = newLISTOP(OP_LIST, 0, pad1, pad2);
3011                newop->op_flags |= OPf_PARENS;
3012                newop->op_flags = (newop->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3013
3014                /* insert newop between o and ns3 */
3015                op_sibling_splice(NULL, o, 0, newop);
3016
3017                /*fixup op_next chain */
3018                newpm = cUNOPx(newop)->op_first; /* pushmark */
3019                o    ->op_next = newpm;
3020                newpm->op_next = pad1;
3021                pad1 ->op_next = pad2;
3022                pad2 ->op_next = newop; /* listop */
3023                newop->op_next = ns3;
3024
3025                /* Ensure pushmark has this flag if padops do */
3026                if (pad1->op_flags & OPf_MOD && pad2->op_flags & OPf_MOD) {
3027                    newpm->op_flags |= OPf_MOD;
3028                }
3029
3030                break;
3031            }
3032
3033            /* Two NEXTSTATEs in a row serve no purpose. Except if they happen
3034               to carry two labels. For now, take the easier option, and skip
3035               this optimisation if the first NEXTSTATE has a label.
3036               Yves asked what about if they have different hints or features?
3037               Tony thinks that as we remove the first of the pair it should
3038               be fine.
3039            */
3040            if (!CopLABEL((COP*)o) && !PERLDB_NOOPT) {
3041                OP *nextop = o->op_next;
3042                while (nextop) {
3043                    switch (nextop->op_type) {
3044                        case OP_NULL:
3045                        case OP_SCALAR:
3046                        case OP_LINESEQ:
3047                        case OP_SCOPE:
3048                            nextop = nextop->op_next;
3049                            continue;
3050                    }
3051                    break;
3052                }
3053
3054                if (nextop && (nextop->op_type == OP_NEXTSTATE)) {
3055                    op_null(o);
3056                    if (oldop)
3057                        oldop->op_next = nextop;
3058                    o = nextop;
3059                    /* Skip (old)oldop assignment since the current oldop's
3060                       op_next already points to the next op.  */
3061                    goto redo;
3062                }
3063            }
3064            break;
3065
3066        case OP_CONCAT:
3067            if (o->op_next && o->op_next->op_type == OP_STRINGIFY) {
3068                if (o->op_next->op_private & OPpTARGET_MY) {
3069                    if (o->op_flags & OPf_STACKED) /* chained concats */
3070                        break; /* ignore_optimization */
3071                    else {
3072                        /* assert(PL_opargs[o->op_type] & OA_TARGLEX); */
3073                        o->op_targ = o->op_next->op_targ;
3074                        o->op_next->op_targ = 0;
3075                        o->op_private |= OPpTARGET_MY;
3076                    }
3077                }
3078                op_null(o->op_next);
3079            }
3080            break;
3081        case OP_STUB:
3082            if ((o->op_flags & OPf_WANT) != OPf_WANT_LIST) {
3083                break; /* Scalar stub must produce undef.  List stub is noop */
3084            }
3085            goto nothin;
3086        case OP_NULL:
3087            if (o->op_targ == OP_NEXTSTATE
3088                || o->op_targ == OP_DBSTATE)
3089            {
3090                PL_curcop = ((COP*)o);
3091            }
3092            /* XXX: We avoid setting op_seq here to prevent later calls
3093               to rpeep() from mistakenly concluding that optimisation
3094               has already occurred. This doesn't fix the real problem,
3095               though (See 20010220.007 (#5874)). AMS 20010719 */
3096            /* op_seq functionality is now replaced by op_opt */
3097            o->op_opt = 0;
3098            /* FALLTHROUGH */
3099        case OP_SCALAR:
3100        case OP_LINESEQ:
3101        case OP_SCOPE:
3102        nothin:
3103            if (oldop) {
3104                oldop->op_next = o->op_next;
3105                o->op_opt = 0;
3106                continue;
3107            }
3108            break;
3109
3110        case OP_PUSHMARK:
3111
3112            /* Given
3113                 5 repeat/DOLIST
3114                 3   ex-list
3115                 1     pushmark
3116                 2     scalar or const
3117                 4   const[0]
3118               convert repeat into a stub with no kids.
3119             */
3120            if (o->op_next->op_type == OP_CONST
3121             || (  o->op_next->op_type == OP_PADSV
3122                && !(o->op_next->op_private & OPpLVAL_INTRO))
3123             || (  o->op_next->op_type == OP_GV
3124                && o->op_next->op_next->op_type == OP_RV2SV
3125                && !(o->op_next->op_next->op_private
3126                        & (OPpLVAL_INTRO|OPpOUR_INTRO))))
3127            {
3128                const OP *kid = o->op_next->op_next;
3129                if (o->op_next->op_type == OP_GV)
3130                   kid = kid->op_next;
3131                /* kid is now the ex-list.  */
3132                if (kid->op_type == OP_NULL
3133                 && (kid = kid->op_next)->op_type == OP_CONST
3134                    /* kid is now the repeat count.  */
3135                 && kid->op_next->op_type == OP_REPEAT
3136                 && kid->op_next->op_private & OPpREPEAT_DOLIST
3137                 && (kid->op_next->op_flags & OPf_WANT) == OPf_WANT_LIST
3138                 && SvIOK(kSVOP_sv) && SvIVX(kSVOP_sv) == 0
3139                 && oldop)
3140                {
3141                    o = kid->op_next; /* repeat */
3142                    oldop->op_next = o;
3143                    op_free(cBINOPo->op_first);
3144                    op_free(cBINOPo->op_last );
3145                    o->op_flags &=~ OPf_KIDS;
3146                    /* stub is a baseop; repeat is a binop */
3147                    STATIC_ASSERT_STMT(sizeof(OP) <= sizeof(BINOP));
3148                    OpTYPE_set(o, OP_STUB);
3149                    o->op_private = 0;
3150                    break;
3151                }
3152            }
3153
3154            /* If the pushmark is associated with an empty anonhash
3155             * or anonlist, null out the pushmark and swap in a
3156             * specialised op for the parent.
3157             *     4        <@> anonhash sK* ->5
3158             *     3           <0> pushmark s ->4
3159             * becomes:
3160             *     3        <@> emptyavhv sK* ->4
3161             *     -           <0> pushmark s ->3
3162             */
3163            if (!OpHAS_SIBLING(o) && (o->op_next == o->op_sibparent) && (
3164                (o->op_next->op_type == OP_ANONHASH) ||
3165                (o->op_next->op_type == OP_ANONLIST) ) &&
3166                (o->op_next->op_flags & OPf_SPECIAL) ) {
3167
3168                OP* anon = o->op_next;
3169                /* These next two are _potentially_ a padsv and an sassign */
3170                OP* padsv = anon->op_next;
3171                OP* sassign = (padsv) ? padsv->op_next: NULL;
3172
3173                anon->op_private = (anon->op_type == OP_ANONLIST) ?
3174                                                0 : OPpEMPTYAVHV_IS_HV;
3175                OpTYPE_set(anon, OP_EMPTYAVHV);
3176                op_null(o);
3177                o = anon;
3178                if (oldop) /* A previous optimization may have NULLED it */
3179                    oldop->op_next = anon;
3180
3181                /* Further optimise scalar assignment of an empty anonhash
3182                 * or anonlist by subsuming the padsv & sassign OPs. */
3183                if ((padsv->op_type == OP_PADSV) &&
3184                    !(padsv->op_private & OPpDEREF) &&
3185                    sassign && (sassign->op_type == OP_SASSIGN) ){
3186
3187                    /* Take some public flags from the sassign */
3188                    anon->op_flags = OPf_KIDS | OPf_SPECIAL |
3189                        (anon->op_flags & OPf_PARENS) |
3190                        (sassign->op_flags & (OPf_WANT|OPf_PARENS));
3191
3192                    /* Take some private flags from the padsv */
3193                    anon->op_private |= OPpTARGET_MY |
3194                        (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3195
3196                    /* Take the targ slot from the padsv*/
3197                    anon->op_targ = padsv->op_targ;
3198                    padsv->op_targ = 0;
3199
3200                    /* Clean up */
3201                    anon->op_next = sassign->op_next;
3202                    op_null(padsv);
3203                    op_null(sassign);
3204                }
3205                break;
3206
3207            }
3208
3209
3210            /* Convert a series of PAD ops for my vars plus support into a
3211             * single padrange op. Basically
3212             *
3213             *    pushmark -> pad[ahs]v -> pad[ahs]?v -> ... -> (list) -> rest
3214             *
3215             * becomes, depending on circumstances, one of
3216             *
3217             *    padrange  ----------------------------------> (list) -> rest
3218             *    padrange  --------------------------------------------> rest
3219             *
3220             * where all the pad indexes are sequential and of the same type
3221             * (INTRO or not).
3222             * We convert the pushmark into a padrange op, then skip
3223             * any other pad ops, and possibly some trailing ops.
3224             * Note that we don't null() the skipped ops, to make it
3225             * easier for Deparse to undo this optimisation (and none of
3226             * the skipped ops are holding any resources). It also makes
3227             * it easier for find_uninit_var(), as it can just ignore
3228             * padrange, and examine the original pad ops.
3229             */
3230        {
3231            OP *p;
3232            OP *followop = NULL; /* the op that will follow the padrange op */
3233            U8 count = 0;
3234            U8 intro = 0;
3235            PADOFFSET base = 0; /* init only to stop compiler whining */
3236            bool gvoid = 0;     /* init only to stop compiler whining */
3237            bool defav = 0;  /* seen (...) = @_ */
3238            bool reuse = 0;  /* reuse an existing padrange op */
3239
3240            /* look for a pushmark -> gv[_] -> rv2av */
3241
3242            {
3243                OP *rv2av, *q;
3244                p = o->op_next;
3245                if (   p->op_type == OP_GV
3246                    && cGVOPx_gv(p) == PL_defgv
3247                    && (rv2av = p->op_next)
3248                    && rv2av->op_type == OP_RV2AV
3249                    && !(rv2av->op_flags & OPf_REF)
3250                    && !(rv2av->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
3251                    && ((rv2av->op_flags & OPf_WANT) == OPf_WANT_LIST)
3252                ) {
3253                    q = rv2av->op_next;
3254                    if (q->op_type == OP_NULL)
3255                        q = q->op_next;
3256                    if (q->op_type == OP_PUSHMARK) {
3257                        defav = 1;
3258                        p = q;
3259                    }
3260                }
3261            }
3262            if (!defav) {
3263                p = o;
3264            }
3265
3266            /* scan for PAD ops */
3267
3268            for (p = p->op_next; p; p = p->op_next) {
3269                if (p->op_type == OP_NULL)
3270                    continue;
3271
3272                if ((     p->op_type != OP_PADSV
3273                       && p->op_type != OP_PADAV
3274                       && p->op_type != OP_PADHV
3275                    )
3276                      /* any private flag other than INTRO? e.g. STATE */
3277                   || (p->op_private & ~OPpLVAL_INTRO)
3278                )
3279                    break;
3280
3281                /* let $a[N] potentially be optimised into AELEMFAST_LEX
3282                 * instead */
3283                if (   p->op_type == OP_PADAV
3284                    && p->op_next
3285                    && p->op_next->op_type == OP_CONST
3286                    && p->op_next->op_next
3287                    && p->op_next->op_next->op_type == OP_AELEM
3288                )
3289                    break;
3290
3291                /* for 1st padop, note what type it is and the range
3292                 * start; for the others, check that it's the same type
3293                 * and that the targs are contiguous */
3294                if (count == 0) {
3295                    intro = (p->op_private & OPpLVAL_INTRO);
3296                    base = p->op_targ;
3297                    gvoid = OP_GIMME(p,0) == G_VOID;
3298                }
3299                else {
3300                    if ((p->op_private & OPpLVAL_INTRO) != intro)
3301                        break;
3302                    /* Note that you'd normally  expect targs to be
3303                     * contiguous in my($a,$b,$c), but that's not the case
3304                     * when external modules start doing things, e.g.
3305                     * Function::Parameters */
3306                    if (p->op_targ != base + count)
3307                        break;
3308                    assert(p->op_targ == base + count);
3309                    /* Either all the padops or none of the padops should
3310                       be in void context.  Since we only do the optimisa-
3311                       tion for av/hv when the aggregate itself is pushed
3312                       on to the stack (one item), there is no need to dis-
3313                       tinguish list from scalar context.  */
3314                    if (gvoid != (OP_GIMME(p,0) == G_VOID))
3315                        break;
3316                }
3317
3318                /* for AV, HV, only when we're not flattening */
3319                if (   p->op_type != OP_PADSV
3320                    && !gvoid
3321                    && !(p->op_flags & OPf_REF)
3322                )
3323                    break;
3324
3325                if (count >= OPpPADRANGE_COUNTMASK)
3326                    break;
3327
3328                /* there's a biggest base we can fit into a
3329                 * SAVEt_CLEARPADRANGE in pp_padrange.
3330                 * (The sizeof() stuff will be constant-folded, and is
3331                 * intended to avoid getting "comparison is always false"
3332                 * compiler warnings. See the comments above
3333                 * MEM_WRAP_CHECK for more explanation on why we do this
3334                 * in a weird way to avoid compiler warnings.)
3335                 */
3336                if (   intro
3337                    && (8*sizeof(base) >
3338                        8*sizeof(UV)-OPpPADRANGE_COUNTSHIFT-SAVE_TIGHT_SHIFT
3339                        ? (Size_t)base
3340                        : (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3341                        ) >
3342                        (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3343                )
3344                    break;
3345
3346                /* Success! We've got another valid pad op to optimise away */
3347                count++;
3348                followop = p->op_next;
3349            }
3350
3351            if (count < 1 || (count == 1 && !defav))
3352                break;
3353
3354            /* pp_padrange in specifically compile-time void context
3355             * skips pushing a mark and lexicals; in all other contexts
3356             * (including unknown till runtime) it pushes a mark and the
3357             * lexicals. We must be very careful then, that the ops we
3358             * optimise away would have exactly the same effect as the
3359             * padrange.
3360             * In particular in void context, we can only optimise to
3361             * a padrange if we see the complete sequence
3362             *     pushmark, pad*v, ...., list
3363             * which has the net effect of leaving the markstack as it
3364             * was.  Not pushing onto the stack (whereas padsv does touch
3365             * the stack) makes no difference in void context.
3366             */
3367            assert(followop);
3368            if (gvoid) {
3369                if (followop->op_type == OP_LIST
3370                        && OP_GIMME(followop,0) == G_VOID
3371                   )
3372                {
3373                    followop = followop->op_next; /* skip OP_LIST */
3374
3375                    /* consolidate two successive my(...);'s */
3376
3377                    if (   oldoldop
3378                        && oldoldop->op_type == OP_PADRANGE
3379                        && (oldoldop->op_flags & OPf_WANT) == OPf_WANT_VOID
3380                        && (oldoldop->op_private & OPpLVAL_INTRO) == intro
3381                        && !(oldoldop->op_flags & OPf_SPECIAL)
3382                    ) {
3383                        U8 old_count;
3384                        assert(oldoldop->op_next == oldop);
3385                        assert(   oldop->op_type == OP_NEXTSTATE
3386                               || oldop->op_type == OP_DBSTATE);
3387                        assert(oldop->op_next == o);
3388
3389                        old_count
3390                            = (oldoldop->op_private & OPpPADRANGE_COUNTMASK);
3391
3392                       /* Do not assume pad offsets for $c and $d are con-
3393                          tiguous in
3394                            my ($a,$b,$c);
3395                            my ($d,$e,$f);
3396                        */
3397                        if (  oldoldop->op_targ + old_count == base
3398                           && old_count < OPpPADRANGE_COUNTMASK - count) {
3399                            base = oldoldop->op_targ;
3400                            count += old_count;
3401                            reuse = 1;
3402                        }
3403                    }
3404
3405                    /* if there's any immediately following singleton
3406                     * my var's; then swallow them and the associated
3407                     * nextstates; i.e.
3408                     *    my ($a,$b); my $c; my $d;
3409                     * is treated as
3410                     *    my ($a,$b,$c,$d);
3411                     */
3412
3413                    while (    ((p = followop->op_next))
3414                            && (  p->op_type == OP_PADSV
3415                               || p->op_type == OP_PADAV
3416                               || p->op_type == OP_PADHV)
3417                            && (p->op_flags & OPf_WANT) == OPf_WANT_VOID
3418                            && (p->op_private & OPpLVAL_INTRO) == intro
3419                            && !(p->op_private & ~OPpLVAL_INTRO)
3420                            && p->op_next
3421                            && (   p->op_next->op_type == OP_NEXTSTATE
3422                                || p->op_next->op_type == OP_DBSTATE)
3423                            && count < OPpPADRANGE_COUNTMASK
3424                            && base + count == p->op_targ
3425                    ) {
3426                        count++;
3427                        followop = p->op_next;
3428                    }
3429                }
3430                else
3431                    break;
3432            }
3433
3434            if (reuse) {
3435                assert(oldoldop->op_type == OP_PADRANGE);
3436                oldoldop->op_next = followop;
3437                oldoldop->op_private = (intro | count);
3438                o = oldoldop;
3439                oldop = NULL;
3440                oldoldop = NULL;
3441            }
3442            else {
3443                /* Convert the pushmark into a padrange.
3444                 * To make Deparse easier, we guarantee that a padrange was
3445                 * *always* formerly a pushmark */
3446                assert(o->op_type == OP_PUSHMARK);
3447                o->op_next = followop;
3448                OpTYPE_set(o, OP_PADRANGE);
3449                o->op_targ = base;
3450                /* bit 7: INTRO; bit 6..0: count */
3451                o->op_private = (intro | count);
3452                o->op_flags = ((o->op_flags & ~(OPf_WANT|OPf_SPECIAL))
3453                              | gvoid * OPf_WANT_VOID
3454                              | (defav ? OPf_SPECIAL : 0));
3455            }
3456            break;
3457        }
3458
3459        case OP_RV2AV:
3460            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3461                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3462            break;
3463
3464        case OP_RV2HV:
3465        case OP_PADHV:
3466            /*'keys %h' in void or scalar context: skip the OP_KEYS
3467             * and perform the functionality directly in the RV2HV/PADHV
3468             * op
3469             */
3470            if (o->op_flags & OPf_REF) {
3471                OP *k = o->op_next;
3472                U8 want = (k->op_flags & OPf_WANT);
3473                if (   k
3474                    && k->op_type == OP_KEYS
3475                    && (   want == OPf_WANT_VOID
3476                        || want == OPf_WANT_SCALAR)
3477                    && !(k->op_private & OPpMAYBE_LVSUB)
3478                    && !(k->op_flags & OPf_MOD)
3479                ) {
3480                    o->op_next     = k->op_next;
3481                    o->op_flags   &= ~(OPf_REF|OPf_WANT);
3482                    o->op_flags   |= want;
3483                    o->op_private |= (o->op_type == OP_PADHV ?
3484                                      OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS);
3485                    /* for keys(%lex), hold onto the OP_KEYS's targ
3486                     * since padhv doesn't have its own targ to return
3487                     * an int with */
3488                    if (!(o->op_type ==OP_PADHV && want == OPf_WANT_SCALAR))
3489                        op_null(k);
3490                }
3491            }
3492
3493            /* see if %h is used in boolean context */
3494            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3495                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
3496
3497
3498            if (o->op_type != OP_PADHV)
3499                break;
3500            /* FALLTHROUGH */
3501        case OP_PADAV:
3502            if (   o->op_type == OP_PADAV
3503                && (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
3504            )
3505                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3506            /* FALLTHROUGH */
3507        case OP_PADSV:
3508            /* Skip over state($x) in void context.  */
3509            if (oldop && o->op_private == (OPpPAD_STATE|OPpLVAL_INTRO)
3510             && (o->op_flags & OPf_WANT) == OPf_WANT_VOID)
3511            {
3512                oldop->op_next = o->op_next;
3513                goto redo_nextstate;
3514            }
3515            if (o->op_type != OP_PADAV)
3516                break;
3517            /* FALLTHROUGH */
3518        case OP_GV:
3519            if (o->op_type == OP_PADAV || o->op_next->op_type == OP_RV2AV) {
3520                OP* const pop = (o->op_type == OP_PADAV) ?
3521                            o->op_next : o->op_next->op_next;
3522                IV i;
3523                if (pop && pop->op_type == OP_CONST &&
3524                    ((PL_op = pop->op_next)) &&
3525                    pop->op_next->op_type == OP_AELEM &&
3526                    !(pop->op_next->op_private &
3527                      (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB)) &&
3528                    (i = SvIV(cSVOPx(pop)->op_sv)) >= -128 && i <= 127)
3529                {
3530                    GV *gv;
3531                    if (cSVOPx(pop)->op_private & OPpCONST_STRICT)
3532                        no_bareword_allowed(pop);
3533                    if (o->op_type == OP_GV)
3534                        op_null(o->op_next);
3535                    op_null(pop->op_next);
3536                    op_null(pop);
3537                    o->op_flags |= pop->op_next->op_flags & OPf_MOD;
3538                    o->op_next = pop->op_next->op_next;
3539                    o->op_ppaddr = PL_ppaddr[OP_AELEMFAST];
3540                    o->op_private = (U8)i;
3541                    if (o->op_type == OP_GV) {
3542                        gv = cGVOPo_gv;
3543                        GvAVn(gv);
3544                        o->op_type = OP_AELEMFAST;
3545                    }
3546                    else
3547                        o->op_type = OP_AELEMFAST_LEX;
3548                }
3549                if (o->op_type != OP_GV)
3550                    break;
3551            }
3552
3553            /* Remove $foo from the op_next chain in void context.  */
3554            if (oldop
3555             && (  o->op_next->op_type == OP_RV2SV
3556                || o->op_next->op_type == OP_RV2AV
3557                || o->op_next->op_type == OP_RV2HV  )
3558             && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3559             && !(o->op_next->op_private & OPpLVAL_INTRO))
3560            {
3561                oldop->op_next = o->op_next->op_next;
3562                /* Reprocess the previous op if it is a nextstate, to
3563                   allow double-nextstate optimisation.  */
3564              redo_nextstate:
3565                if (oldop->op_type == OP_NEXTSTATE) {
3566                    oldop->op_opt = 0;
3567                    o = oldop;
3568                    oldop = oldoldop;
3569                    oldoldop = NULL;
3570                    goto redo;
3571                }
3572                o = oldop->op_next;
3573                goto redo;
3574            }
3575            else if (o->op_next->op_type == OP_RV2SV) {
3576                if (!(o->op_next->op_private & OPpDEREF)) {
3577                    op_null(o->op_next);
3578                    o->op_private |= o->op_next->op_private & (OPpLVAL_INTRO
3579                                                               | OPpOUR_INTRO);
3580                    o->op_next = o->op_next->op_next;
3581                    OpTYPE_set(o, OP_GVSV);
3582                }
3583            }
3584            else if (o->op_next->op_type == OP_READLINE
3585                    && o->op_next->op_next->op_type == OP_CONCAT
3586                    && (o->op_next->op_next->op_flags & OPf_STACKED))
3587            {
3588                /* Turn "$a .= <FH>" into an OP_RCATLINE. AMS 20010917 */
3589                OpTYPE_set(o, OP_RCATLINE);
3590                o->op_flags |= OPf_STACKED;
3591                op_null(o->op_next->op_next);
3592                op_null(o->op_next);
3593            }
3594
3595            break;
3596
3597        case OP_NOT:
3598            break;
3599
3600        case OP_AND:
3601        case OP_OR:
3602        case OP_DOR:
3603        case OP_CMPCHAIN_AND:
3604        case OP_PUSHDEFER:
3605            while (cLOGOP->op_other->op_type == OP_NULL)
3606                cLOGOP->op_other = cLOGOP->op_other->op_next;
3607            while (o->op_next && (   o->op_type == o->op_next->op_type
3608                                  || o->op_next->op_type == OP_NULL))
3609                o->op_next = o->op_next->op_next;
3610
3611            /* If we're an OR and our next is an AND in void context, we'll
3612               follow its op_other on short circuit, same for reverse.
3613               We can't do this with OP_DOR since if it's true, its return
3614               value is the underlying value which must be evaluated
3615               by the next op. */
3616            if (o->op_next &&
3617                (
3618                    (IS_AND_OP(o) && IS_OR_OP(o->op_next))
3619                 || (IS_OR_OP(o) && IS_AND_OP(o->op_next))
3620                )
3621                && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3622            ) {
3623                o->op_next = cLOGOPx(o->op_next)->op_other;
3624            }
3625            DEFER(cLOGOP->op_other);
3626            o->op_opt = 1;
3627            break;
3628
3629        case OP_GREPWHILE:
3630            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3631                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3632            /* FALLTHROUGH */
3633        case OP_COND_EXPR:
3634        case OP_MAPWHILE:
3635        case OP_ANDASSIGN:
3636        case OP_ORASSIGN:
3637        case OP_DORASSIGN:
3638        case OP_RANGE:
3639        case OP_ONCE:
3640        case OP_ARGDEFELEM:
3641            while (cLOGOP->op_other->op_type == OP_NULL)
3642                cLOGOP->op_other = cLOGOP->op_other->op_next;
3643            DEFER(cLOGOP->op_other);
3644            break;
3645
3646        case OP_ENTERLOOP:
3647        case OP_ENTERITER:
3648            while (cLOOP->op_redoop->op_type == OP_NULL)
3649                cLOOP->op_redoop = cLOOP->op_redoop->op_next;
3650            while (cLOOP->op_nextop->op_type == OP_NULL)
3651                cLOOP->op_nextop = cLOOP->op_nextop->op_next;
3652            while (cLOOP->op_lastop->op_type == OP_NULL)
3653                cLOOP->op_lastop = cLOOP->op_lastop->op_next;
3654            /* a while(1) loop doesn't have an op_next that escapes the
3655             * loop, so we have to explicitly follow the op_lastop to
3656             * process the rest of the code */
3657            DEFER(cLOOP->op_lastop);
3658            break;
3659
3660        case OP_ENTERTRY:
3661            assert(cLOGOPo->op_other->op_type == OP_LEAVETRY);
3662            DEFER(cLOGOPo->op_other);
3663            break;
3664
3665        case OP_ENTERTRYCATCH:
3666            assert(cLOGOPo->op_other->op_type == OP_CATCH);
3667            /* catch body is the ->op_other of the OP_CATCH */
3668            DEFER(cLOGOPx(cLOGOPo->op_other)->op_other);
3669            break;
3670
3671        case OP_SUBST:
3672            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3673                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3674            assert(!(cPMOP->op_pmflags & PMf_ONCE));
3675            while (cPMOP->op_pmstashstartu.op_pmreplstart &&
3676                   cPMOP->op_pmstashstartu.op_pmreplstart->op_type == OP_NULL)
3677                cPMOP->op_pmstashstartu.op_pmreplstart
3678                    = cPMOP->op_pmstashstartu.op_pmreplstart->op_next;
3679            DEFER(cPMOP->op_pmstashstartu.op_pmreplstart);
3680            break;
3681
3682        case OP_SORT: {
3683            OP *oright;
3684
3685            if (o->op_flags & OPf_SPECIAL) {
3686                /* first arg is a code block */
3687                OP * const nullop = OpSIBLING(cLISTOP->op_first);
3688                OP * kid          = cUNOPx(nullop)->op_first;
3689
3690                assert(nullop->op_type == OP_NULL);
3691                assert(kid->op_type == OP_SCOPE
3692                 || (kid->op_type == OP_NULL && kid->op_targ == OP_LEAVE));
3693                /* since OP_SORT doesn't have a handy op_other-style
3694                 * field that can point directly to the start of the code
3695                 * block, store it in the otherwise-unused op_next field
3696                 * of the top-level OP_NULL. This will be quicker at
3697                 * run-time, and it will also allow us to remove leading
3698                 * OP_NULLs by just messing with op_nexts without
3699                 * altering the basic op_first/op_sibling layout. */
3700                kid = kLISTOP->op_first;
3701                assert(
3702                      (kid->op_type == OP_NULL
3703                      && (  kid->op_targ == OP_NEXTSTATE
3704                         || kid->op_targ == OP_DBSTATE  ))
3705                    || kid->op_type == OP_STUB
3706                    || kid->op_type == OP_ENTER
3707                    || (PL_parser && PL_parser->error_count));
3708                nullop->op_next = kid->op_next;
3709                DEFER(nullop->op_next);
3710            }
3711
3712            /* check that RHS of sort is a single plain array */
3713            oright = cUNOPo->op_first;
3714            if (!oright || oright->op_type != OP_PUSHMARK)
3715                break;
3716
3717            if (o->op_private & OPpSORT_INPLACE)
3718                break;
3719
3720            /* reverse sort ... can be optimised.  */
3721            if (!OpHAS_SIBLING(cUNOPo)) {
3722                /* Nothing follows us on the list. */
3723                OP * const reverse = o->op_next;
3724
3725                if (reverse->op_type == OP_REVERSE &&
3726                    (reverse->op_flags & OPf_WANT) == OPf_WANT_LIST) {
3727                    OP * const pushmark = cUNOPx(reverse)->op_first;
3728                    if (pushmark && (pushmark->op_type == OP_PUSHMARK)
3729                        && (OpSIBLING(cUNOPx(pushmark)) == o)) {
3730                        /* reverse -> pushmark -> sort */
3731                        o->op_private |= OPpSORT_REVERSE;
3732                        op_null(reverse);
3733                        pushmark->op_next = oright->op_next;
3734                        op_null(oright);
3735                    }
3736                }
3737            }
3738
3739            break;
3740        }
3741
3742        case OP_REVERSE: {
3743            OP *ourmark, *theirmark, *ourlast, *iter, *expushmark, *rv2av;
3744            OP *gvop = NULL;
3745            LISTOP *enter, *exlist;
3746
3747            if (o->op_private & OPpSORT_INPLACE)
3748                break;
3749
3750            enter = cLISTOPx(o->op_next);
3751            if (!enter)
3752                break;
3753            if (enter->op_type == OP_NULL) {
3754                enter = cLISTOPx(enter->op_next);
3755                if (!enter)
3756                    break;
3757            }
3758            /* for $a (...) will have OP_GV then OP_RV2GV here.
3759               for (...) just has an OP_GV.  */
3760            if (enter->op_type == OP_GV) {
3761                gvop = (OP *) enter;
3762                enter = cLISTOPx(enter->op_next);
3763                if (!enter)
3764                    break;
3765                if (enter->op_type == OP_RV2GV) {
3766                  enter = cLISTOPx(enter->op_next);
3767                  if (!enter)
3768                    break;
3769                }
3770            }
3771
3772            if (enter->op_type != OP_ENTERITER)
3773                break;
3774
3775            iter = enter->op_next;
3776            if (!iter || iter->op_type != OP_ITER)
3777                break;
3778
3779            expushmark = enter->op_first;
3780            if (!expushmark || expushmark->op_type != OP_NULL
3781                || expushmark->op_targ != OP_PUSHMARK)
3782                break;
3783
3784            exlist = cLISTOPx(OpSIBLING(expushmark));
3785            if (!exlist || exlist->op_type != OP_NULL
3786                || exlist->op_targ != OP_LIST)
3787                break;
3788
3789            if (exlist->op_last != o) {
3790                /* Mmm. Was expecting to point back to this op.  */
3791                break;
3792            }
3793            theirmark = exlist->op_first;
3794            if (!theirmark || theirmark->op_type != OP_PUSHMARK)
3795                break;
3796
3797            if (OpSIBLING(theirmark) != o) {
3798                /* There's something between the mark and the reverse, eg
3799                   for (1, reverse (...))
3800                   so no go.  */
3801                break;
3802            }
3803
3804            ourmark = cLISTOPo->op_first;
3805            if (!ourmark || ourmark->op_type != OP_PUSHMARK)
3806                break;
3807
3808            ourlast = cLISTOPo->op_last;
3809            if (!ourlast || ourlast->op_next != o)
3810                break;
3811
3812            rv2av = OpSIBLING(ourmark);
3813            if (rv2av && rv2av->op_type == OP_RV2AV && !OpHAS_SIBLING(rv2av)
3814                && rv2av->op_flags == (OPf_WANT_LIST | OPf_KIDS)) {
3815                /* We're just reversing a single array.  */
3816                rv2av->op_flags = OPf_WANT_SCALAR | OPf_KIDS | OPf_REF;
3817                enter->op_flags |= OPf_STACKED;
3818            }
3819
3820            /* We don't have control over who points to theirmark, so sacrifice
3821               ours.  */
3822            theirmark->op_next = ourmark->op_next;
3823            theirmark->op_flags = ourmark->op_flags;
3824            ourlast->op_next = gvop ? gvop : (OP *) enter;
3825            op_null(ourmark);
3826            op_null(o);
3827            enter->op_private |= OPpITER_REVERSED;
3828            iter->op_private |= OPpITER_REVERSED;
3829
3830            oldoldop = NULL;
3831            oldop    = ourlast;
3832            o        = oldop->op_next;
3833            goto redo;
3834            NOT_REACHED; /* NOTREACHED */
3835            break;
3836        }
3837
3838        case OP_UNDEF:
3839            if ((o->op_flags & OPf_KIDS) &&
3840                (cUNOPx(o)->op_first->op_type == OP_PADSV)) {
3841
3842                /* Convert:
3843                 *     undef
3844                 *       padsv[$x]
3845                 * to:
3846                 *     undef[$x]
3847                 */
3848
3849                OP * padsv = cUNOPx(o)->op_first;
3850                o->op_private = OPpTARGET_MY |
3851                        (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3852                o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3853                op_null(padsv);
3854                /* Optimizer does NOT seem to fix up the padsv op_next ptr */
3855                if (oldoldop)
3856                    oldoldop->op_next = o;
3857                oldop = oldoldop;
3858                oldoldop = NULL;
3859
3860            } else if (o->op_next->op_type == OP_PADSV) {
3861                OP * padsv = o->op_next;
3862                OP * sassign = (padsv->op_next &&
3863                        padsv->op_next->op_type == OP_SASSIGN) ?
3864                        padsv->op_next : NULL;
3865                if (sassign && cBINOPx(sassign)->op_first == o) {
3866                    /* Convert:
3867                     *     sassign
3868                     *       undef
3869                     *       padsv[$x]
3870                     * to:
3871                     *     undef[$x]
3872                     * NOTE: undef does not have the "T" flag set in
3873                     *       regen/opcodes, as this would cause
3874                     *       S_maybe_targlex to do the optimization.
3875                     *       Seems easier to keep it all here, rather
3876                     *       than have an undef-specific branch in
3877                     *       S_maybe_targlex just to add the
3878                     *       OPpUNDEF_KEEP_PV flag.
3879                     */
3880                     o->op_private = OPpTARGET_MY | OPpUNDEF_KEEP_PV |
3881                         (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3882                     o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3883                     op_null(padsv);
3884                     op_null(sassign);
3885                     /* Optimizer DOES seems to fix up the op_next ptrs */
3886                }
3887            }
3888            break;
3889
3890        case OP_QR:
3891        case OP_MATCH:
3892            if (!(cPMOP->op_pmflags & PMf_ONCE)) {
3893                assert (!cPMOP->op_pmstashstartu.op_pmreplstart);
3894            }
3895            break;
3896
3897        case OP_RUNCV:
3898            if (!(o->op_private & OPpOFFBYONE) && !CvCLONE(PL_compcv)
3899             && (!CvANON(PL_compcv) || (!PL_cv_has_eval && !PL_perldb)))
3900            {
3901                SV *sv;
3902                if (CvEVAL(PL_compcv)) sv = &PL_sv_undef;
3903                else {
3904                    sv = newRV((SV *)PL_compcv);
3905                    sv_rvweaken(sv);
3906                    SvREADONLY_on(sv);
3907                }
3908                OpTYPE_set(o, OP_CONST);
3909                o->op_flags |= OPf_SPECIAL;
3910                cSVOPo->op_sv = sv;
3911            }
3912            break;
3913
3914        case OP_SASSIGN: {
3915            if (OP_GIMME(o,0) == G_VOID
3916             || (  o->op_next->op_type == OP_LINESEQ
3917                && (  o->op_next->op_next->op_type == OP_LEAVESUB
3918                   || (  o->op_next->op_next->op_type == OP_RETURN
3919                      && !CvLVALUE(PL_compcv)))))
3920            {
3921                OP *right = cBINOP->op_first;
3922                if (right) {
3923                    /*   sassign
3924                    *      RIGHT
3925                    *      substr
3926                    *         pushmark
3927                    *         arg1
3928                    *         arg2
3929                    *         ...
3930                    * becomes
3931                    *
3932                    *  ex-sassign
3933                    *     substr
3934                    *        pushmark
3935                    *        RIGHT
3936                    *        arg1
3937                    *        arg2
3938                    *        ...
3939                    */
3940                    OP *left = OpSIBLING(right);
3941                    if (left->op_type == OP_SUBSTR
3942                         && (left->op_private & 7) < 4) {
3943                        op_null(o);
3944                        /* cut out right */
3945                        op_sibling_splice(o, NULL, 1, NULL);
3946                        /* and insert it as second child of OP_SUBSTR */
3947                        op_sibling_splice(left, cBINOPx(left)->op_first, 0,
3948                                    right);
3949                        left->op_private |= OPpSUBSTR_REPL_FIRST;
3950                        left->op_flags =
3951                            (o->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3952                    }
3953                }
3954            }
3955            OP* rhs = cBINOPx(o)->op_first;
3956            OP* lval = cBINOPx(o)->op_last;
3957
3958            /* Combine a simple SASSIGN OP with a PADSV lvalue child OP
3959             * into a single OP. */
3960
3961            /* This optimization covers arbitrarily complicated RHS OP
3962             * trees. Separate optimizations may exist for specific,
3963             * single RHS OPs, such as:
3964             * "my $foo = undef;" or "my $bar = $other_padsv;" */
3965
3966            if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
3967                 && lval && (lval->op_type == OP_PADSV) &&
3968                !(lval->op_private & OPpDEREF)
3969                 /* skip if padrange has already gazumped the padsv */
3970                 && (lval == oldop)
3971                 /* Memoize::Once produces a non-standard SASSIGN that
3972                  * doesn't actually point to pp_sassign, has only one
3973                  * child (PADSV), and gets to it via op_other rather
3974                  * than op_next. Don't try to optimize this. */
3975                 && (lval != rhs)
3976               ) {
3977                /* SASSIGN's bitfield flags, such as op_moresib and
3978                 * op_slabbed, will be carried over unchanged. */
3979                OpTYPE_set(o, OP_PADSV_STORE);
3980
3981                /* Explicitly craft the new OP's op_flags, carrying
3982                 * some bits over from the SASSIGN */
3983                o->op_flags = (
3984                    OPf_KIDS | OPf_STACKED |
3985                    (o->op_flags & (OPf_WANT|OPf_PARENS))
3986                );
3987
3988                /* Reset op_private flags, taking relevant private flags
3989                 * from the PADSV */
3990                o->op_private = (lval->op_private &
3991                                (OPpLVAL_INTRO|OPpPAD_STATE|OPpDEREF));
3992
3993                /* Steal the targ from the PADSV */
3994                o->op_targ = lval->op_targ; lval->op_targ = 0;
3995
3996                /* Fixup op_next ptrs */
3997                assert(oldop->op_type == OP_PADSV);
3998                /* oldoldop can be arbitrarily deep in the RHS OP tree */
3999                oldoldop->op_next = o;
4000
4001                /* Even when (rhs != oldoldop), rhs might still have a
4002                 * relevant op_next ptr to lval. This is definitely true
4003                 * when rhs is OP_NULL with a LOGOP kid (e.g. orassign).
4004                 * There may be other cases. */
4005                if (rhs->op_next == lval)
4006                    rhs->op_next = o;
4007
4008                /* Now null-out the PADSV */
4009                op_null(lval);
4010
4011                /* NULL the previous op ptrs, so rpeep can continue */
4012                oldoldop = NULL; oldop = NULL;
4013            }
4014
4015            /* Combine a simple SASSIGN OP with an AELEMFAST_LEX lvalue
4016             * into a single OP. This optimization covers arbitrarily
4017             * complicated RHS OP trees. */
4018
4019            if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
4020                && (lval->op_type == OP_NULL) && (lval->op_private == 2) &&
4021                (cBINOPx(lval)->op_first->op_type == OP_AELEMFAST_LEX)
4022            ) {
4023                OP * lex = cBINOPx(lval)->op_first;
4024                /* SASSIGN's bitfield flags, such as op_moresib and
4025                 * op_slabbed, will be carried over unchanged. */
4026                OpTYPE_set(o, OP_AELEMFASTLEX_STORE);
4027
4028                /* Explicitly craft the new OP's op_flags, carrying
4029                 * some bits over from the SASSIGN */
4030                o->op_flags = (
4031                    OPf_KIDS | OPf_STACKED |
4032                    (o->op_flags & (OPf_WANT|OPf_PARENS))
4033                );
4034
4035                /* Copy the AELEMFAST_LEX op->private, which contains
4036                 * the key index. */
4037                o->op_private = lex->op_private;
4038
4039                /* Take the targ from the AELEMFAST_LEX */
4040                o->op_targ = lex->op_targ; lex->op_targ = 0;
4041
4042                assert(oldop->op_type == OP_AELEMFAST_LEX);
4043                /* oldoldop can be arbitrarily deep in the RHS OP tree */
4044                oldoldop->op_next = o;
4045
4046                /* Even when (rhs != oldoldop), rhs might still have a
4047                 * relevant op_next ptr to lex. (Updating it here can
4048                 * also cause other ops in the RHS to get the desired
4049                 * op_next pointer, presumably thanks to the finalizer.)
4050                 * This is definitely truewhen rhs is OP_NULL with a
4051                 * LOGOP kid (e.g. orassign). There may be other cases. */
4052                if (rhs->op_next == lex)
4053                    rhs->op_next = o;
4054
4055                /* Now null-out the AELEMFAST_LEX */
4056                op_null(lex);
4057
4058                /* NULL the previous op ptrs, so rpeep can continue */
4059                oldop = oldoldop; oldoldop = NULL;
4060            }
4061
4062            break;
4063        }
4064
4065        case OP_AASSIGN: {
4066            int l, r, lr, lscalars, rscalars;
4067
4068            /* handle common vars detection, e.g. ($a,$b) = ($b,$a).
4069               Note that we do this now rather than in newASSIGNOP(),
4070               since only by now are aliased lexicals flagged as such
4071
4072               See the essay "Common vars in list assignment" above for
4073               the full details of the rationale behind all the conditions
4074               below.
4075
4076               PL_generation sorcery:
4077               To detect whether there are common vars, the global var
4078               PL_generation is incremented for each assign op we scan.
4079               Then we run through all the lexical variables on the LHS,
4080               of the assignment, setting a spare slot in each of them to
4081               PL_generation.  Then we scan the RHS, and if any lexicals
4082               already have that value, we know we've got commonality.
4083               Also, if the generation number is already set to
4084               PERL_INT_MAX, then the variable is involved in aliasing, so
4085               we also have potential commonality in that case.
4086             */
4087
4088            PL_generation++;
4089            /* scan LHS */
4090            lscalars = 0;
4091            l = S_aassign_scan(aTHX_ cLISTOPo->op_last,  FALSE, &lscalars);
4092            /* scan RHS */
4093            rscalars = 0;
4094            r = S_aassign_scan(aTHX_ cLISTOPo->op_first, TRUE, &rscalars);
4095            lr = (l|r);
4096
4097
4098            /* After looking for things which are *always* safe, this main
4099             * if/else chain selects primarily based on the type of the
4100             * LHS, gradually working its way down from the more dangerous
4101             * to the more restrictive and thus safer cases */
4102
4103            if (   !l                      /* () = ....; */
4104                || !r                      /* .... = (); */
4105                || !(l & ~AAS_SAFE_SCALAR) /* (undef, pos()) = ...; */
4106                || !(r & ~AAS_SAFE_SCALAR) /* ... = (1,2,length,undef); */
4107                || (lscalars < 2)          /* (undef, $x) = ... */
4108            ) {
4109                NOOP; /* always safe */
4110            }
4111            else if (l & AAS_DANGEROUS) {
4112                /* always dangerous */
4113                o->op_private |= OPpASSIGN_COMMON_SCALAR;
4114                o->op_private |= OPpASSIGN_COMMON_AGG;
4115            }
4116            else if (l & (AAS_PKG_SCALAR|AAS_PKG_AGG)) {
4117                /* package vars are always dangerous - too many
4118                 * aliasing possibilities */
4119                if (l & AAS_PKG_SCALAR)
4120                    o->op_private |= OPpASSIGN_COMMON_SCALAR;
4121                if (l & AAS_PKG_AGG)
4122                    o->op_private |= OPpASSIGN_COMMON_AGG;
4123            }
4124            else if (l & ( AAS_MY_SCALAR|AAS_MY_AGG
4125                          |AAS_LEX_SCALAR|AAS_LEX_AGG))
4126            {
4127                /* LHS contains only lexicals and safe ops */
4128
4129                if (l & (AAS_MY_AGG|AAS_LEX_AGG))
4130                    o->op_private |= OPpASSIGN_COMMON_AGG;
4131
4132                if (l & (AAS_MY_SCALAR|AAS_LEX_SCALAR)) {
4133                    if (lr & AAS_LEX_SCALAR_COMM)
4134                        o->op_private |= OPpASSIGN_COMMON_SCALAR;
4135                    else if (   !(l & AAS_LEX_SCALAR)
4136                             && (r & AAS_DEFAV))
4137                    {
4138                        /* falsely mark
4139                         *    my (...) = @_
4140                         * as scalar-safe for performance reasons.
4141                         * (it will still have been marked _AGG if necessary */
4142                        NOOP;
4143                    }
4144                    else if (r  & (AAS_PKG_SCALAR|AAS_PKG_AGG|AAS_DANGEROUS))
4145                        /* if there are only lexicals on the LHS and no
4146                         * common ones on the RHS, then we assume that the
4147                         * only way those lexicals could also get
4148                         * on the RHS is via some sort of dereffing or
4149                         * closure, e.g.
4150                         *    $r = \$lex;
4151                         *    ($lex, $x) = (1, $$r)
4152                         * and in this case we assume the var must have
4153                         *  a bumped ref count. So if its ref count is 1,
4154                         *  it must only be on the LHS.
4155                         */
4156                        o->op_private |= OPpASSIGN_COMMON_RC1;
4157                }
4158            }
4159
4160            /* ... = ($x)
4161             * may have to handle aggregate on LHS, but we can't
4162             * have common scalars. */
4163            if (rscalars < 2)
4164                o->op_private &=
4165                        ~(OPpASSIGN_COMMON_SCALAR|OPpASSIGN_COMMON_RC1);
4166
4167            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4168                S_check_for_bool_cxt(o, 1, OPpASSIGN_TRUEBOOL, 0);
4169            break;
4170        }
4171
4172        case OP_REF:
4173        case OP_BLESSED:
4174            /* if the op is used in boolean context, set the TRUEBOOL flag
4175             * which enables an optimisation at runtime which avoids creating
4176             * a stack temporary for known-true package names */
4177            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4178                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
4179            break;
4180
4181        case OP_LENGTH:
4182            /* see if the op is used in known boolean context,
4183             * but not if OA_TARGLEX optimisation is enabled */
4184            if (   (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
4185                && !(o->op_private & OPpTARGET_MY)
4186            )
4187                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4188            break;
4189
4190        case OP_POS:
4191            /* see if the op is used in known boolean context */
4192            if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4193                S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4194            break;
4195
4196        case OP_CUSTOM: {
4197            Perl_cpeep_t cpeep =
4198                XopENTRYCUSTOM(o, xop_peep);
4199            if (cpeep)
4200                cpeep(aTHX_ o, oldop);
4201            break;
4202        }
4203
4204        }
4205        /* did we just null the current op? If so, re-process it to handle
4206         * eliding "empty" ops from the chain */
4207        if (o->op_type == OP_NULL && oldop && oldop->op_next == o) {
4208            o->op_opt = 0;
4209            o = oldop;
4210        }
4211        else {
4212            oldoldop = oldop;
4213            oldop = o;
4214        }
4215    }
4216    LEAVE;
4217}
4218
4219void
4220Perl_peep(pTHX_ OP *o)
4221{
4222    CALL_RPEEP(o);
4223}
4224
4225/*
4226 * ex: set ts=8 sts=4 sw=4 et:
4227 */
4228